diff -ru --exclude=.svn /home/jm/ftp/spamassassin/MANIFEST /home/jm/ftp/sa/bug-3109/MANIFEST --- /home/jm/ftp/spamassassin/MANIFEST 2006-07-09 18:11:49.000000000 +0100 +++ /home/jm/ftp/sa/bug-3109/MANIFEST 2006-06-30 20:31:20.000000000 +0100 @@ -444,6 +444,7 @@ rules/25_textcat.cf rules/25_uribl.cf rules/60_awl.cf +rules/60_shortcircuit.cf rules/60_whitelist.cf rules/60_whitelist_dkim.cf rules/60_whitelist_spf.cf @@ -481,3 +482,4 @@ t/data/nice/dkim/Simple_02 t/dkim.t t/uribl.t +t/shortcircuit.t diff -ru --exclude=.svn /home/jm/ftp/spamassassin/lib/Mail/SpamAssassin/Conf.pm /home/jm/ftp/sa/bug-3109/lib/Mail/SpamAssassin/Conf.pm --- /home/jm/ftp/spamassassin/lib/Mail/SpamAssassin/Conf.pm 2006-07-09 18:11:49.000000000 +0100 +++ /home/jm/ftp/sa/bug-3109/lib/Mail/SpamAssassin/Conf.pm 2006-07-09 17:49:11.000000000 +0100 @@ -2218,6 +2218,146 @@ type => $CONF_TYPE_HASH_KEY_VALUE }); +=item shortcircuit SYMBOLIC_TEST_NAME {ham|spam|on|off} + +Shortcircuiting a test will force all other pending rules to be skipped, if +that test is hit. + +Recomended usage is to use C to set rules with strong S/O values (ie. +1.0) to be run first, and make instant spam or ham classification based on +that. + +To override a test that uses shortcircuiting, you can set the classification +type to C. + +=over 4 + +=item on + +Shortcircuits the rest of the tests, but does not make a strict classification +of spam or ham. Rather, it uses the default score for the rule being +shortcircuited. This would allow you, for example, to define a rule such as + +=over 4 + + body TEST /test/ + describe TEST test rule that scores barely over spam threshold + score TEST 5.5 + priority TEST -100 + shortcircuit TEST on + +=back + +The result of a message hitting the above rule would be a final score of 5.5, +as opposed to 100 (default) if it were classified as spam. + +=item off + +Disables shortcircuiting on said rule. + +=item spam + +Shortcircuit the rule using a set of defaults; override the default score of +this rule with the score from C, set the +C tflag, and set priority to C<-100>. In other words, +equivalent to: + +=over 4 + + shortcircuit TEST on + priority TEST -100 + score TEST 100 + tflags TEST noautolearn + +=back + +=item ham + +Shortcircuit the rule using a set of defaults; override the default score of +this rule with the score from C, set the C +and C tflags, and set priority to C<-100>. In other words, equivalent +to: + +=over 4 + + shortcircuit TEST on + priority TEST -100 + score TEST -100 + tflags TEST noautolearn nice + +=back + +=back + +=cut + + push (@cmds, { + setting => 'shortcircuit', + code => sub { + my ($self, $key, $value, $line) = @_; + my ($rule,$type); + unless (defined $value && $value !~ /^$/) { + return $MISSING_REQUIRED_VALUE; + } + if ($value =~ /^(\S+)\s+(\S+)$/) { + $rule=$1; + $type=$2; + } else { + return $INVALID_VALUE; + } + + if ($type =~ m/^(?:spam|ham)$/) { + dbg("shortcircuit: adding $rule using abbreviation $type"); + + # set the defaults: + $self->{shortcircuit}->{$rule} = $type; + $self->{priority}->{$rule} = -100; + + my $tf = $self->{tflags}->{$rule}; + $self->{tflags}->{$rule} = ($tf ? $tf." " : "") . + ($type eq 'ham' ? "nice " : "") . + "noautolearn"; + } + elsif ($type eq "on") { + $self->{shortcircuit}->{$rule} = "on"; + } + elsif ($type eq "off") { + delete $self->{shortcircuit}->{$rule}; + } + else { + return $INVALID_VALUE; + } + } + }); + +=item shortcircuit_spam_score n.nn (default: 100) + +When shortcircuit is used on a rule, and the shortcircuit classification type +is set to C, this value should be applied in place of the default score +for that rule. + +=cut + + push (@cmds, { + setting => 'shortcircuit_spam_score', + default => 100, + type => $CONF_TYPE_NUMERIC + }); + +=item shortcircuit_ham_score n.nn (default: -100) + +When shortcircuit is used on a rule, and the shortcircuit classification type +is set to C, this value should be applied in place of the default score +for that rule. + +=cut + + push (@cmds, { + setting => 'shortcircuit_ham_score', + default => -100, + type => $CONF_TYPE_NUMERIC + }); + =back =head1 ADMINISTRATOR SETTINGS @@ -2754,6 +2894,9 @@ _DCCR_ DCC's results _PYZOR_ Pyzor results _RBL_ full results for positive RBL queries in DNS URI format + _SC_ shortcircuit status (classification and rule name) + _SCRULE_ rulename that caused the shortcircuit + _SCTYPE_ shortcircuit classification ("spam", "ham", "default", "none") _LANGUAGES_ possible languages of mail _PREVIEW_ content preview _REPORT_ terse report of tests hit (for header reports) diff -ru --exclude=.svn /home/jm/ftp/spamassassin/lib/Mail/SpamAssassin/PerMsgStatus.pm /home/jm/ftp/sa/bug-3109/lib/Mail/SpamAssassin/PerMsgStatus.pm --- /home/jm/ftp/spamassassin/lib/Mail/SpamAssassin/PerMsgStatus.pm 2006-07-09 18:11:49.000000000 +0100 +++ /home/jm/ftp/sa/bug-3109/lib/Mail/SpamAssassin/PerMsgStatus.pm 2006-07-09 17:52:25.000000000 +0100 @@ -170,11 +170,15 @@ # happen in Conf.pm when we switch a rules from one priority to another next unless ($self->{conf}->{priorities}->{$priority} > 0); + # if shortcircuiting is hit, we skip all other priorities... + last if (exists $self->{shortcircuit_type}); + dbg("check: running tests for priority: $priority"); # only harvest the dnsbl queries once priority HARVEST_DNSBL_PRIORITY # has been reached and then only run once - if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p) { + if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p && !exists $self->{shortcircuit_type}) + { # harvest the DNS results $self->harvest_dnsbl_queries(); $needs_dnsbl_harvest_p = 0; @@ -209,12 +213,20 @@ # sanity check, it is possible that no rules >= HARVEST_DNSBL_PRIORITY ran so the harvest # may not have run yet. Check, and if so, go ahead and harvest here. if ($needs_dnsbl_harvest_p) { - # harvest the DNS results - $self->harvest_dnsbl_queries(); + if (!exists $self->{shortcircuit_type}) { + # harvest the DNS results + $self->harvest_dnsbl_queries(); + } # finish the DNS results $self->rbl_finish(); - $self->{main}->call_plugins ("check_post_dnsbl", { permsgstatus => $self }); + + if (!exists $self->{shortcircuit_type}) { + # TODO: should we call this even if we're short-circuiting? + # in URIDNSBL, it used to be a time-consuming operation. + $self->{main}->call_plugins("check_post_dnsbl", { permsgstatus => $self }); + } + $self->{resolver}->finish_socket() if $self->{resolver}; } @@ -1239,6 +1251,17 @@ AUTOLEARN => sub { return $self->get_autolearn_status(); }, + SC => sub { + my $rule = $self->{shortcircuit_rule}; + my $type = $self->{shortcircuit_type}; + return "$rule ($type)" if ($rule); + return "no"; + }, + + SCRULE => sub { return ($self->{shortcircuit_rule} || "none") ; }, + + SCTYPE => sub { return ($self->{shortcircuit_type} || "no") ; }, + TESTS => sub { my $arg = (shift || ','); return (join($arg, sort(@{$self->{test_names_hit}})) || "none"); @@ -1707,6 +1730,8 @@ my ($self, $priority) = @_; local ($_); + return if (exists $self->{shortcircuit_type}); + # note: we do this only once for all head pattern tests. Only # eval tests need to use stuff in here. $self->{test_log_msgs} = (); # clear test state @@ -1825,6 +1850,8 @@ sub do_body_tests { my ($self, $priority, $textary) = @_; local ($_); + + return if (exists $self->{shortcircuit_type}); dbg("rules: running body-text per-line regexp tests; score so far=".$self->{score}); @@ -2240,6 +2267,8 @@ my ($self, $priority, @uris) = @_; local ($_); + return if (exists $self->{shortcircuit_type}); + dbg("uri: running uri tests; score so far=".$self->{score}); my $doing_user_rules = @@ -2332,6 +2362,8 @@ my ($self, $priority, $textary) = @_; local ($_); + return if (exists $self->{shortcircuit_type}); + dbg("rules: running raw-body-text per-line regexp tests; score so far=".$self->{score}); my $doing_user_rules = @@ -2422,6 +2454,8 @@ sub do_full_tests { my ($self, $priority, $fullmsgref) = @_; local ($_); + + return if (exists $self->{shortcircuit_type}); dbg("rules: running full-text regexp tests; score so far=".$self->{score}); @@ -2524,6 +2558,8 @@ sub do_meta_tests { my ($self, $priority) = @_; local ($_); + + return if (exists $self->{shortcircuit_type}); dbg("rules: running meta tests; score so far=" . $self->{score} ); my $conf = $self->{conf}; @@ -2707,6 +2743,8 @@ sub run_eval_tests { my ($self, $evalhash, $prepend2desc, @extraevalargs) = @_; local ($_); + + return if (exists $self->{shortcircuit_type}); # look these up once in advance to save repeated lookups in loop below my $debugenabled = would_log('dbg'); @@ -2717,6 +2755,7 @@ my $scoreset = $self->{conf}->get_score_set(); while (my ($rulename, $test) = each %{$evalhash}) { + last if (exists $self->{shortcircuit_type}); # Score of 0, skip it. my $score = $scoresref->{$rulename}; @@ -2870,7 +2909,15 @@ } sub _handle_hit { - my ($self, $rule, $score, $area, $desc) = @_; + my ($self, $rule, $score, $area, $desc, $scrule) = @_; + + # if this was a shortcircuited rule hit, lets do some cleanup first + if ($scrule) { + undef $self->{test_names_hit}; # reset rule hits + $self->{score} = 0; # reset score + $self->{tag_data}->{REPORT} = ''; # reset tag data + $self->{tag_data}->{SUMMARY} = ''; # reset tag data + } # ignore meta-match sub-rules. if ($rule =~ /^__/) { push(@{$self->{subtest_names_hit}}, $rule); return; } @@ -2925,6 +2972,8 @@ my ($self, $rule, $area, $value) = @_; $value ||= 1; + return if (exists $self->{shortcircuit_type}); + my $already_hit = $self->{tests_already_hit}->{$rule} || 0; $self->{tests_already_hit}->{$rule} = $already_hit + $value; @@ -2936,7 +2985,25 @@ my $score = $self->{conf}->{scores}->{$rule}; - $self->_handle_hit($rule, $score, $area, $desc); + my $sctype = $self->{conf}->{shortcircuit}->{$rule}; + if ($sctype) { + $self->{shortcircuit_rule} = $rule; + if ($sctype eq 'on') { # guess by rule score + $self->{shortcircuit_type} = ($score < 0 ? 'ham' : 'spam'); + dbg("shortcircuit: s/c due to $rule, using score of $score"); + } + else { + $self->{shortcircuit_type} = $sctype; + if ($sctype eq 'ham') { + $score = $self->{conf}->{shortcircuit_ham_score}; + } else { + $score = $self->{conf}->{shortcircuit_spam_score}; + } + dbg("shortcircuit: s/c $sctype due to $rule, using score of $score"); + } + } + + $self->_handle_hit($rule, $score, $area, $desc, $self->{shortcircuit_rule}); } sub test_log { diff -ru --exclude=.svn /home/jm/ftp/spamassassin/rules/10_default_prefs.cf /home/jm/ftp/sa/bug-3109/rules/10_default_prefs.cf --- /home/jm/ftp/spamassassin/rules/10_default_prefs.cf 2006-04-10 14:50:49.000000000 +0100 +++ /home/jm/ftp/sa/bug-3109/rules/10_default_prefs.cf 2006-04-15 19:49:22.000000000 +0100 @@ -33,7 +33,7 @@ report report Content preview: _PREVIEW_ report -report Content analysis details: (_SCORE_ points, _REQD_ required) +report Content analysis details: (_SCORE_ points, _REQD_ required, s/c _SCTYPE_) report report " pts rule name description" report ---- ---------------------- -------------------------------------------------- @@ -92,7 +92,7 @@ # FROM_HAS_MIXED_NUMS3,HOME_EMPLOYMENT,INVALID_DATE,INVALID_MSGID # LINES_OF_YELLING,MSGID_HAS_NO_AT,NO_REAL_NAME,ONCE_IN_LIFETIME # UNDISC_RECIPS autolearn=spam version=2.60-cvs -add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_" +add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ shortcircuit=_SCTYPE_ autolearn=_AUTOLEARN_ version=_VERSION_" ########################################################################### # Default prefs values: users can override these in their diff -ru --exclude=.svn /home/jm/ftp/spamassassin/spamd/spamd.raw /home/jm/ftp/sa/bug-3109/spamd/spamd.raw --- /home/jm/ftp/spamassassin/spamd/spamd.raw 2006-06-30 16:45:25.000000000 +0100 +++ /home/jm/ftp/sa/bug-3109/spamd/spamd.raw 2006-06-30 20:26:01.000000000 +0100 @@ -1393,6 +1393,7 @@ push(@extra, "bayes=".sprintf("%06f", $status->{bayes_score})); } push(@extra, "autolearn=".$status->get_autolearn_status()); + push(@extra, "shortcircuit=".$status->get_tag("SCTYPE")); my $yorn = $status->is_spam() ? 'Y' : '.'; my $score = $status->get_score(); diff -ru --exclude=.svn /home/jm/ftp/spamassassin/t/shortcircuit.t /home/jm/ftp/sa/bug-3109/t/shortcircuit.t --- /dev/null 2006-05-22 15:25:23.000000000 +0100 +++ t/shortcircuit.t 2006-04-19 19:31:34.000000000 +0100 @@ -0,0 +1,43 @@ +#!/usr/bin/perl + +use lib '.'; use lib 't'; +use SATest; sa_t_init("shortcircuit"); +use Test; BEGIN { plan tests => 8 }; + +# --------------------------------------------------------------------------- + +%anti_patterns = ( +q{ autolearn=ham } => 'autolearned as ham' +); + +tstlocalrules (' + + # hits spam/001 + body X_FOO /Congratulations/ + header X_BAR From =~ /sb55/ + # this should still fire, fixing the meta dependency ordering automatically + meta SC_PRI_SPAM_001 (X_FOO && X_BAR) + shortcircuit SC_PRI_SPAM_001 spam + priority SC_PRI_SPAM_001 -1000 + + # hits spam/002 + header SC_002 Subject =~ /ADV/ + shortcircuit SC_002 on + priority SC_002 -100 + score SC_002 50 + +'); + +%patterns = ( + q{ SC_PRI_SPAM_001 }, 'hit', + q{ shortcircuit=spam }, 'sc', +); +ok (sarun ("-L -t < data/spam/001", \&patterns_run_cb)); +ok_all_patterns(); + +%patterns = ( + q{ SC_002 }, 'hit', + q{ shortcircuit=spam }, 'sc', +); +ok (sarun ("-L -t < data/spam/002", \&patterns_run_cb)); +ok_all_patterns();