Index: MANIFEST =================================================================== --- MANIFEST (revision 388340) +++ MANIFEST (working copy) @@ -439,6 +439,7 @@ rules/25_textcat.cf rules/25_uribl.cf rules/60_awl.cf +rules/60_shortcircuit.cf rules/60_whitelist.cf rules/60_whitelist_spf.cf rules/60_whitelist_subject.cf Index: lib/Mail/SpamAssassin/Constants.pm =================================================================== --- lib/Mail/SpamAssassin/Constants.pm (revision 388340) +++ lib/Mail/SpamAssassin/Constants.pm (working copy) @@ -259,8 +259,8 @@ # --------------------------------------------------------------------------- -use constant META_TEST_MIN_PRIORITY => 500; -use constant HARVEST_DNSBL_PRIORITY => 500; +use constant META_TEST_MIN_PRIORITY => -500; +use constant HARVEST_DNSBL_PRIORITY => -800; # regular expression that matches message separators in The University of # Washington's MBX mailbox format Index: lib/Mail/SpamAssassin/PerMsgStatus.pm =================================================================== --- lib/Mail/SpamAssassin/PerMsgStatus.pm (revision 388340) +++ lib/Mail/SpamAssassin/PerMsgStatus.pm (working copy) @@ -169,11 +169,14 @@ # happen in Conf.pm when we switch a rules from one priority to another next unless ($self->{conf}->{priorities}->{$priority} > 0); + # if shortcircuiting is it, we skip all other priorities... + last if (exists $self->{shortcircuit_type}); + dbg("check: running tests for priority: $priority"); # only harvest the dnsbl queries once priority HARVEST_DNSBL_PRIORITY # has been reached and then only run once - if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p) { + if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p && !exists $self->{shortcircuit_type}) { # harvest the DNS results $self->harvest_dnsbl_queries(); $needs_dnsbl_harvest_p = 0; @@ -212,7 +215,7 @@ # sanity check, it is possible that no rules >= HARVEST_DNSBL_PRIORITY ran so the harvest # may not have run yet. Check, and if so, go ahead and harvest here. - if ($needs_dnsbl_harvest_p) { + if ($needs_dnsbl_harvest_p && !exists $self->{shortcircuit_type}) { # harvest the DNS results $self->harvest_dnsbl_queries(); @@ -1228,6 +1231,17 @@ AUTOLEARN => sub { return $self->get_autolearn_status(); }, + SC => sub { + my $rule = $self->{shortcircuit_rule}; + my $type = $self->{shortcircuit_type}; + return "$rule ($type)" if ($rule); + return "none"; + }, + + SCRULE => sub { return ($self->{shortcircuit_rule} || "none") ; }, + + SCTYPE => sub { return ($self->{shortcircuit_type} || "none") ; }, + TESTS => sub { my $arg = (shift || ','); return (join($arg, sort(@{$self->{test_names_hit}})) || "none"); @@ -1678,6 +1692,8 @@ my ($self, $priority) = @_; local ($_); + return if (exists $self->{shortcircuit_type}); + # note: we do this only once for all head pattern tests. Only # eval tests need to use stuff in here. $self->{test_log_msgs} = (); # clear test state @@ -1707,6 +1723,7 @@ my %ordered = (); while (my($rulename, $rule) = each %{$self->{conf}{head_tests}->{$priority}}) { + last if (exists $self->{shortcircuit_type}); my $def = ''; my ($hdrname, $testtype, $pat) = $rule =~ /^\s*(\S+)\s*(\=|\!)\~\s*(\S.*?\S)\s*$/; @@ -1796,6 +1813,8 @@ sub do_body_tests { my ($self, $priority, $textary) = @_; local ($_); + + return if (exists $self->{shortcircuit_type}); dbg("rules: running body-text per-line regexp tests; score so far=".$self->{score}); @@ -1820,6 +1839,7 @@ my $evalstr2 = ''; while (my($rulename, $pat) = each %{$self->{conf}{body_tests}->{$priority}}) { + last if (exists $self->{shortcircuit_type}); $evalstr .= ' if ($self->{conf}->{scores}->{q{'.$rulename.'}}) { # call procedurally as it is faster. @@ -2211,6 +2231,8 @@ my ($self, $priority, @uris) = @_; local ($_); + return if (exists $self->{shortcircuit_type}); + dbg("uri: running uri tests; score so far=".$self->{score}); my $doing_user_rules = @@ -2234,6 +2256,8 @@ my $evalstr2 = ''; while (my($rulename, $pat) = each %{$self->{conf}{uri_tests}->{$priority}}) { + last if (exists $self->{shortcircuit_type}); + $evalstr .= ' if ($self->{conf}->{scores}->{q{'.$rulename.'}}) { '.$rulename.'_uri_test($self, @_); # call procedurally for speed @@ -2303,6 +2327,8 @@ my ($self, $priority, $textary) = @_; local ($_); + return if (exists $self->{shortcircuit_type}); + dbg("rules: running raw-body-text per-line regexp tests; score so far=".$self->{score}); my $doing_user_rules = @@ -2326,6 +2352,7 @@ my $evalstr2 = ''; while (my($rulename, $pat) = each %{$self->{conf}{rawbody_tests}->{$priority}}) { + last if (exists $self->{shortcircuit_type}); $evalstr .= ' if ($self->{conf}->{scores}->{q{'.$rulename.'}}) { '.$rulename.'_rawbody_test($self, @_); # call procedurally for speed @@ -2393,6 +2420,8 @@ sub do_full_tests { my ($self, $priority, $fullmsgref) = @_; local ($_); + + return if (exists $self->{shortcircuit_type}); dbg("rules: running full-text regexp tests; score so far=".$self->{score}); @@ -2417,6 +2446,7 @@ my $evalstr = $self->start_rules_plugin_code("full"); while (my($rulename, $pat) = each %{$self->{conf}{full_tests}->{$priority}}) { + last if (exists $self->{shortcircuit_type}); $evalstr .= ' if ($self->{conf}->{scores}->{q{'.$rulename.'}}) { '.$self->hash_line_for_rule($rulename).' @@ -2495,6 +2525,8 @@ sub do_meta_tests { my ($self, $priority) = @_; local ($_); + + return if (exists $self->{shortcircuit_type}); dbg("rules: running meta tests; score so far=" . $self->{score} ); @@ -2522,6 +2554,7 @@ # Go through each rule and figure out what we need to do foreach $rulename (@metas) { + last if (exists $self->{shortcircuit_type}); my $rule = $self->{conf}->{meta_tests}->{$priority}->{$rulename}; my $token; @@ -2639,11 +2672,14 @@ sub run_eval_tests { my ($self, $evalhash, $prepend2desc, @extraevalargs) = @_; local ($_); + + return if (exists $self->{shortcircuit_type}); my $debugenabled = would_log('dbg'); my $scoreset = $self->{conf}->get_score_set(); while (my ($rulename, $test) = each %{$evalhash}) { + last if (exists $self->{shortcircuit_type}); # Score of 0, skip it. next unless ($self->{conf}->{scores}->{$rulename}); @@ -2793,8 +2829,16 @@ } sub _handle_hit { - my ($self, $rule, $score, $area, $desc) = @_; + my ($self, $rule, $score, $area, $desc, $scrule) = @_; + # if this was a shortcircuited rule hit, lets do some cleanup first + if ($scrule) { + undef $self->{test_names_hit}; # reset rule hits + $self->{score} = 0; # reset score + $self->{tag_data}->{REPORT} = ''; # reset tag data + $self->{tag_data}->{SUMMARY} = ''; # reset tag data + } + # ignore meta-match sub-rules. if ($rule =~ /^__/) { push(@{$self->{subtest_names_hit}}, $rule); return; } @@ -2842,6 +2886,8 @@ my ($self, $rule, $area, $value) = @_; $value ||= 1; + return if (exists $self->{shortcircuit_type}); + my $already_hit = $self->{tests_already_hit}->{$rule} || 0; $self->{tests_already_hit}->{$rule} = $already_hit + $value; @@ -2853,7 +2899,26 @@ my $score = $self->{conf}->{scores}->{$rule}; - $self->_handle_hit($rule, $score, $area, $desc); + my $sc = $self->{conf}->{shortcircuit}; + + if (exists $sc->{$rule}) { + $self->{shortcircuit_rule} = $rule; + $self->{shortcircuit_type} = $sc->{$rule}; + + if ($self->{shortcircuit_type} eq "spam") { + $score = $self->{conf}->{shortcircuit_spam_score}; + dbg("shortcircuit: s/c as spam, score of $score"); + } + elsif ($self->{shortcircuit_type} eq "ham") { + $score = $self->{conf}->{shortcircuit_ham_score}; + dbg("shortcircuit: s/c as ham, score of $score"); + } + else { + dbg("shortcircuit: s/c classification not specified, using default score of $score"); + } + } + + $self->_handle_hit($rule, $score, $area, $desc, $self->{shortcircuit_rule}); } sub test_log { Index: lib/Mail/SpamAssassin/Conf/Parser.pm =================================================================== --- lib/Mail/SpamAssassin/Conf/Parser.pm (revision 388340) +++ lib/Mail/SpamAssassin/Conf/Parser.pm (working copy) @@ -836,6 +836,12 @@ $conf->{tests}->{$name} = $text; $conf->{test_types}->{$name} = $type; $conf->{tflags}->{$name} ||= ''; + if ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS) { + $conf->{priority}->{$name} ||= 500; + } + else { + $conf->{priority}->{$name} ||= 0; + } $conf->{priority}->{$name} ||= 0; $conf->{source_file}->{$name} = $self->{currentfile}; $conf->{if_stack}->{$name} = $self->get_if_stack_as_string(); Index: lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm =================================================================== --- lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (revision 388340) +++ lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (working copy) @@ -400,6 +400,8 @@ my $scan = $opts->{permsgstatus}; my $scanstate = $scan->{uribl_scanstate}; + return if (exists $scan->{shortcircuit_type}); + # try to complete a few more if (!$self->complete_lookups($scanstate, 0.1)) { my $secs_to_wait = $scan->{conf}->{uridnsbl_timeout}; Index: lib/Mail/SpamAssassin/Conf.pm =================================================================== --- lib/Mail/SpamAssassin/Conf.pm (revision 388340) +++ lib/Mail/SpamAssassin/Conf.pm (working copy) @@ -2196,8 +2196,105 @@ type => $CONF_TYPE_HASH_KEY_VALUE }); +=item shortcircuit SYMBOLIC_TEST_NAME [spam|ham|off] + +Short Circuiting a test will force all other pending rules to be +skipped. Recomended usage would be in conjunction with priority to +set rules with strong S/O values (ie 1.0) to be ran first and make +instant spam or ham classification based on that. To override a +test that uses shortcircuiting, you can set the classification +type to off. + +=over 4 + +=item spam + + override the default score of this rule with the score from C. + +=item ham + + override the default score of this rule with the score from C. + +=item default + +shortcircuits the rest of the tests, but does not make a strict classification of spam or +ham. rather, it uses the default score for the rule being shortcircuited. this would +allow you, for example, to define a rule such as + +=over 4 + + body TEST /test/ + describe TEST test rule that scores barely over spam threshold + score TEST 5.5 + priority TEST -10 + shortcircuit TEST default + =back +the result of a message hitting the above rule would be a final score of 5.5, as opposed +to 100 (default) if it were classified as spam. + +=item off + + disables shortcircuiting on said rule. + +=back + +=cut + + push (@cmds, { + setting => 'shortcircuit', + code => sub { + my ($self, $key, $value, $line) = @_; + my ($rule,$type); + unless (defined $value && $value !~ /^$/) { + return $MISSING_REQUIRED_VALUE; + } + if ($value =~ /^(\S+)\s+(\S+)$/) { + $rule=$1; + $type=$2; + } else { + return $INVALID_VALUE; + } + if ($type =~ m/^(spam|ham|default)$/) { + dbg("shortcircuit: adding $rule $type"); + $self->{main}->{conf}->{shortcircuit}->{$rule} = $type; + } elsif ($type eq "off") { + undef $self->{main}->{conf}->{shortcircuit}->{$rule} if $self->{main}->{conf}->{shortcircuit}->{$rule}; + } else { + return $INVALID_VALUE; + } + } + }); + +=item shortcircuit_spam_score n.nn (default: 100) + +when shortcircuit is used on a rule, and the shortcircuit classification type is +set to C, this value should be applied in place of the default score for that rule. + +=cut + + push (@cmds, { + setting => 'shortcircuit_spam_score', + default => 100, + type => $CONF_TYPE_NUMERIC + }); + +=item shortcircuit_ham_score n.nn (default: -100) + +when shortcircuit is used on a rule, and the shortcircuit classification type is +set to C, this value should be applied in place of the default score for that rule. + +=cut + + push (@cmds, { + setting => 'shortcircuit_ham_score', + default => -100, + type => $CONF_TYPE_NUMERIC + }); + +=back + =head1 ADMINISTRATOR SETTINGS These settings differ from the ones above, in that they are considered 'more @@ -2732,6 +2829,9 @@ _DCCR_ DCC's results _PYZOR_ Pyzor results _RBL_ full results for positive RBL queries in DNS URI format + _SC_ shortcircuit status (classification and rule name) + _SCRULE_ rulename that caused the shortcircuit + _SCTYPE_ shortcircuit classification ("spam", "ham", "default", "none") _LANGUAGES_ possible languages of mail _PREVIEW_ content preview _REPORT_ terse report of tests hit (for header reports) Index: rules/10_default_prefs.cf =================================================================== --- rules/10_default_prefs.cf (revision 388340) +++ rules/10_default_prefs.cf (working copy) @@ -33,11 +33,12 @@ report report Content preview: _PREVIEW_ report -report Content analysis details: (_SCORE_ points, _REQD_ required) +report Content analysis details: (_SCORE_ points, _REQD_ required, s/c _SCTYPE_ ) report report " pts rule name description" report ---- ---------------------- -------------------------------------------------- report _SUMMARY_ + # ........................................................................ # A 'contact address' users should contact for more info. (replaces @@ -91,7 +92,7 @@ # FROM_HAS_MIXED_NUMS3,HOME_EMPLOYMENT,INVALID_DATE,INVALID_MSGID # LINES_OF_YELLING,MSGID_HAS_NO_AT,NO_REAL_NAME,ONCE_IN_LIFETIME # UNDISC_RECIPS autolearn=spam version=2.60-cvs -add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_" +add_header all Status "_YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ shortcircuit=_SCTYPE_ autolearn=_AUTOLEARN_ version=_VERSION_" ########################################################################### # Default prefs values: users can override these in their Index: rules/60_shortcircuit.cf =================================================================== --- rules/60_shortcircuit.cf (revision 0) +++ rules/60_shortcircuit.cf (revision 0) @@ -0,0 +1,117 @@ +# SpamAssassin rules file: spam and ham shortcircuiting using priorities +# +# Please don't modify this file as your changes will be overwritten with +# the next update. Use /etc/mail/spamassassin/local.cf instead. +# See 'perldoc Mail::SpamAssassin::Conf' for details. +# +# <@LICENSE> +# Copyright 2004 Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +########################################################################### +# HIGH PRIORITY RULES + +priority USER_IN_WHITELIST -1000 +priority USER_IN_DEF_WHITELIST -1000 +priority USER_IN_ALL_SPAM_TO -1000 +priority SUBJECT_IN_WHITELIST -1000 + +priority ALL_TRUSTED -950 + +priority SUBJECT_IN_BLACKLIST -900 +priority USER_IN_BLACKLIST_TO -900 +priority USER_IN_BLACKLIST -900 + +# HARVEST_DNSBL_PRIORITY=-800, so must run dns tests after that + +priority HABEAS_ACCREDITED_COI -750 +priority HABEAS_ACCREDITED_SOI -750 +priority RCVD_IN_BSP_TRUSTED -750 +priority RCVD_IN_IADB_VOUCHED -750 + +ifplugin Mail::SpamAssassin::Plugin::URIDNSBL +priority URIBL_BLACK -500 +priority URIBL_JP_SURBL -500 +priority URIBL_SC_SURBL -500 +priority URIBL_OB_SURBL -500 +endif + +# META_TEST_MIN_PRIORITY=-500, so high prio metas must run after that +# make sure meta dependancies have high priority than the actual meta itself! + +priority SC_URIBL_SURBL -490 +priority SC_URIBL_SBL -480 + +ifplugin Mail::SpamAssassin::Plugin::Razor2 +priority RAZOR2_CHECK -450 +endif +ifplugin Mail::SpamAssassin::Plugin::DCC +priority DCC_CHECK -450 +endif +ifplugin Mail::SpamAssassin::Plugin::Pyzor +priority PYZOR_CHECK -450 +endif + +priority SC_URIBL_HASH -440 +priority SC_DIGEST_MULTI -430 + +priority BAYES_99 -400 +priority SC_URIBL_BAYES -390 + +########################################################################### +# SHORTCIRCUIT METAS + +# meta SC_URIBL_SURBL (URIBL_BLACK && (URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL )) +# meta SC_URIBL_HASH ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && (RAZOR2_CHECK || DCC_CHECK || PYZOR_CHECK)) +# meta SC_URIBL_SBL ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && URIBL_SBL) +# meta SC_URIBL_BAYES ((URIBL_BLACK || URIBL_SC_SURBL || URIBL_JP_SURBL || URIBL_OB_SURBL) && BAYES_99) + +# describe SC_URIBL_SURBL Multiple URIBLs hit +# describe SC_URIBL_HASH URI/Checksum Combo hit +# describe SC_URIBL_SBL URI/NS Blacklist Combo hit +# describe SC_URIBL_BAYES URIBL/Bayes99 Combo hit + +########################################################################### +# SHORTCIRCUITS + +shortcircuit USER_IN_WHITELIST default +shortcircuit USER_IN_DEF_WHITELIST default +shortcircuit USER_IN_ALL_SPAM_TO default +shortcircuit SUBJECT_IN_WHITELIST default + +# shortcircuit ALL_TRUSTED default + +shortcircuit USER_IN_BLACKLIST default +shortcircuit USER_IN_BLACKLIST_TO default +shortcircuit SUBJECT_IN_BLACKLIST default + +# shortcircuit HABEAS_ACCREDITED_COI default +# shortcircuit HABEAS_ACCREDITED_SOI default +# shortcircuit RCVD_IN_BSP_TRUSTED default +# shortcircuit RCVD_IN_IADB_VOUCHED default + +# shortcircuit SC_URIBL_SURBL spam +# shortcircuit SC_URIBL_HASH spam +# shortcircuit SC_URIBL_SBL spam +# shortcircuit SC_URIBL_BAYES spam + +# shortcircuit DIGEST_MULTIPLE spam +# shortcircuit BAYES_99 spam +# shortcircuit BAYES_00 ham + +########################################################################### +# EOF +