Index: lib/Mail/SpamAssassin.pm =================================================================== --- lib/Mail/SpamAssassin.pm (revision 380789) +++ lib/Mail/SpamAssassin.pm (working copy) @@ -300,6 +300,8 @@ $self->{resolver} = Mail::SpamAssassin::DnsResolver->new($self); + $self->{conf}->load_plugin('Mail::SpamAssassin::Plugin::BodyTests'); + $self; } Index: lib/Mail/SpamAssassin/PerMsgStatus.pm =================================================================== --- lib/Mail/SpamAssassin/PerMsgStatus.pm (revision 380789) +++ lib/Mail/SpamAssassin/PerMsgStatus.pm (working copy) @@ -156,7 +156,6 @@ $self->run_rbl_eval_tests ($self->{conf}->{rbl_evals}); my $needs_dnsbl_harvest_p = 1; # harvest needs to be run - my $decoded = $self->get_decoded_stripped_body_text_array(); my $bodytext = $self->get_decoded_body_text_array(); my $fulltext = $self->{msg}->get_pristine(); @@ -193,9 +192,7 @@ $self->do_head_tests($priority); $self->do_head_eval_tests($priority); - $self->do_body_tests($priority, $decoded); $self->do_body_uri_tests($priority, @uris); - $self->do_body_eval_tests($priority, $decoded); $self->do_rawbody_tests($priority, $bodytext); $self->do_rawbody_eval_tests($priority, $bodytext); @@ -203,6 +200,8 @@ $self->do_full_tests($priority, \$fulltext); $self->do_full_eval_tests($priority, \$fulltext); + $self->{main}->call_plugins ("check_priority", { permsgstatus => $self, priority => $priority }); + # we may need to call this more often than once through the loop, but # it needs to be done at least once, either at the beginning or the end. $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); @@ -222,7 +221,6 @@ # finished running rules delete $self->{current_rule_name}; - undef $decoded; undef $bodytext; undef $fulltext; @@ -2447,12 +2445,6 @@ $self->run_eval_tests ($self->{conf}->{head_evals}->{$priority}, ''); } -sub do_body_eval_tests { - my ($self, $priority, $bodystring) = @_; - return unless (defined($self->{conf}->{body_evals}->{$priority})); - $self->run_eval_tests ($self->{conf}->{body_evals}->{$priority}, 'BODY: ', $bodystring); -} - sub do_rawbody_eval_tests { my ($self, $priority, $bodystring) = @_; return unless (defined($self->{conf}->{rawbody_evals}->{$priority})); Index: lib/Mail/SpamAssassin/Conf/Parser.pm =================================================================== --- lib/Mail/SpamAssassin/Conf/Parser.pm (revision 380789) +++ lib/Mail/SpamAssassin/Conf/Parser.pm (working copy) @@ -490,8 +490,6 @@ } # Let's do some linting here ... -# This is called from _parse(), BTW, so we can check for $conf->{tests} -# easily before finish_parsing() is called and deletes it. # sub lint_check { my ($self) = @_; @@ -502,13 +500,13 @@ { # Check for description and score issues in lint fashion while ( ($k,$v) = each %{$conf->{descriptions}} ) { - if (!exists $conf->{tests}->{$k}) { + if (!exists $conf->{source_file}->{$k}) { $self->lint_warn("config: warning: description exists for non-existent rule $k\n", $k); } } while ( my($sk) = each %{$conf->{scores}} ) { - if (!exists $conf->{tests}->{$sk}) { + if (!exists $conf->{source_file}->{$sk}) { $self->lint_warn("config: warning: score set for non-existent rule $sk\n", $sk); } } @@ -525,7 +523,7 @@ my $conf = $self->{conf}; my ($k, $v); - while ( ($k,$v) = each %{$conf->{tests}} ) { + while ( ($k,$v) = each %{$conf->{source_file}} ) { if ( ! exists $conf->{scores}->{$k} ) { # T_ rules (in a testing probationary period) get low, low scores my $set_score = ($k =~/^T_/) ? 0.01 : 1.0; @@ -647,10 +645,24 @@ ########################################################################### +sub prioritize_tests { + my ($self, $rulelist); + my $conf = $self->{conf}; + my $rv = {}; + while (my ($name, $def) = each %$rulelist) { + my $priority = $conf->{priority}->{$name} || 0; + $conf->{priorities}->{$priority}++; + + $rv->{$priority}->{$name} = $def; + } + return $rv; +} + sub finish_parsing { my ($self) = @_; my $conf = $self->{conf}; +#BUG4778: below to be removed while (my ($name, $text) = each %{$conf->{tests}}) { my $type = $conf->{test_types}->{$name}; my $priority = $conf->{priority}->{$name} || 0; @@ -677,9 +689,6 @@ if ($args) { $self->lint_warn("syntax error (unparsable argument: $args) for eval function: $name: $text", $name); } - elsif ($type == $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS) { - $conf->{body_evals}->{$priority}->{$name} = \@args; - } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS) { $conf->{head_evals}->{$priority}->{$name} = \@args; } @@ -706,10 +715,7 @@ } # non-eval tests else { - if ($type == $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS) { - $conf->{body_tests}->{$priority}->{$name} = $text; - } - elsif ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS) { + if ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS) { $conf->{head_tests}->{$priority}->{$name} = $text; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS) { @@ -738,6 +744,7 @@ } } } +#BUG4778: above to be removed $self->lint_trusted_networks(); @@ -788,15 +795,16 @@ ########################################################################### -sub add_test { - my ($self, $name, $text, $type) = @_; +sub define_test { + my ($self, $name, $autolearn) = @_; my $conf = $self->{conf}; + $autolearn ||= $Mail::SpamAssassin::Conf::AUTOLEARN_NONE; # Don't allow invalid names ... if ($name !~ /^\D\w*$/) { $self->lint_warn("config: error: rule '$name' has invalid characters ". "(not Alphanumeric + Underscore + starting with a non-digit)\n", $name); - return; + return 0; } # Also set a hard limit for ALL rules (rule names longer than 242 @@ -805,7 +813,7 @@ if (length $name > 200) { $self->lint_warn("config: error: rule '$name' is way too long ". "(recommended maximum length is 22 characters)\n", $name); - return; + return 0; } # Warn about, but use, long rule names during --lint @@ -816,9 +824,56 @@ } } + $conf->{tflags}->{$name} ||= ''; + $conf->{priority}->{$name} ||= 0; + $conf->{source_file}->{$name} = $self->{currentfile}; + $conf->{if_stack}->{$name} = $self->get_if_stack_as_string(); + $conf->{autolearn}->{$name} = $autolearn; + + if ($self->{scoresonly}) { + $conf->{user_defined_rules}->{$name} = 1; + } + + return 1; +} + +sub parse_eval_test { + my ($self, $name, $text) = @_; + my @args; + if (my ($function, $args) = ($text =~ m/(.*?)\s*\((.*?)\)\s*$/)) { + if ($args) { + # bug 4419: Parse quoted strings, unquoted alphanumerics/floats and + # both unquoted IPv4 and IPv6 addresses. s// is used so that we can + # determine whether or not we successfully parsed ALL arguments. + while ($args =~ s/^\s*(?:['"](.*?)['"]|([\d\.:A-Za-z]+?))\s*(?:,\s*|$)//) { + if (defined $1) { + push @args, $1; + } + else { + push @args, $2; + } + } + } + unshift(@args, $function); + if ($args) { + $self->lint_warn("syntax error (unparsable argument: $args) for eval function: $name: $text", $name); + return; + } + } + else { + $self->lint_warn("syntax error for eval function $name: $text", $name); + return; + } + return \@args; +} + +#BUG4778: function to be removed +sub add_test { + my ($self, $name, $text, $type) = @_; + my $conf = $self->{conf}; + # all of these rule types are regexps - if ($type == $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS || - $type == $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS || + if ($type == $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS || $type == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS || $type == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS) { @@ -835,16 +890,25 @@ return unless $self->is_meta_valid($name, $text); } + my $autolearn = $Mail::SpamAssassin::Conf::AUTOLEARN_NONE; + if (($type == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS) || + ($type == $Mail::SpamAssassin::Conf::TYPE_URI_EVALS)) { + $autolearn = $Mail::SpamAssassin::Conf::AUTOLEARN_BODY; + } + elsif (($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS) || + ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS)) { + $autolearn = $Mail::SpamAssassin::Conf::AUTOLEARN_HEAD; + } + elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS) { + $autolearn = $Mail::SpamAssassin::Conf::AUTOLEARN_META; + } + + return unless $self->define_test($name, $autolearn); + $conf->{tests}->{$name} = $text; $conf->{test_types}->{$name} = $type; - $conf->{tflags}->{$name} ||= ''; - $conf->{priority}->{$name} ||= 0; - $conf->{source_file}->{$name} = $self->{currentfile}; - $conf->{if_stack}->{$name} = $self->get_if_stack_as_string(); - if ($self->{scoresonly}) { $conf->{user_rules_to_compile}->{$type} = 1; - $conf->{user_defined_rules}->{$name} = 1; } } Index: lib/Mail/SpamAssassin/Plugin/BodyTests.pm =================================================================== --- lib/Mail/SpamAssassin/Plugin/BodyTests.pm (revision 0) +++ lib/Mail/SpamAssassin/Plugin/BodyTests.pm (revision 0) @@ -0,0 +1,110 @@ +package Mail::SpamAssassin::Plugin::BodyTests; + +use Mail::SpamAssassin::Plugin; +use Mail::SpamAssassin::Logger; +use Mail::SpamAssassin::Conf; + +use strict; +use warnings; + +use vars qw(@ISA); +@ISA = qw(Mail::SpamAssassin::Plugin); + +sub new { + my $class = shift; + my $main = shift; + + $class = ref($class) || $class; + my $self = $class->SUPER::new($main); + bless ($self, $class); + + $self->set_config($main->{conf}); + + return $self; +} + +sub set_config { + my ($self, $conf) = @_; + my @cmds; + + $self->{body_evals} = { }; + $self->{body_tests} = { }; + +=item body SYMBOLIC_TEST_NAME /pattern/modifiers + +Define a body pattern test. C is a Perl regular expression. Note: +as per the header tests, C<#> must be escaped (C<\#>) or else it is considered +the beginning of a comment. + +The 'body' in this case is the textual parts of the message body; +any non-text MIME parts are stripped, and the message decoded from +Quoted-Printable or Base-64-encoded format if necessary. The message +Subject header is considered part of the body and becomes the first +paragraph when running the rules. All HTML tags and line breaks will +be removed before matching. + +=item body SYMBOLIC_TEST_NAME eval:name_of_eval_method([args]) + +Define a body eval test. See above. + +=cut + + push (@cmds, { + setting => 'body', + is_frequent => 1, + is_priv => 1, + code => sub { + my ($self, $key, $value, $line) = @_; + if ($value =~ /^(\S+)\s+eval:(.*)$/) { + my $name = $1; + my $def = $conf->{parser}->parse_eval_test($name, $2); + if (defined($def) && + $conf->{parser}->define_test($name, $Mail::SpamAssassin::Conf::AUTOLEARN_BODY)) { + $self->{body_evals}->{$name} = $def; + } + else { + return $Mail::SpamAssassin::Conf::INVALID_VALUE; + } + } + else { + my @values = split(/\s+/, $value, 2); + if (@values != 2) { + return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; + } + my ($name, $text) = @values; + unless ($conf->{parser}->is_delimited_regexp_valid($name, $text) && + $conf->{parser}->define_test($name, $Mail::SpamAssassin::Conf::AUTOLEARN_BODY)) { + return $Mail::SpamAssassin::Conf::INVALID_VALUE; + } + $self->{body_tests}->{$name} = $text; + } + } + }); + + $conf->{parser}->register_commands(\@cmds); +} + +sub finish_parsing_end { + my ($self, $arg) = @_; + my $conf = $arg->{conf}; + $self->{body_tests} = $conf->{parser}->prioritize_tests($self->{body_tests}); + $self->{body_evals} = $conf->{parser}->prioritize_tests($self->{body_evals}); +} + +sub check_priority { + my ($self, $arg) = @_; + my $permsgstatus = $arg->{permsgstatus}; + my $priority = $arg->{priority}; + my $conf = $permsgstatus->{conf}; + + return unless $self->{body_tests}->{$priority} || + $self->{body_evals}->{$priority}; + + my $decoded = $permsgstatus->get_decoded_stripped_body_text_array(); + + $permsgstatus->do_body_tests($priority, $decoded) + if $self->{body_tests}->{$priority}; + + $permsgstatus->run_eval_tests($self->{body_evals}->{$priority}, 'BODY: ', $decoded) + if $self->{body_evals}->{$priority}; +} Property changes on: lib/Mail/SpamAssassin/Plugin/BodyTests.pm ___________________________________________________________________ Name: svn:eol-style + native Index: lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm =================================================================== --- lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (revision 380789) +++ lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (working copy) @@ -173,7 +173,7 @@ $scanstate->{active_rules_rhsbl} = { }; $scanstate->{active_rules_revipbl} = { }; foreach my $rulename (keys %{$scanner->{conf}->{uridnsbls}}) { - next unless ($scanner->{conf}->is_rule_active('body_evals',$rulename)); + next unless ($scanner->{conf}->is_rule_active($rulename)); my $rulecf = $scanstate->{scanner}->{conf}->{uridnsbls}->{$rulename}; if ($rulecf->{is_rhsbl}) { Index: lib/Mail/SpamAssassin/Conf.pm =================================================================== --- lib/Mail/SpamAssassin/Conf.pm (revision 380789) +++ lib/Mail/SpamAssassin/Conf.pm (working copy) @@ -88,19 +88,20 @@ @MIGRATED_SETTINGS $TYPE_HEAD_TESTS $TYPE_HEAD_EVALS -$TYPE_BODY_TESTS $TYPE_BODY_EVALS $TYPE_FULL_TESTS $TYPE_FULL_EVALS +$TYPE_FULL_TESTS $TYPE_FULL_EVALS $TYPE_RAWBODY_TESTS $TYPE_RAWBODY_EVALS $TYPE_URI_TESTS $TYPE_URI_EVALS $TYPE_META_TESTS $TYPE_RBL_EVALS + +$AUTOLEARN_NONE $AUTOLEARN_HEAD $AUTOLEARN_BODY $AUTOLEARN_META }; @ISA = qw(); +#BUG4778: below to be removed # odd => eval test. Not constants so they can be shared with Parser # TODO: move to Constants.pm? $TYPE_HEAD_TESTS = 0x0008; $TYPE_HEAD_EVALS = 0x0009; -$TYPE_BODY_TESTS = 0x000a; -$TYPE_BODY_EVALS = 0x000b; $TYPE_FULL_TESTS = 0x000c; $TYPE_FULL_EVALS = 0x000d; $TYPE_RAWBODY_TESTS = 0x000e; @@ -110,11 +111,17 @@ $TYPE_META_TESTS = 0x0012; $TYPE_RBL_EVALS = 0x0013; -my @rule_types = ("body_tests", "uri_tests", "uri_evals", - "head_tests", "head_evals", "body_evals", "full_tests", +my @rule_types = ("body_tests_obsolete", "uri_tests", "uri_evals", + "head_tests", "head_evals", "body_evals_obsolete", "full_tests", "full_evals", "rawbody_tests", "rawbody_evals", "rbl_evals", "meta_tests"); +#BUG4778: above to be removed +$AUTOLEARN_NONE = 0; +$AUTOLEARN_HEAD = 1; +$AUTOLEARN_BODY = 2; +$AUTOLEARN_META = 3; + $VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later # these are variables instead of constants so that other classes can @@ -1943,44 +1950,6 @@ } }); -=item body SYMBOLIC_TEST_NAME /pattern/modifiers - -Define a body pattern test. C is a Perl regular expression. Note: -as per the header tests, C<#> must be escaped (C<\#>) or else it is considered -the beginning of a comment. - -The 'body' in this case is the textual parts of the message body; -any non-text MIME parts are stripped, and the message decoded from -Quoted-Printable or Base-64-encoded format if necessary. The message -Subject header is considered part of the body and becomes the first -paragraph when running the rules. All HTML tags and line breaks will -be removed before matching. - -=item body SYMBOLIC_TEST_NAME eval:name_of_eval_method([args]) - -Define a body eval test. See above. - -=cut - - push (@cmds, { - setting => 'body', - is_frequent => 1, - is_priv => 1, - code => sub { - my ($self, $key, $value, $line) = @_; - if ($value =~ /^(\S+)\s+eval:(.*)$/) { - $self->{parser}->add_test ($1, $2, $TYPE_BODY_EVALS); - } - else { - my @values = split(/\s+/, $value, 2); - if (@values != 2) { - return $MISSING_REQUIRED_VALUE; - } - $self->{parser}->add_test (@values, $TYPE_BODY_TESTS); - } - } - }); - =item uri SYMBOLIC_TEST_NAME /pattern/modifiers Define a uri pattern test. C is a Perl regular expression. Note: as @@ -2822,12 +2791,10 @@ # after parsing, tests are refiled into these hashes for each test type. # this allows e.g. a full-text test to be rewritten as a body test in # the user's user_prefs file. - $self->{body_tests} = { }; $self->{uri_tests} = { }; $self->{uri_evals} = { }; # not used/implemented yet $self->{head_tests} = { }; $self->{head_evals} = { }; - $self->{body_evals} = { }; $self->{full_tests} = { }; $self->{full_evals} = { }; $self->{rawbody_tests} = { }; @@ -2912,11 +2879,13 @@ return $self->{scoreset_current}; } +#BUG4778: function to be removed sub get_rule_types { my ($self) = @_; return @rule_types; } +#BUG4778: function to be removed sub get_rule_keys { my ($self, $test_type, $priority) = @_; @@ -2937,6 +2906,7 @@ } } +#BUG4778: function to be removed sub get_rule_value { my ($self, $test_type, $rulename, $priority) = @_; @@ -2984,6 +2954,7 @@ # Remove all rules that don't match the given regexp (or are sub-rules of # meta-tests that match the regexp). +#BUG4778: below to be reimplemented sub trim_rules { my ($self, $regexp) = @_; @@ -3019,6 +2990,7 @@ } } # trim_rules() +#BUG4778: function to be moved into meta rule type plugin and/or reimplemented sub add_meta_depends { my ($self, $meta) = @_; @@ -3042,32 +3014,10 @@ } # add_meta_depends() sub is_rule_active { - my ($self, $test_type, $rulename, $priority) = @_; + my ($self, $rulename) = @_; - # special case rbl_evals since they do not have a priority - if ($test_type eq 'rbl_evals') { - return 0 unless ($self->{$test_type}->{$rulename}); - return ($self->{scores}->{$rulename}); - } + return 0 unless $self->{source_file}->{$rulename}; - # first determine if the rule is defined - if (defined($priority)) { - # we have a specific priority - return 0 unless ($self->{$test_type}->{$priority}->{$rulename}); - } - else { - # no specific priority so we must loop over all currently defined - # priorities to see if the rule is defined - my $found_p = 0; - foreach my $pri (keys %{$self->{priorities}}) { - if ($self->{$test_type}->{$pri}->{$rulename}) { - $found_p = 1; - last; - } - } - return 0 unless ($found_p); - } - return ($self->{scores}->{$rulename}); } @@ -3112,13 +3062,13 @@ sub maybe_header_only { my($self,$rulename) = @_; - my $type = $self->{test_types}->{$rulename}; - return 0 if (!defined ($type)); + my $autolearn = $self->{autolearn}->{$rulename}; + return 0 if (!defined ($autolearn)); - if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) { + if ($autolearn == $AUTOLEARN_HEAD) { return 1; - } elsif ($type == $TYPE_META_TESTS) { + } elsif ($autolearn == $AUTOLEARN_META) { my $tflags = $self->{tflags}->{$rulename}; $tflags ||= ''; if ($tflags =~ m/\bnet\b/i) { return 0; @@ -3132,16 +3082,13 @@ sub maybe_body_only { my($self,$rulename) = @_; - my $type = $self->{test_types}->{$rulename}; - return 0 if (!defined ($type)); + my $autolearn = $self->{autolearn}->{$rulename}; + return 0 if (!defined ($autolearn)); - if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS) - || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS)) - { - # some rawbody go off of headers... + if ($autolearn == $AUTOLEARN_BODY) { return 1; - } elsif ($type == $TYPE_META_TESTS) { + } elsif ($autolearn == $AUTOLEARN_META) { my $tflags = $self->{tflags}->{$rulename}; $tflags ||= ''; if ($tflags =~ m/\bnet\b/i) { return 0;