--- lib/Mail/SpamAssassin/Message.pm (revision 631982) +++ lib/Mail/SpamAssassin/Message.pm (working copy) @@ -1074,7 +1074,8 @@ next if ($parts[$pt]->{'type'} eq 'text/calendar'); push(@{$self->{text_decoded}}, "\n") if ( @{$self->{text_decoded}} ); - push(@{$self->{text_decoded}}, $parts[$pt]->decode()); + push(@{$self->{text_decoded}}, + split_into_array_of_short_paragraphs($parts[$pt]->decode())); } return $self->{text_decoded}; @@ -1099,6 +1100,28 @@ # --------------------------------------------------------------------------- +# split a text into array of paragraphs of sizes between +# $chunk_size and 2 * $chunk_size, returning the resulting array + +sub split_into_array_of_short_paragraphs { + my @result; + my $chunk_size = 1024; + my $text_l = length($_[0]); + my($j,$ofs); + for ($ofs = 0; $text_l - $ofs > 2 * $chunk_size; $ofs = $j+1) { + $j = index($_[0], "\n", $ofs+$chunk_size); + if ($j < 0) { + $j = index($_[0], " ", $ofs+$chunk_size); + if ($j < 0) { $j = $ofs+$chunk_size } + } + push(@result, substr($_[0], $ofs, $j-$ofs+1)); + } + push(@result, substr($_[0], $ofs)) if $ofs < $text_l; + @result; +} + +# --------------------------------------------------------------------------- + 1; =back --- lib/Mail/SpamAssassin/Plugin/BodyEval.pm (revision 631982) +++ lib/Mail/SpamAssassin/Plugin/BodyEval.pm (working copy) @@ -240,7 +240,15 @@ my ($type, $rnd) = $part->rendered(); return unless $type; - foreach ( $rnd =~ /^\s*([^:\s][^:\n]{2,29})\s*:\s*\S/mg ) { + # bug 5644,5717: avoid pathological cases where a regexp takes massive amount + # of time by applying the regexp to limited-size text chunks, one at a time + + foreach my $rnd_chunk ( + Mail::SpamAssassin::Message::split_into_array_of_short_paragraphs($rnd)) + { + foreach ( $rnd_chunk =~ /^\s*([^:\s][^:\n]{2,29})\s*:\s*\S/mg ) { + + # indentation intentionally wrong for 3.2.5 to minimize a patch size my $str = lc $_; $str =~ tr/a-z//cd; #$str =~ s/([a-z])0([a-z])/$1o$2/g; @@ -262,6 +270,7 @@ $hits{$1}++; dbg("eval: stock info hit: $1"); } + } } $pms->{stock_info} = scalar keys %hits;