Lines 50-55
Link Here
|
50 |
use warnings; |
50 |
use warnings; |
51 |
use Carp; |
51 |
use Carp; |
52 |
|
52 |
|
|
|
53 |
# was using to performance test rules using various tmethods.. |
54 |
# BEGIN { |
55 |
# eval { require Time::HiRes }; |
56 |
# Time::HiRes->import( qw(gettimeofday) ) unless $@; |
57 |
# Time::HiRes->import( qw(tv_interval) ) unless $@; |
58 |
# } |
59 |
|
53 |
use Mail::SpamAssassin::Constants qw(:sa); |
60 |
use Mail::SpamAssassin::Constants qw(:sa); |
54 |
use Mail::SpamAssassin::EvalTests; |
61 |
use Mail::SpamAssassin::EvalTests; |
55 |
use Mail::SpamAssassin::Conf; |
62 |
use Mail::SpamAssassin::Conf; |
Lines 1772-1778
Link Here
|
1772 |
my ($self, $priority, $textary) = @_; |
1779 |
my ($self, $priority, $textary) = @_; |
1773 |
local ($_); |
1780 |
local ($_); |
1774 |
|
1781 |
|
1775 |
dbg("rules: running body-text per-line regexp tests; score so far=".$self->{score}); |
1782 |
dbg("rules: running body-text regexp tests; score so far=".$self->{score}); |
1776 |
|
1783 |
|
1777 |
my $doing_user_rules = |
1784 |
my $doing_user_rules = |
1778 |
$self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_BODY_TESTS}; |
1785 |
$self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_BODY_TESTS}; |
Lines 1810-1824
Link Here
|
1810 |
$evalstr2 .= ' |
1817 |
$evalstr2 .= ' |
1811 |
sub '.$rulename.'_body_test { |
1818 |
sub '.$rulename.'_body_test { |
1812 |
my $self = shift; |
1819 |
my $self = shift; |
1813 |
foreach (@_) { |
1820 |
|
1814 |
'.$self->hash_line_for_rule($rulename).' |
1821 |
# currently using Time::HiRes to do performance testing on |
1815 |
if ('.$pat.') { |
1822 |
# individual rules... |
1816 |
$self->got_pattern_hit(q{'.$rulename.'}, "BODY: "); |
1823 |
# my $start_time = [gettimeofday]; |
1817 |
'. $self->hit_rule_plugin_code($rulename, "body") . ' |
1824 |
|
1818 |
# Ok, we hit, stop now. |
1825 |
my $tmethod = $self->{conf}->{tmethod}->{'.$rulename.'}->{type} || "line"; |
1819 |
last unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/; |
1826 |
my $args = $self->{conf}->{tmethod}->{'.$rulename.'}->{args} || ""; |
1820 |
} |
1827 |
|
|
|
1828 |
# tmethod: line and linerange |
1829 |
if ($tmethod =~ m/^line/) { |
1830 |
|
1831 |
my $linecount=1; |
1832 |
my $maxlines = '.$self->{conf}->{tmethod_max_lines}.'; |
1833 |
|
1834 |
foreach (@_) { |
1835 |
|
1836 |
# support predefined line number scanning |
1837 |
if ($tmethod eq "line" && $args =~ m/^\d+$/) { |
1838 |
next unless ($linecount == $args); |
1839 |
} |
1840 |
|
1841 |
# support line range scanning |
1842 |
# do not allow the start/stop delta to exceed the |
1843 |
# config option tmethod_max_lines |
1844 |
elsif ($tmethod eq "linerange" && $args =~ m/(\-?\d+):(\-?\d+)/) { |
1845 |
my ($start,$stop); |
1846 |
if (defined $1) { $start = $1; } else { $start = 0; } |
1847 |
if (defined $2) { $stop = $2; } else { $stop = scalar @_; } |
1848 |
if ($stop - $start > $maxlines) { |
1849 |
$stop = $start + $maxlines; |
1850 |
} |
1851 |
next unless ($linecount >= $start && $linecount < $stop); |
1852 |
} |
1853 |
|
1854 |
$linecount++; |
1855 |
'.$self->hash_line_for_rule($rulename).' |
1856 |
if ('.$pat.') { |
1857 |
$self->got_pattern_hit(q{'.$rulename.'}, "BODY: "); |
1858 |
'. $self->hit_rule_plugin_code($rulename, "body") . ' |
1859 |
# Ok, we hit, stop now. |
1860 |
last unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/; |
1861 |
} |
1862 |
} |
1821 |
} |
1863 |
} |
|
|
1864 |
|
1865 |
elsif ($tmethod =~ m/^string/) { |
1866 |
|
1867 |
my ($fastbody,$start_pos,$bytes); |
1868 |
my $maxbytes = '.$self->{conf}->{tmethod_max_bytes}.'; |
1869 |
|
1870 |
# if args are passed to method string, we will set start |
1871 |
# position and number of bytes which will feed into |
1872 |
# a substr call later. if start and stop are not defined |
1873 |
# we manually set start=0 and stop=tmethod_max_bytes |
1874 |
if ($args && $args =~ m/(\-?\d+):(\-?\d+)/) { |
1875 |
if (defined $1) { $start_pos = $1; } else { $start_pos = 0; } |
1876 |
if (defined $2) { $bytes = $2; } else { $bytes = $maxbytes } |
1877 |
} |
1878 |
else { |
1879 |
$start_pos = 0; |
1880 |
$bytes = $maxbytes; |
1881 |
} |
1882 |
|
1883 |
# append all content lines to fastbody scalar |
1884 |
# until the size of the scalar exceeds tmethod_max_bytes |
1885 |
foreach (@_) { |
1886 |
$fastbody .= $_; |
1887 |
if (length $fastbody > $maxbytes) { |
1888 |
last; |
1889 |
} |
1890 |
} |
1891 |
|
1892 |
# substr the content down based on start pos and # of bytes |
1893 |
$fastbody = substr($fastbody,$start_pos,$bytes); |
1894 |
|
1895 |
# if the length of fastbody exceeds the config method_block_max_bytes |
1896 |
# we need to substr it down further to avoid expensive regexp tests |
1897 |
# ie, string <startpos> 512000 could be expensive depending on startpos |
1898 |
my $fblen = length $fastbody; |
1899 |
if ($fblen > $maxbytes) { |
1900 |
$fastbody = substr($fastbody,0,$maxbytes); |
1901 |
} |
1902 |
|
1903 |
# if the tmethod is stringtrim, we need to convert |
1904 |
# newlines to space, and then convert excess whitespace |
1905 |
# to a single space. this is most beneficial in rawbody |
1906 |
# ruletypes as there is no efficient way currently to get |
1907 |
# html into a single trimmed string. |
1908 |
if ($tmethod eq "stringtrim") { |
1909 |
$fastbody =~ s/[\n\r]/ /gs; |
1910 |
$fastbody =~ s/\s+/ /g; |
1911 |
} |
1912 |
|
1913 |
'.$self->hash_line_for_rule($rulename).' |
1914 |
if ($fastbody && $fastbody =~ '.$pat.') { |
1915 |
$self->got_pattern_hit(q{'.$rulename.'}, "BODY: "); |
1916 |
'. $self->hit_rule_plugin_code($rulename, "body") . ' |
1917 |
} |
1918 |
# dbg("rules: '.$rulename.' - start position $start_pos, bytes $bytes, total length $fblen"); |
1919 |
} |
1920 |
else { |
1921 |
warn("rules: invalid method type defined for rule '.$rulename.'"); |
1922 |
} |
1923 |
|
1924 |
# my $elapsed_time = tv_interval ($start_time, [gettimeofday]); |
1925 |
# dbg("rules: '.$rulename.' took $elapsed_time seconds, using method=[$tmethod] args=[$args]"); |
1926 |
|
1822 |
} |
1927 |
} |
1823 |
'; |
1928 |
'; |
1824 |
} |
1929 |
} |
Lines 2312-2329
Link Here
|
2312 |
|
2417 |
|
2313 |
$evalstr2 .= ' |
2418 |
$evalstr2 .= ' |
2314 |
sub '.$rulename.'_rawbody_test { |
2419 |
sub '.$rulename.'_rawbody_test { |
2315 |
my $self = shift; |
2420 |
my $self = shift; |
2316 |
foreach (@_) { |
2421 |
|
2317 |
'.$self->hash_line_for_rule($rulename).' |
2422 |
# currently using Time::HiRes to do performance testing on |
2318 |
if ('.$pat.') { |
2423 |
# individual rules... |
2319 |
$self->got_pattern_hit(q{'.$rulename.'}, "RAW: "); |
2424 |
# my $start_time = [gettimeofday]; |
2320 |
'. $self->hit_rule_plugin_code($rulename, "rawbody") . ' |
2425 |
|
2321 |
# Ok, we hit, stop now. |
2426 |
my $tmethod = $self->{conf}->{tmethod}->{'.$rulename.'}->{type} || "line"; |
2322 |
last unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/; |
2427 |
my $args = $self->{conf}->{tmethod}->{'.$rulename.'}->{args} || ""; |
2323 |
} |
2428 |
|
2324 |
} |
2429 |
# tmethod: line and linerange |
2325 |
} |
2430 |
if ($tmethod =~ m/^line/) { |
2326 |
'; |
2431 |
|
|
|
2432 |
my $linecount=1; |
2433 |
my $maxlines = '.$self->{conf}->{tmethod_max_lines_raw}.'; |
2434 |
|
2435 |
foreach (@_) { |
2436 |
|
2437 |
# support predefined line number scanning |
2438 |
if ($tmethod eq "line" && $args =~ m/^\d+$/) { |
2439 |
next unless ($linecount == $args); |
2440 |
} |
2441 |
|
2442 |
# support line range scanning |
2443 |
# do not allow the start/stop delta to exceed the |
2444 |
# config option tmethod_max_lines |
2445 |
elsif ($tmethod eq "linerange" && $args =~ m/(\-?\d+):(\-?\d+)/) { |
2446 |
my ($start,$stop); |
2447 |
if (defined $1) { $start = $1; } else { $start = 0; } |
2448 |
if (defined $2) { $stop = $2; } else { $stop = scalar @_; } |
2449 |
if ($stop - $start > $maxlines) { |
2450 |
$stop = $start + $maxlines; |
2451 |
} |
2452 |
next unless ($linecount >= $start && $linecount < $stop); |
2453 |
} |
2454 |
|
2455 |
$linecount++; |
2456 |
'.$self->hash_line_for_rule($rulename).' |
2457 |
if ('.$pat.') { |
2458 |
$self->got_pattern_hit(q{'.$rulename.'}, "RAW: "); |
2459 |
'. $self->hit_rule_plugin_code($rulename, "rawbody") . ' |
2460 |
# Ok, we hit, stop now. |
2461 |
last unless $self->{conf}->{tflags}->{q{'.$rulename.'}} =~ /\bmultiple\b/; |
2462 |
} |
2463 |
} |
2464 |
} |
2465 |
|
2466 |
elsif ($tmethod =~ m/^string/) { |
2467 |
|
2468 |
my ($fastbody,$start_pos,$bytes); |
2469 |
my $maxbytes = '.$self->{conf}->{tmethod_max_bytes_raw}.'; |
2470 |
|
2471 |
# if args are passed to method string, we will set start |
2472 |
# position and number of bytes which will feed into |
2473 |
# a substr call later. if start and stop are not defined |
2474 |
# we manually set start=0 and stop=tmethod_max_bytes |
2475 |
if ($args && $args =~ m/(\-?\d+):(\-?\d+)/) { |
2476 |
if (defined $1) { $start_pos = $1; } else { $start_pos = 0; } |
2477 |
if (defined $2) { $bytes = $2; } else { $bytes = $maxbytes } |
2478 |
} |
2479 |
else { |
2480 |
$start_pos = 0; |
2481 |
$bytes = $maxbytes; |
2482 |
} |
2483 |
|
2484 |
# append all content lines to fastbody scalar |
2485 |
# until the size of the scalar exceeds tmethod_max_bytes |
2486 |
foreach (@_) { |
2487 |
$fastbody .= $_; |
2488 |
if (length $fastbody > $maxbytes) { |
2489 |
last; |
2490 |
} |
2491 |
} |
2492 |
|
2493 |
# substr the content down based on start pos and # of bytes |
2494 |
$fastbody = substr($fastbody,$start_pos,$bytes); |
2495 |
|
2496 |
# if the length of fastbody exceeds the config method_block_max_bytes |
2497 |
# we need to substr it down further to avoid expensive regexp tests |
2498 |
# ie, string <startpos> 512000 could be expensive depending on startpos |
2499 |
my $fblen = length $fastbody; |
2500 |
if ($fblen > $maxbytes) { |
2501 |
$fastbody = substr($fastbody,0,$maxbytes); |
2502 |
} |
2503 |
|
2504 |
# if the tmethod is stringtrim, we need to convert |
2505 |
# newlines to space, and then convert excess whitespace |
2506 |
# to a single space. this is most beneficial in rawbody |
2507 |
# ruletypes as there is no efficient way currently to get |
2508 |
# html into a single trimmed string. |
2509 |
if ($tmethod eq "stringtrim") { |
2510 |
$fastbody =~ s/[\n\r]+/ /gs; |
2511 |
$fastbody =~ s/\s+/ /g; |
2512 |
} |
2513 |
|
2514 |
'.$self->hash_line_for_rule($rulename).' |
2515 |
if ($fastbody && $fastbody =~ '.$pat.') { |
2516 |
$self->got_pattern_hit(q{'.$rulename.'}, "RAW: "); |
2517 |
'. $self->hit_rule_plugin_code($rulename, "rawbody") . ' |
2518 |
} |
2519 |
#dbg("rules: '.$rulename.' - start position $start_pos, bytes $bytes, total length $fblen"); |
2520 |
} |
2521 |
else { |
2522 |
warn("rules: invalid method type defined for rule '.$rulename.'"); |
2523 |
} |
2524 |
|
2525 |
# my $elapsed_time = tv_interval ($start_time, [gettimeofday]); |
2526 |
# dbg("rules: '.$rulename.' took $elapsed_time seconds, using method=[$tmethod] args=[$args]"); |
2527 |
} |
2528 |
'; |
2327 |
} |
2529 |
} |
2328 |
|
2530 |
|
2329 |
# clear out a previous version of this fn, if already defined |
2531 |
# clear out a previous version of this fn, if already defined |