--- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/AsyncLoop.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/AsyncLoop.pm Wed Oct 17 16:42:11 2007 @@ -43,4 +43,17 @@ our @ISA = qw(); +# Load Time::HiRes if it's available +BEGIN { + use vars qw($timer_resolution); + eval { + require Time::HiRes or die "Error loading Time::HiRes: $@, $!"; + Time::HiRes->import( qw(time CLOCK_REALTIME) ); + $timer_resolution = Time::HiRes::clock_getres(CLOCK_REALTIME()); + 1; + } or do { + $timer_resolution = 1; # Perl's builtin timer ticks at one second + }; +} + ############################################################################# @@ -52,9 +65,10 @@ my $self = { main => $main, - last_count => 0, - times_count_was_same => 0, queries_started => 0, queries_completed => 0, - pending_lookups => { } + total_queries_started => 0, + total_queries_completed => 0, + pending_lookups => { }, + timing_by_query => { }, }; @@ -103,9 +117,46 @@ =item completed_callback (optional) -A code reference, which will be called when the lookup has been reported as -complete via C or C. +A code reference which will be called when an asynchronous task (e.g. a +DNS lookup) is completed, either normally, or aborted, e.g. by a timeout. + +When a task has been reported as completed via C +the response (as provided to C) is stored in +$ent->{response_packet} (possibly undef, its semantics is defined by the +caller). When completion is reported via C or a +task was aborted, the $ent->{response_packet} is guaranteed to be undef. +If it is necessary to distinguish between the last two cases, the +$ent->{status} may be examined for a string 'ABORTING' or 'FINISHED'. The code reference will be called with one argument, the C<$ent> object. +=item zone (optional) + +A zone specification (typically a DNS zone name - e.g. host, domain, or RBL) +which may be used as a key to look up per-zone settings. No semantics on this +parameter is imposed by this module. + +=item timeout_initial (optional) + +An initial value of elapsed time for which we are willing to wait for a +response (time in seconds, floating point value is allowed). When elapsed +time since a query started exceeds the timeout value and there are no other +queries to wait for, the query is aborted. The actual timeout value ranges +from timeout_initial and gradually approaches timeout_min (see next parameter) +as the number of already completed queries approaches the number of all +queries started. + +If a caller does not explicitly provide this parameter or its value is +undefined, a default initial timeout value is settable by a configuration +variable rbl_timeout. + +If a value of the timeout_initial parameter is below timeout_min, the initial +timeout is set to timeout_min. + +=item timeout_min (optional) + +A lower bound (in seconds) to which the actual timeout approaches as the +number of queries completed approaches the number of all queries started. +Defaults to 0.2 * timeout_initial. + =back @@ -121,7 +172,33 @@ die "oops, no type" unless $ent->{type}; + my $now = time; + my $key = $ent->{key}; + my $id = $ent->{id}; + $ent->{status} = 'STARTED'; + $ent->{start_time} = $now if !defined $ent->{start_time}; + + my $t_init = $ent->{timeout_initial}; # application-specified has precedence + $t_init = $self->{main}->{conf}->{rbl_timeout} if !defined $t_init; + $t_init = 0 if !defined $t_init; # last-resort default, just in case + + my $t_end = $ent->{timeout_min}; # application-specified has precedence + $t_end = 0.2 * $t_init if !defined $t_end; + $t_end = 0 if $t_end < 0; # just in case + + $t_init = $t_end if $t_init < $t_end; + $ent->{timeout_initial} = $t_init; + $ent->{timeout_min} = $t_end; + + $ent->{display_id} = # identifies entry in debug logging and similar + join(", ", grep { defined } + map { ref $ent->{$_} ? @{$ent->{$_}} : $ent->{$_} } + qw(sets rules rulename type key) ); + $self->{queries_started}++; - $self->{pending_lookups}->{$ent->{key}} = $ent; - $self->{last_start_lookup_time} = time; + $self->{total_queries_started}++; + $self->{pending_lookups}->{$key} = $ent; + + dbg("async: starting: %s (timeout %.1fs, min %.1fs)", + $ent->{display_id}, $ent->{timeout_initial}, $ent->{timeout_min}); $ent; } @@ -165,4 +242,20 @@ # --------------------------------------------------------------------------- +=item $async->log_lookups_timing() + +Log sorted timing for all completed lookups. + +=cut + +sub log_lookups_timing { + my ($self) = @_; + my $timings = $self->{timing_by_query}; + for my $key (sort { $timings->{$a} <=> $timings->{$b} } keys %$timings) { + dbg("async: timing: %.3f %s", $timings->{$key}, $key); + } +} + +# --------------------------------------------------------------------------- + =item $alldone = $async->complete_lookups() @@ -177,16 +270,46 @@ sub complete_lookups { - my ($self, $timeout) = @_; - my %typecount = (); - my $stillwaiting = 0; + my ($self, $timeout, $allow_aborting_of_expired) = @_; + my $alldone = 0; + my $anydone = 0; + my $waiting_time = 0; + my $allexpired = 1; + my %typecount; my $pending = $self->{pending_lookups}; - if (scalar keys %{$pending} <= 0) { - return 1; # nothing left to do - } - $self->{queries_started} = 0; $self->{queries_completed} = 0; + my $now = time; + + if (defined $timeout && $timeout > 0 && + %$pending && $self->{total_queries_started} > 0) + { + # shrink a 'select' timeout if a caller specified unnecessarily long + # value beyond the latest deadline of any outstanding request; + # can save needless wait time (up to 1 second in harvest_dnsbl_queries) + my $r = $self->{total_queries_completed} / $self->{total_queries_started}; + my $r2 = $r * $r; # 0..1 + my $max_deadline; + while (my($key,$ent) = each %$pending) { + my $t_init = $ent->{timeout_initial}; + my $dt = $t_init - ($t_init - $ent->{timeout_min}) * $r2; + my $deadline = $ent->{start_time} + $dt; + $max_deadline = $deadline if $deadline > $max_deadline; + } + if (defined $max_deadline) { + # adjust to timer resolution, only deals with 1s and with fine resolution + $max_deadline = 1 + int $max_deadline + if $timer_resolution == 1 && $max_deadline > int $max_deadline; + my $sufficient_timeout = $max_deadline - $now; + $sufficient_timeout = 0 if $sufficient_timeout < 0; + if ($timeout > $sufficient_timeout) { + dbg("async: reducing select timeout from %.1f to %.1f s", + $timeout, $sufficient_timeout); + $timeout = $sufficient_timeout; + } + } + } + # trap this loop in an eval { } block, as Net::DNS could throw # die()s our way; in particular, process_dnsbl_results() has @@ -194,73 +317,84 @@ eval { - my $nfound = $self->{main}->{resolver}->poll_responses($timeout); - $nfound ||= 'no'; - dbg ("async: select found $nfound socks ready"); - - foreach my $key (keys %{$pending}) { - my $ent = $pending->{$key}; + if (%$pending) { # any outstanding requests still? + $self->{last_poll_responses_time} = $now; + my $nfound = $self->{main}->{resolver}->poll_responses($timeout); + dbg("async: select found %s responses ready (t.o.=%.1f)", + !$nfound ? 'no' : $nfound, $timeout); + } + $now = time; # capture new timestamp, after possible sleep in 'select' - # call a "poll_callback" sub, if one exists - if (defined $ent->{poll_callback}) { + while (my($key,$ent) = each %$pending) { + my $id = $ent->{id}; + if (defined $ent->{poll_callback}) { # call a "poll_callback" if exists + # be nice, provide fresh info to a callback routine + $ent->{status} = 'FINISHED' if exists $self->{finished}->{$id}; + # a callback might call set_response_packet() or report_id_complete() + # dbg("async: calling poll_callback on key $key"); $ent->{poll_callback}->($ent); } - - my $type = $ent->{type}; - if (!exists ($self->{finished}->{$ent->{id}})) { - $typecount{$type}++; - next; - } - - $ent->{response_packet} = delete $self->{finished}->{$ent->{id}}; - if (defined $ent->{completed_callback}) { - $ent->{completed_callback}->($ent); + my $finished = exists $self->{finished}->{$id}; + if ($finished) { + $anydone = 1; + delete $self->{finished}->{$id}; + $ent->{status} = 'FINISHED'; + $ent->{finish_time} = $now if !defined $ent->{finish_time}; + my $elapsed = $ent->{finish_time} - $ent->{start_time}; + dbg("async: completed in %.3f s: %s", $elapsed, $ent->{display_id}); + + # call a "completed_callback" sub, if one exists + if (defined $ent->{completed_callback}) { + # dbg("async: calling completed_callback on key $key"); + $ent->{completed_callback}->($ent); + } + $self->{timing_by_query}->{". $key"} += $elapsed; + $self->{queries_completed}++; + $self->{total_queries_completed}++; + delete $pending->{$key}; } - - $self->{queries_completed}++; - delete $self->{pending_lookups}->{$key}; } - dbg("async: queries completed: ".$self->{queries_completed}. - " started: ".$self->{queries_started}); - - if (1) { - dbg("async: queries active: ". - join (' ', map { "$_=$typecount{$_}" } sort keys %typecount)." at ". - localtime(time)); + if (%$pending) { # still any requests outstanding? are they expired? + my $r = + !$allow_aborting_of_expired || !$self->{total_queries_started} ? 1.0 + : $self->{total_queries_completed} / $self->{total_queries_started}; + my $r2 = $r * $r; # 0..1 + while (my($key,$ent) = each %$pending) { + $typecount{$ent->{type}}++; + my $t_init = $ent->{timeout_initial}; + my $dt = $t_init - ($t_init - $ent->{timeout_min}) * $r2; + # adjust to timer resolution, only deals with 1s and fine resolution + $dt = 1 + int $dt if $timer_resolution == 1 && $dt > int $dt; + $allexpired = 0 if $now <= $ent->{start_time} + $dt; + } + dbg("async: queries completed: %d, started: %d", + $self->{queries_completed}, $self->{queries_started}); } # ensure we don't get stuck if a request gets lost in the ether. - if (!$stillwaiting) { - my $numkeys = scalar keys %{$self->{pending_lookups}}; - if ($numkeys == 0) { - $stillwaiting = 0; - - } else { - $stillwaiting = 1; - - # avoid looping forever if we haven't got all results. - if ($self->{last_count} == $numkeys) { - $self->{times_count_was_same}++; - if ($self->{times_count_was_same} > 20) - { - dbg("async: escaping: must have lost requests"); - $self->abort_remaining_lookups(); - $stillwaiting = 0; - } - } else { - $self->{last_count} = $numkeys; - $self->{times_count_was_same} = 0; - } - } + if (! %$pending) { + $alldone = 1; } + elsif ($allexpired && $allow_aborting_of_expired) { + # avoid looping forever if we haven't got all results. + dbg("async: escaping: lost or timed out requests or responses"); + $self->abort_remaining_lookups(); + $alldone = 1; + } + else { + dbg("async: queries active: %s%s at %s", + join (' ', map { "$_=$typecount{$_}" } sort keys %typecount), + $allexpired ? ', all expired' : '', scalar(localtime(time))); + $alldone = 0; + } + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("async: caught complete_lookups death, aborting: $eval_stat"); + $alldone = 1; # abort remaining }; - if ($@) { - dbg("async: caught complete_lookups death, aborting: $@"); - $stillwaiting = 0; # abort remaining - } - - return (!$stillwaiting); + return wantarray ? ($alldone,$anydone,$waiting_time) : $alldone; } @@ -277,25 +411,43 @@ my $pending = $self->{pending_lookups}; - my $foundone = 0; - foreach my $key (keys %{$pending}) - { - if (!$foundone) { - dbg("async: aborting remaining lookups"); - $foundone = 1; + my $foundcnt = 0; + my $now = time; + while (my($key,$ent) = each %$pending) { + dbg("async: aborting after %.3f s, %s: %s", + $now - $ent->{start_time}, + (defined $ent->{timeout_initial} && + $now > $ent->{start_time} + $ent->{timeout_initial} + ? 'past original deadline' : 'deadline shrunk'), + $ent->{display_id} ); + $foundcnt++; + $self->{timing_by_query}->{"X $key"} = $now - $ent->{start_time}; + + if (defined $ent->{completed_callback}) { + $ent->{finish_time} = $now if !defined $ent->{finish_time}; + $ent->{response_packet} = undef; + $ent->{status} = 'ABORTING'; + # to avoid breaking third-party plugins which may not test for undefined + # 'response_packet', avoid the callback on aborts for now (3.2.4); + # the following call will be enabled for 3.3.0 : + # $ent->{completed_callback}->($ent); } delete $pending->{$key}; } - delete $self->{last_start_lookup_time}; + dbg("async: aborted %d remaining lookups", $foundcnt) if $foundcnt > 0; + delete $self->{last_poll_responses_time}; $self->{main}->{resolver}->bgabort(); + 1; } # --------------------------------------------------------------------------- -=item $async->set_response_packet($id, $pkt) +=item $async->set_response_packet($id, $pkt, $key, $timestamp) Register a "response packet" for a given query. C<$id> is the ID for the query, and must match the C supplied in C. C<$pkt> is the -packet object for the response. +packet object for the response. A parameter C<$key> identifies an entry in a +hash %{$self->{pending_lookups}} where the object which spawned this query can +be found, and through which futher information about the query is accessible. If this was called, C<$pkt> will be available in the C @@ -308,9 +460,34 @@ sub set_response_packet { - my ($self, $id, $pkt) = @_; - $self->{finished}->{$id} = $pkt; + my ($self, $id, $pkt, $key, $timestamp) = @_; + $self->{finished}->{$id} = 1; # only key existence matters, any value + $timestamp = time if !defined $timestamp; + my $pending = $self->{pending_lookups}; + if (!defined $key) { # backwards compatibility with 3.2.3 and older plugins + # a third-party plugin did not provide $key in a call, search for it: + if ($id eq $pending->{$id}->{id}) { # I feel lucky, key==id ? + $key = $id; + } else { # then again, maybe not, be more systematic + for my $tkey (keys %$pending) { + if ($id eq $pending->{$tkey}->{id}) { $key = $tkey; last } + } + } + dbg("async: got response on id $id, search found key $key"); + } + if (!defined $key) { + info("async: no key, response packet not remembered, id $id"); + } else { + my $ent = $pending->{$key}; + if ($id ne $ent->{id}) { + info("async: ignoring response, mismatched id $id, expected $ent->{id}"); + } else { + $ent->{finish_time} = $timestamp; + $ent->{response_packet} = $pkt; + } + } + 1; } -=item $async->report_id_complete($id) +=item $async->report_id_complete($id,$key,$key,$timestamp) Register that a query has completed, and is no longer "pending". C<$id> is the @@ -323,24 +500,23 @@ sub report_id_complete { - my ($self, $id) = @_; - $self->{finished}->{$id} = undef; + my ($self, $id, $key, $timestamp) = @_; + $self->set_response_packet($id, undef, $key, $timestamp); } # --------------------------------------------------------------------------- -=item $time = $async->get_last_start_lookup_time() +=item $time = $async->last_poll_responses_time() -Get the time of the last call to C. If C was -never called or C has been called -C will return undef. +Get the time of the last call to C (which is called +from C. If C was never called or +C has been called C +will return undef. =cut -sub get_last_start_lookup_time { +sub last_poll_responses_time { my ($self) = @_; - return $self->{last_start_lookup_time}; + return $self->{last_poll_responses_time}; } - -# --------------------------------------------------------------------------- 1; --- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Conf.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/Conf.pm Wed Oct 17 16:45:20 2007 @@ -2412,23 +2412,27 @@ }); -=item rbl_timeout n (default: 15) +=item rbl_timeout t [t_min] (default: 15 3) -All DNS queries are made at the beginning of a check and we try to read the -results at the end. This value specifies the maximum period of time to wait -for an DNS query. If most of the DNS queries have succeeded for a particular -message, then SpamAssassin will not wait for the full period to avoid wasting -time on unresponsive server(s). For the default 15 second timeout, here is a -chart of queries remaining versus the effective timeout in seconds: - - queries left 100% 90% 80% 70% 60% 50% 40% 30% 20% 10% 0% - timeout 15 15 14 14 13 11 10 8 5 3 0 - -In addition, whenever the effective timeout is lowered due to additional query -results returning, the remaining queries are always given at least one more -second before timing out, but the wait time will never exceed rbl_timeout. - -For example, if 20 queries are made at the beginning of a message check and 16 -queries have returned (leaving 20%), the remaining 4 queries must finish -within 5 seconds of the beginning of the check or they will be timed out. +All DNS queries are made at the beginning of a check and we try to read +the results at the end. This value specifies the maximum period of time +(in seconds) to wait for an DNS query. If most of the DNS queries have +succeeded for a particular message, then SpamAssassin will not wait for +the full period to avoid wasting time on unresponsive server(s), but will +shrink the timeout according to a percentage of queries already completed. +As the number of queries remaining approaches 0, the timeout value will +gradually approach a t_min value, which is an optional second parameter +and defaults to 0.2 * t. If t is smaller than t_min, the initial timeout +is set to t_min. Here is a chart of queries remaining versus the timeout +in seconds, for the default 15 second / 3 second timeout setting: + + queries left 100% 90% 80% 70% 60% 50% 40% 30% 20% 10% 0% + timeout 15 14.9 14.5 13.9 13.1 12.0 10.7 9.1 7.3 5.3 3 + +For example, if 20 queries are made at the beginning of a message check +and 16 queries have returned (leaving 20%), the remaining 4 queries should +finish within 7.3 seconds since their query started or they will be timed out. +Note that timed out queries are only aborted when there is nothing else left +for SpamAssassin to do - long evaluation of other rules may grant queries +additional time. =cut @@ -2438,5 +2442,17 @@ is_admin => 1, default => 15, - type => $CONF_TYPE_NUMERIC + code => sub { + my ($self, $key, $value, $line) = @_; + unless (defined $value && $value !~ /^$/) { + return $MISSING_REQUIRED_VALUE; + } + local ($1,$2); + unless ($value =~ /^ ( [+-]? \d+ (?: \. \d*)? ) + (?: \s+ ( [+-]? \d+ (?: \. \d*)? ) )? $/xs) { + return $INVALID_VALUE; + } + $self->{rbl_timeout} = $1+0; + $self->{rbl_timeout_min} = $2+0 if defined $2; + } }); --- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Dns.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/Dns.pm Wed Oct 17 16:36:37 2007 @@ -108,4 +108,5 @@ my $ent = { key => $key, + zone => $host, # may serve to fetch other per-zone settings type => "DNSBL-".$type, sets => [ ], # filled in below @@ -115,8 +116,7 @@ my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; + my ($pkt, $id, $timestamp) = @_; $self->process_dnsbl_result($ent, $pkt); - $self->{async}->report_id_complete($id); + $self->{async}->report_id_complete($id,$key,$timestamp); }); @@ -157,4 +157,5 @@ my $ent = { key => $key, + zone => $host, # may serve to fetch other per-zone settings type => "DNSBL-".$type, rules => [ $rule ], @@ -163,8 +164,7 @@ my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; + my ($pkt, $id, $timestamp) = @_; $self->process_dnsbl_result($ent, $pkt); - $self->{async}->report_id_complete($id); + $self->{async}->report_id_complete($id,$key,$timestamp); }); @@ -329,41 +330,25 @@ my ($self, $rule) = @_; - return if !defined $self->{async}->get_last_start_lookup_time(); + dbg("dns: harvest_until_rule_completes"); + my $result = 0; + my $total_waiting_time = 0; + + for (my $first=1; ; $first=0) { + # complete_lookups() may call completed_callback(), which may + # call start_lookup() again (like in Plugin::URIDNSBL) + my ($alldone,$anydone,$waiting_time) = + $self->{async}->complete_lookups($first ? 0 : 1.0, 1); + $total_waiting_time += $waiting_time; - my $deadline = $self->{conf}->{rbl_timeout} + $self->{async}->get_last_start_lookup_time(); - my $now = time; - - # should not give up before at least attempting to collect some responses - # even if previous checks already exceeded rbl_timeout - my $notbefore = $now + 1.2; # at least 1 second from now (time is integer) - - my @left = $self->{async}->get_pending_lookups(); - my $total = scalar @left; - - while ( (($now < $deadline) || ($now < $notbefore)) && - !$self->{async}->complete_lookups(1)) - { - dbg(sprintf("dns: harvest_until_rule_completes: on extended time, ". - "overdue by %.3f s, still %.3f s", - $now-$deadline, $notbefore-$now)) if $now >= $deadline; - - if ($self->is_rule_complete($rule)) { - return 1; - } + $result = 1 if $self->is_rule_complete($rule); + last if $result || $alldone; + dbg("dns: harvest_until_rule_completes - check_tick"); $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); - @left = $self->{async}->get_pending_lookups(); - - # complete_lookups could cause a change in get_last_start_lookup_time - $deadline = $self->{conf}->{rbl_timeout} + - $self->{async}->get_last_start_lookup_time(); - - # dynamic timeout - my $dynamic = (int($self->{conf}->{rbl_timeout} - * (1 - 0.7*(($total - @left) / $total) ** 2) + 1) - + $self->{async}->get_last_start_lookup_time()); - $deadline = $dynamic if ($dynamic < $deadline); - $now = time; } + dbg("dns: timing: %.3f s sleeping in harvest_until_rule_completes", + $total_waiting_time) if $total_waiting_time > 0; + + return $result; } @@ -371,57 +356,50 @@ my ($self) = @_; - return if !defined $self->{async}->get_last_start_lookup_time(); + dbg("dns: harvest_dnsbl_queries"); + my $total_waiting_time = 0; - my $deadline = $self->{conf}->{rbl_timeout} + $self->{async}->get_last_start_lookup_time(); - my $now = time; + for (my $first=1; ; $first=0) { - # should not give up before at least attempting to collect some responses - # (which may have arrived by now), even if previous checks (like Razor, - # dcc, Botnet, rules) already exceeded rbl_timeout - my $notbefore = $now + 1.2; # at least 1 second from now (time is integer) + # complete_lookups() may call completed_callback(), which may + # call start_lookup() again (like in Plugin::URIDNSBL) - my @left = $self->{async}->get_pending_lookups(); - my $total = scalar @left; + # the first time around we specify a 0 timeout, which gives + # complete_lookups a chance to ripe any available results and + # abort overdue requests, without needlessly waiting for more - while ( (($now < $deadline) || ($now < $notbefore)) && - !$self->{async}->complete_lookups(1)) - { - dbg(sprintf("dns: harvest_dnsbl_queries: on extended time, ". - "overdue by %.3f s, still %.3f s", - $now-$deadline, $notbefore-$now)) if $now >= $deadline; + my ($alldone,$anydone,$waiting_time) = + $self->{async}->complete_lookups($first ? 0 : 1.0, 1); + $total_waiting_time += $waiting_time; - $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); - @left = $self->{async}->get_pending_lookups(); + last if $alldone; - # complete_lookups() may have called completed_callback, which may call - # start_lookup() again (like in URIDNSBL), so get_last_start_lookup_time - # may have changed and deadline needs to be recomputed - $deadline = $self->{conf}->{rbl_timeout} + - $self->{async}->get_last_start_lookup_time(); - - # dynamic timeout - my $dynamic = (int($self->{conf}->{rbl_timeout} - * (1 - 0.7*(($total - @left) / $total) ** 2) + 1) - + $self->{async}->get_last_start_lookup_time()); - $deadline = $dynamic if ($dynamic < $deadline); - $now = time; # and loop again - } - - dbg("dns: success for " . ($total - @left) . " of $total queries"); - - # timeouts - @left = $self->{async}->get_pending_lookups(); - $now = time; - for my $query (@left) { - my $string = join(", ", grep { defined } - map { ref $query->{$_} ? @{$query->{$_}} : $query->{$_} } - qw(sets rules rulename type key) ); - my $delay = $now - $self->{async}->get_last_start_lookup_time(); - dbg("dns: timeout for $string after $delay seconds"); + dbg("dns: harvest_dnsbl_queries - check_tick"); + $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); } - # and explicitly abort anything left + # explicitly abort anything left $self->{async}->abort_remaining_lookups(); + $self->{async}->log_lookups_timing(); $self->mark_all_async_rules_complete(); + dbg("dns: timing: %.3f s sleeping in harvest_dnsbl_queries", + $total_waiting_time) if $total_waiting_time > 0; + 1; +} + +# collect and process whatever DNS responses have already arrived, +# don't waste time waiting for more, don't poll too often. +# don't abort any queries even if overdue, +sub harvest_completed_queries { + my ($self) = @_; + + # don't bother collecting responses too often + my $last_poll_time = $self->{async}->last_poll_responses_time(); + return if defined $last_poll_time && time - $last_poll_time < 0.1; + + my ($alldone,$anydone) = $self->{async}->complete_lookups(0, 0); + if ($anydone) { + dbg("dns: harvested completed queries"); +# $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); + } } @@ -481,9 +459,10 @@ } $nsrecords = $self->{dnscache}->{NS}->{$dom} = [ @nses ]; - }; - if ($@) { - dbg("dns: NS lookup failed horribly, perhaps bad resolv.conf setting? ($@)"); + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("dns: NS lookup failed horribly, perhaps bad resolv.conf setting? ($eval_stat)"); return undef; - } + }; } @@ -515,9 +494,10 @@ $mxrecords = $self->{dnscache}->{MX}->{$dom} = [ @ips ]; - }; - if ($@) { - dbg("dns: MX lookup failed horribly, perhaps bad resolv.conf setting? ($@)"); + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("dns: MX lookup failed horribly, perhaps bad resolv.conf setting? ($eval_stat)"); return undef; - } + }; } @@ -573,10 +553,10 @@ $name = $self->{dnscache}->{PTR}->{$dom} = $name; - }; - - if ($@) { - dbg("dns: PTR lookup failed horribly, perhaps bad resolv.conf setting? ($@)"); + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("dns: PTR lookup failed horribly, perhaps bad resolv.conf setting? ($eval_stat)"); return undef; - } + }; } dbg("dns: PTR for '$dom': '$name'"); @@ -615,10 +595,10 @@ } $self->{dnscache}->{A}->{$name} = [ @addrs ]; - }; - - if ($@) { - dbg("dns: A lookup failed horribly, perhaps bad resolv.conf setting? ($@)"); + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("dns: A lookup failed horribly, perhaps bad resolv.conf setting? ($eval_stat)"); return undef; - } + }; } @@ -640,5 +620,6 @@ if ($dnsopt eq "test" && $diff > $dnsint) { $IS_DNS_AVAILABLE = undef; - dbg("dns: is_dns_available() last checked $diff seconds ago; re-checking"); + dbg("dns: is_dns_available() last checked %.1f seconds ago; re-checking", + $diff); } --- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/DnsResolver.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/DnsResolver.pm Wed Oct 17 15:46:34 2007 @@ -50,4 +50,10 @@ our @ISA = qw(); +# Load Time::HiRes if it's available +BEGIN { + eval { require Time::HiRes }; + Time::HiRes->import( qw(time) ) unless $@; +} + ########################################################################### @@ -294,11 +300,12 @@ # a bit noisy, so commented by default... #dbg("dns: new DNS packet time=".time()." host=$host type=$type id=".$packet->id); - }; - - if ($@) { + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; # this can happen if Net::DNS isn't available -- but in this # case this function should never be called! - warn "dns: cannot create Net::DNS::Packet, but new_dns_packet() was called: $@ $!"; - } + warn "dns: cannot create Net::DNS::Packet, but new_dns_packet() was called: $eval_stat"; + }; + return $packet; } @@ -339,16 +346,17 @@ will default to C and C, respectively. -The callback sub will be called with two arguments -- the packet that was -delivered and an id string that fingerprints the query packet and the expected reply. -It is expected that a closure callback be used, like so: +The callback sub will be called with three arguments -- the packet that was +delivered, and an id string that fingerprints the query packet and the expected +reply. The third argument is a timestamp (Unix time, floating point), captured +at the time the packet was collected. It is expected that a closure callback +be used, like so: my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $reply = shift; - my $reply_id = shift; + my ($reply, $reply_id, $timestamp) = @_; $self->got_a_reply ($reply, $reply_id); }); -The callback can ignore the reply as an invalid packet sent to the listening port -if the reply id does not match the return value from bgsend. +The callback can ignore the reply as an invalid packet sent to the listening +port if the reply id does not match the return value from bgsend. =cut @@ -385,44 +393,51 @@ return if $self->{no_resolver}; return if !$self->{sock}; + my $cnt = 0; + my $waiting_time = 0; my $rin = $self->{sock_as_vec}; my $rout; - my ($nfound, $timeleft) = select($rout=$rin, undef, undef, $timeout); - if (!defined $nfound || $nfound < 0) { - warn "dns: select failed: $!"; - return; - } + for (;;) { + my $now_before = time; + my ($nfound, $timeleft) = select($rout=$rin, undef, undef, $timeout); + if (!defined $nfound || $nfound < 0) { + warn "dns: select failed: $!"; + return; + } - if ($nfound == 0) { - return 0; # nothing's ready yet - } + my $now = time; + if ($now > $now_before && (!defined($timeout) || $timeout > 0)) { + $waiting_time += $now - $now_before; + } + $timeout = 0; # next time around collect whatever is available, then exit + last if $nfound == 0; - my $packet = $self->{res}->bgread($self->{sock}); - my $err = $self->{res}->errorstring; + my $packet = $self->{res}->bgread($self->{sock}); + my $err = $self->{res}->errorstring; - if (defined $packet && - defined $packet->header && - defined $packet->question && - defined $packet->answer) - { - my $id = $self->_packet_id($packet); - - my $cb = delete $self->{id_to_callback}->{$id}; - if (!$cb) { - dbg("dns: no callback for id: $id, ignored; packet: ". - ($packet ? $packet->string : "undef")); - return 0; + if (defined $packet && + defined $packet->header && + defined $packet->question && + defined $packet->answer) + { + my $id = $self->_packet_id($packet); + + my $cb = delete $self->{id_to_callback}->{$id}; + if (!$cb) { + dbg("dns: no callback for id: %s, ignored; packet: %s", + $id, $packet ? $packet->string : "undef" ); + } else { + $cb->($packet, $id, $now); + $cnt++; + } + } + else { + dbg("dns: no packet! err=%s packet=%s", + $err, $packet ? $packet->string : "undef" ); } - - $cb->($packet, $id); - return 1; - } - else { - dbg("dns: no packet! err=$err packet=". - ($packet ? $packet->string : "undef")); } - return 0; + return wantarray ? ($cnt, $waiting_time) : $cnt; } @@ -458,4 +473,5 @@ my $timeout = $retrans; my $answerpkt; + my $answerpkt_avail = 0; for (my $i = 0; (($i < $retries) && !defined($answerpkt)); @@ -465,5 +481,6 @@ # note nifty use of a closure here. I love closures ;) $self->bgsend($name, $type, $class, sub { - $answerpkt = shift; + my ($reply, $reply_id, $timestamp) = @_; + $answerpkt = $reply; $answerpkt_avail = 1; }); @@ -471,10 +488,9 @@ my $deadline = $now + $timeout; - while (($now < $deadline) && (!defined($answerpkt))) { + while (!$answerpkt_avail) { + if ($now >= $deadline) { $self->{send_timed_out} = 1; last } $self->poll_responses(1); - last if defined $answerpkt; $now = time; } - $self->{send_timed_out} = 1 unless ($now < $deadline); } return $answerpkt; @@ -540,6 +556,9 @@ foreach my $sock (@fhlist) { my $fno = fileno($sock); - warn "dns: oops! fileno now undef for $sock" unless defined($fno); - vec ($rin, $fno, 1) = 1; + if (!defined $fno) { + warn "dns: oops! fileno now undef for $sock"; + } else { + vec ($rin, $fno, 1) = 1; + } } return $rin; --- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Logger.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/Logger.pm Wed Oct 17 15:46:34 2007 @@ -195,7 +195,8 @@ # remember to avoid deep recursion, my friend sub _log { - my ($level, $message) = @_; + my ($level, $message, @args) = @_; my $facility = "generic"; + local ($1,$2); if ($message =~ /^(\S+?): (.*)/s) { $facility = $1; @@ -210,4 +211,5 @@ } + if (@args && index($message,'%') >= 0) { $message = sprintf($message,@args) } $message =~ s/\n+$//s; $message =~ s/^/${facility}: /mg; --- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Plugin/ASN.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/ASN.pm Wed Oct 17 16:35:22 2007 @@ -213,11 +213,17 @@ # do the DNS query, have the callback process the result rather than poll for them later my $zone_index = $index; - my $id = $scanner->{main}->{resolver}->bgsend("${reversed_ip_quad}.$entry->{zone}", 'TXT', undef, sub { - my $pkt = shift; + my $zone = $reversed_ip_quad . '.' . $entry->{zone}; + my $key = "asnlookup-${zone_index}-$entry->{zone}"; + my $id = $scanner->{main}->{resolver}->bgsend($zone, 'TXT', undef, sub { + my ($pkt, $id, $timestamp) = @_; + $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp); $self->process_dns_result($scanner, $pkt, $zone_index); }); - - $scanner->{async}->start_lookup({ key=>"asnlookup-${zone_index}-$entry->{zone}", id=>$id, type=>'TXT' }); - dbg("asn: launched DNS TXT query for ${reversed_ip_quad}.$entry->{zone} in background"); + my $ent = { + key=>$key, id=>$id, type=>'TXT', + zone => $zone, # serves to fetch other per-zone settings + }; + $scanner->{async}->start_lookup($ent); + dbg("asn: launched DNS TXT query for $reversed_ip_quad.$entry->{zone} in background"); $index++; @@ -234,5 +240,5 @@ my $route_tag = $conf->{asnlookups}[$zone_index]->{route_tag}; - my @answer = $response->answer; + my @answer = !defined $response ? () : $response->answer; foreach my $rr (@answer) { --- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Plugin/Check.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/Check.pm Wed Oct 17 15:46:34 2007 @@ -92,4 +92,5 @@ } + $pms->harvest_completed_queries(); # allow other, plugin-defined rule types to be called here $self->{main}->call_plugins ("check_rules_at_priority", @@ -98,21 +99,32 @@ # do head tests $self->do_head_tests($pms, $priority); + $pms->harvest_completed_queries(); $self->do_head_eval_tests($pms, $priority); + $pms->harvest_completed_queries(); $self->do_body_tests($pms, $priority, $decoded); + $pms->harvest_completed_queries(); $self->do_uri_tests($pms, $priority, @uris); + $pms->harvest_completed_queries(); $self->do_body_eval_tests($pms, $priority, $decoded); + $pms->harvest_completed_queries(); $self->do_rawbody_tests($pms, $priority, $bodytext); + $pms->harvest_completed_queries(); $self->do_rawbody_eval_tests($pms, $priority, $bodytext); + $pms->harvest_completed_queries(); $self->do_full_tests($pms, $priority, \$fulltext); + $pms->harvest_completed_queries(); $self->do_full_eval_tests($pms, $priority, \$fulltext); + $pms->harvest_completed_queries(); $self->do_meta_tests($pms, $priority); + $pms->harvest_completed_queries(); # we may need to call this more often than once through the loop, but # it needs to be done at least once, either at the beginning or the end. $self->{main}->call_plugins ("check_tick", { permsgstatus => $pms }); + $pms->harvest_completed_queries(); } --- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Wed Oct 17 16:35:22 2007 @@ -474,5 +474,7 @@ # dig $dom ns - my $ent = $self->start_lookup ($scanner, 'NS', $self->res_bgsend($scanner, $dom, 'NS'), $key); + my $ent = $self->start_lookup($scanner, $dom, 'NS', + $self->res_bgsend($scanner, $dom, 'NS', $key), + $key); $ent->{obj} = $obj; } @@ -482,5 +484,5 @@ my $packet = $ent->{response_packet}; - my @answer = $packet->answer; + my @answer = !defined $packet ? () : $packet->answer; my $IPV4_ADDRESS = IPV4_ADDRESS; @@ -518,5 +520,7 @@ # dig $hname a - my $ent = $self->start_lookup ($scanner, 'A', $self->res_bgsend($scanner, $hname, 'A'), $key); + my $ent = $self->start_lookup($scanner, $hname, 'A', + $self->res_bgsend($scanner, $hname, 'A', $key), + $key); $ent->{obj} = $obj; } @@ -525,5 +529,7 @@ my ($self, $scanner, $ent, $hname) = @_; - foreach my $rr ($ent->{response_packet}->answer) { + my $packet = $ent->{response_packet}; + my @answer = !defined $packet ? () : $packet->answer; + foreach my $rr (@answer) { my $str = $rr->string; $self->log_dns_result ("A for NS $hname: $str"); @@ -559,6 +565,7 @@ # dig $ip txt - my $ent = $self->start_lookup ($scanner, 'DNSBL', - $self->res_bgsend($scanner, $item, $qtype), $key); + my $ent = $self->start_lookup($scanner, $item, 'DNSBL', + $self->res_bgsend($scanner, $item, $qtype, $key), + $key); $ent->{obj} = $obj; $ent->{rulename} = $rulename; @@ -575,5 +582,5 @@ my $packet = $ent->{response_packet}; - my @answer = $packet->answer; + my @answer = !defined $packet ? () : $packet->answer; my $uridnsbl_subs = $conf->{uridnsbl_subs}->{$ent->{zone}}; @@ -646,13 +653,16 @@ sub start_lookup { - my ($self, $scanner, $type, $id, $key) = @_; + my ($self, $scanner, $zone, $type, $id, $key) = @_; my $ent = { key => $key, + zone => $zone, # serves to fetch other per-zone settings type => "URI-".$type, id => $id, completed_callback => sub { my $ent = shift; - $self->completed_lookup_callback ($scanner, $ent); + if (defined $ent->{response_packet}) { # not aborted or empty + $self->completed_lookup_callback ($scanner, $ent); + } } }; @@ -675,7 +685,4 @@ elsif ($type eq 'URI-DNSBL') { $self->complete_dnsbl_lookup ($scanner, $ent, $val); - my $totalsecs = (time - $ent->{obj}->{querystart}); - dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ". - $totalsecs." seconds to look up ($val)"); } } @@ -684,10 +691,9 @@ sub res_bgsend { - my ($self, $scanner, $host, $type) = @_; + my ($self, $scanner, $host, $type, $key) = @_; return $self->{main}->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; - $scanner->{async}->set_response_packet($id, $pkt); + my ($pkt, $id, $timestamp) = @_; + $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp); }); } --- Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Util/DependencyInfo.pm Wed Aug 8 15:19:11 2007 +++ lib/Mail/SpamAssassin/Util/DependencyInfo.pm Wed Oct 17 15:46:34 2007 @@ -140,6 +140,7 @@ module => 'Time::HiRes', version => '0.00', - desc => 'If this module is installed, the processing times are logged/reported - more precisely in spamd.', + desc => 'If this module is installed, asynchronous DNS lookup timeouts operate + with subsecond precision and the processing times are logged/reported + more accurately. Other modules and plugins may benefit too.', }, {