--- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/AsyncLoop.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/AsyncLoop.pm Wed Oct 10 19:26:08 2007 @@ -44,2 +44,8 @@ +# Load Time::HiRes if it's available +BEGIN { + eval { require Time::HiRes }; + Time::HiRes->import( qw(time) ) unless $@; +} + ############################################################################# @@ -53,7 +59,8 @@ main => $main, - last_count => 0, - times_count_was_same => 0, queries_started => 0, queries_completed => 0, - pending_lookups => { } + total_queries_started => 0, + total_queries_completed => 0, + pending_lookups => { }, + timing_by_query => { }, }; @@ -104,4 +111,12 @@ -A code reference, which will be called when the lookup has been reported as -complete via C or C. +A code reference which will be called when an asynchronous task (e.g. a +DNS lookup) is completed, either normally, or aborted, e.g. by a timeout. + +When a task has been reported as completed via C +the response (as provided to C) is stored in +$ent->{response_packet} (possibly undef, its semantics is defined by the +caller). When completion is reported via C or a +task was aborted, the $ent->{response_packet} is guaranteed to be undef. +If it is necessary to distinguish between the last two cases, the +$ent->{status} may be examined for a string 'ABORTING' or 'FINISHED'. @@ -109,2 +124,24 @@ +=item timeout_initial (optional) + +An initial value of elapsed time for which we are willing to wait for a +response (time in seconds, floating point value is allowed). When elapsed +time since a query started exceeds the timeout value and there are no other +queries to wait for, the query is aborted. The actual timeout value may +shrink dynamically by some factor from a timeout_initial value, the factor +is derived from a number of queries that have already completed, but the +value can not fall below timeout_min (see next parameter). + +If a caller does not explicitly provide this parameter or its value is +undefined, a default initial timeout value is settable by a configuration +variable rbl_timeout. + +If value of the timeout_initial parameter is already below timeout_min, +it is pushed up to timeout_min. + +=item timeout_min (optional) + +A lower bound (in seconds) below which the timeout value can not be +reduced by a dynamic timeout shrinkage formula. Defaults to 1 second. + =back @@ -122,5 +159,25 @@ + my $now = time; + my $key = $ent->{key}; + my $id = $ent->{id}; + $ent->{status} = 'STARTED'; + $ent->{start_time} = $now if !defined $ent->{start_time}; + + $ent->{timeout_min} = 1 if !defined $ent->{timeout_min}; # limits shrinkage + my $timeout = $ent->{timeout_initial}; + $timeout = $self->{main}->{conf}->{rbl_timeout} if !defined $timeout; + $timeout = $ent->{timeout_min} if $timeout < $ent->{timeout_min}; + $ent->{timeout_initial} = $timeout; + + $ent->{display_id} = # identifies entry in debug logging and similar + join(", ", grep { defined } + map { ref $ent->{$_} ? @{$ent->{$_}} : $ent->{$_} } + qw(sets rules rulename type key) ); + $self->{queries_started}++; - $self->{pending_lookups}->{$ent->{key}} = $ent; - $self->{last_start_lookup_time} = time; + $self->{total_queries_started}++; + $self->{pending_lookups}->{$key} = $ent; + + dbg("async: starting: %s (timeout %.1fs, min %.1fs)", + $ent->{display_id}, $ent->{timeout_initial}, $ent->{timeout_min}); $ent; @@ -166,2 +223,18 @@ +=item $async->log_lookups_timing() + +Log sorted timing for all completed lookups. + +=cut + +sub log_lookups_timing { + my ($self) = @_; + my $timings = $self->{timing_by_query}; + for my $key (sort { $timings->{$a} <=> $timings->{$b} } keys %$timings) { + dbg("async: timing: %.3f %s", $timings->{$key}, $key); + } +} + +# --------------------------------------------------------------------------- + =item $alldone = $async->complete_lookups() @@ -178,11 +251,10 @@ sub complete_lookups { - my ($self, $timeout) = @_; - my %typecount = (); - my $stillwaiting = 0; + my ($self, $timeout, $allow_aborting_of_expired) = @_; + my $alldone = 0; + my $anydone = 0; + my $waiting_time = 0; + my $allexpired = 1; + my %typecount; my $pending = $self->{pending_lookups}; - if (scalar keys %{$pending} <= 0) { - return 1; # nothing left to do - } - $self->{queries_started} = 0; @@ -190,2 +262,32 @@ + my $now = time; + + if (defined $timeout && $timeout > 0 && + %$pending && $self->{total_queries_started} > 0) + { + # shrink 'select' timeout if a caller specified unnecessarily long + # value, beyond the latest deadline of any outstanding request; + # can save needless wait time (up to 1 second in harvest_dnsbl_queries) + my $timeout_shrink_factor = + 1 - 0.7 * ( ($self->{total_queries_completed} / + $self->{total_queries_started}) ** 2 ); + my $max_deadline; + while (my($key,$ent) = each %$pending) { + my $dt = $ent->{timeout_initial} * $timeout_shrink_factor; + # don't shrink below a timeout's lower bound + $dt = $ent->{timeout_min} if $dt < $ent->{timeout_min}; + my $deadline = $ent->{start_time} + $dt; + if (!defined $max_deadline || $deadline > $max_deadline) { + $max_deadline = $deadline; + } + } + my $sufficient_timeout = $max_deadline - $now; + $sufficient_timeout = 0 if $sufficient_timeout < 0; + if (defined $max_deadline && $timeout > $sufficient_timeout) { + dbg("async: reducing select timeout from %.1f to %.1f s", + $timeout, $sufficient_timeout); + $timeout = $sufficient_timeout; + } + } + # trap this loop in an eval { } block, as Net::DNS could throw @@ -195,36 +297,57 @@ - my $nfound = $self->{main}->{resolver}->poll_responses($timeout); - $nfound ||= 'no'; - dbg ("async: select found $nfound socks ready"); - - foreach my $key (keys %{$pending}) { - my $ent = $pending->{$key}; + if (%$pending) { # any outstanding requests still? + $self->{last_poll_responses_time} = $now; + my $nfound = $self->{main}->{resolver}->poll_responses($timeout); + dbg("async: select found %s responses ready (t.o.=%.1f)", + !$nfound ? 'no' : $nfound, $timeout); + } + $now = time; # capture new timestamp, after possible sleep in 'select' - # call a "poll_callback" sub, if one exists - if (defined $ent->{poll_callback}) { + while (my($key,$ent) = each %$pending) { + my $id = $ent->{id}; + if (defined $ent->{poll_callback}) { # call a "poll_callback" if exists + # be nice, provide fresh info to a callback routine + $ent->{status} = 'FINISHED' if exists $self->{finished}->{$id}; + # a callback might call set_response_packet() or report_id_complete() + # dbg("async: calling poll_callback on key $key"); $ent->{poll_callback}->($ent); } - - my $type = $ent->{type}; - if (!exists ($self->{finished}->{$ent->{id}})) { - $typecount{$type}++; - next; - } - - $ent->{response_packet} = delete $self->{finished}->{$ent->{id}}; - if (defined $ent->{completed_callback}) { - $ent->{completed_callback}->($ent); + my $finished = exists $self->{finished}->{$id}; + if ($finished) { + $anydone = 1; + delete $self->{finished}->{$id}; + $ent->{status} = 'FINISHED'; + $ent->{finish_time} = $now if !defined $ent->{finish_time}; + my $elapsed = $ent->{finish_time} - $ent->{start_time}; + dbg("async: completed in %.3f s: %s", $elapsed, $ent->{display_id}); + + # call a "completed_callback" sub, if one exists + if (defined $ent->{completed_callback}) { + # dbg("async: calling completed_callback on key $key"); + $ent->{completed_callback}->($ent); + } + $self->{timing_by_query}->{". $key"} += $elapsed; + $self->{queries_completed}++; + $self->{total_queries_completed}++; + delete $pending->{$key}; } - - $self->{queries_completed}++; - delete $self->{pending_lookups}->{$key}; } - dbg("async: queries completed: ".$self->{queries_completed}. - " started: ".$self->{queries_started}); - - if (1) { - dbg("async: queries active: ". - join (' ', map { "$_=$typecount{$_}" } sort keys %typecount)." at ". - localtime(time)); + if (%$pending) { # still any requests outstanding? are they expired? + my $timeout_shrink_factor = + !$allow_aborting_of_expired || !$self->{total_queries_started} ? 1.0 + : 1 - 0.7 * ( ($self->{total_queries_completed} / + $self->{total_queries_started}) ** 2 ); + dbg("async: timeout shrink factor: %.2f", + $timeout_shrink_factor) if $timeout_shrink_factor != 1; + + while (my($key,$ent) = each %$pending) { + $typecount{$ent->{type}}++; + my $dt = $ent->{timeout_initial} * $timeout_shrink_factor; + # don't shrink below a timeout's lower bound + $dt = $ent->{timeout_min} if $dt < $ent->{timeout_min}; + $allexpired = 0 if $now <= $ent->{start_time} + $dt; + } + dbg("async: queries completed: %d, started: %d", + $self->{queries_completed}, $self->{queries_started}); } @@ -232,34 +355,26 @@ # ensure we don't get stuck if a request gets lost in the ether. - if (!$stillwaiting) { - my $numkeys = scalar keys %{$self->{pending_lookups}}; - if ($numkeys == 0) { - $stillwaiting = 0; - - } else { - $stillwaiting = 1; - - # avoid looping forever if we haven't got all results. - if ($self->{last_count} == $numkeys) { - $self->{times_count_was_same}++; - if ($self->{times_count_was_same} > 20) - { - dbg("async: escaping: must have lost requests"); - $self->abort_remaining_lookups(); - $stillwaiting = 0; - } - } else { - $self->{last_count} = $numkeys; - $self->{times_count_was_same} = 0; - } - } + if (! %$pending) { + $alldone = 1; } + elsif ($allexpired && $allow_aborting_of_expired) { + # avoid looping forever if we haven't got all results. + dbg("async: escaping: lost or timed out requests or responses"); + $self->abort_remaining_lookups(); + $alldone = 1; + } + else { + dbg("async: queries active: %s%s at %s", + join (' ', map { "$_=$typecount{$_}" } sort keys %typecount), + $allexpired ? ', all expired' : '', scalar(localtime(time))); + $alldone = 0; + } + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("async: caught complete_lookups death, aborting: $eval_stat"); + $alldone = 1; # abort remaining }; - if ($@) { - dbg("async: caught complete_lookups death, aborting: $@"); - $stillwaiting = 0; # abort remaining - } - - return (!$stillwaiting); + return wantarray ? ($alldone,$anydone,$waiting_time) : $alldone; } @@ -278,8 +393,22 @@ my $pending = $self->{pending_lookups}; - my $foundone = 0; - foreach my $key (keys %{$pending}) - { - if (!$foundone) { - dbg("async: aborting remaining lookups"); - $foundone = 1; + my $foundcnt = 0; + my $now = time; + while (my($key,$ent) = each %$pending) { + dbg("async: aborting after %.3f s, %s: %s", + $now - $ent->{start_time}, + (defined $ent->{timeout_initial} && + $now > $ent->{start_time} + $ent->{timeout_initial} + ? 'past original deadline' : 'deadline shrunk'), + $ent->{display_id} ); + $foundcnt++; + $self->{timing_by_query}->{"X $key"} = $now - $ent->{start_time}; + + if (defined $ent->{completed_callback}) { + $ent->{finish_time} = $now if !defined $ent->{finish_time}; + $ent->{response_packet} = undef; + $ent->{status} = 'ABORTING'; + # to avoid breaking third-party plugins which may not test for undefined + # 'response_packet', avoid the callback on aborts for now (3.2.4); + # the following call will be enabled for 3.3.0 : + # $ent->{completed_callback}->($ent); } @@ -288,4 +417,6 @@ } - delete $self->{last_start_lookup_time}; + dbg("async: aborted %d remaining lookups", $foundcnt) if $foundcnt > 0; + delete $self->{last_poll_responses_time}; $self->{main}->{resolver}->bgabort(); + 1; } @@ -294,3 +425,3 @@ -=item $async->set_response_packet($id, $pkt) +=item $async->set_response_packet($id, $pkt, $key, $timestamp) @@ -298,3 +429,5 @@ query, and must match the C supplied in C. C<$pkt> is the -packet object for the response. +packet object for the response. A parameter C<$key> identifies an entry in a +hash %{$self->{pending_lookups}} where the object which spawned this query can +be found, and through which futher information about the query is accessible. @@ -309,7 +442,32 @@ sub set_response_packet { - my ($self, $id, $pkt) = @_; - $self->{finished}->{$id} = $pkt; + my ($self, $id, $pkt, $key, $timestamp) = @_; + $self->{finished}->{$id} = 1; # only key existence matters, any value + $timestamp = time if !defined $timestamp; + my $pending = $self->{pending_lookups}; + if (!defined $key) { # backwards compatibility with 3.2.3 and older plugins + # a third-party plugin did not provide $key in a call, search for it: + if ($id eq $pending->{$id}->{id}) { # I feel lucky, key==id ? + $key = $id; + } else { # then again, maybe not, be more systematic + for my $tkey (keys %$pending) { + if ($id eq $pending->{$tkey}->{id}) { $key = $tkey; last } + } + } + dbg("async: got response on id $id, search found key $key"); + } + if (!defined $key) { + info("async: no key, response packet not remembered, id $id"); + } else { + my $ent = $pending->{$key}; + if ($id ne $ent->{id}) { + info("async: ignoring response, mismatched id $id, expected $ent->{id}"); + } else { + $ent->{finish_time} = $timestamp; + $ent->{response_packet} = $pkt; + } + } + 1; } -=item $async->report_id_complete($id) +=item $async->report_id_complete($id,$key,$key,$timestamp) @@ -324,4 +482,4 @@ sub report_id_complete { - my ($self, $id) = @_; - $self->{finished}->{$id} = undef; + my ($self, $id, $key, $timestamp) = @_; + $self->set_response_packet($id, undef, $key, $timestamp); } @@ -330,7 +488,8 @@ -=item $time = $async->get_last_start_lookup_time() +=item $time = $async->last_poll_responses_time() -Get the time of the last call to C. If C was -never called or C has been called -C will return undef. +Get the time of the last call to C (which is called +from C. If C was never called or +C has been called C +will return undef. @@ -338,8 +497,6 @@ -sub get_last_start_lookup_time { +sub last_poll_responses_time { my ($self) = @_; - return $self->{last_start_lookup_time}; + return $self->{last_poll_responses_time}; } - -# --------------------------------------------------------------------------- --- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Dns.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/Dns.pm Wed Oct 10 19:26:08 2007 @@ -109,2 +109,3 @@ key => $key, + timeout_initial => $self->{conf}->{rbl_timeout}, type => "DNSBL-".$type, @@ -116,6 +117,5 @@ my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; + my ($pkt, $id, $timestamp) = @_; $self->process_dnsbl_result($ent, $pkt); - $self->{async}->report_id_complete($id); + $self->{async}->report_id_complete($id,$key,$timestamp); }); @@ -158,2 +158,3 @@ key => $key, + timeout_initial => $self->{conf}->{rbl_timeout}, type => "DNSBL-".$type, @@ -164,6 +165,5 @@ my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; + my ($pkt, $id, $timestamp) = @_; $self->process_dnsbl_result($ent, $pkt); - $self->{async}->report_id_complete($id); + $self->{async}->report_id_complete($id,$key,$timestamp); }); @@ -228,2 +228,3 @@ +# called as a completion routine to bgsend by DnsResolver::poll_responses; # returns 1 on successful packet processing @@ -330,39 +331,23 @@ - return if !defined $self->{async}->get_last_start_lookup_time(); + dbg("dns: harvest_until_rule_completes"); + my $result = 0; + my $total_waiting_time = 0; + + for (my $first=1; ; $first=0) { + # complete_lookups() may call completed_callback(), which may + # call start_lookup() again (like in Plugin::URIDNSBL) + my ($alldone,$anydone,$waiting_time) = + $self->{async}->complete_lookups($first ? 0 : 1.0, 1); + $total_waiting_time += $waiting_time; - my $deadline = $self->{conf}->{rbl_timeout} + $self->{async}->get_last_start_lookup_time(); - my $now = time; - - # should not give up before at least attempting to collect some responses - # even if previous checks already exceeded rbl_timeout - my $notbefore = $now + 1.2; # at least 1 second from now (time is integer) - - my @left = $self->{async}->get_pending_lookups(); - my $total = scalar @left; - - while ( (($now < $deadline) || ($now < $notbefore)) && - !$self->{async}->complete_lookups(1)) - { - dbg(sprintf("dns: harvest_until_rule_completes: on extended time, ". - "overdue by %.3f s, still %.3f s", - $now-$deadline, $notbefore-$now)) if $now >= $deadline; - - if ($self->is_rule_complete($rule)) { - return 1; - } + $result = 1 if $self->is_rule_complete($rule); + last if $result || $alldone; + dbg("dns: harvest_until_rule_completes - check_tick"); $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); - @left = $self->{async}->get_pending_lookups(); - - # complete_lookups could cause a change in get_last_start_lookup_time - $deadline = $self->{conf}->{rbl_timeout} + - $self->{async}->get_last_start_lookup_time(); - - # dynamic timeout - my $dynamic = (int($self->{conf}->{rbl_timeout} - * (1 - 0.7*(($total - @left) / $total) ** 2) + 1) - + $self->{async}->get_last_start_lookup_time()); - $deadline = $dynamic if ($dynamic < $deadline); - $now = time; } + dbg("dns: timing: %.3f s sleeping in harvest_until_rule_completes", + $total_waiting_time) if $total_waiting_time > 0; + + return $result; } @@ -372,55 +357,48 @@ - return if !defined $self->{async}->get_last_start_lookup_time(); + dbg("dns: harvest_dnsbl_queries"); + my $total_waiting_time = 0; - my $deadline = $self->{conf}->{rbl_timeout} + $self->{async}->get_last_start_lookup_time(); - my $now = time; + for (my $first=1; ; $first=0) { - # should not give up before at least attempting to collect some responses - # (which may have arrived by now), even if previous checks (like Razor, - # dcc, Botnet, rules) already exceeded rbl_timeout - my $notbefore = $now + 1.2; # at least 1 second from now (time is integer) + # complete_lookups() may call completed_callback(), which may + # call start_lookup() again (like in Plugin::URIDNSBL) - my @left = $self->{async}->get_pending_lookups(); - my $total = scalar @left; + # the first time around we specify a 0 timeout, which gives + # complete_lookups a chance to ripe any available results and + # abort overdue requests, without needlessly waiting for more - while ( (($now < $deadline) || ($now < $notbefore)) && - !$self->{async}->complete_lookups(1)) - { - dbg(sprintf("dns: harvest_dnsbl_queries: on extended time, ". - "overdue by %.3f s, still %.3f s", - $now-$deadline, $notbefore-$now)) if $now >= $deadline; + my ($alldone,$anydone,$waiting_time) = + $self->{async}->complete_lookups($first ? 0 : 1.0, 1); + $total_waiting_time += $waiting_time; - $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); - @left = $self->{async}->get_pending_lookups(); + last if $alldone; - # complete_lookups() may have called completed_callback, which may call - # start_lookup() again (like in URIDNSBL), so get_last_start_lookup_time - # may have changed and deadline needs to be recomputed - $deadline = $self->{conf}->{rbl_timeout} + - $self->{async}->get_last_start_lookup_time(); - - # dynamic timeout - my $dynamic = (int($self->{conf}->{rbl_timeout} - * (1 - 0.7*(($total - @left) / $total) ** 2) + 1) - + $self->{async}->get_last_start_lookup_time()); - $deadline = $dynamic if ($dynamic < $deadline); - $now = time; # and loop again - } - - dbg("dns: success for " . ($total - @left) . " of $total queries"); - - # timeouts - @left = $self->{async}->get_pending_lookups(); - $now = time; - for my $query (@left) { - my $string = join(", ", grep { defined } - map { ref $query->{$_} ? @{$query->{$_}} : $query->{$_} } - qw(sets rules rulename type key) ); - my $delay = $now - $self->{async}->get_last_start_lookup_time(); - dbg("dns: timeout for $string after $delay seconds"); + dbg("dns: harvest_dnsbl_queries - check_tick"); + $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); } - # and explicitly abort anything left + # explicitly abort anything left $self->{async}->abort_remaining_lookups(); + $self->{async}->log_lookups_timing(); $self->mark_all_async_rules_complete(); + dbg("dns: timing: %.3f s sleeping in harvest_dnsbl_queries", + $total_waiting_time) if $total_waiting_time > 0; + 1; +} + +# collect and process whatever DNS responses have already arrived, +# don't waste time waiting for more, don't poll too often. +# don't abort any queries even if overdue, +sub harvest_completed_queries { + my ($self) = @_; + + # don't bother collecting responses too often + my $last_poll_time = $self->{async}->last_poll_responses_time(); + return if defined $last_poll_time && time - $last_poll_time < 0.1; + + my ($alldone,$anydone) = $self->{async}->complete_lookups(0, 0); + if ($anydone) { + dbg("dns: harvested completed queries"); +# $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); + } } @@ -482,7 +460,8 @@ $nsrecords = $self->{dnscache}->{NS}->{$dom} = [ @nses ]; - }; - if ($@) { - dbg("dns: NS lookup failed horribly, perhaps bad resolv.conf setting? ($@)"); + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("dns: NS lookup failed horribly, perhaps bad resolv.conf setting? ($eval_stat)"); return undef; - } + }; } @@ -516,7 +495,8 @@ $mxrecords = $self->{dnscache}->{MX}->{$dom} = [ @ips ]; - }; - if ($@) { - dbg("dns: MX lookup failed horribly, perhaps bad resolv.conf setting? ($@)"); + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("dns: MX lookup failed horribly, perhaps bad resolv.conf setting? ($eval_stat)"); return undef; - } + }; } @@ -574,8 +554,8 @@ $name = $self->{dnscache}->{PTR}->{$dom} = $name; - }; - - if ($@) { - dbg("dns: PTR lookup failed horribly, perhaps bad resolv.conf setting? ($@)"); + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("dns: PTR lookup failed horribly, perhaps bad resolv.conf setting? ($eval_stat)"); return undef; - } + }; } @@ -616,8 +596,8 @@ $self->{dnscache}->{A}->{$name} = [ @addrs ]; - }; - - if ($@) { - dbg("dns: A lookup failed horribly, perhaps bad resolv.conf setting? ($@)"); + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("dns: A lookup failed horribly, perhaps bad resolv.conf setting? ($eval_stat)"); return undef; - } + }; } @@ -641,3 +621,4 @@ $IS_DNS_AVAILABLE = undef; - dbg("dns: is_dns_available() last checked $diff seconds ago; re-checking"); + dbg("dns: is_dns_available() last checked %.1f seconds ago; re-checking", + $diff); } --- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/DnsResolver.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/DnsResolver.pm Wed Oct 10 19:25:50 2007 @@ -51,2 +51,8 @@ +# Load Time::HiRes if it's available +BEGIN { + eval { require Time::HiRes }; + Time::HiRes->import( qw(time) ) unless $@; +} + ########################################################################### @@ -295,9 +301,10 @@ #dbg("dns: new DNS packet time=".time()." host=$host type=$type id=".$packet->id); - }; - - if ($@) { + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; # this can happen if Net::DNS isn't available -- but in this # case this function should never be called! - warn "dns: cannot create Net::DNS::Packet, but new_dns_packet() was called: $@ $!"; - } + warn "dns: cannot create Net::DNS::Packet, but new_dns_packet() was called: $eval_stat"; + }; + return $packet; @@ -340,9 +347,10 @@ -The callback sub will be called with two arguments -- the packet that was -delivered and an id string that fingerprints the query packet and the expected reply. -It is expected that a closure callback be used, like so: +The callback sub will be called with three arguments -- the packet that was +delivered, and an id string that fingerprints the query packet and the expected +reply. The third argument is a timestamp (Unix time, floating point), captured +at the time the packet was collected. It is expected that a closure callback +be used, like so: my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $reply = shift; - my $reply_id = shift; + my ($reply, $reply_id, $timestamp) = @_; $self->got_a_reply ($reply, $reply_id); @@ -350,4 +358,4 @@ -The callback can ignore the reply as an invalid packet sent to the listening port -if the reply id does not match the return value from bgsend. +The callback can ignore the reply as an invalid packet sent to the listening +port if the reply id does not match the return value from bgsend. @@ -386,2 +394,4 @@ return if !$self->{sock}; + my $cnt = 0; + my $waiting_time = 0; @@ -389,39 +399,44 @@ my $rout; - my ($nfound, $timeleft) = select($rout=$rin, undef, undef, $timeout); - if (!defined $nfound || $nfound < 0) { - warn "dns: select failed: $!"; - return; - } + for (;;) { + my $now_before = time; + my ($nfound, $timeleft) = select($rout=$rin, undef, undef, $timeout); + if (!defined $nfound || $nfound < 0) { + warn "dns: select failed: $!"; + return; + } - if ($nfound == 0) { - return 0; # nothing's ready yet - } + my $now = time; + if ($now > $now_before && (!defined($timeout) || $timeout > 0)) { + $waiting_time += $now - $now_before; + } + $timeout = 0; # next time around collect whatever is available, then exit + last if $nfound == 0; - my $packet = $self->{res}->bgread($self->{sock}); - my $err = $self->{res}->errorstring; + my $packet = $self->{res}->bgread($self->{sock}); + my $err = $self->{res}->errorstring; - if (defined $packet && - defined $packet->header && - defined $packet->question && - defined $packet->answer) - { - my $id = $self->_packet_id($packet); - - my $cb = delete $self->{id_to_callback}->{$id}; - if (!$cb) { - dbg("dns: no callback for id: $id, ignored; packet: ". - ($packet ? $packet->string : "undef")); - return 0; + if (defined $packet && + defined $packet->header && + defined $packet->question && + defined $packet->answer) + { + my $id = $self->_packet_id($packet); + + my $cb = delete $self->{id_to_callback}->{$id}; + if (!$cb) { + dbg("dns: no callback for id: %s, ignored; packet: %s", + $id, $packet ? $packet->string : "undef" ); + } else { + $cb->($packet, $id, $now); + $cnt++; + } + } + else { + dbg("dns: no packet! err=%s packet=%s", + $err, $packet ? $packet->string : "undef" ); } - - $cb->($packet, $id); - return 1; - } - else { - dbg("dns: no packet! err=$err packet=". - ($packet ? $packet->string : "undef")); } - return 0; + return wantarray ? ($cnt, $waiting_time) : $cnt; } @@ -459,2 +474,3 @@ my $answerpkt; + my $answerpkt_avail = 0; for (my $i = 0; @@ -466,3 +482,4 @@ $self->bgsend($name, $type, $class, sub { - $answerpkt = shift; + my ($reply, $reply_id, $timestamp) = @_; + $answerpkt = $reply; $answerpkt_avail = 1; }); @@ -472,8 +489,7 @@ - while (($now < $deadline) && (!defined($answerpkt))) { + while (!$answerpkt_avail) { + if ($now >= $deadline) { $self->{send_timed_out} = 1; last } $self->poll_responses(1); - last if defined $answerpkt; $now = time; } - $self->{send_timed_out} = 1 unless ($now < $deadline); } @@ -541,4 +557,7 @@ my $fno = fileno($sock); - warn "dns: oops! fileno now undef for $sock" unless defined($fno); - vec ($rin, $fno, 1) = 1; + if (!defined $fno) { + warn "dns: oops! fileno now undef for $sock"; + } else { + vec ($rin, $fno, 1) = 1; + } } --- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Logger.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/Logger.pm Wed Oct 10 19:25:50 2007 @@ -196,5 +196,6 @@ sub _log { - my ($level, $message) = @_; + my ($level, $message, @args) = @_; my $facility = "generic"; + local ($1,$2); if ($message =~ /^(\S+?): (.*)/s) { @@ -211,2 +212,3 @@ + if (@args && index($message,'%') >= 0) { $message = sprintf($message,@args) } $message =~ s/\n+$//s; --- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Plugin/ASN.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/ASN.pm Wed Oct 10 19:25:50 2007 @@ -214,9 +214,14 @@ my $zone_index = $index; + my $key = "asnlookup-${zone_index}-$entry->{zone}"; my $id = $scanner->{main}->{resolver}->bgsend("${reversed_ip_quad}.$entry->{zone}", 'TXT', undef, sub { - my $pkt = shift; + my ($pkt, $id, $timestamp) = @_; + $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp); $self->process_dns_result($scanner, $pkt, $zone_index); }); - - $scanner->{async}->start_lookup({ key=>"asnlookup-${zone_index}-$entry->{zone}", id=>$id, type=>'TXT' }); - dbg("asn: launched DNS TXT query for ${reversed_ip_quad}.$entry->{zone} in background"); + my $ent = { + key=>$key, id=>$id, type=>'TXT', + # timeout => ... # defaults to $scanner->{conf}->{rbl_timeout} + }; + $scanner->{async}->start_lookup($ent); + dbg("asn: launched DNS TXT query for $reversed_ip_quad.$entry->{zone} in background"); @@ -235,3 +240,3 @@ - my @answer = $response->answer; + my @answer = !defined $response ? () : $response->answer; --- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Plugin/Check.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/Check.pm Wed Oct 10 19:25:50 2007 @@ -93,2 +93,3 @@ + $pms->harvest_completed_queries(); # allow other, plugin-defined rule types to be called here @@ -99,15 +100,25 @@ $self->do_head_tests($pms, $priority); + $pms->harvest_completed_queries(); $self->do_head_eval_tests($pms, $priority); + $pms->harvest_completed_queries(); $self->do_body_tests($pms, $priority, $decoded); + $pms->harvest_completed_queries(); $self->do_uri_tests($pms, $priority, @uris); + $pms->harvest_completed_queries(); $self->do_body_eval_tests($pms, $priority, $decoded); + $pms->harvest_completed_queries(); $self->do_rawbody_tests($pms, $priority, $bodytext); + $pms->harvest_completed_queries(); $self->do_rawbody_eval_tests($pms, $priority, $bodytext); + $pms->harvest_completed_queries(); $self->do_full_tests($pms, $priority, \$fulltext); + $pms->harvest_completed_queries(); $self->do_full_eval_tests($pms, $priority, \$fulltext); + $pms->harvest_completed_queries(); $self->do_meta_tests($pms, $priority); + $pms->harvest_completed_queries(); @@ -116,2 +127,3 @@ $self->{main}->call_plugins ("check_tick", { permsgstatus => $pms }); + $pms->harvest_completed_queries(); } --- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Wed Oct 10 19:26:08 2007 @@ -475,3 +475,5 @@ # dig $dom ns - my $ent = $self->start_lookup ($scanner, 'NS', $self->res_bgsend($scanner, $dom, 'NS'), $key); + my $ent = $self->start_lookup($scanner, 'NS', + $self->res_bgsend($scanner, $dom, 'NS', $key), + $key); $ent->{obj} = $obj; @@ -483,3 +485,3 @@ my $packet = $ent->{response_packet}; - my @answer = $packet->answer; + my @answer = !defined $packet ? () : $packet->answer; @@ -519,3 +521,5 @@ # dig $hname a - my $ent = $self->start_lookup ($scanner, 'A', $self->res_bgsend($scanner, $hname, 'A'), $key); + my $ent = $self->start_lookup($scanner, 'A', + $self->res_bgsend($scanner, $hname, 'A', $key), + $key); $ent->{obj} = $obj; @@ -526,3 +530,5 @@ - foreach my $rr ($ent->{response_packet}->answer) { + my $packet = $ent->{response_packet}; + my @answer = !defined $packet ? () : $packet->answer; + foreach my $rr (@answer) { my $str = $rr->string; @@ -560,4 +566,5 @@ # dig $ip txt - my $ent = $self->start_lookup ($scanner, 'DNSBL', - $self->res_bgsend($scanner, $item, $qtype), $key); + my $ent = $self->start_lookup($scanner, 'DNSBL', + $self->res_bgsend($scanner, $item, $qtype, $key), + $key); $ent->{obj} = $obj; @@ -576,3 +583,3 @@ my $packet = $ent->{response_packet}; - my @answer = $packet->answer; + my @answer = !defined $packet ? () : $packet->answer; @@ -651,2 +658,3 @@ key => $key, + timeout_initial => $scanner->{conf}->{rbl_timeout}, type => "URI-".$type, @@ -655,3 +663,5 @@ my $ent = shift; - $self->completed_lookup_callback ($scanner, $ent); + if (defined $ent->{response_packet}) { # not aborted or empty + $self->completed_lookup_callback ($scanner, $ent); + } } @@ -676,5 +686,2 @@ $self->complete_dnsbl_lookup ($scanner, $ent, $val); - my $totalsecs = (time - $ent->{obj}->{querystart}); - dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ". - $totalsecs." seconds to look up ($val)"); } @@ -685,8 +692,7 @@ sub res_bgsend { - my ($self, $scanner, $host, $type) = @_; + my ($self, $scanner, $host, $type, $key) = @_; return $self->{main}->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; - $scanner->{async}->set_response_packet($id, $pkt); + my ($pkt, $id, $timestamp) = @_; + $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp); }); --- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Util/DependencyInfo.pm Wed Aug 8 15:19:11 2007 +++ lib/Mail/SpamAssassin/Util/DependencyInfo.pm Wed Oct 10 19:25:50 2007 @@ -141,4 +141,5 @@ version => '0.00', - desc => 'If this module is installed, the processing times are logged/reported - more precisely in spamd.', + desc => 'If this module is installed, asynchronous DNS lookup timeouts operate + with subsecond precision and the processing times are logged/reported + more accurately. Other modules and plugins may benefit too.', },