--- ../ori/lib/Mail/SpamAssassin/AsyncLoop.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/AsyncLoop.pm Thu Sep 27 16:32:16 2007 @@ -43,4 +43,10 @@ our @ISA = qw(); +# Load Time::HiRes if it's available +BEGIN { + eval { require Time::HiRes }; + Time::HiRes->import( qw(time) ) unless $@; +} + ############################################################################# @@ -52,9 +58,10 @@ my $self = { main => $main, - last_count => 0, - times_count_was_same => 0, queries_started => 0, queries_completed => 0, - pending_lookups => { } + total_queries_started => 0, + total_queries_completed => 0, + pending_lookups => { }, + timing_by_query => { }, }; @@ -103,6 +110,14 @@ =item completed_callback (optional) -A code reference, which will be called when the lookup has been reported as -complete via C or C. +A code reference which will be called when an asynchronous task (e.g. a +DNS lookup) is completed, either normally, or aborted, e.g. by a timeout. + +When a task has been reported as completed via C +the response (as provided to C) is stored in +$ent->{response_packet} (possibly undef, its semantics is defined by the +caller). When completion is reported via C or a +task was aborted, the $ent->{response_packet} is guaranteed to be undef. +If it is necessary to distinguish between the last two cases, the +$ent->{status} may be examined for a string 'ABORTING' or 'FINISHED'. The code reference will be called with one argument, the C<$ent> object. @@ -117,11 +132,26 @@ my ($self, $ent) = @_; - die "oops, no id" unless $ent->{id}; - die "oops, no key" unless $ent->{key}; - die "oops, no type" unless $ent->{type}; + die "oops, no id" unless $ent->{id} ne ''; + die "oops, no key" unless $ent->{key} ne ''; + die "oops, no type" unless $ent->{type} ne ''; + + my $now = time; + my $key = $ent->{key}; + my $id = $ent->{id}; + $ent->{status} = 'STARTED'; + $ent->{start_time} = $now if !defined $ent->{start_time}; + $ent->{timeout} = + $self->{main}->{conf}->{rbl_timeout} if !defined $ent->{timeout}; + $ent->{display_id} = # identifies entry in debug logging and similar + join(", ", grep { defined } + map { ref $ent->{$_} ? @{$ent->{$_}} : $ent->{$_} } + qw(sets rules rulename type key) ); $self->{queries_started}++; - $self->{pending_lookups}->{$ent->{key}} = $ent; - $self->{last_start_lookup_time} = time; + $self->{total_queries_started}++; + $self->{pending_lookups}->{$key} = $ent; + + dbg("async: starting: %s (timeout %.1f s)", + $ent->{display_id}, $ent->{timeout}); $ent; } @@ -165,4 +195,20 @@ # --------------------------------------------------------------------------- +=item $async->log_lookups_timing() + +Log sorted timing for all completed lookups. + +=cut + +sub log_lookups_timing { + my ($self) = @_; + my $timings = $self->{timing_by_query}; + for my $key (sort { $timings->{$a} <=> $timings->{$b} } keys %$timings) { + dbg("async: timing: %.3f %s", $timings->{$key}, $key); + } +} + +# --------------------------------------------------------------------------- + =item $alldone = $async->complete_lookups() @@ -177,16 +223,47 @@ sub complete_lookups { - my ($self, $timeout) = @_; - my %typecount = (); - my $stillwaiting = 0; + my ($self, $timeout, $allow_aborting_of_expired) = @_; + my $alldone = 0; + my $anydone = 0; + my $waiting_time = 0; + my $allexpired = 1; + my %typecount; my $pending = $self->{pending_lookups}; - if (scalar keys %{$pending} <= 0) { - return 1; # nothing left to do - } - $self->{queries_started} = 0; $self->{queries_completed} = 0; + my $now = time; + + if (defined $timeout && $timeout > 0 && + %$pending && $self->{total_queries_started} > 0) + { + # shrink 'select' timeout if a caller specified unnecessarily long + # value, beyond the latest deadline of any outstanding request; + # can save needless wait time (up to 1 second in harvest_dnsbl_queries) + my $timeout_shrink_factor = + 1 - 0.7 * ( ($self->{total_queries_completed} / + $self->{total_queries_started}) ** 2 ); + my $max_deadline; + while (my($key,$ent) = each %$pending) { + my $dt = $ent->{timeout}; + if ($dt > 1.0) { # only allow shrinking of timeouts over 1 second + $dt *= $timeout_shrink_factor; + $dt = 1.0 if $dt < 1.0; # don't shrink below 1 second + } + my $deadline = $ent->{start_time} + $dt; + if (!defined $max_deadline || $deadline > $max_deadline) { + $max_deadline = $deadline; + } + } + my $sufficient_timeout = $max_deadline - $now; + $sufficient_timeout = 0 if $sufficient_timeout < 0; + if (defined $max_deadline && $timeout > $sufficient_timeout) { + dbg("async: reducing select timeout from %.1f to %.1f s", + $timeout, $sufficient_timeout); + $timeout = $sufficient_timeout; + } + } + # trap this loop in an eval { } block, as Net::DNS could throw # die()s our way; in particular, process_dnsbl_results() has @@ -194,73 +271,86 @@ eval { - my $nfound = $self->{main}->{resolver}->poll_responses($timeout); - $nfound ||= 'no'; - dbg ("async: select found $nfound socks ready"); - - foreach my $key (keys %{$pending}) { - my $ent = $pending->{$key}; + if (%$pending) { # any outstanding requests still? + $self->{last_poll_responses_time} = $now; + my $nfound = $self->{main}->{resolver}->poll_responses($timeout); + dbg("async: select found %s responses ready (t.o.=%.1f)", + !$nfound ? 'no' : $nfound, $timeout); + } + $now = time; # capture new timestamp, after possible sleep in 'select' + while (my($key,$ent) = each %$pending) { + my $id = $ent->{id}; + my $finished = exists $self->{finished}->{$id}; + if ($finished) { + $anydone = 1; + delete $self->{finished}->{$id}; + $ent->{status} = 'FINISHED'; + $ent->{finish_time} = $now if !defined $ent->{finish_time}; + } # call a "poll_callback" sub, if one exists if (defined $ent->{poll_callback}) { $ent->{poll_callback}->($ent); } - - my $type = $ent->{type}; - if (!exists ($self->{finished}->{$ent->{id}})) { - $typecount{$type}++; - next; - } - - $ent->{response_packet} = delete $self->{finished}->{$ent->{id}}; - if (defined $ent->{completed_callback}) { - $ent->{completed_callback}->($ent); + if ($finished) { + my $elapsed = $ent->{finish_time} - $ent->{start_time}; + dbg("async: completed in %.3f s: %s", $elapsed, $ent->{display_id}); + + # call a "completed_callback" sub, if one exists + if (defined $ent->{completed_callback}) { + $ent->{completed_callback}->($ent); + } + $self->{timing_by_query}->{". $key"} += $elapsed; + $self->{queries_completed}++; + $self->{total_queries_completed}++; + delete $pending->{$key}; } - - $self->{queries_completed}++; - delete $self->{pending_lookups}->{$key}; } - dbg("async: queries completed: ".$self->{queries_completed}. - " started: ".$self->{queries_started}); - - if (1) { - dbg("async: queries active: ". - join (' ', map { "$_=$typecount{$_}" } sort keys %typecount)." at ". - localtime(time)); + if (%$pending) { # still any requests outstanding? are they expired? + my $timeout_shrink_factor = + !$allow_aborting_of_expired || !$self->{total_queries_started} ? 1.0 + : 1 - 0.7 * ( ($self->{total_queries_completed} / + $self->{total_queries_started}) ** 2 ); + dbg("async: timeout shrink factor: %.2f", + $timeout_shrink_factor) if $timeout_shrink_factor != 1; + + while (my($key,$ent) = each %$pending) { + $typecount{$ent->{type}}++; + my $dt = $ent->{timeout}; + if ($dt > 1.0) { # only allow shrinking of timeouts over 1 second + $dt *= $timeout_shrink_factor; + $dt = 1.0 if $dt < 1.0; # don't shrink below 1 second + } + $allexpired = 0 if $now <= $ent->{start_time} + $dt; + } + dbg("async: queries completed: %d, started: %d", + $self->{queries_completed}, $self->{queries_started}); } # ensure we don't get stuck if a request gets lost in the ether. - if (!$stillwaiting) { - my $numkeys = scalar keys %{$self->{pending_lookups}}; - if ($numkeys == 0) { - $stillwaiting = 0; - - } else { - $stillwaiting = 1; - - # avoid looping forever if we haven't got all results. - if ($self->{last_count} == $numkeys) { - $self->{times_count_was_same}++; - if ($self->{times_count_was_same} > 20) - { - dbg("async: escaping: must have lost requests"); - $self->abort_remaining_lookups(); - $stillwaiting = 0; - } - } else { - $self->{last_count} = $numkeys; - $self->{times_count_was_same} = 0; - } - } + if (! %$pending) { + $alldone = 1; } + elsif ($allexpired && $allow_aborting_of_expired) { + # avoid looping forever if we haven't got all results. + dbg("async: escaping: lost or timed out requests or responses"); + $self->abort_remaining_lookups(); + $alldone = 1; + } + else { + dbg("async: queries active: %s%s at %s", + join (' ', map { "$_=$typecount{$_}" } sort keys %typecount), + $allexpired ? ', all expired' : '', scalar(localtime(time))); + $alldone = 0; + } + 1; + } or do { + my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; + dbg("async: caught complete_lookups death, aborting: %s", $eval_stat); + $alldone = 1; # abort remaining }; - if ($@) { - dbg("async: caught complete_lookups death, aborting: $@"); - $stillwaiting = 0; # abort remaining - } - - return (!$stillwaiting); + return wantarray ? ($alldone,$anydone,$waiting_time) : $alldone; } @@ -277,16 +367,27 @@ my $pending = $self->{pending_lookups}; - my $foundone = 0; - foreach my $key (keys %{$pending}) - { - if (!$foundone) { - dbg("async: aborting remaining lookups"); - $foundone = 1; + my $foundcnt = 0; + my $now = time; + while (my($key,$ent) = each %$pending) { + dbg("async: aborting after %.3f s, %s: %s", + $now - $ent->{start_time}, + defined $ent->{timeout} && $now > $ent->{start_time} + $ent->{timeout} + ? 'past original deadline' : 'deadline shrunk', + $ent->{display_id} ); + $foundcnt++; + $self->{timing_by_query}->{"X $key"} = $now - $ent->{start_time}; + + if (defined $ent->{completed_callback}) { + $ent->{finish_time} = $now if !defined $ent->{finish_time}; + $ent->{response_packet} = undef; + $ent->{status} = 'ABORTING'; + $ent->{completed_callback}->($ent); } - delete $pending->{$key}; } - delete $self->{last_start_lookup_time}; + dbg("async: aborted %d remaining lookups", $foundcnt) if $foundcnt > 0; + delete $self->{last_poll_responses_time}; $self->{main}->{resolver}->bgabort(); + 1; } @@ -308,9 +409,16 @@ sub set_response_packet { - my ($self, $id, $pkt) = @_; - $self->{finished}->{$id} = $pkt; + my ($self, $id, $pkt, $key, $timestamp) = @_; + $self->{finished}->{$id} = 1; # only key existence matters, any value + $timestamp = time if !defined $timestamp; + my $ent = $self->{pending_lookups}->{$key}; + $id eq $ent->{id} + or die "set_response_packet: PANIC - mismatched id $id, $ent->{id}"; + $ent->{finish_time} = $timestamp; + $ent->{response_packet} = $pkt; + 1; } -=item $async->report_id_complete($id) +=item $async->report_id_complete($id,$key) Register that a query has completed, and is no longer "pending". C<$id> is the @@ -323,24 +431,23 @@ sub report_id_complete { - my ($self, $id) = @_; - $self->{finished}->{$id} = undef; + my ($self, $id, $key, $timestamp) = @_; + $self->set_response_packet($id, undef, $key, $timestamp); } # --------------------------------------------------------------------------- -=item $time = $async->get_last_start_lookup_time() +=item $time = $async->last_poll_responses_time() -Get the time of the last call to C. If C was -never called or C has been called -C will return undef. +Get the time of the last call to C (which is called +from C. If C was never called or +C has been called C +will return undef. =cut -sub get_last_start_lookup_time { +sub last_poll_responses_time { my ($self) = @_; - return $self->{last_start_lookup_time}; + return $self->{last_poll_responses_time}; } - -# --------------------------------------------------------------------------- 1; --- ../ori/lib/Mail/SpamAssassin/Dns.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/Dns.pm Thu Sep 27 16:21:13 2007 @@ -104,8 +104,9 @@ # only make a specific query once if (!$existing) { - dbg("dns: launching DNS $type query for $host in background"); + dbg("dns: launching DNS %s query for %s in background", $type,$host); my $ent = { key => $key, + timeout => $self->{conf}->{rbl_timeout}, type => "DNSBL-".$type, sets => [ ], # filled in below @@ -115,8 +116,7 @@ my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; + my ($pkt, $id, $timestamp) = @_; $self->process_dnsbl_result($ent, $pkt); - $self->{async}->report_id_complete($id); + $self->{async}->report_id_complete($id,$key,$timestamp); }); @@ -153,8 +153,9 @@ return if $self->{async}->get_lookup($key); - dbg("dns: launching DNS $type query for $host in background"); + dbg("dns: launching DNS %s query for %s in background", $type,$host); my $ent = { key => $key, + timeout => $self->{conf}->{rbl_timeout}, type => "DNSBL-".$type, rules => [ $rule ], @@ -163,8 +164,7 @@ my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; + my ($pkt, $id, $timestamp) = @_; $self->process_dnsbl_result($ent, $pkt); - $self->{async}->report_id_complete($id); + $self->{async}->report_id_complete($id,$key,$timestamp); }); @@ -329,41 +330,25 @@ my ($self, $rule) = @_; - return if !defined $self->{async}->get_last_start_lookup_time(); + dbg("dns: harvest_until_rule_completes"); + my $result = 0; + my $total_waiting_time = 0; + + for (my $first=1; ; $first=0) { + # complete_lookups() may call completed_callback(), which may + # call start_lookup() again (like in Plugin::URIDNSBL) + my ($alldone,$anydone,$waiting_time) = + $self->{async}->complete_lookups($first ? 0 : 1.0, 1); + $total_waiting_time += $waiting_time; - my $deadline = $self->{conf}->{rbl_timeout} + $self->{async}->get_last_start_lookup_time(); - my $now = time; - - # should not give up before at least attempting to collect some responses - # even if previous checks already exceeded rbl_timeout - my $notbefore = $now + 1.2; # at least 1 second from now (time is integer) - - my @left = $self->{async}->get_pending_lookups(); - my $total = scalar @left; - - while ( (($now < $deadline) || ($now < $notbefore)) && - !$self->{async}->complete_lookups(1)) - { - dbg(sprintf("dns: harvest_until_rule_completes: on extended time, ". - "overdue by %.3f s, still %.3f s", - $now-$deadline, $notbefore-$now)) if $now >= $deadline; - - if ($self->is_rule_complete($rule)) { - return 1; - } + $result = 1 if $self->is_rule_complete($rule); + last if $result || $alldone; + dbg("dns: harvest_until_rule_completes - check_tick"); $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); - @left = $self->{async}->get_pending_lookups(); - - # complete_lookups could cause a change in get_last_start_lookup_time - $deadline = $self->{conf}->{rbl_timeout} + - $self->{async}->get_last_start_lookup_time(); - - # dynamic timeout - my $dynamic = (int($self->{conf}->{rbl_timeout} - * (1 - 0.7*(($total - @left) / $total) ** 2) + 1) - + $self->{async}->get_last_start_lookup_time()); - $deadline = $dynamic if ($dynamic < $deadline); - $now = time; } + dbg("dns: timing: %.3f s sleeping in harvest_until_rule_completes", + $total_waiting_time) if $total_waiting_time > 0; + + return $result; } @@ -371,57 +356,50 @@ my ($self) = @_; - return if !defined $self->{async}->get_last_start_lookup_time(); + dbg("dns: harvest_dnsbl_queries"); + my $total_waiting_time = 0; - my $deadline = $self->{conf}->{rbl_timeout} + $self->{async}->get_last_start_lookup_time(); - my $now = time; + for (my $first=1; ; $first=0) { - # should not give up before at least attempting to collect some responses - # (which may have arrived by now), even if previous checks (like Razor, - # dcc, Botnet, rules) already exceeded rbl_timeout - my $notbefore = $now + 1.2; # at least 1 second from now (time is integer) + # complete_lookups() may call completed_callback(), which may + # call start_lookup() again (like in Plugin::URIDNSBL) - my @left = $self->{async}->get_pending_lookups(); - my $total = scalar @left; + # the first time around we specify a 0 timeout, which gives + # complete_lookups a chance to ripe any available results and + # abort overdue requests, without needlessly waiting for more - while ( (($now < $deadline) || ($now < $notbefore)) && - !$self->{async}->complete_lookups(1)) - { - dbg(sprintf("dns: harvest_dnsbl_queries: on extended time, ". - "overdue by %.3f s, still %.3f s", - $now-$deadline, $notbefore-$now)) if $now >= $deadline; + my ($alldone,$anydone,$waiting_time) = + $self->{async}->complete_lookups($first ? 0 : 1.0, 1); + $total_waiting_time += $waiting_time; - $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); - @left = $self->{async}->get_pending_lookups(); + last if $alldone; - # complete_lookups() may have called completed_callback, which may call - # start_lookup() again (like in URIDNSBL), so get_last_start_lookup_time - # may have changed and deadline needs to be recomputed - $deadline = $self->{conf}->{rbl_timeout} + - $self->{async}->get_last_start_lookup_time(); - - # dynamic timeout - my $dynamic = (int($self->{conf}->{rbl_timeout} - * (1 - 0.7*(($total - @left) / $total) ** 2) + 1) - + $self->{async}->get_last_start_lookup_time()); - $deadline = $dynamic if ($dynamic < $deadline); - $now = time; # and loop again - } - - dbg("dns: success for " . ($total - @left) . " of $total queries"); - - # timeouts - @left = $self->{async}->get_pending_lookups(); - $now = time; - for my $query (@left) { - my $string = join(", ", grep { defined } - map { ref $query->{$_} ? @{$query->{$_}} : $query->{$_} } - qw(sets rules rulename type key) ); - my $delay = $now - $self->{async}->get_last_start_lookup_time(); - dbg("dns: timeout for $string after $delay seconds"); + dbg("dns: harvest_dnsbl_queries - check_tick"); + $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); } - # and explicitly abort anything left + # explicitly abort anything left $self->{async}->abort_remaining_lookups(); + $self->{async}->log_lookups_timing(); $self->mark_all_async_rules_complete(); + dbg("dns: timing: %.3f s sleeping in harvest_dnsbl_queries", + $total_waiting_time) if $total_waiting_time > 0; + 1; +} + +# collect and process whatever DNS responses have already arrived, +# don't waste time waiting for more, don't poll too often. +# don't abort any queries even if overdue, +sub harvest_completed_queries { + my ($self) = @_; + + # don't bother collecting responses too often + my $last_poll_time = $self->{async}->last_poll_responses_time(); + return if defined $last_poll_time && time - $last_poll_time < 0.1; + + my ($alldone,$anydone) = $self->{async}->complete_lookups(0, 0); + if ($anydone) { + dbg("dns: harvested completed queries"); +# $self->{main}->call_plugins ("check_tick", { permsgstatus => $self }); + } } @@ -640,5 +618,6 @@ if ($dnsopt eq "test" && $diff > $dnsint) { $IS_DNS_AVAILABLE = undef; - dbg("dns: is_dns_available() last checked $diff seconds ago; re-checking"); + dbg("dns: is_dns_available() last checked %.1f seconds ago; re-checking", + $diff); } --- ../ori/lib/Mail/SpamAssassin/DnsResolver.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/DnsResolver.pm Thu Sep 27 16:21:13 2007 @@ -50,4 +50,10 @@ our @ISA = qw(); +# Load Time::HiRes if it's available +BEGIN { + eval { require Time::HiRes }; + Time::HiRes->import( qw(time) ) unless $@; +} + ########################################################################### @@ -339,16 +347,17 @@ will default to C and C, respectively. -The callback sub will be called with two arguments -- the packet that was -delivered and an id string that fingerprints the query packet and the expected reply. -It is expected that a closure callback be used, like so: +The callback sub will be called with three arguments -- the packet that was +delivered, and an id string that fingerprints the query packet and the expected +reply. The third argument is a timestamp (Unix time, floating point), captured +at the time the packet was collected. It is expected that a closure callback +be used, like so: my $id = $self->{resolver}->bgsend($host, $type, undef, sub { - my $reply = shift; - my $reply_id = shift; + my ($reply, $reply_id, $timestamp) = @_; $self->got_a_reply ($reply, $reply_id); }); -The callback can ignore the reply as an invalid packet sent to the listening port -if the reply id does not match the return value from bgsend. +The callback can ignore the reply as an invalid packet sent to the listening +port if the reply id does not match the return value from bgsend. =cut @@ -385,44 +394,51 @@ return if $self->{no_resolver}; return if !$self->{sock}; + my $cnt = 0; + my $waiting_time = 0; my $rin = $self->{sock_as_vec}; my $rout; - my ($nfound, $timeleft) = select($rout=$rin, undef, undef, $timeout); - if (!defined $nfound || $nfound < 0) { - warn "dns: select failed: $!"; - return; - } + for (;;) { + my $now_before = time; + my ($nfound, $timeleft) = select($rout=$rin, undef, undef, $timeout); + if (!defined $nfound || $nfound < 0) { + warn "dns: select failed: $!"; + return; + } - if ($nfound == 0) { - return 0; # nothing's ready yet - } + my $now = time; + if ($now > $now_before && (!defined($timeout) || $timeout > 0)) { + $waiting_time += $now - $now_before; + } + $timeout = 0; # next time around collect whatever is available, then exit + last if $nfound == 0; - my $packet = $self->{res}->bgread($self->{sock}); - my $err = $self->{res}->errorstring; + my $packet = $self->{res}->bgread($self->{sock}); + my $err = $self->{res}->errorstring; - if (defined $packet && - defined $packet->header && - defined $packet->question && - defined $packet->answer) - { - my $id = $self->_packet_id($packet); - - my $cb = delete $self->{id_to_callback}->{$id}; - if (!$cb) { - dbg("dns: no callback for id: $id, ignored; packet: ". - ($packet ? $packet->string : "undef")); - return 0; + if (defined $packet && + defined $packet->header && + defined $packet->question && + defined $packet->answer) + { + my $id = $self->_packet_id($packet); + + my $cb = delete $self->{id_to_callback}->{$id}; + if (!$cb) { + dbg("dns: no callback for id: %s, ignored; packet: %s", + $id, $packet ? $packet->string : "undef" ); + } else { + $cb->($packet, $id, $now); + $cnt++; + } + } + else { + dbg("dns: no packet! err=%s packet=%s", + $err, $packet ? $packet->string : "undef" ); } - - $cb->($packet, $id); - return 1; - } - else { - dbg("dns: no packet! err=$err packet=". - ($packet ? $packet->string : "undef")); } - return 0; + return wantarray ? ($cnt, $waiting_time) : $cnt; } @@ -458,4 +474,5 @@ my $timeout = $retrans; my $answerpkt; + my $answerpkt_avail = 0; for (my $i = 0; (($i < $retries) && !defined($answerpkt)); @@ -465,5 +482,6 @@ # note nifty use of a closure here. I love closures ;) $self->bgsend($name, $type, $class, sub { - $answerpkt = shift; + my ($reply, $reply_id, $timestamp) = @_; + $answerpkt = $reply; $answerpkt_avail = 1; }); @@ -471,10 +489,9 @@ my $deadline = $now + $timeout; - while (($now < $deadline) && (!defined($answerpkt))) { + while (!$answerpkt_avail) { + if ($now >= $deadline) { $self->{send_timed_out} = 1; last } $self->poll_responses(1); - last if defined $answerpkt; $now = time; } - $self->{send_timed_out} = 1 unless ($now < $deadline); } return $answerpkt; @@ -540,6 +557,9 @@ foreach my $sock (@fhlist) { my $fno = fileno($sock); - warn "dns: oops! fileno now undef for $sock" unless defined($fno); - vec ($rin, $fno, 1) = 1; + if (!defined $fno) { + warn "dns: oops! fileno now undef for $sock"; + } else { + vec ($rin, $fno, 1) = 1; + } } return $rin; --- ../ori/lib/Mail/SpamAssassin/Logger.pm Wed Aug 8 15:19:15 2007 +++ lib/Mail/SpamAssassin/Logger.pm Thu Sep 27 16:21:13 2007 @@ -195,7 +195,7 @@ # remember to avoid deep recursion, my friend sub _log { - my ($level, $message) = @_; - + my ($level, $message, @args) = @_; my $facility = "generic"; + local ($1,$2); if ($message =~ /^(\S+?): (.*)/s) { $facility = $1; @@ -210,4 +210,5 @@ } + if (@args && index($message,'%') >= 0) { $message = sprintf($message,@args) } $message =~ s/\n+$//s; $message =~ s/^/${facility}: /mg; --- ../ori/lib/Mail/SpamAssassin/Plugin/ASN.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/ASN.pm Thu Sep 27 16:21:13 2007 @@ -213,11 +213,17 @@ # do the DNS query, have the callback process the result rather than poll for them later my $zone_index = $index; + my $key = "asnlookup-${zone_index}-$entry->{zone}"; my $id = $scanner->{main}->{resolver}->bgsend("${reversed_ip_quad}.$entry->{zone}", 'TXT', undef, sub { - my $pkt = shift; + my ($pkt, $id, $timestamp) = @_; + $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp); $self->process_dns_result($scanner, $pkt, $zone_index); }); - - $scanner->{async}->start_lookup({ key=>"asnlookup-${zone_index}-$entry->{zone}", id=>$id, type=>'TXT' }); - dbg("asn: launched DNS TXT query for ${reversed_ip_quad}.$entry->{zone} in background"); + my $ent = { + key=>$key, id=>$id, type=>'TXT', + # timeout => ... # defaults to $scanner->{conf}->{rbl_timeout} + }; + $scanner->{async}->start_lookup($ent); + dbg("asn: launched DNS TXT query for %s.%s in background", + $reversed_ip_quad, $entry->{zone}); $index++; --- ../ori/lib/Mail/SpamAssassin/Plugin/Check.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/Check.pm Thu Sep 27 16:21:13 2007 @@ -92,4 +92,5 @@ } + $pms->harvest_completed_queries(); # allow other, plugin-defined rule types to be called here $self->{main}->call_plugins ("check_rules_at_priority", @@ -98,21 +99,32 @@ # do head tests $self->do_head_tests($pms, $priority); + $pms->harvest_completed_queries(); $self->do_head_eval_tests($pms, $priority); + $pms->harvest_completed_queries(); $self->do_body_tests($pms, $priority, $decoded); + $pms->harvest_completed_queries(); $self->do_uri_tests($pms, $priority, @uris); + $pms->harvest_completed_queries(); $self->do_body_eval_tests($pms, $priority, $decoded); + $pms->harvest_completed_queries(); $self->do_rawbody_tests($pms, $priority, $bodytext); + $pms->harvest_completed_queries(); $self->do_rawbody_eval_tests($pms, $priority, $bodytext); + $pms->harvest_completed_queries(); $self->do_full_tests($pms, $priority, \$fulltext); + $pms->harvest_completed_queries(); $self->do_full_eval_tests($pms, $priority, \$fulltext); + $pms->harvest_completed_queries(); $self->do_meta_tests($pms, $priority); + $pms->harvest_completed_queries(); # we may need to call this more often than once through the loop, but # it needs to be done at least once, either at the beginning or the end. $self->{main}->call_plugins ("check_tick", { permsgstatus => $pms }); + $pms->harvest_completed_queries(); } --- ../Mail-SpamAssassin-3.2.3/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Wed Aug 8 15:19:14 2007 +++ lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Thu Sep 27 16:29:42 2007 @@ -474,5 +474,7 @@ # dig $dom ns - my $ent = $self->start_lookup ($scanner, 'NS', $self->res_bgsend($scanner, $dom, 'NS'), $key); + my $ent = $self->start_lookup($scanner, 'NS', + $self->res_bgsend($scanner, $dom, 'NS', $key), + $key); $ent->{obj} = $obj; } @@ -518,5 +520,7 @@ # dig $hname a - my $ent = $self->start_lookup ($scanner, 'A', $self->res_bgsend($scanner, $hname, 'A'), $key); + my $ent = $self->start_lookup($scanner, 'A', + $self->res_bgsend($scanner, $hname, 'A', $key), + $key); $ent->{obj} = $obj; } @@ -559,6 +563,7 @@ # dig $ip txt - my $ent = $self->start_lookup ($scanner, 'DNSBL', - $self->res_bgsend($scanner, $item, $qtype), $key); + my $ent = $self->start_lookup($scanner, 'DNSBL', + $self->res_bgsend($scanner, $item, $qtype, $key), + $key); $ent->{obj} = $obj; $ent->{rulename} = $rulename; @@ -650,9 +655,12 @@ my $ent = { key => $key, + timeout => $scanner->{conf}->{rbl_timeout}, type => "URI-".$type, id => $id, completed_callback => sub { my $ent = shift; - $self->completed_lookup_callback ($scanner, $ent); + if (defined $ent->{response_packet}) { # not aborted or empty + $self->completed_lookup_callback ($scanner, $ent); + } } }; @@ -675,7 +683,4 @@ elsif ($type eq 'URI-DNSBL') { $self->complete_dnsbl_lookup ($scanner, $ent, $val); - my $totalsecs = (time - $ent->{obj}->{querystart}); - dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ". - $totalsecs." seconds to look up ($val)"); } } @@ -684,10 +689,9 @@ sub res_bgsend { - my ($self, $scanner, $host, $type) = @_; + my ($self, $scanner, $host, $type, $key) = @_; return $self->{main}->{resolver}->bgsend($host, $type, undef, sub { - my $pkt = shift; - my $id = shift; - $scanner->{async}->set_response_packet($id, $pkt); + my ($pkt, $id, $timestamp) = @_; + $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp); }); }