diff -ur Mail-SpamAssassin-2.51.orig/lib/Mail/SpamAssassin/Bayes.pm Mail-SpamAssassin-2.51/lib/Mail/SpamAssassin/Bayes.pm --- Mail-SpamAssassin-2.51.orig/lib/Mail/SpamAssassin/Bayes.pm Wed Mar 19 00:27:46 2003 +++ Mail-SpamAssassin-2.51/lib/Mail/SpamAssassin/Bayes.pm Fri Mar 21 18:17:24 2003 @@ -41,6 +41,7 @@ $MIN_SPAM_CORPUS_SIZE_FOR_BAYES $MIN_HAM_CORPUS_SIZE_FOR_BAYES %HEADER_NAME_COMPRESSION + $OPPORTUNISTIC_LOCK_VALID }; @ISA = qw(); @@ -153,6 +154,9 @@ $MIN_SPAM_CORPUS_SIZE_FOR_BAYES = 200; $MIN_HAM_CORPUS_SIZE_FOR_BAYES = 200; +# How many seconds should the opportunistic_expire lock be valid? +$OPPORTUNISTIC_LOCK_VALID = 300; + # Should we use the Robinson f(w) equation from # http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html ? # It gives better results, in that scores are more likely to distribute @@ -940,14 +944,7 @@ $self->{store}->add_touches_to_journal(); $self->{store}->scan_count_increment(); - # handle expiry and journal syncing - if ($self->{store}->expiry_due()) { - dbg ("expiration is due: expiring old tokens now..."); - $self->{store}->sync_journal(); - $self->{store}->expire_old_tokens(); - dbg ("expiration done"); - } - + $self->opportunistic_expire(); $self->{store}->untie_db(); return $score; @@ -957,6 +954,22 @@ return 0.5; # nice and neutral } +sub opportunistic_expire { + my($self) = @_; + + # Is an expire or journal sync running? + my $running_expire = $self->{store}->get_running_expire_tok(); + if ( defined $running_expire && $running_expire+$OPPORTUNISTIC_LOCK_VALID > time() ) { return; } + + # handle expiry and journal syncing + if ($self->{store}->expiry_due()) { + dbg ("expiration is due: expiring old tokens now..."); + $self->{store}->sync_journal(); + $self->{store}->expire_old_tokens(); + dbg ("expiration done"); + } +} + ########################################################################### sub dbg { Mail::SpamAssassin::dbg (@_); } diff -ur Mail-SpamAssassin-2.51.orig/lib/Mail/SpamAssassin/BayesStore.pm Mail-SpamAssassin-2.51/lib/Mail/SpamAssassin/BayesStore.pm --- Mail-SpamAssassin-2.51.orig/lib/Mail/SpamAssassin/BayesStore.pm Fri Mar 21 18:18:14 2003 +++ Mail-SpamAssassin-2.51/lib/Mail/SpamAssassin/BayesStore.pm Fri Mar 21 18:27:16 2003 @@ -19,7 +19,7 @@ @DBNAMES @DB_EXTENSIONS $NSPAM_MAGIC_TOKEN $NHAM_MAGIC_TOKEN $LAST_EXPIRE_MAGIC_TOKEN $NTOKENS_MAGIC_TOKEN $OLDEST_TOKEN_AGE_MAGIC_TOKEN - $SCANCOUNT_BASE_MAGIC_TOKEN + $SCANCOUNT_BASE_MAGIC_TOKEN $RUNNING_EXPIRE_MAGIC_TOKEN }; @ISA = qw(); @@ -66,6 +66,7 @@ $LAST_EXPIRE_MAGIC_TOKEN = '**LASTEXPIRE'; $NTOKENS_MAGIC_TOKEN = '**NTOKENS'; $SCANCOUNT_BASE_MAGIC_TOKEN = '**SCANBASE'; +$RUNNING_EXPIRE_MAGIC_TOKEN = '**RUNNINGEXPIRE'; use constant MAX_SIZE_FOR_SCAN_COUNT_FILE => 5000; @@ -275,8 +276,13 @@ sub expire_old_tokens_trapped { my ($self, $opts) = @_; - if (!$self->expiry_due() && !$self->{bayes}->{main}->{learn_force_expire}) - { return 0; } + # Flag that we're doing work + $self->set_running_expire_tok(); + + if (!$self->expiry_due() && !$self->{bayes}->{main}->{learn_force_expire}) { + $self->remove_running_expire_tok(); + return 0; + } my $too_old = $self->scan_count_get(); $too_old = ($too_old < $self->{expiry_count} ? @@ -316,7 +322,8 @@ || $tok eq $LAST_EXPIRE_MAGIC_TOKEN || $tok eq $NTOKENS_MAGIC_TOKEN || $tok eq $OLDEST_TOKEN_AGE_MAGIC_TOKEN - || $tok eq $SCANCOUNT_BASE_MAGIC_TOKEN); + || $tok eq $SCANCOUNT_BASE_MAGIC_TOKEN + || $tok eq $RUNNING_EXPIRE_MAGIC_TOKEN); my ($ts, $th, $atime) = $self->tok_get ($tok); @@ -341,8 +348,9 @@ } } - if ($showdots && (($kept + $deleted) % 1000) == 0) { - print STDERR "."; + if ((($kept + $deleted) % 1000) == 0) { + if ($showdots) { print STDERR "."; } + $self->set_running_expire_tok(); } } @@ -366,7 +374,7 @@ @deleted_toks = (); # free 'em up $deleted -= $reprieved; - # and add the magic tokens + # and add the magic tokens. don't add the expire_running token. $new_toks{$SCANCOUNT_BASE_MAGIC_TOKEN} = $self->{db_toks}->{$SCANCOUNT_BASE_MAGIC_TOKEN}; $new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = $self->scan_count_get(); $new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $oldest; @@ -388,10 +396,8 @@ } } - # ok, once that's done we can re-tie. Call untie_db() first so - # we unlock correctly etc. first + # Call untie_db() first so we unlock correctly etc. first $self->untie_db(); - $self->tie_db_writable(); my $done = time(); @@ -478,6 +484,21 @@ ($ns || 0, $nn || 0); } +sub get_running_expire_tok { + my ($self) = @_; + return $self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN}; +} + +sub set_running_expire_tok { + my ($self) = @_; + $self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN} = time(); +} + +sub remove_running_expire_tok { + my ($self) = @_; + delete $self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN}; +} + ########################################################################### # db abstraction: allow deferred writes, since we will be frequently @@ -561,7 +582,7 @@ }; my $err = $@; - # ok, untie from write-mode, delete the retired journal + # ok, untie from write-mode $self->untie_db(); # handle any errors that may have occurred @@ -576,6 +597,9 @@ sub sync_journal_trapped { my ($self, $opts, $path) = @_; + # Flag that we're doing work + $self->set_running_expire_tok(); + my $started = time(); my $count = 0; my $total_count = 0; @@ -625,8 +649,9 @@ while( my($k,$v) = each %tokens ) { $self->tok_touch_token ($v, $k); - if ($showdots && (++$count % 1000) == 0) { - print STDERR "."; + if ((++$count % 1000) == 0) { + if ($showdots) { print STDERR "."; } + $self->set_running_expire_tok(); } }