Lines 546-624
Link Here
|
546 |
|
546 |
|
547 |
sub sync_journal { |
547 |
sub sync_journal { |
548 |
my ($self, $opts) = @_; |
548 |
my ($self, $opts) = @_; |
|
|
549 |
my $ret = 0; |
549 |
|
550 |
|
550 |
my $path = $self->get_journal_filename(); |
551 |
my $path = $self->get_journal_filename(); |
551 |
|
552 |
|
552 |
if (!-f $path) { return 0; } |
553 |
# if $path doesn't exist, or it's not a file, or is 0 bytes in length, return |
|
|
554 |
if ( !stat($path) || !-f _ || -z _ ) { return 0; } |
553 |
|
555 |
|
554 |
# retire the journal, so we can update the db files from it in peace. |
556 |
eval { |
555 |
# TODO: use locking here |
557 |
local $SIG{'__DIE__'}; # do not run user die() traps in here |
556 |
my $retirepath = $path.".old"; |
558 |
if ($self->tie_db_writable()) { |
557 |
if (!rename ($path, $retirepath)) { |
559 |
$ret = $self->sync_journal_trapped($opts, $path); |
558 |
warn "bayes: failed rename $path to $retirepath\n"; |
560 |
} |
|
|
561 |
}; |
562 |
my $err = $@; |
563 |
|
564 |
# ok, untie from write-mode, delete the retired journal |
565 |
$self->untie_db(); |
566 |
|
567 |
# handle any errors that may have occurred |
568 |
if ($err) { |
569 |
warn "bayes: $err\n"; |
559 |
return 0; |
570 |
return 0; |
560 |
} |
571 |
} |
561 |
|
572 |
|
|
|
573 |
$ret; |
574 |
} |
575 |
|
576 |
sub sync_journal_trapped { |
577 |
my ($self, $opts, $path) = @_; |
578 |
|
562 |
my $started = time(); |
579 |
my $started = time(); |
563 |
my $count = 0; |
580 |
my $count = 0; |
564 |
|
581 |
my $total_count = 0; |
|
|
582 |
my %tokens = (); |
565 |
my $showdots = $opts->{showdots}; |
583 |
my $showdots = $opts->{showdots}; |
|
|
584 |
my $retirepath = $path.".old"; |
566 |
|
585 |
|
567 |
# now read the retired journal |
586 |
# now read the retired journal |
568 |
if (!open (JOURNAL, "<".$retirepath)) { |
587 |
if (!open (JOURNAL, "<$path")) { |
569 |
warn "bayes: cannot open read $retirepath\n"; |
588 |
warn "bayes: cannot open read $path\n"; |
570 |
rename($retirepath,$path); # try to put it back if we can... |
|
|
571 |
return 0; |
589 |
return 0; |
572 |
} |
590 |
} |
573 |
|
591 |
|
574 |
my $ok_to_remove = 0; |
592 |
# retire the journal, so we can update the db files from it in peace. |
575 |
eval { |
593 |
# TODO: use locking here |
576 |
local $SIG{'__DIE__'}; # do not run user die() traps in here |
594 |
if (!rename ($path, $retirepath)) { |
|
|
595 |
warn "bayes: failed rename $path to $retirepath\n"; |
596 |
close(JOURNAL); |
597 |
return 0; |
598 |
} |
577 |
|
599 |
|
578 |
if ($self->tie_db_writable()) { |
600 |
# Read the journal |
579 |
while (<JOURNAL>) { |
601 |
while (<JOURNAL>) { |
580 |
$count++; |
602 |
$total_count++; |
581 |
if (/^c (-?\d+) (-?\d+) (\d+) (.*)$/) { |
603 |
|
582 |
$self->tok_sync_counters ($1+0, $2+0, $3+0, $4); |
604 |
if (/^t (\d+) (.*)$/) { # Token timestamp update, cache resultant entries |
583 |
} elsif (/^t (\d+) (.*)$/) { |
605 |
$tokens{$2} = $1+0; |
584 |
$self->tok_touch_token ($1+0, $2); |
606 |
# elsif (/^c (-?\d+) (-?\d+) (\d+) (.*)$/) { # Add/full token update |
585 |
} elsif (/^n (-?\d+) (-?\d+)$/) { |
607 |
# $self->tok_sync_counters ($1+0, $2+0, $3+0, $4); |
586 |
$self->tok_sync_nspam_nham ($1+0, $2+0); |
608 |
# $count++; |
587 |
} else { |
609 |
# } elsif (/^n (-?\d+) (-?\d+)$/) { # update ham/spam count |
588 |
warn "Bayes journal: gibberish: $_"; |
610 |
# $self->tok_sync_nspam_nham ($1+0, $2+0); |
589 |
} |
611 |
# $count++; |
590 |
|
612 |
} else { |
591 |
if ($showdots && ($count % 1000) == 0) { |
613 |
warn "Bayes journal: gibberish entry found: $_"; |
592 |
print STDERR "."; |
|
|
593 |
} |
594 |
} |
595 |
$ok_to_remove = 1; |
596 |
} |
614 |
} |
597 |
}; |
|
|
598 |
my $err = $@; |
599 |
|
600 |
if ($showdots) { print STDERR "\n"; } |
601 |
|
615 |
|
602 |
# ok, untie from write-mode, delete the retired journal |
616 |
# if ($showdots && ($count % 1000) == 0) { |
603 |
$self->untie_db(); |
617 |
# print STDERR "."; |
604 |
close JOURNAL; |
618 |
# } |
605 |
if ($ok_to_remove) { |
|
|
606 |
unlink ($retirepath); |
607 |
} |
619 |
} |
608 |
else { |
620 |
close JOURNAL; |
609 |
warn "bayes: Detected problem syncing journal, trying to rename $retirepath to $path\n"; |
621 |
|
610 |
rename($retirepath,$path) or warn "bayes: rename failed"; # try to put it back if we can... |
622 |
# Now that we've determined what tokens we need to update and their |
611 |
|
623 |
# final values, update the DB. Should be much smaller than the full |
612 |
# handle any errors that may have occurred |
624 |
# journal entries. |
613 |
if ($err) { |
625 |
while( my($k,$v) = each %tokens ) { |
614 |
warn "bayes: $err\n"; |
626 |
$self->tok_touch_token ($v, $k); |
615 |
return 0; |
627 |
|
|
|
628 |
if ($showdots && (++$count % 1000) == 0) { |
629 |
print STDERR "."; |
616 |
} |
630 |
} |
617 |
} |
631 |
} |
618 |
|
632 |
|
|
|
633 |
if ($showdots) { print STDERR "\n"; } |
634 |
|
635 |
# we're all done, so unlink the old journal file |
636 |
unlink ($retirepath) || warn "bayes: can't unlink $retirepath: $!\n"; |
637 |
|
619 |
my $done = time(); |
638 |
my $done = time(); |
620 |
my $msg = ("synced Bayes databases from journal in ".($done - $started). |
639 |
my $msg = ("synced Bayes databases from journal in ".($done - $started). |
621 |
" seconds: $count entries"); |
640 |
" seconds: $count unique entries ($total_count total entries)"); |
622 |
|
641 |
|
623 |
if ($opts->{verbose}) { |
642 |
if ($opts->{verbose}) { |
624 |
print $msg,"\n"; |
643 |
print $msg,"\n"; |