Index: spamassassin-trunk/contrib/run-masses =================================================================== --- spamassassin-trunk/contrib/run-masses (revision 111236) +++ spamassassin-trunk/contrib/run-masses (working copy) @@ -1,7 +1,7 @@ #!/bin/sh # run-masses, Theo Van Dinter (c) 2002 -# $Id: run-masses,v 1.1 2003/06/10 17:11:12 felicity Exp $ +# $Id$ # # This script will run a mass-check against all mbox files in a given Index: spamassassin-trunk/spamc/libspamc.h =================================================================== --- spamassassin-trunk/spamc/libspamc.h (revision 111236) +++ spamassassin-trunk/spamc/libspamc.h (working copy) @@ -73,6 +73,8 @@ #define EX_NOTSPAM 0 #define EX_ISSPAM 1 +#define EX_LEARNED 5 +#define EX_NOTLEARNED 6 #define EX_TOOBIG 866 /* Aug 14, 2002 bj: Bitflags instead of lots of bool parameters */ @@ -97,6 +99,9 @@ /* log to stderr */ #define SPAMC_LOG_TO_STDERR (1<<22) +/* Nov 24, 2004 NP: added learning support */ +#define SPAMC_LEARN (1<<21) + /* Aug 14, 2002 bj: A struct for storing a message-in-progress */ typedef enum { @@ -135,6 +140,8 @@ int out_len; /* Output from spamd. Either the filtered message, or the check-only response. Or else, a pointer to msg above. */ + int is_learned; /* Output from spamd. Gives state + about learn resp. unlearn process */ /* these members added in SpamAssassin version 2.60: */ struct libspamc_private_message *priv; @@ -204,7 +211,7 @@ * failover, more than one host is defined, but if there is only one there, * no failover is done. */ -int message_filter(struct transport *tp, const char *username, +int message_filter(struct transport *tp, const char *username, int learntype, int flags, struct message *m); /* Dump the message. If there is any data in the message (typically, m->type @@ -216,7 +223,7 @@ /* Do a message_read->message_filter->message_write sequence, handling errors * appropriately with dump_message or appropriate CHECK_ONLY output. Returns * EX_OK or EX_ISSPAM/EX_NOTSPAM on success, some error EX on error. */ -int message_process(struct transport *trans, char *username, int max_size, +int message_process(struct transport *trans, char *username, int learntype, int max_size, int in_fd, int out_fd, const int flags); /* Cleanup the resources we allocated for storing the message. Call after @@ -224,7 +231,7 @@ void message_cleanup(struct message *m); /* Aug 14, 2002 bj: This is now legacy, don't use it. */ -int process_message(struct transport *tp, char *username, +int process_message(struct transport *tp, char *username, int learntype, int max_size, int in_fd, int out_fd, const int check_only, const int safe_fallback); Index: spamassassin-trunk/spamc/spamc.c =================================================================== --- spamassassin-trunk/spamc/spamc.c (revision 111236) +++ spamassassin-trunk/spamc/spamc.c (working copy) @@ -140,7 +140,8 @@ " [default: 250k]\n"); usg(" -u username User for spamd to process this message under.\n" " [default: current user]\n"); - + usg(" -L learntype Message gets learned as spam (0),\n" + " or learned as ham (1), or forgotten (2)\n"); usg(" -B Assume input is a single BSMTP-formatted\n" " message.\n"); @@ -173,13 +174,13 @@ */ int read_args(int argc, char **argv, - int *max_size, char **username, + int *max_size, char **username, int *learntype, struct transport *ptrn) { #ifndef _WIN32 - const char *opts = "-BcrRd:e:fyp:t:s:u:xSHU:ElhV"; + const char *opts = "-BcrRd:e:fyp:t:s:u:L:xSHU:ElhV"; #else - const char *opts = "-BcrRd:fyp:t:s:u:xSHElhV"; + const char *opts = "-BcrRd:fyp:t:s:u:L:xSHElhV"; #endif int opt; int ret = EX_OK; @@ -280,6 +281,12 @@ *username = optarg; break; } + case 'L': + { + flags |= SPAMC_LEARN; + *learntype = atoi(optarg); + break; + } #ifndef _WIN32 case 'U': { @@ -298,7 +305,6 @@ flags |= SPAMC_SYMBOLS; break; } - case '?': case ':': { @@ -460,6 +466,7 @@ int out_fd = -1; int result; int ret; + int learntype = 0; transport_init(&trans); @@ -476,12 +483,13 @@ /* Now parse the command line arguments. First, set the defaults. */ max_size = 250 * 1024; username = NULL; - if ((ret = read_args(argc, argv, &max_size, &username, &trans)) != EX_OK) { + + if ((ret = read_args(argc, argv, &max_size, &username, &learntype, &trans)) != EX_OK) { if (ret == EX_TEMPFAIL ) ret = EX_OK; goto finish; } - + ret = get_current_user(&username); if (ret != EX_OK) goto finish; @@ -518,14 +526,39 @@ if (ret == EX_OK) { - ret = message_filter(&trans, username, flags, &m); + ret = message_filter(&trans, username, learntype, flags, &m); free(username); username = NULL; - - if (ret == EX_OK) { - get_output_fd(&out_fd); - if (message_write(out_fd, &m) >= 0) { + if (ret == EX_OK) { + get_output_fd(&out_fd); + if (flags & SPAMC_LEARN) + { + + if (m.is_learned == 1) + { + + printf( "Message successfully un/learned\n" ); + + ret = EX_LEARNED; + + } + else + { + + printf( "Message was already un/learned\n" ); + + ret = EX_NOTLEARNED; + + } + message_cleanup(&m); + + goto finish; + + + } + else if (message_write(out_fd, &m) >= 0) { + result = m.is_spam; if ((flags & SPAMC_CHECK_ONLY) && result != EX_TOOBIG) { message_cleanup(&m); @@ -559,19 +592,24 @@ message_cleanup(&m); ret = EX_NOTSPAM; } - else { + else if (flags & SPAMC_LEARN ) + { + message_cleanup(&m); + ret = EX_OSERR; + } + else { message_dump(STDIN_FILENO, out_fd, &m); message_cleanup(&m); if (ret == EX_TOOBIG) { ret = 0; } - else if (use_exit_code) { + else if (use_exit_code) { ret = result; } else if (flags & SPAMC_SAFE_FALLBACK) { ret = EX_OK; } - } + } finish: #ifdef _WIN32 Index: spamassassin-trunk/spamc/libspamc.c =================================================================== --- spamassassin-trunk/spamc/libspamc.c (revision 111236) +++ spamassassin-trunk/spamc/libspamc.c (working copy) @@ -754,6 +754,7 @@ { char is_spam[6]; char s_str[21], t_str[21]; + char is_learned[4]; UNUSED_VARIABLE(len); @@ -800,12 +801,30 @@ } return EX_OK; } + else if (sscanf(buf, "Learned: %3s", is_learned) == 1) + { + if(strcmp(is_learned,"yes")==0||strcmp(is_learned,"Yes")==0) + { + m->is_learned = 1; + } + else if(strcmp(is_learned,"no")==0||strcmp(is_learned,"No")==0) + { + m->is_learned = 0; + } + else + { + libspamc_log(flags, LOG_ERR, "spamd responded with bad Learned-state '%s'", + buf); + return EX_PROTOCOL; + } + return EX_OK; + } libspamc_log(flags, LOG_ERR, "spamd responded with bad header '%s'", buf); return EX_PROTOCOL; } -int message_filter(struct transport *tp, const char *username, +int message_filter(struct transport *tp, const char *username, int learntype, int flags, struct message *m) { char buf[8192]; @@ -814,6 +833,7 @@ int sock = -1; int rc; char versbuf[20]; + char strlearntype[1]; float version; int response; int failureval; @@ -843,9 +863,9 @@ } m->out_len = 0; + /* Build spamd protocol header */ - /* Build spamd protocol header */ - if (flags & SPAMC_CHECK_ONLY) + if (flags & SPAMC_CHECK_ONLY) strcpy(buf, "CHECK "); else if (flags & SPAMC_REPORT_IFSPAM) strcpy(buf, "REPORT_IFSPAM "); @@ -853,7 +873,12 @@ strcpy(buf, "REPORT "); else if (flags & SPAMC_SYMBOLS) strcpy(buf, "SYMBOLS "); - else + else if (flags & SPAMC_LEARN ) + { + strcpy(buf, "LEARN "); + len = strlen(buf); + } + else strcpy(buf, "PROCESS "); len = strlen(buf); @@ -868,6 +893,23 @@ strcat(buf, "\r\n"); len = strlen(buf); + + if (flags & SPAMC_LEARN) + { + if ((learntype > 2) | (learntype < 0 )) + { + free(m->out); + m->out = m->msg; + m->out_len = m->msg_len; + return EX_OSERR; + } + sprintf(strlearntype,"%d",learntype); + strcpy(buf + len, "Learn-type: "); + strcat(buf + len, strlearntype); + strcat(buf + len, "\r\n"); + len += strlen(buf + len); + } + if (username != NULL) { if (strlen(username) + 8 >= (bufsiz - len)) { free(m->out); @@ -880,7 +922,8 @@ strcat(buf + len, "\r\n"); len += strlen(buf + len); } - if ((m->msg_len > 9999999) || ((len + 27) >= (bufsiz - len))) { + + if ((m->msg_len > 9999999) || ((len + 27) >= (bufsiz - len))) { free(m->out); m->out = m->msg; m->out_len = m->msg_len; @@ -948,6 +991,7 @@ m->score = 0; m->threshold = 0; m->is_spam = EX_TOOBIG; + m->is_learned = 0; while (1) { failureval = _spamc_read_full_line(m, flags, ssl, sock, buf, &len, bufsiz); @@ -960,7 +1004,7 @@ } if (_handle_spamd_header(m, flags, buf, len) < 0) { - failureval = EX_PROTOCOL; + failureval = EX_PROTOCOL; goto failure; } } @@ -977,10 +1021,18 @@ } return EX_OK; } + else if (flags & SPAMC_LEARN) + { + shutdown(sock, SHUT_RD); + closesocket(sock); + sock = -1; + return EX_OK; + } else { if (m->content_length < 0) { /* should have got a length too. */ - failureval = EX_PROTOCOL; + + failureval = EX_PROTOCOL; goto failure; } @@ -1048,7 +1100,7 @@ } -int message_process(struct transport *trans, char *username, int max_size, +int message_process(struct transport *trans, char *username, int learntype, int max_size, int in_fd, int out_fd, const int flags) { int ret; @@ -1060,7 +1112,7 @@ ret = message_read(in_fd, flags, &m); if (ret != EX_OK) goto FAIL; - ret = message_filter(trans, username, flags, &m); + ret = message_filter(trans, username, learntype, flags, &m); if (ret != EX_OK) goto FAIL; if (message_write(out_fd, &m) < 0) @@ -1085,6 +1137,7 @@ } } + void message_cleanup(struct message *m) { if (m->out != NULL && m->pre != NULL && m->out != m->pre+m->pre_len) @@ -1097,7 +1150,7 @@ } /* Aug 14, 2002 bj: Obsolete! */ -int process_message(struct transport *tp, char *username, int max_size, +int process_message(struct transport *tp, char *username, int learntype, int max_size, int in_fd, int out_fd, const int my_check_only, const int my_safe_fallback) { @@ -1109,7 +1162,7 @@ if (my_safe_fallback) flags |= SPAMC_SAFE_FALLBACK; - return message_process(tp, username, max_size, in_fd, out_fd, flags); + return message_process(tp, username, learntype, max_size, in_fd, out_fd, flags); } /* @@ -1389,4 +1442,18 @@ exit(0); } + + + #endif /* LIBSPAMC_UNIT_TESTS */ + + + + + + + + + + + Index: spamassassin-trunk/spamd/spamd.raw =================================================================== --- spamassassin-trunk/spamd/spamd.raw (revision 111236) +++ spamassassin-trunk/spamd/spamd.raw (working copy) @@ -1083,6 +1083,10 @@ check( $1, $2, $start, $remote_hostname, $remote_hostaddr ); } + elsif (/(LEARN) SPAMC\/(.*)/) { + learn( $1, $2, $start, $remote_hostname, $remote_hostaddr ); + } + # Looks like a client is just seeing if we're alive. elsif (/PING SPAMC\/(.*)/) { @@ -1100,6 +1104,27 @@ return 1; } +sub handle_setuid_to_user { + + if ( $spamtest->{paranoid} ) { + logmsg("PARANOID: still running as root, closing connection."); + die; + } + logmsg( "Still running as root: user not specified with -u, " + . "not found, or set to root. Fall back to nobody." ); + my ( $uid, $gid ) = ( getpwnam('nobody') )[ 2, 3 ]; + $uid =~ /^(\d+)$/ and $uid = $1; # de-taint + $gid =~ /^(\d+)$/ and $gid = $1; # de-taint + + $) = "$gid $gid"; # eGID + $> = $uid; # eUID + if ( !defined($uid) || ( $> != $uid and $> != ( $uid - 2**32 ) ) ) { + logmsg("fatal: setuid to nobody failed"); + die; + } + +} + sub check { my ( $method, $version, $start_time, $remote_hostname, $remote_hostaddr ) = @_; local ($_); @@ -1124,25 +1149,8 @@ $expected_length = $hdrs->{expected_length}; } - if ( $setuid_to_user && $> == 0 ) { - if ( $spamtest->{paranoid} ) { - logmsg("PARANOID: still running as root, closing connection."); - die; - } - logmsg( "Still running as root: user not specified with -u, " - . "not found, or set to root. Fall back to nobody." ); - my ( $uid, $gid ) = ( getpwnam('nobody') )[ 2, 3 ]; - $uid =~ /^(\d+)$/ and $uid = $1; # de-taint - $gid =~ /^(\d+)$/ and $gid = $1; # de-taint + handle_setuid_to_user if ($setuid_to_user && $> == 0); - $) = "$gid $gid"; # eGID - $> = $uid; # eUID - if ( !defined($uid) || ( $> != $uid and $> != ( $uid - 2**32 ) ) ) { - logmsg("fatal: setuid to nobody failed"); - die; - } - } - if ( $opt{'sql-config'} && !defined($current_user) ) { unless ( handle_user_sql('nobody') ) { service_unavailable_error("Error fetching user preferences via SQL"); @@ -1318,6 +1326,140 @@ $mail->finish(); } +# XXX - yuck too much shared code with check +sub learn { + my ( $method, $version, $start_time, $remote_hostname, $remote_hostaddr ) = @_; + local ($_); + my $expected_length; + my $learn_type; + my $forget = 0; + my $isspam = 1; + + my $hdrs = {}; + + # parse_headers returns !=0 on failure + return 1 + if parse_headers( + $hdrs, $client, + { + 'Content-length' => \&got_clen_header, + 'User' => \&got_user_header, + 'Learn-type' => \&got_learn_type_header, + } + ); + + $expected_length = $hdrs->{expected_length}; + $learn_type = $hdrs->{learn_type}; + + &handle_setuid_to_user if ($setuid_to_user && $> == 0); + + if ( $opt{'sql-config'} && !defined($current_user) ) { + unless ( handle_user_sql('nobody') ) { + service_unavailable_error("Error fetching user preferences via SQL"); + return 1; + } + } + + if ( $opt{'ldap-config'} && !defined($current_user) ) { + handle_user_ldap('nobody'); + } + + my $resp = "EX_OK"; + + # Now read in message + my @msglines; + my $actual_length = 0; + while ( $_ = $client->getline() ) { + $actual_length += length($_); + push(@msglines, $_); + last if (defined $expected_length && $actual_length >= $expected_length); + } + + my $mail = $spamtest->parse(\@msglines); + # Free some mem. + undef @msglines; + + if ( $mail->get_header("X-Spam-Checker-Version") ) { + my $new_mail = $spamtest->parse($spamtest->remove_spamassassin_markup($mail), 1); + $mail->finish(); + $mail = $new_mail; + } + + # Extract the Message-Id(s) for logging purposes. + my $msgid = $mail->get_pristine_header("Message-Id"); + my $rmsgid = $mail->get_pristine_header("Resent-Message-Id"); + foreach my $id ((\$msgid, \$rmsgid)) { + if ( $$id ) { + while ( $$id =~ s/\([^\(\)]*\)// ) + { } # remove comments and + $$id =~ s/^\s+|\s+$//g; # leading and trailing spaces + $$id =~ s/\s+/ /g; # collapse whitespaces + $$id =~ s/^.*?<(.*?)>.*$/$1/; # keep only the id itself + $$id =~ s/[^\x21-\x7e]/?/g; # replace all weird chars + $$id =~ s/[<>]/?/g; # plus all dangling angle brackets + $$id =~ s/^(.+)$/<$1>/; # re-bracket the id (if not empty) + } + } + + $msgid ||= "(unknown)"; + $current_user ||= "(unknown)"; + + my $learn_type_desc; + my $learn_type_desc_past; + + if ($learn_type == 0) { + $learn_type_desc = "learning spam"; + $learn_type_desc_past = "learned spam"; + $isspam = 1; + } + elsif ($learn_type == 1) { + $learn_type_desc = "learning ham"; + $learn_type_desc_past = "learned ham"; + $isspam = 0; + } + elsif ($learn_type == 2) { + $learn_type_desc = "forgetting"; + $learn_type_desc_past = "forgot"; + $forget = 1; + } + + logmsg( $learn_type_desc + . " message $msgid" + . ( $rmsgid ? " aka $rmsgid" : "" ) + . " for ${current_user}:$>" + . "." ); + + # Check length if we're supposed to. + if ( defined $expected_length && $actual_length != $expected_length ) { + protocol_error( + "(Content-Length mismatch: Expected $expected_length bytes, got $actual_length bytes)" + ); + $mail->finish(); + return 1; + } + + my $status = $spamtest->learn( $mail, undef, $isspam, $forget ); + my $hdr; + + if ($status->did_learn()) { + $hdr .= "Learned: Yes"; + } + else { + $hdr .= "Learned: No"; + } + + print $client "SPAMD/1.1 $resphash{$resp} $resp\r\n", + $hdr . "\r\n\r\n"; + + my $scantime = sprintf( "%.1f", time - $start_time ); + + + logmsg( "$learn_type_desc_past message for $current_user:$> in" + . " $scantime seconds, $actual_length bytes." ); + $status->finish(); # added by jm to allow GC'ing + $mail->finish(); +} + ########################################################################### # generalised header parser. @@ -1422,6 +1564,21 @@ return 0; } +sub got_learn_type_header { + my ( $hdrs, $header, $value ) = @_; + if ( $value !~ /^(\d*)$/ ) { + protocol_error("(Learn-type contains non-numeric bytes)"); + return 1; + } + my $type = $1; + if ($type != 0 && $type != 1 && $type != 2) { + protocol_error("(Learn-type contains invalid type)"); + return 1; + } + $hdrs->{learn_type} = $type; + return 0; +} + sub protocol_error { my ($err) = @_; my $resp = "EX_PROTOCOL";