|
[SACVS] CVS: spamassassin/lib/Mail/SpamAssassin Bayes.pm,1.94,1.95 BayesSto: msg#00068mail.spam.spamassassin.cvs
Update of /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin In directory sc8-pr-cvs1:/tmp/cvs-serv3327/lib/Mail/SpamAssassin Modified Files: Bayes.pm BayesStore.pm CmdLearn.pm PerMsgLearner.pm PerMsgStatus.pm Log Message: bug 1396: SA 2.60 chokes on messages without Message-ID Index: Bayes.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/Bayes.pm,v retrieving revision 1.94 retrieving revision 1.95 diff -b -w -u -d -r1.94 -r1.95 --- Bayes.pm 8 Sep 2003 01:07:04 -0000 1.94 +++ Bayes.pm 19 Sep 2003 04:02:40 -0000 1.95 @@ -611,7 +611,7 @@ ########################################################################### sub learn { - my ($self, $isspam, $msg) = @_; + my ($self, $isspam, $msg, $id) = @_; if (!$self->{conf}->{use_bayes}) { return; } if (!defined $msg) { return; } @@ -629,7 +629,7 @@ } if ($ok) { - $ret = $self->learn_trapped ($isspam, $msg, $body); + $ret = $self->learn_trapped ($isspam, $msg, $body, $id); if (!$self->{main}->{learn_caller_will_untie}) { $self->{store}->untie_db(); @@ -648,9 +648,9 @@ # this function is trapped by the wrapper above sub learn_trapped { - my ($self, $isspam, $msg, $body) = @_; + my ($self, $isspam, $msg, $body, $msgid) = @_; - my $msgid = $self->get_msgid ($msg); + $msgid ||= $self->get_msgid ($msg); my $seen = $self->{store}->seen_get ($msgid); if (defined ($seen)) { if (($seen eq 's' && $isspam) || ($seen eq 'h' && !$isspam)) { @@ -687,13 +687,15 @@ $self->{store}->seen_put ($msgid, ($isspam ? 's' : 'h')); $self->{store}->add_touches_to_journal(); + + dbg("bayes: Learned '$msgid'"); 1; } ########################################################################### sub forget { - my ($self, $msg) = @_; + my ($self, $msg, $id) = @_; if (!$self->{conf}->{use_bayes}) { return; } if (!defined $msg) { return; } @@ -706,7 +708,7 @@ local $SIG{'__DIE__'}; # do not run user die() traps in here if ($self->{store}->tie_db_writable()) { - $ret = $self->forget_trapped ($msg, $body); + $ret = $self->forget_trapped ($msg, $body, $id); if (!$self->{main}->{learn_caller_will_untie}) { $self->{store}->untie_db(); @@ -725,9 +727,9 @@ # this function is trapped by the wrapper above sub forget_trapped { - my ($self, $msg, $body) = @_; + my ($self, $msg, $body, $msgid) = @_; - my $msgid = $self->get_msgid ($msg); + $msgid ||= $self->get_msgid ($msg); my $seen = $self->{store}->seen_get ($msgid); my $isspam; if (defined ($seen)) { @@ -773,7 +775,7 @@ my ($self, $msg) = @_; my $msgid = $msg->get_header("Message-Id"); - if ( !defined $msgid || $msgid eq '' ) { # generate a best effort unique id + if (!defined $msgid || $msgid eq '' || $msgid =~ /^\s*<\s*>.*$/) { # generate a best effort unique id # Use sha1(Date:, last received: and top N bytes of body) # where N is MIN(1024 bytes, 1/2 of body length) # @@ -822,7 +824,7 @@ if (!defined $body) { # why?! - warn "failed to get body for ".$self->{msg}->get_header("Message-Id")."\n"; + warn "failed to get body for ".$self->get_msgid($self->{msg})."\n"; return [ ]; } Index: BayesStore.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/BayesStore.pm,v retrieving revision 1.89 retrieving revision 1.90 diff -b -w -u -d -r1.89 -r1.90 --- BayesStore.pm 10 Sep 2003 17:25:30 -0000 1.89 +++ BayesStore.pm 19 Sep 2003 04:02:40 -0000 1.90 @@ -255,6 +255,7 @@ elsif ( !$found ) { # new DB, make sure we know that ... $self->{db_version} = $self->{db_toks}->{$DB_VERSION_MAGIC_TOKEN} = DB_VERSION; $self->{db_toks}->{$NTOKENS_MAGIC_TOKEN} = 0; # no tokens in the db ... + dbg("bayes: new db, set db version ".$self->{db_version}." and 0 tokens"); } return 1; Index: CmdLearn.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/CmdLearn.pm,v retrieving revision 1.48 retrieving revision 1.49 diff -b -w -u -d -r1.48 -r1.49 --- CmdLearn.pm 15 Sep 2003 23:00:55 -0000 1.48 +++ CmdLearn.pm 19 Sep 2003 04:02:40 -0000 1.49 @@ -284,7 +284,7 @@ } $ma->{noexit} = 1; - my $status = $spamtest->learn ($ma, $id, $isspam, $forget); + my $status = $spamtest->learn ($ma, undef, $isspam, $forget); if ($status->did_learn()) { $learnedcount++; Index: PerMsgLearner.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/PerMsgLearner.pm,v retrieving revision 1.7 retrieving revision 1.8 diff -b -w -u -d -r1.7 -r1.8 --- PerMsgLearner.pm 19 Sep 2003 01:51:28 -0000 1.7 +++ PerMsgLearner.pm 19 Sep 2003 04:02:40 -0000 1.8 @@ -47,7 +47,7 @@ sub new { my $class = shift; $class = ref($class) || $class; - my ($main, $msg, $id) = @_; + my ($main, $msg) = @_; my $self = { 'main' => $main, @@ -59,27 +59,24 @@ $self->{bayes_scanner} = $self->{main}->{bayes_scanner}; - $id ||= $self->{msg}->get_header ("Message-Id"); - $id ||= $self->{msg}->get_header ("Message-ID"); - $id ||= 'no_id.$$.'.rand(); - $id =~ s/[-\0\s\;\:]/_/gs; - - $self->{id} = $id; - bless ($self, $class); $self; } ########################################################################### -=item $status->learn_spam() +=item $status->learn_spam($id) Learn the message as spam. +C<$id> is an optional message-identification string, used internally +to tag the message. If it is C<undef>, the Message-Id of the message +will be used. It should be unique to that message. + =cut sub learn_spam { - my ($self) = @_; + my ($self, $id) = @_; if ($self->{main}->{learn_with_whitelist}) { $self->{main}->add_all_addresses_to_blacklist ($self->{msg}); @@ -88,43 +85,51 @@ # use the real message-id here instead of mass-check's idea of an "id", # as we may deliver the msg into another mbox format but later need # to forget it's training. - $self->{learned} = $self->{bayes_scanner}->learn (1, $self->{msg}); + $self->{learned} = $self->{bayes_scanner}->learn (1, $self->{msg}, $id); } ########################################################################### -=item $status->learn_ham() +=item $status->learn_ham($id) Learn the message as ham. +C<$id> is an optional message-identification string, used internally +to tag the message. If it is C<undef>, the Message-Id of the message +will be used. It should be unique to that message. + =cut sub learn_ham { - my ($self) = @_; + my ($self, $id) = @_; if ($self->{main}->{learn_with_whitelist}) { $self->{main}->add_all_addresses_to_whitelist ($self->{msg}); } - $self->{learned} = $self->{bayes_scanner}->learn (0, $self->{msg}); + $self->{learned} = $self->{bayes_scanner}->learn (0, $self->{msg}, $id); } ########################################################################### -=item $status->forget() +=item $status->forget($id) Forget about a previously-learned message. +C<$id> is an optional message-identification string, used internally +to tag the message. If it is C<undef>, the Message-Id of the message +will be used. It should be unique to that message. + =cut sub forget { - my ($self) = @_; + my ($self, $id) = @_; if ($self->{main}->{learn_with_whitelist}) { $self->{main}->remove_all_addresses_from_whitelist ($self->{msg}); } - $self->{learned} = $self->{bayes_scanner}->forget ($self->{msg}); + $self->{learned} = $self->{bayes_scanner}->forget ($self->{msg}, $id); } ########################################################################### Index: PerMsgStatus.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/PerMsgStatus.pm,v retrieving revision 1.353 retrieving revision 1.354 diff -b -w -u -d -r1.353 -r1.354 --- PerMsgStatus.pm 19 Sep 2003 01:51:28 -0000 1.353 +++ PerMsgStatus.pm 19 Sep 2003 04:02:40 -0000 1.354 @@ -298,8 +298,7 @@ dbg ("auto-learn? yes, ".($isspam?"spam ($hits > $max)":"ham ($hits < $min)")); eval { - my $learnstatus = $self->{main}->learn ($self->{msg}, - scalar($self->get("Message-Id")), $isspam, 0); + my $learnstatus = $self->{main}->learn ($self->{msg}, undef, $isspam, 0); $learnstatus->finish(); if ( $learnstatus->did_learn() ) { $self->{auto_learn_status} = $isspam; ------------------------------------------------------- This sf.net email is sponsored by:ThinkGeek Welcome to geek heaven. http://thinkgeek.com/sf |
|
| <Prev in Thread] | Current Thread | [Next in Thread> |
|---|---|---|
| Previous by Date: | [SACVS] CVS: spamassassin/lib/Mail SpamAssassin.pm,1.205,1.206: 00068, Daniel Quinlan |
|---|---|
| Next by Date: | [SACVS] CVS: spamassassin/lib/Mail/SpamAssassin BayesStore.pm,1.90,1.91: 00068, Theo Van Dinter |
| Previous by Thread: | [SACVS] CVS: spamassassin/lib/Mail SpamAssassin.pm,1.205,1.206i: 00068, Daniel Quinlan |
| Next by Thread: | [SACVS] CVS: spamassassin/lib/Mail/SpamAssassin BayesStore.pm,1.90,1.91: 00068, Theo Van Dinter |
| Indexes: | [Date] [Thread] [Top] [All Lists] |
| News | FAQ | advertise |