logo       

[SACVS] CVS: spamassassin/lib/Mail/SpamAssassin Bayes.pm,1.94,1.95 BayesSto: msg#00068

mail.spam.spamassassin.cvs

Subject: [SACVS] CVS: spamassassin/lib/Mail/SpamAssassin Bayes.pm,1.94,1.95 BayesStore.pm,1.89,1.90 CmdLearn.pm,1.48,1.49 PerMsgLearner.pm,1.7,1.8 PerMsgStatus.pm,1.353,1.354

Update of /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin
In directory sc8-pr-cvs1:/tmp/cvs-serv3327/lib/Mail/SpamAssassin

Modified Files:
Bayes.pm BayesStore.pm CmdLearn.pm PerMsgLearner.pm
PerMsgStatus.pm
Log Message:
bug 1396: SA 2.60 chokes on messages without Message-ID


Index: Bayes.pm
===================================================================
RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/Bayes.pm,v
retrieving revision 1.94
retrieving revision 1.95
diff -b -w -u -d -r1.94 -r1.95
--- Bayes.pm 8 Sep 2003 01:07:04 -0000 1.94
+++ Bayes.pm 19 Sep 2003 04:02:40 -0000 1.95
@@ -611,7 +611,7 @@
###########################################################################

sub learn {
- my ($self, $isspam, $msg) = @_;
+ my ($self, $isspam, $msg, $id) = @_;

if (!$self->{conf}->{use_bayes}) { return; }
if (!defined $msg) { return; }
@@ -629,7 +629,7 @@
}

if ($ok) {
- $ret = $self->learn_trapped ($isspam, $msg, $body);
+ $ret = $self->learn_trapped ($isspam, $msg, $body, $id);

if (!$self->{main}->{learn_caller_will_untie}) {
$self->{store}->untie_db();
@@ -648,9 +648,9 @@

# this function is trapped by the wrapper above
sub learn_trapped {
- my ($self, $isspam, $msg, $body) = @_;
+ my ($self, $isspam, $msg, $body, $msgid) = @_;

- my $msgid = $self->get_msgid ($msg);
+ $msgid ||= $self->get_msgid ($msg);
my $seen = $self->{store}->seen_get ($msgid);
if (defined ($seen)) {
if (($seen eq 's' && $isspam) || ($seen eq 'h' && !$isspam)) {
@@ -687,13 +687,15 @@

$self->{store}->seen_put ($msgid, ($isspam ? 's' : 'h'));
$self->{store}->add_touches_to_journal();
+
+ dbg("bayes: Learned '$msgid'");
1;
}

###########################################################################

sub forget {
- my ($self, $msg) = @_;
+ my ($self, $msg, $id) = @_;

if (!$self->{conf}->{use_bayes}) { return; }
if (!defined $msg) { return; }
@@ -706,7 +708,7 @@
local $SIG{'__DIE__'}; # do not run user die() traps in here

if ($self->{store}->tie_db_writable()) {
- $ret = $self->forget_trapped ($msg, $body);
+ $ret = $self->forget_trapped ($msg, $body, $id);

if (!$self->{main}->{learn_caller_will_untie}) {
$self->{store}->untie_db();
@@ -725,9 +727,9 @@

# this function is trapped by the wrapper above
sub forget_trapped {
- my ($self, $msg, $body) = @_;
+ my ($self, $msg, $body, $msgid) = @_;

- my $msgid = $self->get_msgid ($msg);
+ $msgid ||= $self->get_msgid ($msg);
my $seen = $self->{store}->seen_get ($msgid);
my $isspam;
if (defined ($seen)) {
@@ -773,7 +775,7 @@
my ($self, $msg) = @_;

my $msgid = $msg->get_header("Message-Id");
- if ( !defined $msgid || $msgid eq '' ) { # generate a best effort unique id
+ if (!defined $msgid || $msgid eq '' || $msgid =~ /^\s*<\s*>.*$/) { #
generate a best effort unique id
# Use sha1(Date:, last received: and top N bytes of body)
# where N is MIN(1024 bytes, 1/2 of body length)
#
@@ -822,7 +824,7 @@

if (!defined $body) {
# why?!
- warn "failed to get body for ".$self->{msg}->get_header("Message-Id")."\n";
+ warn "failed to get body for ".$self->get_msgid($self->{msg})."\n";
return [ ];
}


Index: BayesStore.pm
===================================================================
RCS file:
/cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/BayesStore.pm,v
retrieving revision 1.89
retrieving revision 1.90
diff -b -w -u -d -r1.89 -r1.90
--- BayesStore.pm 10 Sep 2003 17:25:30 -0000 1.89
+++ BayesStore.pm 19 Sep 2003 04:02:40 -0000 1.90
@@ -255,6 +255,7 @@
elsif ( !$found ) { # new DB, make sure we know that ...
$self->{db_version} = $self->{db_toks}->{$DB_VERSION_MAGIC_TOKEN} =
DB_VERSION;
$self->{db_toks}->{$NTOKENS_MAGIC_TOKEN} = 0; # no tokens in the db ...
+ dbg("bayes: new db, set db version ".$self->{db_version}." and 0 tokens");
}

return 1;

Index: CmdLearn.pm
===================================================================
RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/CmdLearn.pm,v
retrieving revision 1.48
retrieving revision 1.49
diff -b -w -u -d -r1.48 -r1.49
--- CmdLearn.pm 15 Sep 2003 23:00:55 -0000 1.48
+++ CmdLearn.pm 19 Sep 2003 04:02:40 -0000 1.49
@@ -284,7 +284,7 @@
}

$ma->{noexit} = 1;
- my $status = $spamtest->learn ($ma, $id, $isspam, $forget);
+ my $status = $spamtest->learn ($ma, undef, $isspam, $forget);

if ($status->did_learn()) {
$learnedcount++;

Index: PerMsgLearner.pm
===================================================================
RCS file:
/cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/PerMsgLearner.pm,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -w -u -d -r1.7 -r1.8
--- PerMsgLearner.pm 19 Sep 2003 01:51:28 -0000 1.7
+++ PerMsgLearner.pm 19 Sep 2003 04:02:40 -0000 1.8
@@ -47,7 +47,7 @@
sub new {
my $class = shift;
$class = ref($class) || $class;
- my ($main, $msg, $id) = @_;
+ my ($main, $msg) = @_;

my $self = {
'main' => $main,
@@ -59,27 +59,24 @@

$self->{bayes_scanner} = $self->{main}->{bayes_scanner};

- $id ||= $self->{msg}->get_header ("Message-Id");
- $id ||= $self->{msg}->get_header ("Message-ID");
- $id ||= 'no_id.$$.'.rand();
- $id =~ s/[-\0\s\;\:]/_/gs;
-
- $self->{id} = $id;
-
bless ($self, $class);
$self;
}

###########################################################################

-=item $status->learn_spam()
+=item $status->learn_spam($id)

Learn the message as spam.

+C<$id> is an optional message-identification string, used internally
+to tag the message. If it is C<undef>, the Message-Id of the message
+will be used. It should be unique to that message.
+
=cut

sub learn_spam {
- my ($self) = @_;
+ my ($self, $id) = @_;

if ($self->{main}->{learn_with_whitelist}) {
$self->{main}->add_all_addresses_to_blacklist ($self->{msg});
@@ -88,43 +85,51 @@
# use the real message-id here instead of mass-check's idea of an "id",
# as we may deliver the msg into another mbox format but later need
# to forget it's training.
- $self->{learned} = $self->{bayes_scanner}->learn (1, $self->{msg});
+ $self->{learned} = $self->{bayes_scanner}->learn (1, $self->{msg}, $id);
}

###########################################################################

-=item $status->learn_ham()
+=item $status->learn_ham($id)

Learn the message as ham.

+C<$id> is an optional message-identification string, used internally
+to tag the message. If it is C<undef>, the Message-Id of the message
+will be used. It should be unique to that message.
+
=cut

sub learn_ham {
- my ($self) = @_;
+ my ($self, $id) = @_;

if ($self->{main}->{learn_with_whitelist}) {
$self->{main}->add_all_addresses_to_whitelist ($self->{msg});
}

- $self->{learned} = $self->{bayes_scanner}->learn (0, $self->{msg});
+ $self->{learned} = $self->{bayes_scanner}->learn (0, $self->{msg}, $id);
}

###########################################################################

-=item $status->forget()
+=item $status->forget($id)

Forget about a previously-learned message.

+C<$id> is an optional message-identification string, used internally
+to tag the message. If it is C<undef>, the Message-Id of the message
+will be used. It should be unique to that message.
+
=cut

sub forget {
- my ($self) = @_;
+ my ($self, $id) = @_;

if ($self->{main}->{learn_with_whitelist}) {
$self->{main}->remove_all_addresses_from_whitelist ($self->{msg});
}

- $self->{learned} = $self->{bayes_scanner}->forget ($self->{msg});
+ $self->{learned} = $self->{bayes_scanner}->forget ($self->{msg}, $id);
}

###########################################################################

Index: PerMsgStatus.pm
===================================================================
RCS file:
/cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/PerMsgStatus.pm,v
retrieving revision 1.353
retrieving revision 1.354
diff -b -w -u -d -r1.353 -r1.354
--- PerMsgStatus.pm 19 Sep 2003 01:51:28 -0000 1.353
+++ PerMsgStatus.pm 19 Sep 2003 04:02:40 -0000 1.354
@@ -298,8 +298,7 @@

dbg ("auto-learn? yes, ".($isspam?"spam ($hits > $max)":"ham ($hits <
$min)"));
eval {
- my $learnstatus = $self->{main}->learn ($self->{msg},
- scalar($self->get("Message-Id")), $isspam, 0);
+ my $learnstatus = $self->{main}->learn ($self->{msg}, undef, $isspam, 0);
$learnstatus->finish();
if ( $learnstatus->did_learn() ) {
$self->{auto_learn_status} = $isspam;



-------------------------------------------------------
This sf.net email is sponsored by:ThinkGeek
Welcome to geek heaven.
http://thinkgeek.com/sf


<Prev in Thread] Current Thread [Next in Thread>
Google Custom Search

News | FAQ | advertise