logo       

[SACVS] CVS: spamassassin/lib/Mail/SpamAssassin Conf.pm,1.258,1.259 EvalTes: msg#00098

mail.spam.spamassassin.cvs

Subject: [SACVS] CVS: spamassassin/lib/Mail/SpamAssassin Conf.pm,1.258,1.259 EvalTests.pm,1.448,1.449 PerMsgStatus.pm,1.354,1.355

Update of /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin
In directory sc8-pr-cvs1:/tmp/cvs-serv18462/lib/Mail/SpamAssassin

Modified Files:
Conf.pm EvalTests.pm PerMsgStatus.pm
Log Message:
bug 2459: reorganize auto-whitelist implementation


Index: Conf.pm
===================================================================
RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/Conf.pm,v
retrieving revision 1.258
retrieving revision 1.259
diff -b -w -u -d -r1.258 -r1.259
--- Conf.pm 21 Sep 2003 22:17:55 -0000 1.258
+++ Conf.pm 23 Sep 2003 21:37:02 -0000 1.259
@@ -163,6 +163,8 @@
# Mail::SpamAssassin module users who set that configuration setting,
# to receive the correct values.

+ $self->{use_auto_whitelist} = 1;
+ $self->{auto_whitelist_factory} = "Mail::SpamAssassin::DBBasedAddrList";
$self->{auto_whitelist_path} = "__userstate__/auto-whitelist";
$self->{auto_whitelist_file_mode} = '0700';
$self->{auto_whitelist_factor} = 0.5;
@@ -2284,6 +2286,35 @@

if (/^dcc_options\s+([A-Z -]+)/) {
$self->{dcc_options} = $1; next;
+ }
+
+=item use_auto_whitelist ( 0 | 1 ) (default: 1)
+
+Whether to use auto-whitelists. Auto-whitelists track the long-term
+average score for each sender and then shift the score of new messages
+toward that long-term average. This can increase or decrease the score
+for messages, depending on the long-term behavior of the particular
+correspondent.
+
+For more information about the auto-whitelist system, please look at the
+the C<Automatic Whitelist System> section of the README file. The
+auto-whitelist is not intended as a general-purpose replacement for static
+whitelist entries added to your config files.
+
+=cut
+
+ if (/^use_auto_whitelist\s+(\d+)$/) {
+ $self->{use_auto_whitelist} = $1; next;
+ }
+
+=item auto_whitelist_factory module (default:
Mail::SpamAssassin::DBBasedAddrList)
+
+Select alternative whitelist factory module.
+
+=cut
+
+ if (/^auto_whitelist_factory\s+(.*)$/) {
+ $self->{auto_whitelist_factory} = $1; next;
}

=item auto_whitelist_path /path/to/file (default:
~/.spamassassin/auto-whitelist)

Index: EvalTests.pm
===================================================================
RCS file:
/cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/EvalTests.pm,v
retrieving revision 1.448
retrieving revision 1.449
diff -b -w -u -d -r1.448 -r1.449
--- EvalTests.pm 21 Sep 2003 22:34:18 -0000 1.448
+++ EvalTests.pm 23 Sep 2003 21:37:02 -0000 1.449
@@ -916,6 +916,95 @@

###########################################################################

+sub check_from_in_auto_whitelist {
+ my ($self) = @_;
+
+ return unless $self->{main}->{conf}->{use_auto_whitelist};
+
+ if (!defined $self->{main}->{pers_addr_list_factory}) {
+ my $factory;
+ return unless $self->{main}->{conf}->{auto_whitelist_factory};
+ my $type = $self->{main}->{conf}->{auto_whitelist_factory};
+ if ($type =~ /^([_A-Za-z0-9:]+)$/) {
+ $type = $1;
+ }
+ else {
+ warn "illegal auto_whitelist_factory setting\n";
+ return;
+ }
+ eval '
+ require '.$type.';
+ $factory = '.$type.'->new();
+ ';
+ if ($@) { warn $@; undef $factory; }
+ $self->{main}->set_persistent_address_list_factory($factory);
+ }
+
+ local $_ = lc $self->get('From:addr');
+ return 0 unless /\S/;
+
+ # find the earliest usable "originating IP". ignore reserved nets
+ my $origip;
+ foreach my $rly (reverse (@{$self->{relays_trusted}},
@{$self->{relays_untrusted}}))
+ {
+ next if ($rly->{ip_is_reserved});
+ if ($rly->{ip}) {
+ $origip = $rly->{ip}; last;
+ }
+ }
+
+ # Create the AWL object, catching 'die's
+ my $whitelist;
+ my $evalok = eval {
+ $whitelist = Mail::SpamAssassin::AutoWhitelist->new($self->{main});
+
+ # check
+ my $meanscore = $whitelist->check_address($_, $origip);
+ my $delta = 0;
+
+ dbg("AWL active, pre-score: " . $self->{hits} . ", mean: " .
+ ($meanscore || 'undef') . ", IP: " . ($origip || 'undef'));
+
+ if (defined ($meanscore)) {
+ $delta = ($meanscore - $self->{hits}) *
$self->{main}->{conf}->{auto_whitelist_factor};
+ $self->{tag_data}->{AWL} = sprintf("%2.1f",$delta);
+ # Save this for _AWL_ tag
+ }
+
+ # Update the AWL *before* adding the new score, otherwise
+ # early high-scoring messages are reinforced compared to
+ # later ones. http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=159704
+ if (!$self->{disable_auto_learning}) {
+ $whitelist->add_score($self->{hits});
+ }
+
+ # current AWL score changes with each hit
+ for my $set (0..3) {
+ $self->{conf}->{scoreset}->[$set]->{"AWL"} = sprintf("%0.3f", $delta);
+ }
+
+ if ($delta != 0) {
+ $self->_handle_hit("AWL", $delta, "AWL: ",
+ $self->{main}->{conf}->{descriptions}->{AWL});
+ }
+
+ dbg("Post AWL score: ".$self->{hits});
+ $whitelist->finish();
+ 1;
+ };
+
+ if (!$evalok) {
+ dbg ("open of AWL file failed: $@");
+ # try an unlock, in case we got that far
+ eval { $whitelist->finish(); };
+ }
+
+ # test hit is above
+ return 0;
+}
+
+###########################################################################
+
sub _check_whitelist_rcvd {
my ($self, $list, $addr) = @_;


Index: PerMsgStatus.pm
===================================================================
RCS file:
/cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/PerMsgStatus.pm,v
retrieving revision 1.354
retrieving revision 1.355
diff -b -w -u -d -r1.354 -r1.355
--- PerMsgStatus.pm 19 Sep 2003 04:02:40 -0000 1.354
+++ PerMsgStatus.pm 23 Sep 2003 21:37:02 -0000 1.355
@@ -182,12 +182,8 @@
# auto-learning
$self->learn();

- # add points from Bayes, before adjusting the AWL
+ # add points from learning systems (Bayes and AWL)
$self->{hits} += $self->{learned_hits};
-
- # Do AWL tests last, since these need the score to have already been
- # calculated
- $self->do_awl_tests();
}

$self->delete_fulltext_tmpfile();
@@ -262,7 +258,7 @@
foreach my $test ( @{$self->{test_names_hit}} ) {
# ignore tests with 0 score in this scoreset or if the test is a
learning or userconf test
next if ( $self->{conf}->{scores}->{$test} == 0 );
- next if ( exists $self->{conf}->{tflags}->{$test} &&
$self->{conf}->{tflags}->{$test} =~ /\b(?:learn|userconf)\b/ );
+ next if ( exists $self->{conf}->{tflags}->{$test} &&
$self->{conf}->{tflags}->{$test} =~ /\bnoautolearn\b/ );

$hits += $self->{conf}->{scores}->{$test};
}
@@ -2001,75 +1997,6 @@

###########################################################################

-sub do_awl_tests {
- my($self) = @_;
-
- return unless (defined $self->{main}->{pers_addr_list_factory});
-
- local $_ = lc $self->get('From:addr');
- return 0 unless /\S/;
-
- # find the earliest usable "originating IP". ignore reserved nets
- my $origip;
- foreach my $rly (reverse (@{$self->{relays_trusted}},
@{$self->{relays_untrusted}}))
- {
- next if ($rly->{ip_is_reserved});
- if ($rly->{ip}) {
- $origip = $rly->{ip}; last;
- }
- }
-
- # Create the AWL object, catching 'die's
- my $whitelist;
- my $evalok = eval {
- $whitelist = Mail::SpamAssassin::AutoWhitelist->new($self->{main});
-
- # check
- my $meanscore = $whitelist->check_address($_, $origip);
- my $delta = 0;
-
- dbg("AWL active, pre-score: ".$self->{hits}.", mean:
".($meanscore||'undef').
- ", originating-ip: ".($origip||'undef'));
-
- if(defined($meanscore))
- {
- $delta = ($meanscore - $self->{hits}) *
$self->{main}->{conf}->{auto_whitelist_factor};
- $self->{tag_data}->{AWL} = sprintf("%2.1f",$delta);
- # Save this for _AWL_ tag
- }
-
- # Update the AWL *before* adding the new score, otherwise
- # early high-scoring messages are reinforced compared to
- # later ones. See
- # http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=159704
- #
- if (!$self->{disable_auto_learning}) {
- $whitelist->add_score($self->{hits});
- }
-
- # current AWL score changes with each hit
- for my $set (0..3) {
- $self->{conf}->{scoreset}->[$set]->{"AWL"} = sprintf("%0.3f", $delta);
- }
-
- if ($delta != 0) {
- $self->_handle_hit("AWL",$delta,"AWL: ","Auto-whitelist adjustment");
- }
-
- dbg("Post AWL score: ".$self->{hits});
- $whitelist->finish();
- 1;
- };
-
- if (!$evalok) {
- dbg ("open of AWL file failed: $@");
- # try an unlock, in case we got that far
- eval { $whitelist->finish(); };
- }
-}
-
-###########################################################################
-
sub do_meta_tests {
my ($self) = @_;
local ($_);
@@ -2199,13 +2126,15 @@
# Score of 0, skip it.
next unless ($self->{conf}->{scores}->{$rulename});

- # If the rule is a net rule, and we're in a non-net enabled scoreset, skip
it.
+ # If the rule is a net rule, and we're in a non-net scoreset, skip it.
next if (exists $self->{conf}->{tflags}->{$rulename} &&
- (($scoreset & 1) == 0) && $self->{conf}->{tflags}->{$rulename} =~
/\bnet\b/);
+ (($scoreset & 1) == 0) &&
+ $self->{conf}->{tflags}->{$rulename} =~ /\bnet\b/);

- # If the rule is a learn rule, and we're in a non-learn enabled scoreset,
skip it.
+ # If the rule is a bayes rule, and we're in a non-bayes scoreset, skip it.
next if (exists $self->{conf}->{tflags}->{$rulename} &&
- (($scoreset & 2) == 0) && $self->{conf}->{tflags}->{$rulename} =~
/\blearn\b/);
+ (($scoreset & 2) == 0) &&
+ $self->{conf}->{tflags}->{$rulename} =~ /\bbayes\b/);

my $score = $self->{conf}{scores}{$rulename};
my $result;
@@ -2309,9 +2238,8 @@

my $tflags = $self->{conf}->{tflags}->{$rule}; $tflags ||= '';

- # ignore 'learn' or 'userconf' rules, when considering score for
- # Bayesian auto-learning
- if ($tflags =~ /\b(?:learn|userconf)\b/i) {
+ # ignore 'noautolearn' rules when considering score for Bayes auto-learning
+ if ($tflags =~ /\bnoautolearn\b/i) {
$self->{learned_hits} += $score;
}
else {



-------------------------------------------------------
This sf.net email is sponsored by:ThinkGeek
Welcome to geek heaven.
http://thinkgeek.com/sf


<Prev in Thread] Current Thread [Next in Thread>
Google Custom Search

News | FAQ | advertise