|
[SACVS] CVS: spamassassin/lib/Mail/SpamAssassin Conf.pm,1.258,1.259 EvalTes: msg#00098mail.spam.spamassassin.cvs
Update of /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin In directory sc8-pr-cvs1:/tmp/cvs-serv18462/lib/Mail/SpamAssassin Modified Files: Conf.pm EvalTests.pm PerMsgStatus.pm Log Message: bug 2459: reorganize auto-whitelist implementation Index: Conf.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/Conf.pm,v retrieving revision 1.258 retrieving revision 1.259 diff -b -w -u -d -r1.258 -r1.259 --- Conf.pm 21 Sep 2003 22:17:55 -0000 1.258 +++ Conf.pm 23 Sep 2003 21:37:02 -0000 1.259 @@ -163,6 +163,8 @@ # Mail::SpamAssassin module users who set that configuration setting, # to receive the correct values. + $self->{use_auto_whitelist} = 1; + $self->{auto_whitelist_factory} = "Mail::SpamAssassin::DBBasedAddrList"; $self->{auto_whitelist_path} = "__userstate__/auto-whitelist"; $self->{auto_whitelist_file_mode} = '0700'; $self->{auto_whitelist_factor} = 0.5; @@ -2284,6 +2286,35 @@ if (/^dcc_options\s+([A-Z -]+)/) { $self->{dcc_options} = $1; next; + } + +=item use_auto_whitelist ( 0 | 1 ) (default: 1) + +Whether to use auto-whitelists. Auto-whitelists track the long-term +average score for each sender and then shift the score of new messages +toward that long-term average. This can increase or decrease the score +for messages, depending on the long-term behavior of the particular +correspondent. + +For more information about the auto-whitelist system, please look at the +the C<Automatic Whitelist System> section of the README file. The +auto-whitelist is not intended as a general-purpose replacement for static +whitelist entries added to your config files. + +=cut + + if (/^use_auto_whitelist\s+(\d+)$/) { + $self->{use_auto_whitelist} = $1; next; + } + +=item auto_whitelist_factory module (default: Mail::SpamAssassin::DBBasedAddrList) + +Select alternative whitelist factory module. + +=cut + + if (/^auto_whitelist_factory\s+(.*)$/) { + $self->{auto_whitelist_factory} = $1; next; } =item auto_whitelist_path /path/to/file (default: ~/.spamassassin/auto-whitelist) Index: EvalTests.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/EvalTests.pm,v retrieving revision 1.448 retrieving revision 1.449 diff -b -w -u -d -r1.448 -r1.449 --- EvalTests.pm 21 Sep 2003 22:34:18 -0000 1.448 +++ EvalTests.pm 23 Sep 2003 21:37:02 -0000 1.449 @@ -916,6 +916,95 @@ ########################################################################### +sub check_from_in_auto_whitelist { + my ($self) = @_; + + return unless $self->{main}->{conf}->{use_auto_whitelist}; + + if (!defined $self->{main}->{pers_addr_list_factory}) { + my $factory; + return unless $self->{main}->{conf}->{auto_whitelist_factory}; + my $type = $self->{main}->{conf}->{auto_whitelist_factory}; + if ($type =~ /^([_A-Za-z0-9:]+)$/) { + $type = $1; + } + else { + warn "illegal auto_whitelist_factory setting\n"; + return; + } + eval ' + require '.$type.'; + $factory = '.$type.'->new(); + '; + if ($@) { warn $@; undef $factory; } + $self->{main}->set_persistent_address_list_factory($factory); + } + + local $_ = lc $self->get('From:addr'); + return 0 unless /\S/; + + # find the earliest usable "originating IP". ignore reserved nets + my $origip; + foreach my $rly (reverse (@{$self->{relays_trusted}}, @{$self->{relays_untrusted}})) + { + next if ($rly->{ip_is_reserved}); + if ($rly->{ip}) { + $origip = $rly->{ip}; last; + } + } + + # Create the AWL object, catching 'die's + my $whitelist; + my $evalok = eval { + $whitelist = Mail::SpamAssassin::AutoWhitelist->new($self->{main}); + + # check + my $meanscore = $whitelist->check_address($_, $origip); + my $delta = 0; + + dbg("AWL active, pre-score: " . $self->{hits} . ", mean: " . + ($meanscore || 'undef') . ", IP: " . ($origip || 'undef')); + + if (defined ($meanscore)) { + $delta = ($meanscore - $self->{hits}) * $self->{main}->{conf}->{auto_whitelist_factor}; + $self->{tag_data}->{AWL} = sprintf("%2.1f",$delta); + # Save this for _AWL_ tag + } + + # Update the AWL *before* adding the new score, otherwise + # early high-scoring messages are reinforced compared to + # later ones. http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=159704 + if (!$self->{disable_auto_learning}) { + $whitelist->add_score($self->{hits}); + } + + # current AWL score changes with each hit + for my $set (0..3) { + $self->{conf}->{scoreset}->[$set]->{"AWL"} = sprintf("%0.3f", $delta); + } + + if ($delta != 0) { + $self->_handle_hit("AWL", $delta, "AWL: ", + $self->{main}->{conf}->{descriptions}->{AWL}); + } + + dbg("Post AWL score: ".$self->{hits}); + $whitelist->finish(); + 1; + }; + + if (!$evalok) { + dbg ("open of AWL file failed: $@"); + # try an unlock, in case we got that far + eval { $whitelist->finish(); }; + } + + # test hit is above + return 0; +} + +########################################################################### + sub _check_whitelist_rcvd { my ($self, $list, $addr) = @_; Index: PerMsgStatus.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/PerMsgStatus.pm,v retrieving revision 1.354 retrieving revision 1.355 diff -b -w -u -d -r1.354 -r1.355 --- PerMsgStatus.pm 19 Sep 2003 04:02:40 -0000 1.354 +++ PerMsgStatus.pm 23 Sep 2003 21:37:02 -0000 1.355 @@ -182,12 +182,8 @@ # auto-learning $self->learn(); - # add points from Bayes, before adjusting the AWL + # add points from learning systems (Bayes and AWL) $self->{hits} += $self->{learned_hits}; - - # Do AWL tests last, since these need the score to have already been - # calculated - $self->do_awl_tests(); } $self->delete_fulltext_tmpfile(); @@ -262,7 +258,7 @@ foreach my $test ( @{$self->{test_names_hit}} ) { # ignore tests with 0 score in this scoreset or if the test is a learning or userconf test next if ( $self->{conf}->{scores}->{$test} == 0 ); - next if ( exists $self->{conf}->{tflags}->{$test} && $self->{conf}->{tflags}->{$test} =~ /\b(?:learn|userconf)\b/ ); + next if ( exists $self->{conf}->{tflags}->{$test} && $self->{conf}->{tflags}->{$test} =~ /\bnoautolearn\b/ ); $hits += $self->{conf}->{scores}->{$test}; } @@ -2001,75 +1997,6 @@ ########################################################################### -sub do_awl_tests { - my($self) = @_; - - return unless (defined $self->{main}->{pers_addr_list_factory}); - - local $_ = lc $self->get('From:addr'); - return 0 unless /\S/; - - # find the earliest usable "originating IP". ignore reserved nets - my $origip; - foreach my $rly (reverse (@{$self->{relays_trusted}}, @{$self->{relays_untrusted}})) - { - next if ($rly->{ip_is_reserved}); - if ($rly->{ip}) { - $origip = $rly->{ip}; last; - } - } - - # Create the AWL object, catching 'die's - my $whitelist; - my $evalok = eval { - $whitelist = Mail::SpamAssassin::AutoWhitelist->new($self->{main}); - - # check - my $meanscore = $whitelist->check_address($_, $origip); - my $delta = 0; - - dbg("AWL active, pre-score: ".$self->{hits}.", mean: ".($meanscore||'undef'). - ", originating-ip: ".($origip||'undef')); - - if(defined($meanscore)) - { - $delta = ($meanscore - $self->{hits}) * $self->{main}->{conf}->{auto_whitelist_factor}; - $self->{tag_data}->{AWL} = sprintf("%2.1f",$delta); - # Save this for _AWL_ tag - } - - # Update the AWL *before* adding the new score, otherwise - # early high-scoring messages are reinforced compared to - # later ones. See - # http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=159704 - # - if (!$self->{disable_auto_learning}) { - $whitelist->add_score($self->{hits}); - } - - # current AWL score changes with each hit - for my $set (0..3) { - $self->{conf}->{scoreset}->[$set]->{"AWL"} = sprintf("%0.3f", $delta); - } - - if ($delta != 0) { - $self->_handle_hit("AWL",$delta,"AWL: ","Auto-whitelist adjustment"); - } - - dbg("Post AWL score: ".$self->{hits}); - $whitelist->finish(); - 1; - }; - - if (!$evalok) { - dbg ("open of AWL file failed: $@"); - # try an unlock, in case we got that far - eval { $whitelist->finish(); }; - } -} - -########################################################################### - sub do_meta_tests { my ($self) = @_; local ($_); @@ -2199,13 +2126,15 @@ # Score of 0, skip it. next unless ($self->{conf}->{scores}->{$rulename}); - # If the rule is a net rule, and we're in a non-net enabled scoreset, skip it. + # If the rule is a net rule, and we're in a non-net scoreset, skip it. next if (exists $self->{conf}->{tflags}->{$rulename} && - (($scoreset & 1) == 0) && $self->{conf}->{tflags}->{$rulename} =~ /\bnet\b/); + (($scoreset & 1) == 0) && + $self->{conf}->{tflags}->{$rulename} =~ /\bnet\b/); - # If the rule is a learn rule, and we're in a non-learn enabled scoreset, skip it. + # If the rule is a bayes rule, and we're in a non-bayes scoreset, skip it. next if (exists $self->{conf}->{tflags}->{$rulename} && - (($scoreset & 2) == 0) && $self->{conf}->{tflags}->{$rulename} =~ /\blearn\b/); + (($scoreset & 2) == 0) && + $self->{conf}->{tflags}->{$rulename} =~ /\bbayes\b/); my $score = $self->{conf}{scores}{$rulename}; my $result; @@ -2309,9 +2238,8 @@ my $tflags = $self->{conf}->{tflags}->{$rule}; $tflags ||= ''; - # ignore 'learn' or 'userconf' rules, when considering score for - # Bayesian auto-learning - if ($tflags =~ /\b(?:learn|userconf)\b/i) { + # ignore 'noautolearn' rules when considering score for Bayes auto-learning + if ($tflags =~ /\bnoautolearn\b/i) { $self->{learned_hits} += $score; } else { ------------------------------------------------------- This sf.net email is sponsored by:ThinkGeek Welcome to geek heaven. http://thinkgeek.com/sf |
|
| <Prev in Thread] | Current Thread | [Next in Thread> |
|---|---|---|
| Previous by Date: | [SACVS] CVS: spamassassin spamassassin.raw,1.105,1.106: 00098, Daniel Quinlan |
|---|---|
| Next by Date: | [SACVS] CVS: spamassassin/spamd spamd.raw,1.219,1.220: 00098, Daniel Quinlan |
| Previous by Thread: | [SACVS] CVS: spamassassin spamassassin.raw,1.105,1.106i: 00098, Daniel Quinlan |
| Next by Thread: | [SACVS] CVS: spamassassin/spamd spamd.raw,1.219,1.220: 00098, Daniel Quinlan |
| Indexes: | [Date] [Thread] [Top] [All Lists] |
| News | FAQ | advertise |