|
[SACVS] CVS: spamassassin/lib/Mail/SpamAssassin/MIME Parser.pm,1.2,1.3: msg#00114mail.spam.spamassassin.cvs
Update of /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/MIME In directory sc8-pr-cvs1:/tmp/cvs-serv2010/lib/Mail/SpamAssassin/MIME Modified Files: Parser.pm Log Message: a bit more code cleanup Index: Parser.pm =================================================================== RCS file: /cvsroot/spamassassin/spamassassin/lib/Mail/SpamAssassin/MIME/Parser.pm,v retrieving revision 1.2 retrieving revision 1.3 diff -b -w -u -d -r1.2 -r1.3 --- Parser.pm 24 Sep 2003 19:30:32 -0000 1.2 +++ Parser.pm 24 Sep 2003 21:33:33 -0000 1.3 @@ -5,16 +5,12 @@ # MIME Message parser, for email and nntp engines. +use Mail::SpamAssassin; use Mail::SpamAssassin::MIME; use MIME::Base64; use MIME::QuotedPrint; use Carp; -sub debug { - - # warn((caller)[2], @_); -} - =head2 This is how mail messages can come in: =over 4 @@ -73,8 +69,7 @@ # constructor sub parse { - my $class = shift; - my $message = shift; + my($self,$message) = @_; # now go generate stuff my @message = split ( /^/m, $message ); @@ -101,7 +96,7 @@ if ($header) { my ( $key, $value ) = split ( /:\s*/, $header, 2 ); if ( $key =~ /^content-type(?:-encoding)?$/i ) { - $msg->header( $key, $class->decode_header($value), $value ); + $msg->header( $key, $self->decode_header($value), $value ); } } @@ -111,15 +106,15 @@ # Parse out the body ... my ($boundary) = $msg->header('content-type') =~ /boundary\s*=\s*["']?([^"';]+)["']?/i; - $class->parse_body( $msg, $msg, $boundary, \@message ); + $self->parse_body( $msg, $msg, $boundary, \@message ); return $msg; } sub parse_body { - my $class = shift; - my ( $msg, $_msg, $boundary, $body ) = @_; + my($self, $msg, $_msg, $boundary, $body) = @_; + # CRLF -> LF for ( @{$body} ) { s/\r\n/\n/; } @@ -129,41 +124,40 @@ # warn "Parsing message of type: $type\n"; if ( $type =~ /^text\/plain/i ) { - debug("Parse text/plain\n"); - $class->parse_normal( $msg, $_msg, $boundary, $body ); + dbg("Parse text/plain\n"); + $self->parse_normal( $msg, $_msg, $boundary, $body ); } elsif ( $type =~ /^text\/html/i ) { - debug("Parse text/html\n"); - $class->parse_normal( $msg, $_msg, $boundary, $body ); + dbg("Parse text/html\n"); + $self->parse_normal( $msg, $_msg, $boundary, $body ); } elsif ( $type =~ /^multipart\/alternative/i ) { - debug("Parse multipart/alternative\n"); - $class->parse_multipart_alternate( $msg, $_msg, $boundary, $body ); + dbg("Parse multipart/alternative\n"); + $self->parse_multipart_alternate( $msg, $_msg, $boundary, $body ); } elsif ( $type =~ /^multipart\//i ) { - debug("Parse $type\n"); - $class->parse_multipart_mixed( $msg, $_msg, $boundary, $body ); + dbg("Parse $type\n"); + $self->parse_multipart_mixed( $msg, $_msg, $boundary, $body ); } else { - debug("Regular attachment\n"); - $class->decode_attachment( $msg, $_msg, $boundary, $body ); + dbg("Regular attachment\n"); + $self->decode_attachment( $msg, $_msg, $boundary, $body ); } if ( !$msg->body() ) { - debug("No message body found. Reparsing\n"); - my $part_fh = []; + dbg("No message body found. Reparsing as blank.\n"); + my $part_array = []; my $part_msg = Mail::SpamAssassin::MIME->new(); - $class->decode_body( $msg, $part_msg, $boundary, $part_fh ); + $self->decode_body( $msg, $part_msg, $boundary, $part_array ); } } sub parse_multipart_alternate { - my $class = shift; - my ( $msg, $_msg, $boundary, $body ) = @_; + my($self, $msg, $_msg, $boundary, $body ) = @_; my $preamble = ''; - debug("m/a got boundary: $boundary\n"); + dbg("m/a got boundary: $boundary\n"); # extract preamble (normally contains "This message is in Multipart/MIME format") while ( my $line = shift @{$body} ) { @@ -171,48 +165,46 @@ $preamble .= $line; } - debug("preamble: [[$preamble]]\n"); + dbg("preamble: [[$preamble]]\n"); my $in_body = 0; my $header; - my $part_fh; + my $part_array; my $part_msg = Mail::SpamAssassin::MIME->new(); my $line_count = @{$body}; foreach ( @{$body} ) { - - # debug($_); if ( --$line_count == 0 || /^\-\-\Q$boundary\E/ ) { - debug("m/a got end of section\n"); + dbg("m/a got end of section\n"); # end of part my $line = $_; # per rfc 1521, the CRLF before the boundary is part of the boundary ... - if ($part_fh) { - chomp( $part_fh->[ scalar @{$part_fh} - 1 ] ); - splice @{$part_fh}, -1 - if ( $part_fh->[ scalar @{$part_fh} - 1 ] eq '' ); + if ($part_array) { + chomp( $part_array->[ scalar @{$part_array} - 1 ] ); + splice @{$part_array}, -1 + if ( $part_array->[ scalar @{$part_array} - 1 ] eq '' ); } # assume body part if it's text if ( $part_msg->header('content-type') =~ /^text/i ) { - $class->decode_body( $msg, $part_msg, $boundary, $part_fh ); + $self->decode_body( $msg, $part_msg, $boundary, $part_array ); } else { - debug("Likely virus?\n"); - $class->decode_attachment( $msg, $part_msg, $boundary, $part_fh ); + dbg("Likely virus?\n"); + $self->decode_attachment( $msg, $part_msg, $boundary, $part_array ); } last if $line =~ /^\-\-\Q$boundary\E\-\-$/; $in_body = 0; $part_msg = Mail::SpamAssassin::MIME->new(); - undef $part_fh; + undef $part_array; next; } if ($in_body) { - push ( @{$part_fh}, $_ ); + push ( @{$part_array}, $_ ); } else { @@ -242,10 +234,9 @@ } sub parse_multipart_mixed { - my $class = shift; - my ( $msg, $_msg, $boundary, $body ) = @_; + my($self, $msg, $_msg, $boundary, $body) = @_; - debug("m/m Got boundary: $boundary\n"); + dbg("m/m Got boundary: $boundary\n"); my $preamble = ''; # extract preamble (normally contains "This message is in Multipart/MIME format") @@ -254,45 +245,45 @@ $preamble .= $line; } - debug("Extracted preamble: [[$preamble]]\n"); + dbg("Extracted preamble: [[$preamble]]\n"); my $part_msg = Mail::SpamAssassin::MIME->new(); # just used for headers storage my $in_body = 0; my $header; - my $part_fh; + my $part_array; my $line_count = @{$body}; foreach ( @{$body} ) { if ( --$line_count == 0 || /^\-\-\Q$boundary\E/ ) { # end of part - debug("Got end of MIME section: $_\n"); + dbg("Got end of MIME section: $_\n"); my $line = $_; # per rfc 1521, the CRLF before the boundary is part of the boundary ... - if ($part_fh) { - chomp( $part_fh->[ scalar @{$part_fh} - 1 ] ); - splice @{$part_fh}, -1 - if ( $part_fh->[ scalar @{$part_fh} - 1 ] eq '' ); + if ($part_array) { + chomp( $part_array->[ scalar @{$part_array} - 1 ] ); + splice @{$part_array}, -1 + if ( $part_array->[ scalar @{$part_array} - 1 ] eq '' ); } my ($p_boundary) = $part_msg->header('content-type') =~ /boundary\s*=\s*["']?([^"';]+)["']?/i; $p_boundary ||= $boundary; - $class->parse_body( $msg, $part_msg, $p_boundary, $part_fh ); + $self->parse_body( $msg, $part_msg, $p_boundary, $part_array ); last if $line =~ /^\-\-\Q${boundary}\E\-\-$/; $in_body = 0; $part_msg = Mail::SpamAssassin::MIME->new(); - undef $part_fh; + undef $part_array; next; } if ($in_body) { - push ( @{$part_fh}, $_ ); + push ( @{$part_array}, $_ ); } else { s/\s+$//; @@ -320,11 +311,10 @@ } sub parse_normal { - my $class = shift; - my ( $msg, $_msg, $boundary, $body ) = @_; + my($self, $msg, $_msg, $boundary, $body) = @_; # extract body, store it in $msg - $class->decode_body( $msg, $_msg, $boundary, $body ); + $self->decode_body( $msg, $_msg, $boundary, $body ); } use File::Path qw(rmtree); @@ -349,8 +339,7 @@ # decode according to RFC2047 sub decode_header { - my $class = shift; - my ($header) = @_; + my($self, $header) = @_; return '' unless $header; return $header unless $header =~ /=\?/; @@ -361,49 +350,30 @@ } sub decode_body { - my $class = shift; - my ( $msg, $part_msg, $boundary, $body ) = @_; + my($self, $msg, $part_msg, $boundary, $body ) = @_; - my ( $type, $content ) = $class->decode( $part_msg, $body ); + my ( $type, $content ) = $self->decode( $part_msg, $body ); - debug("got body: $type\n"); + dbg("got body: $type\n"); $msg->add_body_part( $type, $content, $body, $boundary ); } sub decode_attachment { - my $class = shift; - my ( $msg, $part_msg, $boundary, $body ) = @_; + my($self, $msg, $part_msg, $boundary, $body ) = @_; - debug("decoding attachment\n"); + dbg("decoding attachment\n"); - my ( $type, $content, $filename ) = $class->decode( $part_msg, $body ); + my ( $type, $content, $filename ) = $self->decode( $part_msg, $body ); $msg->add_attachment( $type, $content, $filename, $body, $boundary ); } sub decode { - my $class = shift; - my ( $msg, $body ) = @_; - - # tvd - 2003/09/24 - The original code used Text::Iconv to deal with UTF-8 stuff and the like. - # I haven't quite decided if we need to deal with the content-type stuff or not, so ... - # - # my $converter = NullConverter->new(); - # if ($msg->header('content-type') && ($msg->header('content-type') =~ /^text\//i)) { - # # text type - might need to translate to UTF8 - # my $type = $msg->header('content-type'); - # # remember to strip charset portion - we can always add it later. - # if ($type =~ s/charset="?([^\";]+)"?;?//i) { - # my $charset = $1; - # $charset =~ s/us-ascii/ISO-8859-15/i; # some mailers are broken this way. - # $converter = $class->converter($charset); - # $msg->header('content-type', $type); - # } - # } + my($self, $msg, $body ) = @_; if ( lc( $msg->header('content-transfer-encoding') ) eq 'quoted-printable' ) { - debug("decoding QP file\n"); + dbg("decoding QP file\n"); my @output = split ( /^/m, MIME::QuotedPrint::decode_qp( join ( "", @{$body} ) ) ); @@ -411,13 +381,13 @@ my ($filename) = ( $msg->header('content-disposition') =~ /name="?([^\";]+)"?/i ); if ( !$filename ) { - ($filename) = ( $msg->header('content-type') =~ /name="?([^\";]+)"?/i ); + ($filename) = ( $type =~ /name="?([^\";]+)"?/i ); } return $type, \@output, $filename; } elsif ( lc( $msg->header('content-transfer-encoding') ) eq 'base64' ) { - debug("decoding B64 file\n"); + dbg("decoding B64 file\n"); my $output = [ MIME::Base64::decode_base64( join ( "", @{$body} ) ) ]; my $type = $msg->header('content-type'); @@ -433,7 +403,7 @@ return $type, $output, $filename; } else { - debug("decoding other encoding\n"); + dbg("decoding other encoding\n"); # Encoding is one of 7bit, 8bit, binary or x-something - just save. my @output = @{$body}; @@ -442,12 +412,14 @@ my ($filename) = ( $msg->header('content-disposition') =~ /name="?([^\";]+)"?/i ); if ( !$filename ) { - ($filename) = ( $msg->header('content-type') =~ /name="?([^\";]+)"?/i ); + ($filename) = ( $type =~ /name="?([^\";]+)"?/i ); } return $type, \@output, $filename; } } + +sub dbg { Mail::SpamAssassin::dbg (@_); } 1; __END__ ------------------------------------------------------- This sf.net email is sponsored by:ThinkGeek Welcome to geek heaven. http://thinkgeek.com/sf |
|
| <Prev in Thread] | Current Thread | [Next in Thread> |
|---|---|---|
| Previous by Date: | [SACVS] CVS: spamassassin/lib/Mail/SpamAssassin MIME.pm,1.2,1.3: 00114, Theo Van Dinter |
|---|---|
| Next by Date: | [SACVS] CVS: spamassassin/lib/Mail/SpamAssassin/MIME Parser.pm,1.3,1.4: 00114, Theo Van Dinter |
| Previous by Thread: | [SACVS] CVS: spamassassin/lib/Mail/SpamAssassin MIME.pm,1.2,1.3i: 00114, Theo Van Dinter |
| Next by Thread: | [SACVS] CVS: spamassassin/lib/Mail/SpamAssassin/MIME Parser.pm,1.3,1.4: 00114, Theo Van Dinter |
| Indexes: | [Date] [Thread] [Top] [All Lists] |
| News | FAQ | advertise |