#
# Courier::Filter::Module::Parts class
#
# (C) 2003-2008 Julian Mehnle <julian@mehnle.net>
# $Id: Parts.pm 210 2008-03-21 19:30:31Z julian $
#
###############################################################################
=head1 NAME
Courier::Filter::Module::Parts - Message (MIME multipart and ZIP archive)
parts filter module for the Courier::Filter framework
=cut
package Courier::Filter::Module::Parts;
use warnings;
use strict;
use base 'Courier::Filter::Module';
use MIME::Parser 5.4;
use IO::InnerFile 2.110;
# Require either MIME::Parser 5.413 or lower, or IO::InnerFile 2.110+
# (where IO::InnerFile::seek() properly returns TRUE when appropriate).
use Digest::MD5;
use File::Spec;
# In-memory processing doesn't work, see comments in match_mime_part().
use constant TRUE => (0 == 0);
use constant FALSE => not TRUE;
use constant default_response => 'Prohibited message part detected.';
=head1 SYNOPSIS
use Courier::Filter::Module::Parts;
my $module = Courier::Filter::Module::Parts->new(
max_message_size
=> $max_message_size,
max_part_size => $max_part_size,
views => ['raw', 'zip'],
signatures => [
{
# One or more of the following options:
mime_type => 'text/html' || qr/html/i,
file_name => 'file_name.ext' || qr/\.(com|exe)$/i,
size => 106496,
digest_md5 => 'b09e26c292759d654633d3c8ed00d18d',
encrypted => 0,
# Optionally any of the following:
views => ['raw', 'zip'],
response => $response_text
},
...
],
logger => $logger,
inverse => 0,
trusting => 0,
testing => 0,
debugging => 0
);
my $filter = Courier::Filter->new(
...
modules => [ $module ],
...
);
=head1 DESCRIPTION
This class is a filter module class for use with Courier::Filter. It matches a
message if one of the message's parts (MIME parts, or files in a ZIP archive)
matches one of the configured signatures.
=cut
# Implementation:
###############################################################################
=head2 Constructor
The following constructor is provided:
=over
=item B<new(%options)>: returns I<Courier::Filter::Module::Parts>
Creates a new B<Parts> filter module.
%options is a list of key/value pairs representing any of the following
options:
=over
=item B<views>
An arrayref containing the global default set of I<views> the filter module
should apply to message parts when matching the configured signatures against
them. A view is the way how a MIME part's (MIME-decoded) data is interpreted.
Defaults to B<['raw']>.
The following views are supported:
=over
=item B<raw>
The MIME part is MIME-decoded but not otherwise transformed. The raw MIME part
is then matched against the configured signatures.
=item B<zip>
If the MIME part has a file name ending in C<.zip>, it is considered a ZIP
archive, and all unencrypted files in the archive are matched as individual
message parts against the configured signatures. The zip view requires the
B<Archive::Zip> Perl module to be installed.
=back
=item B<max_message_size>
=item B<max_size> (DEPRECATED)
An integer value controlling the maximum size (in bytes) of the overall message
text for a message to be processed by this filter module. Messages larger than
this value will never be processed, and thus will never match. If B<undef>,
there is no size limit. Defaults to B<1024**2> (1MB).
As MIME multipart and ZIP archive processing can be quite CPU- and
memory-intensive (although the B<Parts> filter module makes use of temporary
files since version 0.13), you should definitely restrict the message size to
some sensible value that easily fits in your server's memory. 1024**2 (1MB)
should be appropriate for most uses of this filter module.
The C<max_message_size> option was previously called C<max_size>, but the
latter is now deprecated and may not be supported in future versions of the
B<Parts> filter module.
=item B<max_part_size>
An integer value controlling the maximum size (in bytes) of any single message
part (i.e. MIME part in a message, or file in an archive) for that part to be
processed by this filter module. Parts larger than this value will never be
processed, and thus will never match. If B<undef>, there is no size limit.
Defaults to the value of the C<max_message_size> option, so you don't really
need to specify a part size limit if you are comfortable with using the same
value for both. See the C<max_message_size> option for its default.
If you make use of the B<'zip'> view, be aware of the risk posed by so-called
I<decompression bombs>, which allow messages to easily fall below the overall
message size limit, while a file in a small attached ZIP archive can decompress
to a huge size. The part size limit prevents huge files from being
decompressed.
=item B<signatures>
I<Required>. A reference to an array containing the list of I<signatures>
against which message parts are to be matched. A signature in turn is a
reference to a hash containing one or more so-called signature I<aspects> (as
key/value pairs) and any signature I<options> (also as key/value pairs).
I<Signature aspects>
Aspects may either be scalar values (for exact, case-sensitive matches), or
regular expression objects created with the C<qr//> operator (for inexact,
partial matches). For a signature to match a message part, I<all> of the
signature's specified aspects must match those of the message part. For the
filter module to match a message, I<any> of the signatures must match I<any> of
the message's parts.
A signature aspect can be any of the following:
=over
=item B<mime_type>
The MIME type of the message part ('type/sub-type').
=item B<file_name>
The file name of the message part.
=item B<size>
The exact size (in bytes) of the decoded message part.
=item B<digest_md5>
The MD5 digest of the decoded message part (32 hex digits, as printed by
`md5sum`).
=item B<encrypted>
A boolean value denoting whether the message part is encrypted and its contents
are inaccessible to the B<Parts> filter module.
=back
I<Signature options>
A signature option can be any of the following:
=over
=item B<views>
An arrayref containing the set of I<views> the filter module should apply to
message parts when matching I<this> signature against them. For a list of
supported views, see the description of the constructor's C<views> option.
Defaults to the global set of views specified to the constructor.
=item B<response>
A string that is to be returned as the match result in case of a match.
Defaults to B<"Prohibited message part detected.">.
=back
I<Example>
So for instance, a signature list could look like this:
signatures => [
{
mime_type => qr/html/i,
response => 'No HTML mail, please.'
},
{
file_name => qr/\.(com|exe|lnk|pif|scr|vbs)$/i,
response => 'Executable content detected'
},
{
size => 106496,
digest_md5 => 'b09e26c292759d654633d3c8ed00d18d',
views => ['raw', 'zip'], # Look into ZIP archives, too!
response => 'Worm detected: W32.Swen'
},
{
size => 22528,
# Cannot set a specific digest_md5 since W32.Mydoom
# is polymorphic.
response => 'Worm suspected: W32.Mydoom'
},
{
encrypted => 1,
views => ['zip'],
response => 'Worm suspected ' .
'(only worms and fools use ZIP encryption)'
}
]
=back
All options of the B<Courier::Filter::Module> constructor are also supported
by the constructor of the B<Parts> filter module. Please see
L<Courier::Filter::Module/"new"> for their descriptions.
=cut
sub new {
my ($class, %options) = @_;
my $mime_parser = MIME::Parser->new();
#$mime_parser->output_to_core(TRUE);
# In-memory processing doesn't work, see comments in match_mime_part().
$mime_parser->output_under(File::Spec->tmpdir);
#$mime_parser->tmp_to_core(TRUE);
# In-memory processing doesn't work, see comments in match_mime_part().
$mime_parser->use_inner_files(TRUE);
my $self = $class->SUPER::new(
%options,
mime_parser => $mime_parser
);
# Default "max_message_size" option to the deprecated "max_size" option,
# or to 1024**2 (1MB):
$self->{max_message_size} = (
exists($self->{max_size}) ? $self->{max_size} : 1024**2
)
if not exists($self->{max_message_size});
# Default "max_part_size" option to the "max_message_size" option:
$self->{max_part_size} = $self->{max_message_size}
if not exists($self->{max_part_size});
# Default "views" option to 'raw':
my $views = $self->{views} || { 'raw' => TRUE };
# Transform "views" option into hashref if it was given as an arrayref:
$views = { map(($_ => TRUE), @$views) }
if ref($views) eq 'ARRAY';
my $used_views = { %$views };
foreach my $signature ( @{$self->{signatures}} ) {
# Default "views" option to global "views" option:
my $signature_views = $signature->{views} || $views;
# Transform "views" option into hashref if it was given as an arrayref:
$signature_views = { map(($_ => TRUE), @$signature_views) }
if ref($signature_views) eq 'ARRAY';
# Add any signature-specific views to the global set of used views:
%$used_views = (%$used_views, %$signature_views);
$signature->{views} = $signature_views;
$self->compile_signature($signature);
}
$self->{used_views} = $used_views;
return $self;
}
=back
=head2 Instance methods
See L<Courier::Filter::Module/"Instance methods"> for a description of the
provided instance methods.
=cut
sub match {
my ($self, $message) = @_;
return undef
if defined($self->{max_message_size})
and -s $message->file_name > $self->{max_message_size};
#my $text = $message->text;
#my $part = $self->{mime_parser}->parse_data($text);
# In-memory processing doesn't work, see comments in match_mime_part().
my $part = $self->{mime_parser}->parse_open($message->file_name);
my ($result, @code) = $self->match_mime_part($part);
$result &&= 'Parts: ' . $result;
$self->{mime_parser}->filer->purge();
# In-memory processing doesn't work, see comments in match_mime_part().
rmdir($self->{mime_parser}->filer->output_dir);
#if MIME::Tools->VERSION < 6.0;
# Purging also doesn't work properly
# (bug filed: <http://rt.cpan.org/NoAuth/Bug.html?id=7858>).
return ($result, @code);
}
sub match_mime_part {
my ($self, $part) = @_;
if (my $body = $part->bodyhandle) {
# No sub-parts, match this part.
#my $handle = $body->open('r');
# In-memory processing doesn't work because MIME::Body::open()
# doesn't provide a fully-IO::Handle-compatible I/O handle object
# (opened() method is missing, no bug filed). Working around that
# by alternatively creating a Perl 5.8 style in-memory file
# object...
# my $body_as_string = $body->as_string;
# open(my $handle, '+<', \$body_as_string);
# ...doesn't work either because Archive::Zip::_isSeekable() is
# broken (erroneously considers Perl 5.8 style in-memory IO::File
# objects not to be seekable,
# bug filed: <http://rt.cpan.org/NoAuth/Bug.html?id=7855>).
# All of this forces us to make MIME::Parser use temporary files
# instead of doing everything exclusively in-memory. Aaargh!!
# First, we gather signature makers for all possible (and enabled)
# views of the MIME part, then we actually test each view in turn
# against the configured test signatures.
my @views;
# Raw view (the MIME part itself) (if enabled):
my $rawsig = $self->make_signature_from_mime_part($part);
if ($self->{used_views}->{'raw'}) {
push(
@views,
{
name => 'raw',
sig_maker => sub { $rawsig }
}
)
if not defined($self->{max_part_size})
or $rawsig->{size} <= $self->{max_part_size};
}
# ZIP archive members view (if enabled and MIME part is a ZIP archive):
if (
$self->{used_views}->{'zip'} and
defined($rawsig->{file_name}) and
$rawsig->{file_name} =~ /\.zip$/i
) {
require Archive::Zip;
my $archive = Archive::Zip->new();
#$archive->readFromFileHandle($handle);
# In-memory processing doesn't work, see above.
$archive->read($body->path);
# Make a view for each archive member:
foreach my $member ($archive->members) {
push(
@views,
{
name => 'zip',
sig_maker => sub {
$self->make_signature_from_zip_archive_member($member)
}
}
)
if not defined($self->{max_part_size})
or $member->uncompressedSize <= $self->{max_part_size};
}
}
# Now, for each view, try matching the configured signatures:
foreach my $view (@views) {
# Make signature from data view:
my $datasig = $view->{sig_maker}->();
# Test that signature against the configured signatures:
foreach my $signature ( @{$self->{signatures}} ) {
# Skip this signature if it doesn't apply to the current view:
next if not $signature->{views}->{ $view->{name} };
my ($result, @code) = $signature->{matcher}->($datasig);
return ($result, @code) if defined($result);
}
}
}
else {
# Match all sub-parts:
foreach my $subpart ($part->parts) {
my ($result, @code) = $self->match_mime_part($subpart);
return ($result, @code) if defined($result);
}
}
return undef;
}
sub make_signature_from_mime_part {
my ($self, $part) = @_;
my $head = $part->head;
my $body = $part->bodyhandle;
my $text = $body->as_string;
return {
mime_type => $head->mime_type,
file_name => $head->recommended_filename,
size => length($text),
digest_md5 => Digest::MD5::md5_hex($text),
encrypted => FALSE
};
}
sub make_signature_from_zip_archive_member {
my ($self, $member) = @_;
return {
mime_type => undef,
file_name => $member->fileName,
size => $member->uncompressedSize,
digest_md5 => $member->isEncrypted ?
undef
: Digest::MD5::md5_hex(scalar($member->contents)),
encrypted => $member->isEncrypted
};
}
sub compile_signature {
my ($self, $signature) = @_;
my %matchers;
my @aspects = grep(!/^(?:response|views)$/, keys(%$signature));
foreach my $aspect (@aspects) {
my $pattern = $signature->{$aspect};
my $matcher;
if (ref($pattern) eq 'Regexp') {
$matcher = sub { $_[0] =~ $pattern };
}
elsif (ref($pattern) eq 'CODE') {
$matcher = $pattern;
}
else {
if ($aspect =~ /^(?:encrypted)$/) {
# Aspect is of boolean type:
$matcher = sub { not ($_[0] xor $pattern) };
}
else {
$matcher = sub { $_[0] eq $pattern };
}
}
$matchers{$aspect} = $matcher;
}
my @response =
ref($signature->{response}) eq 'ARRAY' ?
@{ $signature->{response} }
: ($signature->{response} || $self->default_response);
my $matcher = sub {
# Closure with regard to %matchers.
my ($signature) = @_;
foreach my $aspect (keys(%matchers)) {
my $value = $signature->{$aspect};
return undef
if not defined($value)
or not $matchers{$aspect}->($value);
}
return @response;
};
$signature->{matcher} = $matcher;
return;
}
=head1 SEE ALSO
L<Courier::Filter::Module>, L<Courier::Filter::Overview>.
For AVAILABILITY, SUPPORT, and LICENSE information, see
L<Courier::Filter::Overview>.
=head1 AUTHOR
Julian Mehnle <julian@mehnle.net>
=cut
TRUE;