View | Details | Raw Unified | Return to bug 7548
Collapse All | Expand All

(-)Makefile.PL (+1 lines)
Lines 1128-1133 Link Here
1128
	$(PERL) -MFile::Copy -e "copy(q[rules/v330.pre], q{$(B_CONFDIR)/v330.pre}) unless -f q{$(B_CONFDIR)/v330.pre}"
1128
	$(PERL) -MFile::Copy -e "copy(q[rules/v330.pre], q{$(B_CONFDIR)/v330.pre}) unless -f q{$(B_CONFDIR)/v330.pre}"
1129
	$(PERL) -MFile::Copy -e "copy(q[rules/v340.pre], q{$(B_CONFDIR)/v340.pre}) unless -f q{$(B_CONFDIR)/v340.pre}"
1129
	$(PERL) -MFile::Copy -e "copy(q[rules/v340.pre], q{$(B_CONFDIR)/v340.pre}) unless -f q{$(B_CONFDIR)/v340.pre}"
1130
	$(PERL) -MFile::Copy -e "copy(q[rules/v341.pre], q{$(B_CONFDIR)/v341.pre}) unless -f q{$(B_CONFDIR)/v341.pre}"
1130
	$(PERL) -MFile::Copy -e "copy(q[rules/v341.pre], q{$(B_CONFDIR)/v341.pre}) unless -f q{$(B_CONFDIR)/v341.pre}"
1131
	$(PERL) -MFile::Copy -e "copy(q[rules/v342.pre], q{$(B_CONFDIR)/v342.pre}) unless -f q{$(B_CONFDIR)/v342.pre}"
1131
  
1132
  
1132
1133
1133
data__install:
1134
data__install:
(-)lib/Mail/SpamAssassin/Plugin/HashBL.pm (+266 lines)
Line 0 Link Here
1
# Author: Steve Freegard <steve.freegard@fsl.com>
2
# Copyright 2016 Steve Freegard
3
#
4
# <@LICENSE>
5
# Licensed to the Apache Software Foundation (ASF) under one or more
6
# contributor license agreements.  See the NOTICE file distributed with
7
# this work for additional information regarding copyright ownership.
8
# The ASF licenses this file to you under the Apache License, Version 2.0
9
# (the "License"); you may not use this file except in compliance with
10
# the License.  You may obtain a copy of the License at:
11
#
12
#     http://www.apache.org/licenses/LICENSE-2.0
13
#
14
# Unless required by applicable law or agreed to in writing, software
15
# distributed under the License is distributed on an "AS IS" BASIS,
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
# See the License for the specific language governing permissions and
18
# limitations under the License.
19
# </@LICENSE>
20
#
21
22
=head1 NAME
23
24
HashBL - seearch email addresses in HashBL blocklists
25
26
=head1 SYNOPSIS
27
28
  loadplugin Mail::SpamAssassin::Plugin::HashBL
29
  header   HASHBL_EMAIL       eval:check_hashbl_emails('ebl.msbl.org')
30
  describe HASHBL_EMAIL       Message contains email address found on EBL
31
32
=head1 DESCRIPTION
33
34
The Email Blocklist (EBL) contains email addresses used to receive responses to spam emails.
35
These email addresses are sometimes called contact email addresses or 
36
drop boxes.
37
The initial target of this blocklist was "Nigerian" 419 Advance Fee Fraud spam. As time passed and more types of spam that used drop boxes was identified, 
38
these drop boxes also were listed.
39
The EBL now lists significant numbers of drop boxes used in spam sent 
40
by Chinese manufacturers of high-tech and light industrial products, 
41
SEO/web development companies, direct spam services, list sellers, and a number
42
of fraudulent or outright illegal products sold by botnets.
43
44
=cut
45
46
package Mail::SpamAssassin::Plugin::HashBL;
47
use strict;
48
use warnings;
49
my $VERSION = 0.001;
50
51
use Mail::SpamAssassin::Plugin;
52
use Mail::SpamAssassin::PerMsgStatus;
53
use Mail::SpamAssassin::Util;
54
use Digest::SHA qw(sha1_hex);
55
use Digest::MD5 qw(md5_hex);
56
57
use vars qw(@ISA $email_whitelist $skip_replyto_envfrom);
58
@ISA = qw(Mail::SpamAssassin::Plugin);
59
60
sub dbg { Mail::SpamAssassin::Plugin::dbg ("HashBL: @_"); }
61
62
sub new {
63
    my ($class, $mailsa) = @_;
64
65
    $class = ref($class) || $class;
66
    my $self = $class->SUPER::new($mailsa);
67
    bless ($self, $class);
68
69
    $self->{hashbl_available} = 1;
70
    $self->set_config($mailsa->{conf});
71
    $self->register_eval_rule("check_hashbl_emails");
72
73
    # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
74
    # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
75
    # full email regex v0.02
76
    $self->{email_regex} = qr/
77
      (?=.{0,64}\@)				# limit userpart to 64 chars (and speed up searching?)
78
      (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-])	# start boundary
79
      (						# capture email
80
      [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+		# no dot in beginning
81
      (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)*	# no consecutive dots, no ending dot
82
      \@
83
      (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
84
      $self->{main}->{registryboundaries}->{valid_tlds_re}	# ends with valid tld
85
      )
86
      (?!(?:[a-z0-9-]|\.[a-z0-9]))		# make sure domain ends here
87
    /xi;
88
89
    return $self;
90
}
91
92
sub set_config {
93
    my ($self, $conf) = @_;
94
    my @cmds;
95
}
96
97
sub parse_config {
98
    my ($self, $opts) = @_;
99
    return 0;
100
}
101
102
sub _parse_headers {
103
    my ($self, $pms) = @_;
104
105
    if (not defined $pms->{hashbl_email_cache}) {
106
        %{$pms->{hashbl_email_cache}{'headers'}} = ();
107
    }
108
109
    my @headers = ('EnvelopeFrom', 'Sender', 'From', 'Reply-To');
110
111
    foreach my $header (@headers) {
112
        my $email = $pms->get($header . ':addr');
113
        if ($email) {
114
            dbg("Found email $email in header $header");
115
            $pms->{hashbl_email_cache}{'headers'}{$email} = 1;
116
        }
117
    }
118
119
    return 1;
120
}
121
122
sub _parse_body {
123
    my ($self, $pms) = @_;
124
125
    # Parse body
126
    if (not defined $pms->{hashbl_email_cache}) {
127
        %{$pms->{hashbl_email_cache}{'body'}} = ();
128
    }
129
130
    my %seen;
131
    my @body_emails;
132
    # get all <a href="mailto:", since they don't show up on stripped_body
133
    my $parsed = $pms->get_uri_detail_list();
134
    while (my($uri, $info) = each %{$parsed}) {
135
        if (defined $info->{types}->{a} and not defined $info->{types}->{parsed}) {
136
            if ($uri =~ /^(?:(?i)mailto):$self->{email_regex}/) {
137
                my $email = lc($1);
138
                push(@body_emails, $email) unless defined $seen{$email};
139
                $seen{$email} = 1;
140
                last if scalar @body_emails >= 20; # sanity
141
            }
142
        }
143
    }
144
    # scan stripped normalized body
145
    # have to do this way since get_uri_detail_list doesn't know what mails are inside <>
146
    my $body = $pms->get_decoded_stripped_body_text_array();
147
    BODY: foreach (@$body) {
148
        # strip urls with possible emails inside
149
        s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
150
        # strip emails contained in <>, not mailto:
151
        # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
152
        s#<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
153
        while (/$self->{email_regex}/g) {
154
            my $email = lc($1);
155
            push(@body_emails, $email) unless defined $seen{$email};
156
            $seen{$email} = 1;
157
            last BODY if scalar @body_emails >= 40; # sanity
158
        }
159
    }
160
    foreach my $email (@body_emails) {
161
        dbg("Found email $email in body");
162
        $pms->{hashbl_email_cache}{'body'}{$email} = 1;
163
    }
164
    
165
    return 1;
166
}
167
168
sub _got_hit {
169
    my ($self, $pms, $rulename, $email, $desc) = @_;
170
171
    if (defined $pms->{conf}->{descriptions}->{$rulename}) {
172
        $desc = $pms->{conf}->{descriptions}->{$rulename};
173
    }
174
175
    $email =~ s/\@/[at]/g;
176
    $pms->got_hit($rulename, "", description => $desc." ($email)", ruletype => 'eval');
177
}
178
179
sub _submit_email_query {
180
    my ($self, $pms, $list, $type, $email) = @_;
181
    my $rulename = $pms->get_current_eval_rule_name();
182
    my ($hash, $lookup, $key);
183
    if (uc($type) eq 'SHA1') {
184
        $hash = sha1_hex($email);
185
    }
186
    elsif (uc($type) eq 'MD5') {
187
        $hash = md5_hex($email);
188
    }
189
    $lookup = "$hash.$list.";
190
    my $obj = { email => $email };
191
    dbg("list: $list, type: $type, email: $email, hash: $hash, lookup: $lookup");
192
    $key = "HASHBL_EMAIL:$lookup";
193
    my $ent = {
194
        key => $key,
195
        zone => $list,
196
        obj => $obj,
197
        type => 'HASHBL',
198
        rulename => $rulename,
199
    };
200
201
    $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
202
        my ($ent2, $pkt) = @_;
203
        $self->_finish_email_lookup($pms, $ent2, $pkt);
204
    }, master_deadline => $pms->{master_deadline} );
205
206
    return $ent;   
207
}
208
209
sub _finish_email_lookup {
210
  my ($self, $pms, $ent, $pkt) = @_;
211
212
  if (!$pkt) {
213
      # $pkt will be undef if the DNS query was aborted (e.g. timed out)
214
      dbg("_finish_email_lookup aborted: ",
215
          $ent->{rulename}, $ent->{key});
216
      return;
217
  }
218
219
  my $email = $ent->{obj}->{email};
220
221
  dbg("_finish_email_lookup: ", $ent->{rulename}, $ent->{key}, $email);
222
 
223
  my @answer = $pkt->answer;
224
  foreach my $rr (@answer) {
225
      if ($rr->address =~ /^127\./) {
226
          $self->_got_hit($pms, $ent->{rulename}, $email);
227
          $pms->register_async_rule_finish($ent->{rulename});
228
      }
229
  }
230
}
231
232
sub check_hashbl_emails {
233
    my ($self, $pms, $list, $type) = @_;
234
235
    return 0 unless $self->{hashbl_available};
236
237
    my $rulename = $pms->get_current_eval_rule_name();
238
239
    # First we lookup all unique email addresses found in the headers
240
    return 0 unless $self->_parse_headers($pms);
241
    foreach my $email (keys %{$pms->{hashbl_email_cache}{'headers'}}) {
242
        # Remove this from the body hash
243
        delete $pms->{hashbl_email_cache}{'body'}{$email};
244
        dbg("HEADER: $email");
245
        $self->_submit_email_query($pms, $list, (($type) ? $type : 'SHA1'), $email);
246
    }
247
248
    # Check any e-mail addresses found in the message body
249
    return 0 unless $self->_parse_body($pms);
250
251
    my (@emails) = keys %{$pms->{hashbl_email_cache}{'body'}};
252
253
    # Randomize order and truncate the array to 10 items maximum
254
    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@emails);
255
    $#emails = 9 if (scalar @emails > 10);
256
257
    foreach my $email (@emails) {
258
        #$self->_got_hit($pms, $email, "Email found in list $list");
259
        dbg("BODY: $email");
260
        $self->_submit_email_query($pms, $list, (($type) ? $type : 'SHA1'), $email);
261
    }
262
263
    return 0;
264
}
265
266
1;
(-)rules/25_hashbl.cf (+37 lines)
Line 0 Link Here
1
# SpamAssassin - HashBL rules
2
#
3
# Please don't modify this file as your changes will be overwritten with
4
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
5
# See 'perldoc Mail::SpamAssassin::Conf' for details.
6
#
7
# <@LICENSE>
8
# Licensed to the Apache Software Foundation (ASF) under one or more
9
# contributor license agreements.  See the NOTICE file distributed with
10
# this work for additional information regarding copyright ownership.
11
# The ASF licenses this file to you under the Apache License, Version 2.0
12
# (the "License"); you may not use this file except in compliance with
13
# the License.  You may obtain a copy of the License at:
14
# 
15
#     http://www.apache.org/licenses/LICENSE-2.0
16
# 
17
# Unless required by applicable law or agreed to in writing, software
18
# distributed under the License is distributed on an "AS IS" BASIS,
19
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
# See the License for the specific language governing permissions and
21
# limitations under the License.
22
# </@LICENSE>
23
#
24
###########################################################################
25
26
# Requires the Mail::SpamAssassin::Plugin::HashBL plugin be loaded.
27
28
# This plugin queries EBL rbl that lists significant numbers of drop boxes 
29
# used in spam sent by Chinese manufacturers of high-tech and 
30
# light industrial products, SEO/web development companies, 
31
# direct spam services, list sellers, and a number of fraudulent 
32
# or outright illegal products sold by botnets.
33
34
ifplugin Mail::SpamAssassin::Plugin::HashBL
35
    header   HASHBL_EMAIL	eval:check_hashbl_emails('ebl.msbl.org')
36
    describe HASHBL_EMAIL	Message contains email address found on the EBL
37
endif	# Mail::SpamAssassin::Plugin::HashBL
(-)rules/v342.pre (+20 lines)
Line 0 Link Here
1
# This is the right place to customize your installation of SpamAssassin.
2
#
3
# See 'perldoc Mail::SpamAssassin::Conf' for details of what can be
4
# tweaked.
5
#
6
# This file was installed during the installation of SpamAssassin 3.4.1,
7
# and contains plugin loading commands for the new plugins added in that
8
# release.  It will not be overwritten during future SpamAssassin installs,
9
# so you can modify it to enable some disabled-by-default plugins below,
10
# if you so wish.
11
#
12
# There are now multiple files read to enable plugins in the
13
# /etc/mail/spamassassin directory; previously only one, "init.pre" was
14
# read.  Now both "init.pre", "v310.pre", and any other files ending in
15
# ".pre" will be read.  As future releases are made, new plugins will be
16
# added to new files, named according to the release they're added in.
17
###########################################################################
18
19
# HashBL - Use EBL email blocklist
20
# loadplugin Mail::SpamAssassin::Plugin::HashBL

Return to bug 7548