View | Details | Raw Unified | Return to bug 7215
Collapse All | Expand All

(-)lib/Mail/SpamAssassin/Conf.pm (-12 / +18 lines)
Lines 82-94 Link Here
82
# use bytes;
82
# use bytes;
83
use re 'taint';
83
use re 'taint';
84
84
85
use Mail::SpamAssassin::Util;
86
use Mail::SpamAssassin::NetSet;
85
use Mail::SpamAssassin::NetSet;
87
use Mail::SpamAssassin::Constants qw(:sa :ip);
86
use Mail::SpamAssassin::Constants qw(:sa :ip);
88
use Mail::SpamAssassin::Conf::Parser;
87
use Mail::SpamAssassin::Conf::Parser;
89
use Mail::SpamAssassin::Logger;
88
use Mail::SpamAssassin::Logger;
90
use Mail::SpamAssassin::Util::TieOneStringHash;
89
use Mail::SpamAssassin::Util::TieOneStringHash;
91
use Mail::SpamAssassin::Util qw(untaint_var);
90
use Mail::SpamAssassin::Util qw(untaint_var idn_to_ascii);
92
use File::Spec;
91
use File::Spec;
93
92
94
use vars qw{
93
use vars qw{
Lines 3477-3484 Link Here
3477
3476
3478
=item util_rb_tld tld1 tld2 ...
3477
=item util_rb_tld tld1 tld2 ...
3479
3478
3480
This option maintains list of valid TLDs in the RegistryBoundaries code. 
3479
This option maintains a list of valid TLDs in the RegistryBoundaries code. 
3481
TLDs include things like com, net, org, etc.
3480
Top level domains (TLD) include things like com, net, org, xn--p1ai, рф, ...
3481
International domain names may be specified in ASCII-compatible encoding (ACE),
3482
e.g. xn--p1ai, xn--qxam, or with Unicode labels encoded as UTF-8 octets,
3483
e.g. рф, ελ.
3482
3484
3483
=cut
3485
=cut
3484
3486
Lines 3541-3547 Link Here
3541
    xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h
3543
    xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h
3542
    xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama
3544
    xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama
3543
    youtube yt za zm zone zw
3545
    youtube yt za zm zone zw
3544
    /) { $self->{valid_tlds}{lc $_} = 1; }
3546
    /) { $self->{valid_tlds}{idn_to_ascii($_)} = 1 }
3545
3547
3546
  push (@cmds, {
3548
  push (@cmds, {
3547
    setting => 'util_rb_tld',
3549
    setting => 'util_rb_tld',
Lines 3555-3561 Link Here
3555
	return $INVALID_VALUE;
3557
	return $INVALID_VALUE;
3556
      }
3558
      }
3557
      foreach (split(/\s+/, $value)) {
3559
      foreach (split(/\s+/, $value)) {
3558
        $self->{valid_tlds}{lc $_} = 1;
3560
        $self->{valid_tlds}{idn_to_ascii($_)} = 1;
3559
      }
3561
      }
3560
      dbg("config: added tld list - $value");
3562
      dbg("config: added tld list - $value");
3561
    }
3563
    }
Lines 3564-3570 Link Here
3564
=item util_rb_2tld 2tld-1.tld 2tld-2.tld ...
3566
=item util_rb_2tld 2tld-1.tld 2tld-2.tld ...
3565
3567
3566
This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries
3568
This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries
3567
code.  2TLDs include things like co.uk, fed.us, etc.
3569
code.  2TLDs include things like co.uk, fed.us, etc.  International domain
3570
names may be specified in ASCII-compatible encoding (ACE), or with Unicode
3571
labels encoded as UTF-8 octets.
3568
3572
3569
=cut
3573
=cut
3570
3574
Lines 3735-3741 Link Here
3735
    net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za
3739
    net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za
3736
    mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm
3740
    mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm
3737
    com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw
3741
    com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw
3738
    /) { $self->{two_level_domains}{lc $_} = 1; }
3742
    /) { $self->{two_level_domains}{idn_to_ascii($_)} = 1 }
3739
3743
3740
  push (@cmds, {
3744
  push (@cmds, {
3741
    setting => 'util_rb_2tld',
3745
    setting => 'util_rb_2tld',
Lines 3749-3755 Link Here
3749
	return $INVALID_VALUE;
3753
	return $INVALID_VALUE;
3750
      }
3754
      }
3751
      foreach (split(/\s+/, $value)) {
3755
      foreach (split(/\s+/, $value)) {
3752
        $self->{two_level_domains}{lc $_} = 1;
3756
        $self->{two_level_domains}{idn_to_ascii($_)} = 1;
3753
      }
3757
      }
3754
    }
3758
    }
3755
  });
3759
  });
Lines 3757-3763 Link Here
3757
=item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
3761
=item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
3758
3762
3759
This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries
3763
This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries
3760
code.  3TLDs include things like demon.co.uk, plc.co.im, etc.
3764
code.  3TLDs include things like demon.co.uk, plc.co.im, etc.  International
3765
domain names may be specified in ASCII-compatible encoding (ACE), or with
3766
Unicode labels encoded as UTF-8 octets.
3761
3767
3762
=cut
3768
=cut
3763
3769
Lines 3766-3772 Link Here
3766
  # sa-update 20_aux_tlds.cf.
3772
  # sa-update 20_aux_tlds.cf.
3767
  foreach (qw/
3773
  foreach (qw/
3768
    demon.co.uk esc.edu.ar lkd.co.im plc.co.im
3774
    demon.co.uk esc.edu.ar lkd.co.im plc.co.im
3769
    /) { $self->{three_level_domains}{lc $_} = 1; }
3775
    /) { $self->{three_level_domains}{idn_to_ascii($_)} = 1 }
3770
3776
3771
  push (@cmds, {
3777
  push (@cmds, {
3772
    setting => 'util_rb_3tld',
3778
    setting => 'util_rb_3tld',
Lines 3780-3786 Link Here
3780
	return $INVALID_VALUE;
3786
	return $INVALID_VALUE;
3781
      }
3787
      }
3782
      foreach (split(/\s+/, $value)) {
3788
      foreach (split(/\s+/, $value)) {
3783
        $self->{three_level_domains}{lc $_} = 1;
3789
        $self->{three_level_domains}{idn_to_ascii($_)} = 1;
3784
      }
3790
      }
3785
    }
3791
    }
3786
  });
3792
  });
(-)lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (-2 / +1 lines)
Lines 1048-1056 Link Here
1048
  return 0 if $from eq '' || $to eq '';
1048
  return 0 if $from eq '' || $to eq '';
1049
  return 0 if $from =~ /^SRS\d=/;
1049
  return 0 if $from =~ /^SRS\d=/;
1050
1050
1051
  if ($to =~ /^([^@]+)@(.+)$/) {
1051
  if ($to =~ /^([^@]+)\@(.+)$/) {
1052
    my($user,$dom) = ($1,$2);
1052
    my($user,$dom) = ($1,$2);
1053
    $dom = idn_to_ascii($dom);
1054
    $dom = $self->{main}->{registryboundaries}->trim_domain($dom);
1053
    $dom = $self->{main}->{registryboundaries}->trim_domain($dom);
1055
    return unless
1054
    return unless
1056
        ($self->{main}->{registryboundaries}->is_domain_valid($dom));
1055
        ($self->{main}->{registryboundaries}->is_domain_valid($dom));
(-)lib/Mail/SpamAssassin/RegistryBoundaries.pm (-5 / +12 lines)
Lines 33-38 Link Here
33
our @ISA = qw();
33
our @ISA = qw();
34
use vars qw(%US_STATES);
34
use vars qw(%US_STATES);
35
35
36
use Mail::SpamAssassin::Logger;
37
use Mail::SpamAssassin::Util qw(idn_to_ascii);
38
36
# called from SpamAssassin->init() to create $self->{util_rb}
39
# called from SpamAssassin->init() to create $self->{util_rb}
37
sub new {
40
sub new {
38
  my $class = shift;
41
  my $class = shift;
Lines 46-52 Link Here
46
  bless ($self, $class);
49
  bless ($self, $class);
47
50
48
  # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
51
  # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
49
  if ($self->{conf}->{valid_tlds}) {
52
  if ($self->{conf}->{valid_tlds} && %{$self->{conf}->{valid_tlds}}) {
53
    # International domain names are already in ASCII-compatible encoding (ACE)
50
    my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
54
    my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
51
    # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
55
    # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
52
    $self->{valid_tlds_re} = qr/(?:$tlds)/i;
56
    $self->{valid_tlds_re} = qr/(?:$tlds)/i;
Lines 87-95 Link Here
87
=cut
91
=cut
88
92
89
sub split_domain {
93
sub split_domain {
90
  my $self = shift;
94
  my ($self, $domain) = @_;
91
  my $domain = lc shift;
92
95
96
  $domain = idn_to_ascii($domain);
93
  my $hostname = '';
97
  my $hostname = '';
94
98
95
  if (defined $domain && $domain ne '') {
99
  if (defined $domain && $domain ne '') {
Lines 126-131 Link Here
126
        }
130
        }
127
        else {
131
        else {
128
          my $temp = join(".", @domparts);
132
          my $temp = join(".", @domparts);
133
          # International domain names in ASCII-compatible encoding (ACE)
129
          last if ($self->{conf}->{three_level_domains}{$temp});
134
          last if ($self->{conf}->{three_level_domains}{$temp});
130
        }
135
        }
131
      }
136
      }
Lines 132-137 Link Here
132
      elsif (@domparts == 2) {
137
      elsif (@domparts == 2) {
133
        # co.uk, etc.
138
        # co.uk, etc.
134
        my $temp = join(".", @domparts);
139
        my $temp = join(".", @domparts);
140
        # International domain names in ASCII-compatible encoding (ACE)
135
        last if ($self->{conf}->{two_level_domains}{$temp});
141
        last if ($self->{conf}->{two_level_domains}{$temp});
136
      }
142
      }
137
      push(@hostname, shift @domparts);
143
      push(@hostname, shift @domparts);
Lines 185-196 Link Here
185
=cut
191
=cut
186
192
187
sub is_domain_valid {
193
sub is_domain_valid {
188
  my $self = shift;
194
  my ($self, $dom) = @_;
189
  my $dom = lc shift;
190
195
191
  # domains don't have whitespace
196
  # domains don't have whitespace
192
  return 0 if ($dom =~ /\s/);
197
  return 0 if ($dom =~ /\s/);
193
198
199
  $dom = idn_to_ascii($dom);
200
194
  # ensure it ends in a known-valid TLD, and has at least 1 dot
201
  # ensure it ends in a known-valid TLD, and has at least 1 dot
195
  return 0 unless ($dom =~ /\.([^.]+)$/);
202
  return 0 unless ($dom =~ /\.([^.]+)$/);
196
  return 0 unless ($self->{conf}->{valid_tlds}{$1});
203
  return 0 unless ($self->{conf}->{valid_tlds}{$1});
(-)lib/Mail/SpamAssassin/Util.pm (-2 / +4 lines)
Lines 440-446 Link Here
440
    info("util: idn_to_ascii: not valid UTF-8: /%s/, called from %s line %d",
440
    info("util: idn_to_ascii: not valid UTF-8: /%s/, called from %s line %d",
441
         $s, $package, $line);
441
         $s, $package, $line);
442
    $s = lc $s;  # garbage-in / garbage-out
442
    $s = lc $s;  # garbage-in / garbage-out
443
  } else {
443
  } else {  # is valid UTF-8 but not all-ASCII
444
    my $chars;
444
    my $chars;
445
    # RFC 3490 (IDNA): Whenever dots are used as label separators, the
445
    # RFC 3490 (IDNA): Whenever dots are used as label separators, the
446
    # following characters MUST be recognized as dots: U+002E (full stop),
446
    # following characters MUST be recognized as dots: U+002E (full stop),
Lines 450-456 Link Here
450
      info("util: idn_to_ascii: alternative dots normalized: /%s/ -> /%s/",
450
      info("util: idn_to_ascii: alternative dots normalized: /%s/ -> /%s/",
451
           $_[0], $s);
451
           $_[0], $s);
452
    }
452
    }
453
    if ($have_libidn) {
453
    if (!$have_libidn) {
454
      $s = lc $s;
455
    } else {
454
      # to ASCII-compatible encoding (ACE), lowercased
456
      # to ASCII-compatible encoding (ACE), lowercased
455
      my $sa = Net::LibIDN::idn_to_ascii($s, 'UTF-8');
457
      my $sa = Net::LibIDN::idn_to_ascii($s, 'UTF-8');
456
      if (!defined $sa) {
458
      if (!defined $sa) {
(-)rules/20_aux_tlds.cf (+2 lines)
Lines 52-57 Link Here
52
#
52
#
53
# For an up to date list of IDN TLDs that can be pasted into this block, run this command:
53
# For an up to date list of IDN TLDs that can be pasted into this block, run this command:
54
#  wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -q -O - | grep -i '^xn--' | tr '\n' ' ' | fold -w 80 -s | perl -pe 'chomp; s/.*/util_rb_tld \L$_\n/'
54
#  wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -q -O - | grep -i '^xn--' | tr '\n' ' ' | fold -w 80 -s | perl -pe 'chomp; s/.*/util_rb_tld \L$_\n/'
55
# Since version 4.0 the util_rb_tld also accepts Unicode IDN labels (encoded as UTF-8), e.g.:
56
#  wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -q -O - | grep -i '^xn--' | idn -u | tr '\n' ' ' | fold -w 80 -s | perl -pe 'chomp; s/.*/util_rb_tld \L$_\n/'
55
57
56
if (can(Mail::SpamAssassin::Conf::feature_registryboundaries))
58
if (can(Mail::SpamAssassin::Conf::feature_registryboundaries))
57
util_rb_tld xn--1qqw23a xn--30rr7y xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c
59
util_rb_tld xn--1qqw23a xn--30rr7y xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c

Return to bug 7215