# <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =head1 NAME Mail::SpamAssassin::Conf - SpamAssassin configuration file =head1 SYNOPSIS # a comment rewrite_header Subject *****SPAM***** full PARA_A_2_C_OF_1618 /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i describe PARA_A_2_C_OF_1618 Claims compliance with senate bill 1618 header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*@/i describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters score A_HREF_TO_REMOVE 2.0 lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com lang pt_BR report O programa detetor de Spam ZOE [...] =head1 DESCRIPTION SpamAssassin is configured using traditional UNIX-style configuration files, loaded from the C and C directories. The following web page lists the most important configuration settings used to configure SpamAssassin; novices are encouraged to read it first: http://wiki.apache.org/spamassassin/ImportantInitialConfigItems =head1 FILE FORMAT The C<#> character starts a comment, which continues until end of line. B if the C<#> character is to be used as part of a rule or configuration option, it must be escaped with a backslash. i.e.: C<\#> Whitespace in the files is not significant, but please note that starting a line with whitespace is deprecated, as we reserve its use for multi-line rule definitions, at some point in the future. Currently, each rule or configuration setting must fit on one-line; multi-line settings are not supported yet. File and directory paths can use C<~> to refer to the user's home directory, but no other shell-style path extensions such as globing or C<~user/> are supported. Where appropriate below, default values are listed in parentheses. =head1 USER PREFERENCES The following options can be used in both site-wide (C) and user-specific (C) configuration files to customize how SpamAssassin handles incoming email messages. =cut package Mail::SpamAssassin::Conf; use strict; use warnings; use bytes; use re 'taint'; use Mail::SpamAssassin::Util; use Mail::SpamAssassin::NetSet; use Mail::SpamAssassin::Constants qw(:sa :ip); use Mail::SpamAssassin::Conf::Parser; use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Util::TieOneStringHash; use Mail::SpamAssassin::Util qw(untaint_var); use File::Spec; use vars qw{ @ISA $CONF_TYPE_STRING $CONF_TYPE_BOOL $CONF_TYPE_NUMERIC $CONF_TYPE_HASH_KEY_VALUE $CONF_TYPE_ADDRLIST $CONF_TYPE_TEMPLATE $CONF_TYPE_STRINGLIST $CONF_TYPE_IPADDRLIST $CONF_TYPE_DURATION $CONF_TYPE_NOARGS $MISSING_REQUIRED_VALUE $INVALID_VALUE $INVALID_HEADER_FIELD_NAME @MIGRATED_SETTINGS $COLLECT_REGRESSION_TESTS $TYPE_HEAD_TESTS $TYPE_HEAD_EVALS $TYPE_BODY_TESTS $TYPE_BODY_EVALS $TYPE_FULL_TESTS $TYPE_FULL_EVALS $TYPE_RAWBODY_TESTS $TYPE_RAWBODY_EVALS $TYPE_URI_TESTS $TYPE_URI_EVALS $TYPE_META_TESTS $TYPE_RBL_EVALS $TYPE_EMPTY_TESTS }; @ISA = qw(); # odd => eval test. Not constants so they can be shared with Parser # TODO: move to Constants.pm? $TYPE_HEAD_TESTS = 0x0008; $TYPE_HEAD_EVALS = 0x0009; $TYPE_BODY_TESTS = 0x000a; $TYPE_BODY_EVALS = 0x000b; $TYPE_FULL_TESTS = 0x000c; $TYPE_FULL_EVALS = 0x000d; $TYPE_RAWBODY_TESTS = 0x000e; $TYPE_RAWBODY_EVALS = 0x000f; $TYPE_URI_TESTS = 0x0010; $TYPE_URI_EVALS = 0x0011; $TYPE_META_TESTS = 0x0012; $TYPE_RBL_EVALS = 0x0013; $TYPE_EMPTY_TESTS = 0x0014; my @rule_types = ("body_tests", "uri_tests", "uri_evals", "head_tests", "head_evals", "body_evals", "full_tests", "full_evals", "rawbody_tests", "rawbody_evals", "rbl_evals", "meta_tests"); #Removed $VERSION per BUG 6422 #$VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later # these are variables instead of constants so that other classes can # access them; if they're constants, they'd have to go in Constants.pm # TODO: move to Constants.pm? $CONF_TYPE_STRING = 1; $CONF_TYPE_BOOL = 2; $CONF_TYPE_NUMERIC = 3; $CONF_TYPE_HASH_KEY_VALUE = 4; $CONF_TYPE_ADDRLIST = 5; $CONF_TYPE_TEMPLATE = 6; $CONF_TYPE_NOARGS = 7; $CONF_TYPE_STRINGLIST = 8; $CONF_TYPE_IPADDRLIST = 9; $CONF_TYPE_DURATION = 10; $MISSING_REQUIRED_VALUE = '-99999999999999'; # string expected by parser $INVALID_VALUE = '-99999999999998'; $INVALID_HEADER_FIELD_NAME = '-99999999999997'; # set to "1" by the test suite code, to record regression tests # $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1; # search for "sub new {" to find the start of the code ########################################################################### sub set_default_commands { my($self) = @_; # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt. # push each config item like this, to avoid a POD bug; it can't just accept # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies. my @cmds; =head2 SCORING OPTIONS =over 4 =item required_score n.nn (default: 5) Set the score required before a mail is considered spam. C can be an integer or a real number. 5.0 is the default setting, and is quite aggressive; it would be suitable for a single-user setup, but if you're an ISP installing SpamAssassin, you should probably set the default to be more conservative, like 8.0 or 10.0. It is not recommended to automatically delete or discard messages marked as spam, as your users B complain, but if you choose to do so, only delete messages with an exceptionally high score such as 15.0 or higher. This option was previously known as C and that name is still accepted, but is deprecated. =cut push (@cmds, { setting => 'required_score', aliases => ['required_hits'], # backward compatible default => 5, type => $CONF_TYPE_NUMERIC, }); =item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ] Assign scores (the number of points for a hit) to a given test. Scores can be positive or negative real numbers or integers. C is the symbolic name used by SpamAssassin for that test; for example, 'FROM_ENDS_IN_NUMS'. If only one valid score is listed, then that score is always used for a test. If four valid scores are listed, then the score that is used depends on how SpamAssassin is being used. The first score is used when both Bayes and network tests are disabled (score set 0). The second score is used when Bayes is disabled, but network tests are enabled (score set 1). The third score is used when Bayes is enabled and network tests are disabled (score set 2). The fourth score is used when Bayes is enabled and network tests are enabled (score set 3). Setting a rule's score to 0 will disable that rule from running. If any of the score values are surrounded by parenthesis '()', then all of the scores in the line are considered to be relative to the already set score. ie: '(3)' means increase the score for this rule by 3 points in all score sets. '(3) (0) (3) (0)' means increase the score for this rule by 3 in score sets 0 and 2 only. If no score is given for a test by the end of the configuration, a default score is assigned: a score of 1.0 is used for all tests, except those whose names begin with 'T_' (this is used to indicate a rule in testing) which receive 0.01. Note that test names which begin with '__' are indirect rules used to compose meta-match rules and can also act as prerequisites to other rules. They are not scored or listed in the 'tests hit' reports, but assigning a score of 0 to an indirect rule will disable it from running. =cut push (@cmds, { setting => 'score', is_frequent => 1, code => sub { my ($self, $key, $value, $line) = @_; my($rule, @scores) = split(/\s+/, $value); unless (defined $value && $value !~ /^$/ && (scalar @scores == 1 || scalar @scores == 4)) { info("config: score: requires a symbolic rule name and 1 or 4 scores"); return $MISSING_REQUIRED_VALUE; } # Figure out if we're doing relative scores, remove the parens if we are my $relative = 0; foreach (@scores) { local ($1); if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) { $relative = 1; } unless (/^-?\d+(?:\.\d+)?$/) { info("config: score: the non-numeric score ($_) is not valid, " . "a numeric score is required"); return $INVALID_VALUE; } } if ($relative && !exists $self->{scoreset}->[0]->{$rule}) { info("config: score: relative score without previous setting in " . "configuration"); return $INVALID_VALUE; } # If we're only passed 1 score, copy it to the other scoresets if (@scores) { if (@scores != 4) { @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] ); } # Set the actual scoreset values appropriately for my $index (0..3) { my $score = $relative ? $self->{scoreset}->[$index]->{$rule} + $scores[$index] : $scores[$index]; $self->{scoreset}->[$index]->{$rule} = $score + 0.0; } } } }); =back =head2 WHITELIST AND BLACKLIST OPTIONS =over 4 =item whitelist_from user@example.com Used to whitelist sender addresses which send mail that is often tagged (incorrectly) as spam. Use of this setting is not recommended, since it blindly trusts the message, which is routinely and easily forged by spammers and phish senders. The recommended solution is to instead use C or other authenticated whitelisting methods, or C. Whitelist and blacklist addresses are now file-glob-style patterns, so C, C<*@isp.com>, or C<*.domain.net> will all work. Specifically, C<*> and C are allowed, but all other metacharacters are not. Regular expressions are not used for security reasons. Matching is case-insensitive. Multiple addresses per line, separated by spaces, is OK. Multiple C lines are also OK. The headers checked for whitelist addresses are as follows: if C is set, use that; otherwise check all addresses taken from the following set of headers: Envelope-Sender Resent-Sender X-Envelope-From From In addition, the "envelope sender" data, taken from the SMTP envelope data where this is available, is looked up. See C. e.g. whitelist_from joe@example.com fred@example.com whitelist_from *@example.com =cut push (@cmds, { setting => 'whitelist_from', type => $CONF_TYPE_ADDRLIST, }); =item unwhitelist_from user@example.com Used to override a default whitelist_from entry, so for example a distribution whitelist_from can be overridden in a local.cf file, or an individual user can override a whitelist_from entry in their own C file. The specified email address has to match exactly (although case-insensitively) the address previously used in a whitelist_from line, which implies that a wildcard only matches literally the same wildcard (not 'any' address). e.g. unwhitelist_from joe@example.com fred@example.com unwhitelist_from *@example.com =cut push (@cmds, { command => 'unwhitelist_from', setting => 'whitelist_from', type => $CONF_TYPE_ADDRLIST, code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value }); =item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net Works similarly to whitelist_from, except that in addition to matching a sender address, a relay's rDNS name or its IP address must match too for the whitelisting rule to fire. The first parameter is a sender's e-mail address to whitelist, and the second is a string to match the relay's rDNS, or its IP address. Matching is case-insensitive. This second parameter is matched against the TCP-info information field as provided in a FROM clause of a trace information (i.e. the Received header field, see RFC 5321). Only the Received header fields inserted by trusted hosts are considered. This parameter can either be a full hostname, or the domain component of that hostname, or an IP address in square brackets. The reverse DNS lookup is done by a MTA, not by SpamAssassin. In case of an IPv4 address in brackets, it may be truncated on classful boundaries to cover whole subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, C<[10.1]>, C<[10]>. CIDR notation is currently not supported, nor is IPv6. The matching on IP address is mainly provided to cover rare cases where whitelisting of a sending MTA is desired which does not have a correct reverse DNS configured. In other words, if the host that connected to your MX had an IP address 192.0.2.123 that mapped to 'sendinghost.example.org', you should specify C, or C, or C<[192.0.2.123]> or C<[192.0.2]> here. Note that this requires that C be correct. For simple cases, it will be, but for a complex network you may get better results by setting that parameter. It also requires that your mail exchangers be configured to perform DNS reverse lookups on the connecting host's IP address, and to record the result in the generated Received header field according to RFC 5321. e.g. whitelist_from_rcvd joe@example.com example.com whitelist_from_rcvd *@axkit.org sergeant.org whitelist_from_rcvd *@axkit.org [192.0.2.123] =item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net Same as C, but used for the default whitelist entries in the SpamAssassin distribution. The whitelist score is lower, because these are often targets for spammer spoofing. =cut push (@cmds, { setting => 'whitelist_from_rcvd', type => $CONF_TYPE_ADDRLIST, code => sub { my ($self, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $MISSING_REQUIRED_VALUE; } unless ($value =~ /^\S+\s+\S+$/) { return $INVALID_VALUE; } $self->{parser}->add_to_addrlist_rcvd ('whitelist_from_rcvd', split(/\s+/, $value)); } }); push (@cmds, { setting => 'def_whitelist_from_rcvd', type => $CONF_TYPE_ADDRLIST, code => sub { my ($self, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $MISSING_REQUIRED_VALUE; } unless ($value =~ /^\S+\s+\S+$/) { return $INVALID_VALUE; } $self->{parser}->add_to_addrlist_rcvd ('def_whitelist_from_rcvd', split(/\s+/, $value)); } }); =item whitelist_allows_relays user@example.com Specify addresses which are in C that sometimes send through a mail relay other than the listed ones. By default mail with a From address that is in C that does not match the relay will trigger a forgery rule. Including the address in C prevents that. Whitelist and blacklist addresses are now file-glob-style patterns, so C, C<*@isp.com>, or C<*.domain.net> will all work. Specifically, C<*> and C are allowed, but all other metacharacters are not. Regular expressions are not used for security reasons. Matching is case-insensitive. Multiple addresses per line, separated by spaces, is OK. Multiple C lines are also OK. The specified email address does not have to match exactly the address previously used in a whitelist_from_rcvd line as it is compared to the address in the header. e.g. whitelist_allows_relays joe@example.com fred@example.com whitelist_allows_relays *@example.com =cut push (@cmds, { setting => 'whitelist_allows_relays', type => $CONF_TYPE_ADDRLIST, }); =item unwhitelist_from_rcvd user@example.com Used to override a default whitelist_from_rcvd entry, so for example a distribution whitelist_from_rcvd can be overridden in a local.cf file, or an individual user can override a whitelist_from_rcvd entry in their own C file. The specified email address has to match exactly the address previously used in a whitelist_from_rcvd line. e.g. unwhitelist_from_rcvd joe@example.com fred@example.com unwhitelist_from_rcvd *@axkit.org =cut push (@cmds, { setting => 'unwhitelist_from_rcvd', type => $CONF_TYPE_ADDRLIST, code => sub { my ($self, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $MISSING_REQUIRED_VALUE; } unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) { return $INVALID_VALUE; } $self->{parser}->remove_from_addrlist_rcvd('whitelist_from_rcvd', split (/\s+/, $value)); $self->{parser}->remove_from_addrlist_rcvd('def_whitelist_from_rcvd', split (/\s+/, $value)); } }); =item blacklist_from user@example.com Used to specify addresses which send mail that is often tagged (incorrectly) as non-spam, but which the user doesn't want. Same format as C. =cut push (@cmds, { setting => 'blacklist_from', type => $CONF_TYPE_ADDRLIST, }); =item unblacklist_from user@example.com Used to override a default blacklist_from entry, so for example a distribution blacklist_from can be overridden in a local.cf file, or an individual user can override a blacklist_from entry in their own C file. The specified email address has to match exactly the address previously used in a blacklist_from line. e.g. unblacklist_from joe@example.com fred@example.com unblacklist_from *@spammer.com =cut push (@cmds, { command => 'unblacklist_from', setting => 'blacklist_from', type => $CONF_TYPE_ADDRLIST, code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value }); =item whitelist_to user@example.com If the given address appears as a recipient in the message headers (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will be whitelisted. Useful if you're deploying SpamAssassin system-wide, and don't want some users to have their mail filtered. Same format as C. There are three levels of To-whitelisting, C, C and C. Users in the first level may still get some spammish mails blocked, but users in C should never get mail blocked. The headers checked for whitelist addresses are as follows: if C or C are set, use those; otherwise check all addresses taken from the following set of headers: To Cc Apparently-To Delivered-To Envelope-Recipients Apparently-Resent-To X-Envelope-To Envelope-To X-Delivered-To X-Original-To X-Rcpt-To X-Real-To =item more_spam_to user@example.com See above. =item all_spam_to user@example.com See above. =cut push (@cmds, { setting => 'whitelist_to', type => $CONF_TYPE_ADDRLIST, }); push (@cmds, { setting => 'more_spam_to', type => $CONF_TYPE_ADDRLIST, }); push (@cmds, { setting => 'all_spam_to', type => $CONF_TYPE_ADDRLIST, }); =item blacklist_to user@example.com If the given address appears as a recipient in the message headers (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will be blacklisted. Same format as C. =cut push (@cmds, { setting => 'blacklist_to', type => $CONF_TYPE_ADDRLIST, }); =item whitelist_auth user@example.com Used to specify addresses which send mail that is often tagged (incorrectly) as spam. This is different from C and C in that it first verifies that the message was sent by an authorized sender for the address, before whitelisting. Authorization is performed using one of the installed sender-authorization schemes: SPF (using C), or DKIM (using C). Note that those plugins must be active, and working, for this to operate. Using C is roughly equivalent to specifying duplicate C, C, and C lines for each of the addresses specified. e.g. whitelist_auth joe@example.com fred@example.com whitelist_auth *@example.com =item def_whitelist_auth user@example.com Same as C, but used for the default whitelist entries in the SpamAssassin distribution. The whitelist score is lower, because these are often targets for spammer spoofing. =cut push (@cmds, { setting => 'whitelist_auth', type => $CONF_TYPE_ADDRLIST, }); push (@cmds, { setting => 'def_whitelist_auth', type => $CONF_TYPE_ADDRLIST, }); =item unwhitelist_auth user@example.com Used to override a C entry. The specified email address has to match exactly the address previously used in a C line. e.g. unwhitelist_auth joe@example.com fred@example.com unwhitelist_auth *@example.com =cut push (@cmds, { command => 'unwhitelist_auth', setting => 'whitelist_auth', type => $CONF_TYPE_ADDRLIST, code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value }); =item enlist_uri_host (listname) host ... Adds one or more host names or domain names to a named list of URI domains. The named list can then be consulted through a check_uri_host_listed() eval rule implemented by the WLBLEval plugin, which takes the list name as an argument. Parenthesis around a list name are literal - a required syntax. Host names may optionally be prefixed by an exclamantion mark '!', which produces false as a result if this entry matches. This makes it easier to exclude some subdomains when their superdomain is listed, for example: enlist_uri_host (MYLIST) !sub1.example.com !sub2.example.com example.com No wildcards are supported, but subdomains do match implicitly. Lists are independent. Search for each named list starts by looking up the full hostname first, then leading fields are progressively stripped off (e.g.: sub.example.com, example.com, com) until a match is found or we run out of fields. The first matching entry (the most specific) determines if a lookup yielded a true (no '!' prefix) or a false (with a '!' prefix) result. If an URL found in a message contains an IP address in place of a host name, the given list must specify the exact same IP address (instead of a host name) in order to match. Use the delist_uri_host directive to neutralize previous enlist_uri_host settings. Enlisting to lists named 'BLACK' and 'WHITE' have their shorthand directives blacklist_uri_host and whitelist_uri_host and corresponding default rules, but the names 'BLACK' and 'WHITE' are otherwise not special or reserved. =cut push (@cmds, { command => 'enlist_uri_host', setting => 'uri_host_lists', type => $CONF_TYPE_ADDRLIST, code => sub { my($conf, $key, $value, $line) = @_; local($1,$2); if ($value !~ /^ \( (.*?) \) \s+ (.*) \z/sx) { return $MISSING_REQUIRED_VALUE; } my $listname = $1; # corresponds to arg in check_uri_host_in_wblist() # note: must not factor out dereferencing, as otherwise # subhashes would spring up in a copy and be lost foreach my $host ( split(' ', lc $2) ) { my $v = $host =~ s/^!// ? 0 : 1; $conf->{uri_host_lists}{$listname}{$host} = $v; } } }); =item delist_uri_host [ (listname) ] host ... Removes one or more specified host names from a named list of URI domains. Removing an unlisted name is ignored (is not an error). Listname is optional, if specified then just the named list is affected, otherwise hosts are removed from all URI host lists created so far. Parenthesis around a list name are a required syntax. Note that directives in configuration files are processed in sequence, the delist_uri_host only applies to previously listed entries and has no effect on enlisted entries in yet-to-be-processed directives. For convenience (similarity to the enlist_uri_host directive) hostnames may be prefixed by a an exclamation mark, which is stripped off from each name and has no meaning here. =cut push (@cmds, { command => 'delist_uri_host', setting => 'uri_host_lists', type => $CONF_TYPE_ADDRLIST, code => sub { my($conf, $key, $value, $line) = @_; local($1,$2); if ($value !~ /^ (?: \( (.*?) \) \s+ )? (.*) \z/sx) { return $MISSING_REQUIRED_VALUE; } my @listnames = defined $1 ? $1 : keys %{$conf->{uri_host_lists}}; my @args = split(' ', lc $2); foreach my $listname (@listnames) { foreach my $host (@args) { my $v = $host =~ s/^!// ? 0 : 1; delete $conf->{uri_host_lists}{$listname}{$host}; } } } }); =item blacklist_uri_host host-or-domain ... Is a shorthand for a directive: enlist_uri_host (BLACK) host ... Please see directives enlist_uri_host and delist_uri_host for details. =cut push (@cmds, { command => 'blacklist_uri_host', setting => 'uri_host_lists', type => $CONF_TYPE_ADDRLIST, code => sub { my($conf, $key, $value, $line) = @_; foreach my $host ( split(' ', lc $value) ) { my $v = $host =~ s/^!// ? 0 : 1; $conf->{uri_host_lists}{'BLACK'}{$host} = $v; } } }); =item whitelist_uri_host host-or-domain ... Is a shorthand for a directive: enlist_uri_host (BLACK) host ... Please see directives enlist_uri_host and delist_uri_host for details. =cut push (@cmds, { command => 'whitelist_uri_host', setting => 'uri_host_lists', type => $CONF_TYPE_ADDRLIST, code => sub { my($conf, $key, $value, $line) = @_; foreach my $host ( split(' ', lc $value) ) { my $v = $host =~ s/^!// ? 0 : 1; $conf->{uri_host_lists}{'WHITE'}{$host} = $v; } } }); =back =head2 BASIC MESSAGE TAGGING OPTIONS =over 4 =item rewrite_header { subject | from | to } STRING By default, suspected spam messages will not have the C, C or C lines tagged to indicate spam. By setting this option, the header will be tagged with C to indicate that a message is spam. For the From or To headers, this will take the form of an RFC 2822 comment following the address in parantheses. For the Subject header, this will be prepended to the original subject. Note that you should only use the _REQD_ and _SCORE_ tags when rewriting the Subject header if C is 0. Otherwise, you may not be able to remove the SpamAssassin markup via the normal methods. More information about tags is explained below in the B