diff options
| author | Barry Warsaw | 2008-01-26 18:35:18 -0500 |
|---|---|---|
| committer | Barry Warsaw | 2008-01-26 18:35:18 -0500 |
| commit | 3952c95a23a74b8686b55a3a4f1873238e6d6610 (patch) | |
| tree | c994bb588b94da2eac9d851a7da4400861dee25a | |
| parent | df637148d8fa2d5c101a990ee6766ea8547f000a (diff) | |
| download | mailman-3952c95a23a74b8686b55a3a4f1873238e6d6610.tar.gz mailman-3952c95a23a74b8686b55a3a4f1873238e6d6610.tar.zst mailman-3952c95a23a74b8686b55a3a4f1873238e6d6610.zip | |
| -rw-r--r-- | Mailman/Defaults.py | 23 | ||||
| -rw-r--r-- | Mailman/Handlers/SpamDetect.py | 8 | ||||
| -rw-r--r-- | Mailman/app/chains.py | 191 | ||||
| -rw-r--r-- | Mailman/docs/antispam.txt | 75 | ||||
| -rw-r--r-- | Mailman/docs/chains.txt | 12 | ||||
| -rw-r--r-- | Mailman/interfaces/chain.py | 10 | ||||
| -rw-r--r-- | Mailman/rules/docs/emergency.txt | 4 | ||||
| -rw-r--r-- | Mailman/rules/docs/header-matching.txt | 89 | ||||
| -rw-r--r-- | Mailman/tests/test_documentation.py | 1 |
9 files changed, 281 insertions, 132 deletions
diff --git a/Mailman/Defaults.py b/Mailman/Defaults.py index 75e1c768f..82fb6886f 100644 --- a/Mailman/Defaults.py +++ b/Mailman/Defaults.py @@ -164,16 +164,21 @@ DEFAULT_URL_HOST = '@URLHOST@' # Spam avoidance defaults ##### -# This variable contains a list of 2-tuple of the format (header, regex) which -# the Mailman/Handlers/SpamDetect.py module uses to match against the current -# message. If the regex matches the given header in the current message, then -# it is flagged as spam. header is case-insensitive and should not include -# the trailing colon. regex is always matched with re.IGNORECASE. +# This variable contains a list of tuple of the format: # -# Note that the more searching done, the slower the whole process gets. Spam -# detection is run against all messages coming to either the list, or the -# -owners address, unless the message is explicitly approved. -KNOWN_SPAMMERS = [] +# (header, pattern[, chain]) +# +# which is used to match against the current message's headers. If the +# pattern matches the given header in the current message, then the named +# chain is jumped to. header is case-insensitive and should not include the +# trailing colon. pattern is always matched with re.IGNORECASE. chain is +# optional; if not given the 'hold' chain is used, but if given it may be any +# existing chain, such as 'discard', 'reject', or 'accept'. +# +# Note that the more searching done, the slower the whole process gets. +# Header matching is run against all messages coming to either the list, or +# the -owners address, unless the message is explicitly approved. +HEADER_MATCHES = [] diff --git a/Mailman/Handlers/SpamDetect.py b/Mailman/Handlers/SpamDetect.py index b7e687284..f45b52a32 100644 --- a/Mailman/Handlers/SpamDetect.py +++ b/Mailman/Handlers/SpamDetect.py @@ -88,14 +88,6 @@ class HeaderGenerator(Generator): def process(mlist, msg, msgdata): if msgdata.get('approved'): return - # First do site hard coded header spam checks - for header, regex in config.KNOWN_SPAMMERS: - cre = re.compile(regex, re.IGNORECASE) - for value in msg.get_all(header, []): - mo = cre.search(value) - if mo: - # we've detected spam, so throw the message away - raise SpamDetected # Now do header_filter_rules # TK: Collect headers in sub-parts because attachment filename # extension may be a clue to possible virus/spam. diff --git a/Mailman/app/chains.py b/Mailman/app/chains.py index 38c7325ab..c327f8234 100644 --- a/Mailman/app/chains.py +++ b/Mailman/app/chains.py @@ -33,6 +33,7 @@ __metaclass__ = type __i18n_templates__ = True +import re import logging from email.mime.message import MIMEMessage @@ -49,7 +50,7 @@ from Mailman.app.replybot import autorespond_to_sender, can_acknowledge from Mailman.configuration import config from Mailman.i18n import _ from Mailman.interfaces import ( - IChain, IChainLink, IMutableChain, IPendable, LinkAction) + IChain, IChainLink, IMutableChain, IPendable, IRule, LinkAction) from Mailman.queue import Switchboard log = logging.getLogger('mailman.vette') @@ -91,8 +92,19 @@ class TerminalChainBase: yield Link('truth', LinkAction.stop) def process(self, mlist, msg, msgdata): + """Process the message for the given mailing list. + + This must be overridden by subclasses. + """ raise NotImplementedError + def get_rule(self, name): + """See `IChain`. + + This always returns the globally registered named rule. + """ + return config.rules[name] + class DiscardChain(TerminalChainBase): """Discard a message.""" @@ -283,7 +295,7 @@ class AcceptChain(TerminalChainBase): class Chain: - """Default built-in moderation chain.""" + """Generic chain base class.""" implements(IMutableChain) def __init__(self, name, description): @@ -307,9 +319,124 @@ class Chain: for link in self._links: yield link + def get_rule(self, name): + """See `IChain`. + + This always returns the globally registered named rule. + """ + return config.rules[name] + + + +class BuiltInChain(Chain): + """Default built-in chain.""" + + def __init__(self): + super(BuiltInChain, self).__init__( + 'built-in', _('The built-in moderation chain.')) + self.append_link(Link('approved', LinkAction.jump, 'accept')) + self.append_link(Link('emergency', LinkAction.jump, 'hold')) + self.append_link(Link('loop', LinkAction.jump, 'discard')) + # Do all of the following before deciding whether to hold the message + # for moderation. + self.append_link(Link('administrivia', LinkAction.defer)) + self.append_link(Link('implicit-dest', LinkAction.defer)) + self.append_link(Link('max-recipients', LinkAction.defer)) + self.append_link(Link('max-size', LinkAction.defer)) + self.append_link(Link('news-moderation', LinkAction.defer)) + self.append_link(Link('no-subject', LinkAction.defer)) + self.append_link(Link('suspicious-header', LinkAction.defer)) + # Now if any of the above hit, jump to the hold chain. + self.append_link(Link('any', LinkAction.jump, 'hold')) + # Take a detour through the self header matching chain, which we'll + # create later. + self.append_link(Link('truth', LinkAction.detour, 'header-match')) + # Finally, the builtin chain selfs to acceptance. + self.append_link(Link('truth', LinkAction.jump, 'accept')) + + + +class HeaderMatchRule: + """Header matching rule used by header-match chain.""" + implements(IRule) + + # Sequential rule counter. + _count = 1 + + def __init__(self, header, pattern): + self._header = header + self._pattern = pattern + self.name = 'header-match-%002d' % HeaderMatchRule._count + HeaderMatchRule._count += 1 + self.description = u'%s: %s' % (header, pattern) + # XXX I think we should do better here, somehow recording that a + # particular header matched a particular pattern, but that gets ugly + # with RFC 2822 headers. It also doesn't match well with the rule + # name concept. For now, we just record the rather useless numeric + # rule name. I suppose we could do the better hit recording in the + # check() method, and set self.record = False. + self.record = True + + def check(self, mlist, msg, msgdata): + """See `IRule`.""" + for value in msg.get_all(self._header, []): + if re.search(self._pattern, value, re.IGNORECASE): + return True + return False + + +class HeaderMatchChain(Chain): + """Default header matching chain. + + This could be extended by header match rules in the database. + """ + + def __init__(self): + super(HeaderMatchChain, self).__init__( + 'header-match', _('The built-in header matching chain')) + # The header match rules are not global, so don't register them. + # These are the only rules that the header match chain can execute. + self._links = [] + self._rules = {} + # Initialize header check rules with those from the global + # HEADER_MATCHES variable. + for entry in config.HEADER_MATCHES: + if len(entry) == 2: + header, pattern = entry + chain = 'hold' + elif len(entry) == 3: + header, pattern, chain = entry + # We don't assert that the chain exists here because the jump + # chain may not yet have been created. + else: + raise AssertionError( + 'Bad entry for HEADER_MATCHES: %s' % entry) + self.extend(header, pattern, chain) + + def extend(self, header, pattern, chain='hold'): + """Extend the existing header matches. + + :param header: The case-insensitive header field name. + :param pattern: The pattern to match the header's value again. The + match is not anchored and is done case-insensitively. + :param chain: Option chain to jump to if the pattern matches any of + the named header values. If not given, the 'hold' chain is used. + """ + rule = HeaderMatchRule(header, pattern) + self._rules[rule.name] = rule + link = Link(rule.name, LinkAction.jump, chain) + self._links.append(link) + + def get_rule(self, name): + """See `IChain`. + + Only local rules are findable by this chain. + """ + return self._rules[name] + -def process(start_chain, mlist, msg, msgdata): +def process(mlist, msg, msgdata, start_chain='built-in'): """Process the message through a chain. :param start_chain: The name of the chain to start the processing with. @@ -317,34 +444,45 @@ def process(start_chain, mlist, msg, msgdata): :param msg: The Message object. :param msgdata: The message metadata dictionary. """ - # Find the starting chain. - current_chain = iter(config.chains[start_chain]) + # Set up some bookkeeping. chain_stack = [] msgdata['rule_hits'] = hits = [] msgdata['rule_misses'] = misses = [] - while current_chain: + # Find the starting chain and begin iterating through its links. + chain = config.chains[start_chain] + chain_iter = iter(chain) + # Loop until we've reached the end of all processing chains. + while chain: + # Iterate over all links in the chain. Do this outside a for-loop so + # we can capture a chain's link iterator in mid-flight. This supports + # the 'detour' link action try: - link = current_chain.next() + link = chain_iter.next() except StopIteration: # This chain is exhausted. Pop the last chain on the stack and - # continue. + # continue iterating through it. If there's nothing left on the + # chain stack then we're completely finished processing. if len(chain_stack) == 0: return - current_chain = chain_stack.pop() + chain, chain_iter = chain_stack.pop() continue # Process this link. - rule = config.rules[link.rule] + rule = chain.get_rule(link.rule) if rule.check(mlist, msg, msgdata): if rule.record: hits.append(link.rule) # The rule matched so run its action. if link.action is LinkAction.jump: - current_chain = iter(config.chains[link.chain]) + chain = config.chains[link.chain] + chain_iter = iter(chain) + continue elif link.action is LinkAction.detour: - # Push the current chain so that we can return to it when the - # next chain is finished. - chain_stack.append(current_chain) - current_chain = iter(config.chains[link.chain]) + # Push the current chain so that we can return to it when + # the next chain is finished. + chain_stack.append((chain, chain_iter)) + chain = config.chains[link.chain] + chain_iter = iter(chain) + continue elif link.action is LinkAction.stop: # Stop all processing. return @@ -354,7 +492,7 @@ def process(start_chain, mlist, msg, msgdata): elif link.action is LinkAction.run: link.function(mlist, msg, msgdata) else: - raise AssertionError('Unknown link action: %s' % link.action) + raise AssertionError('Bad link action: %s' % link.action) else: # The rule did not match; keep going. if rule.record: @@ -370,21 +508,10 @@ def initialize(): 'Duplicate chain name: %s' % chain.name) config.chains[chain.name] = chain # Set up a couple of other default chains. - default = Chain('built-in', _('The built-in moderation chain.')) - default.append_link(Link('approved', LinkAction.jump, 'accept')) - default.append_link(Link('emergency', LinkAction.jump, 'hold')) - default.append_link(Link('loop', LinkAction.jump, 'discard')) - # Do all these before deciding whether to hold the message for moderation. - default.append_link(Link('administrivia', LinkAction.defer)) - default.append_link(Link('implicit-dest', LinkAction.defer)) - default.append_link(Link('max-recipients', LinkAction.defer)) - default.append_link(Link('max-size', LinkAction.defer)) - default.append_link(Link('news-moderation', LinkAction.defer)) - default.append_link(Link('no-subject', LinkAction.defer)) - default.append_link(Link('suspicious-header', LinkAction.defer)) - # Now if any of the above hit, jump to the hold chain. - default.append_link(Link('any', LinkAction.jump, 'hold')) - # Finally, the builtin chain defaults to acceptance. - default.append_link(Link('truth', LinkAction.jump, 'accept')) + chain = BuiltInChain() + config.chains[chain.name] = chain + # Create and initialize the header matching chain. + chain = HeaderMatchChain() + config.chains[chain.name] = chain # XXX Read chains from the database and initialize them. pass diff --git a/Mailman/docs/antispam.txt b/Mailman/docs/antispam.txt deleted file mode 100644 index 3ad5e982e..000000000 --- a/Mailman/docs/antispam.txt +++ /dev/null @@ -1,75 +0,0 @@ -Anti-spam defences -================== - -By design, Mailman does not have very sophisticated anti-spam measures because -this type of filtering is done much more efficiently at the MTA level. For -example, if Mailman were to do spam detection, it could not reject the message -at SMTP time. - -Still, Mailman does employ a small number of rather ham-handed anti-spam -measures. - - >>> from Mailman.Handlers.SpamDetect import process - >>> from Mailman.queue import Switchboard - >>> from Mailman.configuration import config - >>> mlist = config.db.list_manager.create(u'_xtest@example.com') - - -Short circuiting ----------------- - -If a message is pre-approved, this handler does nothing. - - >>> msg = message_from_string("""\ - ... From: aperson@example.com - ... - ... An important message. - ... """) - >>> msgdata = {'approved': True} - >>> process(mlist, msg, msgdata) - >>> print msg.as_string() - From: aperson@example.com - <BLANKLINE> - An important message. - <BLANKLINE> - >>> msgdata - {'approved': True} - - -Header matching ---------------- - -There is a global configuration variable that can be set to a list of header -matches. Each item in that list is a 2-tuple of the header to match and a -regular expression. For example, if we wanted to block all message that come -from 'aperson' regardless of the domain, we'd do something like the following -in our mailman.cfg file: - - >>> old_value = config.KNOWN_SPAMMERS[:] - >>> config.KNOWN_SPAMMERS.append(('from', 'aperson')) - -Now if the same message is posted to the mailing list, and that message is not -pre-approved. The handler will throw an exception that signals the message is -spam. - - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - Traceback (most recent call last): - ... - SpamDetected - >>> print msg.as_string() - From: aperson@example.com - <BLANKLINE> - An important message. - <BLANKLINE> - >>> msgdata - {} - - # Restore global state - config.KNOWN_SPAMMERS = old_value - - -Header filter rules -------------------- - -XXX Need tests. diff --git a/Mailman/docs/chains.txt b/Mailman/docs/chains.txt index 0a93683f7..e676957d8 100644 --- a/Mailman/docs/chains.txt +++ b/Mailman/docs/chains.txt @@ -36,12 +36,14 @@ The Discard chain simply throws the message away. ... An important message. ... """) + >>> from Mailman.app.chains import process + # XXX This checks the vette log file because there is no other evidence # that this chain has done anything. >>> import os >>> fp = open(os.path.join(config.LOG_DIR, 'vette')) >>> file_pos = fp.tell() - >>> chain.process(mlist, msg, {}) + >>> process(mlist, msg, {}, 'discard') >>> fp.seek(file_pos) >>> print 'LOG:', fp.read() LOG: ... DISCARD: <first> @@ -62,7 +64,7 @@ this action. >>> chain.description u'Reject/bounce a message and stop processing.' >>> file_pos = fp.tell() - >>> chain.process(mlist, msg, {}) + >>> process(mlist, msg, {}, 'reject') >>> fp.seek(file_pos) >>> print 'LOG:', fp.read() LOG: ... REJECT: <first> @@ -111,7 +113,7 @@ sender and the list moderators. u'Hold a message and stop processing.' >>> file_pos = fp.tell() - >>> chain.process(mlist, msg, {}) + >>> process(mlist, msg, {}, 'hold') >>> fp.seek(file_pos) >>> print 'LOG:', fp.read() LOG: ... HOLD: _xtest@example.com post from aperson@example.com held, @@ -262,7 +264,7 @@ processed and sent on to the list membership. >>> chain.description u'Accept a message.' >>> file_pos = fp.tell() - >>> chain.process(mlist, msg, {}) + >>> process(mlist, msg, {}, 'accept') >>> fp.seek(file_pos) >>> print 'LOG:', fp.read() LOG: ... ACCEPT: <first> @@ -311,7 +313,7 @@ all default rules. This message will end up in the prep queue. >>> file_pos = fp.tell() >>> from Mailman.app.chains import process - >>> process('built-in', mlist, msg, {}) + >>> process(mlist, msg, {}) >>> fp.seek(file_pos) >>> print 'LOG:', fp.read() LOG: ... ACCEPT: <first> diff --git a/Mailman/interfaces/chain.py b/Mailman/interfaces/chain.py index 8c0837820..eca663b30 100644 --- a/Mailman/interfaces/chain.py +++ b/Mailman/interfaces/chain.py @@ -69,6 +69,16 @@ class IChain(Interface): :return: an IChainLink. """ + def get_rule(name): + """Lookup and return the named rule. + + :param name: The name of the rule to return. This may be a globally + registered rule name, in which case it must be unique, or it may + be a rule defined locally to the chain. + :return: The named `IRule`. + :raises: KeyError if the named rule cannot be found. + """ + class IMutableChain(IChain): diff --git a/Mailman/rules/docs/emergency.txt b/Mailman/rules/docs/emergency.txt index 1375c3bc9..aecbcb90d 100644 --- a/Mailman/rules/docs/emergency.txt +++ b/Mailman/rules/docs/emergency.txt @@ -21,7 +21,7 @@ rule matches if the flag is set on the mailing list. >>> from Mailman.app.chains import process >>> mlist.emergency = True - >>> process('built-in', mlist, msg, {}) + >>> process(mlist, msg, {}, 'built-in') There are two messages in the virgin queue. The one addressed to the original sender will contain a token we can use to grab the held message out of the @@ -69,6 +69,6 @@ However, if the message metadata has a 'moderator_approved' key set, then even if the mailing list has its emergency flag set, the message still goes through to the membership. - >>> process('built-in', mlist, msg, dict(moderator_approved=True)) + >>> process(mlist, msg, dict(moderator_approved=True), 'built-in') >>> len(virginq.files) 0 diff --git a/Mailman/rules/docs/header-matching.txt b/Mailman/rules/docs/header-matching.txt new file mode 100644 index 000000000..b32feabe5 --- /dev/null +++ b/Mailman/rules/docs/header-matching.txt @@ -0,0 +1,89 @@ +Header matching +=============== + +Mailman can do pattern based header matching during its normal rule +processing. There is a set of site-wide default header matchines specified in +the configuaration file under the HEADER_MATCHES variable. + + >>> from Mailman.app.lifecycle import create_list + >>> mlist = create_list(u'_xtest@example.com') + +Because the default HEADER_MATCHES variable is empty when the configuration +file is read, we'll just extend the current header matching chain with a +pattern that matches 4 or more stars, discarding the message if it hits. + + >>> from Mailman.configuration import config + >>> chain = config.chains['header-match'] + >>> chain.extend('x-spam-score', '[*]{4,}', 'discard') + +First, if the message has no X-Spam-Score header, the message passes through +the chain untouched (i.e. no disposition). + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... Subject: Not spam + ... Message-ID: <one> + ... + ... This is a message. + ... """) + + >>> from Mailman.app.chains import process + +Pass through is seen as nothing being in the log file after processing. + + # XXX This checks the vette log file because there is no other evidence + # that this chain has done anything. + >>> import os + >>> fp = open(os.path.join(config.LOG_DIR, 'vette')) + >>> fp.seek(0, 2) + >>> file_pos = fp.tell() + >>> process(mlist, msg, {}, 'header-match') + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: + <BLANKLINE> + +Now, if the header exists but does not match, then it also passes through +untouched. + + >>> msg['X-Spam-Score'] = '***' + >>> del msg['subject'] + >>> msg['Subject'] = 'This is almost spam' + >>> del msg['message-id'] + >>> msg['Message-ID'] = '<two>' + >>> file_pos = fp.tell() + >>> process(mlist, msg, {}, 'header-match') + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: + <BLANKLINE> + +But now if the header matches, then the message gets discarded. + + >>> msg['X-Spam-Score'] = '****' + >>> del msg['subject'] + >>> msg['Subject'] = 'This is spam, but barely' + >>> del msg['message-id'] + >>> msg['Message-ID'] = '<three>' + >>> file_pos = fp.tell() + >>> process(mlist, msg, {}, 'header-match') + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: ... DISCARD: <three> + <BLANKLINE> + +For kicks, let's show a message that's really spammy. + + >>> msg['X-Spam-Score'] = '**********' + >>> del msg['subject'] + >>> msg['Subject'] = 'This is really spammy' + >>> del msg['message-id'] + >>> msg['Message-ID'] = '<four>' + >>> file_pos = fp.tell() + >>> process(mlist, msg, {}, 'header-match') + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: ... DISCARD: <four> + <BLANKLINE> + diff --git a/Mailman/tests/test_documentation.py b/Mailman/tests/test_documentation.py index 9faf1d588..ad00ba19c 100644 --- a/Mailman/tests/test_documentation.py +++ b/Mailman/tests/test_documentation.py @@ -18,7 +18,6 @@ """Harness for testing Mailman's documentation.""" import os -import pdb import doctest import unittest |
