diff options
| author | Barry Warsaw | 2008-02-01 22:21:05 -0500 |
|---|---|---|
| committer | Barry Warsaw | 2008-02-01 22:21:05 -0500 |
| commit | b6f3ba4c9ebe821dd2c4676d7397fe5312b72a36 (patch) | |
| tree | 5803f890642e5272b856e30869abd630b43271bc | |
| parent | c1cc921b691eb60445cf28bc66a59b02b3cd09a4 (diff) | |
| download | mailman-b6f3ba4c9ebe821dd2c4676d7397fe5312b72a36.tar.gz mailman-b6f3ba4c9ebe821dd2c4676d7397fe5312b72a36.tar.zst mailman-b6f3ba4c9ebe821dd2c4676d7397fe5312b72a36.zip | |
| -rw-r--r-- | Mailman/Handlers/SpamDetect.py | 130 | ||||
| -rw-r--r-- | Mailman/app/chains.py | 15 | ||||
| -rw-r--r-- | Mailman/app/rules.py | 21 | ||||
| -rw-r--r-- | Mailman/app/styles.py | 2 | ||||
| -rw-r--r-- | Mailman/chains/base.py | 19 | ||||
| -rw-r--r-- | Mailman/chains/builtin.py | 63 | ||||
| -rw-r--r-- | Mailman/chains/headers.py | 81 | ||||
| -rw-r--r-- | Mailman/database/mailinglist.py | 2 | ||||
| -rw-r--r-- | Mailman/database/mailman.sql | 2 | ||||
| -rw-r--r-- | Mailman/docs/chains.txt | 3 | ||||
| -rw-r--r-- | Mailman/interfaces/chain.py | 10 | ||||
| -rw-r--r-- | Mailman/rules/docs/header-matching.txt | 56 | ||||
| -rw-r--r-- | Mailman/rules/docs/truth.txt | 10 | ||||
| -rw-r--r-- | Mailman/rules/truth.py | 41 |
14 files changed, 223 insertions, 232 deletions
diff --git a/Mailman/Handlers/SpamDetect.py b/Mailman/Handlers/SpamDetect.py deleted file mode 100644 index f45b52a32..000000000 --- a/Mailman/Handlers/SpamDetect.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (C) 1998-2007 by the Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, -# USA. - -"""Do more detailed spam detection. - -This module hard codes site wide spam detection. By hacking the -KNOWN_SPAMMERS variable, you can set up more regular expression matches -against message headers. If spam is detected the message is discarded -immediately. - -TBD: This needs to be made more configurable and robust. -""" - -import re - -from cStringIO import StringIO -from email.Generator import Generator - -from Mailman import Errors -from Mailman import i18n -from Mailman.Handlers.Hold import hold_for_approval -from Mailman.configuration import config - -# First, play footsie with _ so that the following are marked as translated, -# but aren't actually translated until we need the text later on. -def _(s): - return s - - - -class SpamDetected(Errors.DiscardMessage): - """The message contains known spam""" - -class HeaderMatchHold(Errors.HoldMessage): - reason = _('The message headers matched a filter rule') - - -# And reset the translator -_ = i18n._ - - - -class Tee: - def __init__(self, outfp_a, outfp_b): - self._outfp_a = outfp_a - self._outfp_b = outfp_b - - def write(self, s): - self._outfp_a.write(s) - self._outfp_b.write(s) - - -# Class to capture the headers separate from the message body -class HeaderGenerator(Generator): - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): - Generator.__init__(self, outfp, mangle_from_, maxheaderlen) - self._headertxt = '' - - def _write_headers(self, msg): - sfp = StringIO() - oldfp = self._fp - self._fp = Tee(oldfp, sfp) - try: - Generator._write_headers(self, msg) - finally: - self._fp = oldfp - self._headertxt = sfp.getvalue() - - def header_text(self): - return self._headertxt - - - -def process(mlist, msg, msgdata): - if msgdata.get('approved'): - return - # Now do header_filter_rules - # TK: Collect headers in sub-parts because attachment filename - # extension may be a clue to possible virus/spam. - headers = '' - for p in msg.walk(): - g = HeaderGenerator(StringIO()) - g.flatten(p) - headers += g.header_text() - # Now reshape headers (remove extra CR and connect multiline). - headers = re.sub('\n+', '\n', headers) - headers = re.sub('\n\s', ' ', headers) - for patterns, action, empty in mlist.header_filter_rules: - if action == config.DEFER: - continue - for pattern in patterns.splitlines(): - if pattern.startswith('#'): - continue - # ignore 'empty' patterns - if not pattern.strip(): - continue - if re.search(pattern, headers, re.IGNORECASE|re.MULTILINE): - if action == config.DISCARD: - raise Errors.DiscardMessage - if action == config.REJECT: - if msgdata.get('toowner'): - # Don't send rejection notice if addressed to '-owner' - # because it may trigger a loop of notices if the - # sender address is forged. We just discard it here. - raise Errors.DiscardMessage - raise Errors.RejectMessage( - _('Message rejected by filter rule match')) - if action == config.HOLD: - if msgdata.get('toowner'): - # Don't hold '-owner' addressed message. We just - # pass it here but list-owner can set this to be - # discarded on the GUI if he wants. - return - hold_for_approval(mlist, msg, msgdata, HeaderMatchHold) - if action == config.ACCEPT: - return diff --git a/Mailman/app/chains.py b/Mailman/app/chains.py index fc7899cbe..2bf0b9ae1 100644 --- a/Mailman/app/chains.py +++ b/Mailman/app/chains.py @@ -66,20 +66,19 @@ def process(mlist, msg, msgdata, start_chain='built-in'): chain, chain_iter = chain_stack.pop() continue # Process this link. - rule = chain.get_rule(link.rule) - if rule.check(mlist, msg, msgdata): - if rule.record: - hits.append(link.rule) + if link.rule.check(mlist, msg, msgdata): + if link.rule.record: + hits.append(link.rule.name) # The rule matched so run its action. if link.action is LinkAction.jump: - chain = config.chains[link.chain] + chain = link.chain chain_iter = chain.get_links(mlist, msg, msgdata) continue elif link.action is LinkAction.detour: # Push the current chain so that we can return to it when # the next chain is finished. chain_stack.append((chain, chain_iter)) - chain = config.chains[link.chain] + chain = link.chain chain_iter = chain.get_links(mlist, msg, msgdata) continue elif link.action is LinkAction.stop: @@ -94,8 +93,8 @@ def process(mlist, msg, msgdata, start_chain='built-in'): raise AssertionError('Bad link action: %s' % link.action) else: # The rule did not match; keep going. - if rule.record: - misses.append(link.rule) + if link.rule.record: + misses.append(link.rule.name) diff --git a/Mailman/app/rules.py b/Mailman/app/rules.py index f0209c767..50bb8d281 100644 --- a/Mailman/app/rules.py +++ b/Mailman/app/rules.py @@ -17,10 +17,7 @@ """Various rule helpers""" -__all__ = [ - 'TruthRule', - 'initialize', - ] +__all__ = ['initialize'] __metaclass__ = type @@ -33,24 +30,8 @@ from Mailman.interfaces import IRule -class TruthRule: - """A rule that always matches.""" - implements(IRule) - - name = 'truth' - description = 'A rule which always matches.' - record = False - - def check(self, mlist, msg, msgdata): - """See `IRule`.""" - return True - - - def initialize(): """Find and register all rules in all plugins.""" - # Register built in rules. - config.rules[TruthRule.name] = TruthRule() # Find rules in plugins. for rule_finder in get_plugins('mailman.rules'): for rule_class in rule_finder(): diff --git a/Mailman/app/styles.py b/Mailman/app/styles.py index 4ca3f1b01..3edd88abf 100644 --- a/Mailman/app/styles.py +++ b/Mailman/app/styles.py @@ -70,7 +70,7 @@ class DefaultStyle: mlist.send_goodbye_msg = config.DEFAULT_SEND_GOODBYE_MSG mlist.bounce_matching_headers = ( config.DEFAULT_BOUNCE_MATCHING_HEADERS) - mlist.header_filter_rules = [] + mlist.header_matches = [] mlist.anonymous_list = config.DEFAULT_ANONYMOUS_LIST mlist.description = u'' mlist.info = u'' diff --git a/Mailman/chains/base.py b/Mailman/chains/base.py index 30b66b1cf..16aa63176 100644 --- a/Mailman/chains/base.py +++ b/Mailman/chains/base.py @@ -58,23 +58,17 @@ class TerminalChainBase: """ raise NotImplementedError - def get_rule(self, name): - """See `IChain`. - - This always returns the globally registered named rule. - """ - return config.rules[name] - def get_links(self, mlist, msg, msgdata): """See `IChain`.""" return iter(self) def __iter__(self): """See `IChainIterator`.""" + truth = config.rules['truth'] # First, yield a link that always runs the process method. - yield Link('truth', LinkAction.run, function=self._process) + yield Link(truth, LinkAction.run, function=self._process) # Now yield a rule that stops all processing. - yield Link('truth', LinkAction.stop) + yield Link(truth, LinkAction.stop) @@ -98,13 +92,6 @@ class Chain: """See `IMutableChain`.""" self._links = [] - def get_rule(self, name): - """See `IChain`. - - This always returns the globally registered named rule. - """ - return config.rules[name] - def get_links(self, mlist, msg, msgdata): """See `IChain`.""" return iter(ChainIterator(self)) diff --git a/Mailman/chains/builtin.py b/Mailman/chains/builtin.py index e5e97c3fb..d702b48b9 100644 --- a/Mailman/chains/builtin.py +++ b/Mailman/chains/builtin.py @@ -23,37 +23,62 @@ __metaclass__ = type import logging -from Mailman.interfaces import LinkAction -from Mailman.chains.base import Chain, Link +from zope.interface import implements + +from Mailman.chains.base import Link +from Mailman.configuration import config from Mailman.i18n import _ +from Mailman.interfaces import IChain, LinkAction log = logging.getLogger('mailman.vette') -class BuiltInChain(Chain): +class BuiltInChain: """Default built-in chain.""" - def __init__(self): - super(BuiltInChain, self).__init__( - 'built-in', _('The built-in moderation chain.')) - self.append_link(Link('approved', LinkAction.jump, 'accept')) - self.append_link(Link('emergency', LinkAction.jump, 'hold')) - self.append_link(Link('loop', LinkAction.jump, 'discard')) + implements(IChain) + + name = 'built-in' + description = _('The built-in moderation chain.') + + _link_descriptions = ( + ('approved', LinkAction.jump, 'accept'), + ('emergency', LinkAction.jump, 'hold'), + ('loop', LinkAction.jump, 'discard'), # Do all of the following before deciding whether to hold the message # for moderation. - self.append_link(Link('administrivia', LinkAction.defer)) - self.append_link(Link('implicit-dest', LinkAction.defer)) - self.append_link(Link('max-recipients', LinkAction.defer)) - self.append_link(Link('max-size', LinkAction.defer)) - self.append_link(Link('news-moderation', LinkAction.defer)) - self.append_link(Link('no-subject', LinkAction.defer)) - self.append_link(Link('suspicious-header', LinkAction.defer)) + ('administrivia', LinkAction.defer, None), + ('implicit-dest', LinkAction.defer, None), + ('max-recipients', LinkAction.defer, None), + ('max-size', LinkAction.defer, None), + ('news-moderation', LinkAction.defer, None), + ('no-subject', LinkAction.defer, None), + ('suspicious-header', LinkAction.defer, None), # Now if any of the above hit, jump to the hold chain. - self.append_link(Link('any', LinkAction.jump, 'hold')) + ('any', LinkAction.jump, 'hold'), # Take a detour through the self header matching chain, which we'll # create later. - self.append_link(Link('truth', LinkAction.detour, 'header-match')) + ('truth', LinkAction.detour, 'header-match'), # Finally, the builtin chain selfs to acceptance. - self.append_link(Link('truth', LinkAction.jump, 'accept')) + ('truth', LinkAction.jump, 'accept'), + ) + + def __init__(self): + self._cached_links = None + + def get_links(self, mlist, msg, msgdata): + """See `IChain`.""" + if self._cached_links is None: + self._cached_links = links = [] + for rule_name, action, chain_name in self._link_descriptions: + # Get the named rule. + rule = config.rules[rule_name] + # Get the chain, if one is defined. + if chain_name is None: + chain = None + else: + chain = config.chains[chain_name] + links.append(Link(rule, action, chain)) + return iter(self._cached_links) diff --git a/Mailman/chains/headers.py b/Mailman/chains/headers.py index a802eaab4..06dfafeda 100644 --- a/Mailman/chains/headers.py +++ b/Mailman/chains/headers.py @@ -23,10 +23,11 @@ __metaclass__ = type import re import logging +import itertools from zope.interface import implements -from Mailman.interfaces import IRule, LinkAction +from Mailman.interfaces import IChainIterator, IRule, LinkAction from Mailman.chains.base import Chain, Link from Mailman.i18n import _ from Mailman.configuration import config @@ -36,6 +37,29 @@ log = logging.getLogger('mailman.vette') +def make_link(entry): + """Create a Link object. + + :param entry: a 2- or 3-tuple describing a link. If a 2-tuple, it is a + header and a pattern, and a default chain of 'hold' will be used. If + a 3-tuple, the third item is the chain name to use. + :return: an ILink. + """ + if len(entry) == 2: + header, pattern = entry + chain_name = 'hold' + elif len(entry) == 3: + header, pattern, chain_name = entry + # We don't assert that the chain exists here because the jump + # chain may not yet have been created. + else: + raise AssertionError('Bad link description: %s' % entry) + rule = HeaderMatchRule(header, pattern) + chain = config.chains[chain_name] + return Link(rule, LinkAction.jump, chain) + + + class HeaderMatchRule: """Header matching rule used by header-match chain.""" implements(IRule) @@ -78,23 +102,16 @@ class HeaderMatchChain(Chain): # The header match rules are not global, so don't register them. # These are the only rules that the header match chain can execute. self._links = [] - self._rules = {} # Initialize header check rules with those from the global # HEADER_MATCHES variable. for entry in config.HEADER_MATCHES: - if len(entry) == 2: - header, pattern = entry - chain = 'hold' - elif len(entry) == 3: - header, pattern, chain = entry - # We don't assert that the chain exists here because the jump - # chain may not yet have been created. - else: - raise AssertionError( - 'Bad entry for HEADER_MATCHES: %s' % entry) - self.extend(header, pattern, chain) + self._links.append(make_link(entry)) + # Keep track of how many global header matching rules we've seen. + # This is so the flush() method will only delete those that were added + # via extend() or append_link(). + self._permanent_link_count = len(self._links) - def extend(self, header, pattern, chain='hold'): + def extend(self, header, pattern, chain_name='hold'): """Extend the existing header matches. :param header: The case-insensitive header field name. @@ -103,14 +120,32 @@ class HeaderMatchChain(Chain): :param chain: Option chain to jump to if the pattern matches any of the named header values. If not given, the 'hold' chain is used. """ - rule = HeaderMatchRule(header, pattern) - self._rules[rule.name] = rule - link = Link(rule.name, LinkAction.jump, chain) - self._links.append(link) + self._links.append(make_link((header, pattern, chain_name))) - def get_rule(self, name): - """See `IChain`. + def flush(self): + """See `IMutableChain`.""" + del self._links[self._permanent_link_count:] - Only local rules are findable by this chain. - """ - return self._rules[name] + def get_links(self, mlist, msg, msgdata): + """See `IChain`.""" + list_iterator = HeaderMatchIterator(mlist) + return itertools.chain(iter(self._links), iter(list_iterator)) + + def __iter__(self): + for link in self._links: + yield link + + + +class HeaderMatchIterator: + """An iterator of both the global and list-specific chain links.""" + + implements(IChainIterator) + + def __init__(self, mlist): + self._mlist = mlist + + def __iter__(self): + """See `IChainIterator`.""" + for entry in self._mlist.header_matches: + yield make_link(entry) diff --git a/Mailman/database/mailinglist.py b/Mailman/database/mailinglist.py index 04c872aab..3230308eb 100644 --- a/Mailman/database/mailinglist.py +++ b/Mailman/database/mailinglist.py @@ -113,7 +113,7 @@ class MailingList(Model): gateway_to_news = Bool() generic_nonmember_action = Int() goodbye_msg = Unicode() - header_filter_rules = Pickle() + header_matches = Pickle() hold_these_nonmembers = Pickle() include_list_post_header = Bool() include_rfc2369_headers = Bool() diff --git a/Mailman/database/mailman.sql b/Mailman/database/mailman.sql index c511e6180..0af3401dd 100644 --- a/Mailman/database/mailman.sql +++ b/Mailman/database/mailman.sql @@ -93,7 +93,7 @@ CREATE TABLE mailinglist ( gateway_to_news BOOLEAN, generic_nonmember_action INTEGER, goodbye_msg TEXT, - header_filter_rules BLOB, + header_matches BLOB, hold_these_nonmembers BLOB, include_list_post_header BOOLEAN, include_rfc2369_headers BOOLEAN, diff --git a/Mailman/docs/chains.txt b/Mailman/docs/chains.txt index e676957d8..433ee8e8e 100644 --- a/Mailman/docs/chains.txt +++ b/Mailman/docs/chains.txt @@ -300,9 +300,6 @@ the Hold handler from previous versions of Mailman. >>> chain = config.chains['built-in'] >>> verifyObject(IChain, chain) True - >>> from Mailman.interfaces import IMutableChain - >>> verifyObject(IMutableChain, chain) - True >>> chain.name 'built-in' >>> chain.description diff --git a/Mailman/interfaces/chain.py b/Mailman/interfaces/chain.py index 63d7eb7f7..04c0260c2 100644 --- a/Mailman/interfaces/chain.py +++ b/Mailman/interfaces/chain.py @@ -63,16 +63,6 @@ class IChain(Interface): name = Attribute('Chain name; must be unique.') description = Attribute('A brief description of the chain.') - def get_rule(name): - """Lookup and return the named rule. - - :param name: The name of the rule to return. This may be a globally - registered rule name, in which case it must be unique, or it may - be a rule defined locally to the chain. - :return: The named `IRule`. - :raises: KeyError if the named rule cannot be found. - """ - def get_links(mlist, msg, msgdata): """Get an `IChainIterator` for processing. diff --git a/Mailman/rules/docs/header-matching.txt b/Mailman/rules/docs/header-matching.txt index b32feabe5..fbd0ff65f 100644 --- a/Mailman/rules/docs/header-matching.txt +++ b/Mailman/rules/docs/header-matching.txt @@ -61,6 +61,7 @@ untouched. But now if the header matches, then the message gets discarded. + >>> del msg['x-spam-score'] >>> msg['X-Spam-Score'] = '****' >>> del msg['subject'] >>> msg['Subject'] = 'This is spam, but barely' @@ -75,6 +76,7 @@ But now if the header matches, then the message gets discarded. For kicks, let's show a message that's really spammy. + >>> del msg['x-spam-score'] >>> msg['X-Spam-Score'] = '**********' >>> del msg['subject'] >>> msg['Subject'] = 'This is really spammy' @@ -87,3 +89,57 @@ For kicks, let's show a message that's really spammy. LOG: ... DISCARD: <four> <BLANKLINE> +Flush out the extended header matching rules. + + >>> chain.flush() + + +List-specific header matching +----------------------------- + +Each mailing list can also be configured with a set of header matching regular +expression rules. These are used to impose list-specific header filtering +with the same semantics as the global `HEADER_MATCHES` variable. + +The list administrator wants to match not on four stars, but on three plus +signs, but only for the current mailing list. + + >>> mlist.header_matches = [('x-spam-score', '[+]{3,}', 'discard')] + +A message with a spam score of two pluses does not match. + + >>> del msg['x-spam-score'] + >>> msg['X-Spam-Score'] = '++' + >>> del msg['message-id'] + >>> msg['Message-ID'] = '<five>' + >>> file_pos = fp.tell() + >>> process(mlist, msg, {}, 'header-match') + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: + +A message with a spam score of three pluses does match. + + >>> del msg['x-spam-score'] + >>> msg['X-Spam-Score'] = '+++' + >>> del msg['message-id'] + >>> msg['Message-ID'] = '<six>' + >>> file_pos = fp.tell() + >>> process(mlist, msg, {}, 'header-match') + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: ... DISCARD: <six> + <BLANKLINE> + +As does a message with a spam score of four pluses. + + >>> del msg['x-spam-score'] + >>> msg['X-Spam-Score'] = '+++' + >>> del msg['message-id'] + >>> msg['Message-ID'] = '<seven>' + >>> file_pos = fp.tell() + >>> process(mlist, msg, {}, 'header-match') + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: ... DISCARD: <seven> + <BLANKLINE> diff --git a/Mailman/rules/docs/truth.txt b/Mailman/rules/docs/truth.txt new file mode 100644 index 000000000..baa40772a --- /dev/null +++ b/Mailman/rules/docs/truth.txt @@ -0,0 +1,10 @@ +Truth +===== + +The 'truth' rule always matches. This makes it useful as a terminus rule for +unconditionally jumping to another chain. + + >>> from Mailman.configuration import config + >>> rule = config.rules['truth'] + >>> rule.check(False, False, False) + True diff --git a/Mailman/rules/truth.py b/Mailman/rules/truth.py new file mode 100644 index 000000000..d3cfc30f3 --- /dev/null +++ b/Mailman/rules/truth.py @@ -0,0 +1,41 @@ +# Copyright (C) 2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""A rule which always matches.""" + +__all__ = ['Truth'] +__metaclass__ = type + + +from zope.interface import implements + +from Mailman.i18n import _ +from Mailman.interfaces import IRule + + + +class Truth: + """Look for any previous rule match.""" + implements(IRule) + + name = 'truth' + description = _('A rule which always matches.') + record = False + + def check(self, mlist, msg, msgdata): + """See `IRule`.""" + return True |
