summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Warsaw2008-01-26 18:35:18 -0500
committerBarry Warsaw2008-01-26 18:35:18 -0500
commit3952c95a23a74b8686b55a3a4f1873238e6d6610 (patch)
treec994bb588b94da2eac9d851a7da4400861dee25a
parentdf637148d8fa2d5c101a990ee6766ea8547f000a (diff)
downloadmailman-3952c95a23a74b8686b55a3a4f1873238e6d6610.tar.gz
mailman-3952c95a23a74b8686b55a3a4f1873238e6d6610.tar.zst
mailman-3952c95a23a74b8686b55a3a4f1873238e6d6610.zip
-rw-r--r--Mailman/Defaults.py23
-rw-r--r--Mailman/Handlers/SpamDetect.py8
-rw-r--r--Mailman/app/chains.py191
-rw-r--r--Mailman/docs/antispam.txt75
-rw-r--r--Mailman/docs/chains.txt12
-rw-r--r--Mailman/interfaces/chain.py10
-rw-r--r--Mailman/rules/docs/emergency.txt4
-rw-r--r--Mailman/rules/docs/header-matching.txt89
-rw-r--r--Mailman/tests/test_documentation.py1
9 files changed, 281 insertions, 132 deletions
diff --git a/Mailman/Defaults.py b/Mailman/Defaults.py
index 75e1c768f..82fb6886f 100644
--- a/Mailman/Defaults.py
+++ b/Mailman/Defaults.py
@@ -164,16 +164,21 @@ DEFAULT_URL_HOST = '@URLHOST@'
# Spam avoidance defaults
#####
-# This variable contains a list of 2-tuple of the format (header, regex) which
-# the Mailman/Handlers/SpamDetect.py module uses to match against the current
-# message. If the regex matches the given header in the current message, then
-# it is flagged as spam. header is case-insensitive and should not include
-# the trailing colon. regex is always matched with re.IGNORECASE.
+# This variable contains a list of tuple of the format:
#
-# Note that the more searching done, the slower the whole process gets. Spam
-# detection is run against all messages coming to either the list, or the
-# -owners address, unless the message is explicitly approved.
-KNOWN_SPAMMERS = []
+# (header, pattern[, chain])
+#
+# which is used to match against the current message's headers. If the
+# pattern matches the given header in the current message, then the named
+# chain is jumped to. header is case-insensitive and should not include the
+# trailing colon. pattern is always matched with re.IGNORECASE. chain is
+# optional; if not given the 'hold' chain is used, but if given it may be any
+# existing chain, such as 'discard', 'reject', or 'accept'.
+#
+# Note that the more searching done, the slower the whole process gets.
+# Header matching is run against all messages coming to either the list, or
+# the -owners address, unless the message is explicitly approved.
+HEADER_MATCHES = []
diff --git a/Mailman/Handlers/SpamDetect.py b/Mailman/Handlers/SpamDetect.py
index b7e687284..f45b52a32 100644
--- a/Mailman/Handlers/SpamDetect.py
+++ b/Mailman/Handlers/SpamDetect.py
@@ -88,14 +88,6 @@ class HeaderGenerator(Generator):
def process(mlist, msg, msgdata):
if msgdata.get('approved'):
return
- # First do site hard coded header spam checks
- for header, regex in config.KNOWN_SPAMMERS:
- cre = re.compile(regex, re.IGNORECASE)
- for value in msg.get_all(header, []):
- mo = cre.search(value)
- if mo:
- # we've detected spam, so throw the message away
- raise SpamDetected
# Now do header_filter_rules
# TK: Collect headers in sub-parts because attachment filename
# extension may be a clue to possible virus/spam.
diff --git a/Mailman/app/chains.py b/Mailman/app/chains.py
index 38c7325ab..c327f8234 100644
--- a/Mailman/app/chains.py
+++ b/Mailman/app/chains.py
@@ -33,6 +33,7 @@ __metaclass__ = type
__i18n_templates__ = True
+import re
import logging
from email.mime.message import MIMEMessage
@@ -49,7 +50,7 @@ from Mailman.app.replybot import autorespond_to_sender, can_acknowledge
from Mailman.configuration import config
from Mailman.i18n import _
from Mailman.interfaces import (
- IChain, IChainLink, IMutableChain, IPendable, LinkAction)
+ IChain, IChainLink, IMutableChain, IPendable, IRule, LinkAction)
from Mailman.queue import Switchboard
log = logging.getLogger('mailman.vette')
@@ -91,8 +92,19 @@ class TerminalChainBase:
yield Link('truth', LinkAction.stop)
def process(self, mlist, msg, msgdata):
+ """Process the message for the given mailing list.
+
+ This must be overridden by subclasses.
+ """
raise NotImplementedError
+ def get_rule(self, name):
+ """See `IChain`.
+
+ This always returns the globally registered named rule.
+ """
+ return config.rules[name]
+
class DiscardChain(TerminalChainBase):
"""Discard a message."""
@@ -283,7 +295,7 @@ class AcceptChain(TerminalChainBase):
class Chain:
- """Default built-in moderation chain."""
+ """Generic chain base class."""
implements(IMutableChain)
def __init__(self, name, description):
@@ -307,9 +319,124 @@ class Chain:
for link in self._links:
yield link
+ def get_rule(self, name):
+ """See `IChain`.
+
+ This always returns the globally registered named rule.
+ """
+ return config.rules[name]
+
+
+
+class BuiltInChain(Chain):
+ """Default built-in chain."""
+
+ def __init__(self):
+ super(BuiltInChain, self).__init__(
+ 'built-in', _('The built-in moderation chain.'))
+ self.append_link(Link('approved', LinkAction.jump, 'accept'))
+ self.append_link(Link('emergency', LinkAction.jump, 'hold'))
+ self.append_link(Link('loop', LinkAction.jump, 'discard'))
+ # Do all of the following before deciding whether to hold the message
+ # for moderation.
+ self.append_link(Link('administrivia', LinkAction.defer))
+ self.append_link(Link('implicit-dest', LinkAction.defer))
+ self.append_link(Link('max-recipients', LinkAction.defer))
+ self.append_link(Link('max-size', LinkAction.defer))
+ self.append_link(Link('news-moderation', LinkAction.defer))
+ self.append_link(Link('no-subject', LinkAction.defer))
+ self.append_link(Link('suspicious-header', LinkAction.defer))
+ # Now if any of the above hit, jump to the hold chain.
+ self.append_link(Link('any', LinkAction.jump, 'hold'))
+ # Take a detour through the self header matching chain, which we'll
+ # create later.
+ self.append_link(Link('truth', LinkAction.detour, 'header-match'))
+ # Finally, the builtin chain selfs to acceptance.
+ self.append_link(Link('truth', LinkAction.jump, 'accept'))
+
+
+
+class HeaderMatchRule:
+ """Header matching rule used by header-match chain."""
+ implements(IRule)
+
+ # Sequential rule counter.
+ _count = 1
+
+ def __init__(self, header, pattern):
+ self._header = header
+ self._pattern = pattern
+ self.name = 'header-match-%002d' % HeaderMatchRule._count
+ HeaderMatchRule._count += 1
+ self.description = u'%s: %s' % (header, pattern)
+ # XXX I think we should do better here, somehow recording that a
+ # particular header matched a particular pattern, but that gets ugly
+ # with RFC 2822 headers. It also doesn't match well with the rule
+ # name concept. For now, we just record the rather useless numeric
+ # rule name. I suppose we could do the better hit recording in the
+ # check() method, and set self.record = False.
+ self.record = True
+
+ def check(self, mlist, msg, msgdata):
+ """See `IRule`."""
+ for value in msg.get_all(self._header, []):
+ if re.search(self._pattern, value, re.IGNORECASE):
+ return True
+ return False
+
+
+class HeaderMatchChain(Chain):
+ """Default header matching chain.
+
+ This could be extended by header match rules in the database.
+ """
+
+ def __init__(self):
+ super(HeaderMatchChain, self).__init__(
+ 'header-match', _('The built-in header matching chain'))
+ # The header match rules are not global, so don't register them.
+ # These are the only rules that the header match chain can execute.
+ self._links = []
+ self._rules = {}
+ # Initialize header check rules with those from the global
+ # HEADER_MATCHES variable.
+ for entry in config.HEADER_MATCHES:
+ if len(entry) == 2:
+ header, pattern = entry
+ chain = 'hold'
+ elif len(entry) == 3:
+ header, pattern, chain = entry
+ # We don't assert that the chain exists here because the jump
+ # chain may not yet have been created.
+ else:
+ raise AssertionError(
+ 'Bad entry for HEADER_MATCHES: %s' % entry)
+ self.extend(header, pattern, chain)
+
+ def extend(self, header, pattern, chain='hold'):
+ """Extend the existing header matches.
+
+ :param header: The case-insensitive header field name.
+ :param pattern: The pattern to match the header's value again. The
+ match is not anchored and is done case-insensitively.
+ :param chain: Option chain to jump to if the pattern matches any of
+ the named header values. If not given, the 'hold' chain is used.
+ """
+ rule = HeaderMatchRule(header, pattern)
+ self._rules[rule.name] = rule
+ link = Link(rule.name, LinkAction.jump, chain)
+ self._links.append(link)
+
+ def get_rule(self, name):
+ """See `IChain`.
+
+ Only local rules are findable by this chain.
+ """
+ return self._rules[name]
+
-def process(start_chain, mlist, msg, msgdata):
+def process(mlist, msg, msgdata, start_chain='built-in'):
"""Process the message through a chain.
:param start_chain: The name of the chain to start the processing with.
@@ -317,34 +444,45 @@ def process(start_chain, mlist, msg, msgdata):
:param msg: The Message object.
:param msgdata: The message metadata dictionary.
"""
- # Find the starting chain.
- current_chain = iter(config.chains[start_chain])
+ # Set up some bookkeeping.
chain_stack = []
msgdata['rule_hits'] = hits = []
msgdata['rule_misses'] = misses = []
- while current_chain:
+ # Find the starting chain and begin iterating through its links.
+ chain = config.chains[start_chain]
+ chain_iter = iter(chain)
+ # Loop until we've reached the end of all processing chains.
+ while chain:
+ # Iterate over all links in the chain. Do this outside a for-loop so
+ # we can capture a chain's link iterator in mid-flight. This supports
+ # the 'detour' link action
try:
- link = current_chain.next()
+ link = chain_iter.next()
except StopIteration:
# This chain is exhausted. Pop the last chain on the stack and
- # continue.
+ # continue iterating through it. If there's nothing left on the
+ # chain stack then we're completely finished processing.
if len(chain_stack) == 0:
return
- current_chain = chain_stack.pop()
+ chain, chain_iter = chain_stack.pop()
continue
# Process this link.
- rule = config.rules[link.rule]
+ rule = chain.get_rule(link.rule)
if rule.check(mlist, msg, msgdata):
if rule.record:
hits.append(link.rule)
# The rule matched so run its action.
if link.action is LinkAction.jump:
- current_chain = iter(config.chains[link.chain])
+ chain = config.chains[link.chain]
+ chain_iter = iter(chain)
+ continue
elif link.action is LinkAction.detour:
- # Push the current chain so that we can return to it when the
- # next chain is finished.
- chain_stack.append(current_chain)
- current_chain = iter(config.chains[link.chain])
+ # Push the current chain so that we can return to it when
+ # the next chain is finished.
+ chain_stack.append((chain, chain_iter))
+ chain = config.chains[link.chain]
+ chain_iter = iter(chain)
+ continue
elif link.action is LinkAction.stop:
# Stop all processing.
return
@@ -354,7 +492,7 @@ def process(start_chain, mlist, msg, msgdata):
elif link.action is LinkAction.run:
link.function(mlist, msg, msgdata)
else:
- raise AssertionError('Unknown link action: %s' % link.action)
+ raise AssertionError('Bad link action: %s' % link.action)
else:
# The rule did not match; keep going.
if rule.record:
@@ -370,21 +508,10 @@ def initialize():
'Duplicate chain name: %s' % chain.name)
config.chains[chain.name] = chain
# Set up a couple of other default chains.
- default = Chain('built-in', _('The built-in moderation chain.'))
- default.append_link(Link('approved', LinkAction.jump, 'accept'))
- default.append_link(Link('emergency', LinkAction.jump, 'hold'))
- default.append_link(Link('loop', LinkAction.jump, 'discard'))
- # Do all these before deciding whether to hold the message for moderation.
- default.append_link(Link('administrivia', LinkAction.defer))
- default.append_link(Link('implicit-dest', LinkAction.defer))
- default.append_link(Link('max-recipients', LinkAction.defer))
- default.append_link(Link('max-size', LinkAction.defer))
- default.append_link(Link('news-moderation', LinkAction.defer))
- default.append_link(Link('no-subject', LinkAction.defer))
- default.append_link(Link('suspicious-header', LinkAction.defer))
- # Now if any of the above hit, jump to the hold chain.
- default.append_link(Link('any', LinkAction.jump, 'hold'))
- # Finally, the builtin chain defaults to acceptance.
- default.append_link(Link('truth', LinkAction.jump, 'accept'))
+ chain = BuiltInChain()
+ config.chains[chain.name] = chain
+ # Create and initialize the header matching chain.
+ chain = HeaderMatchChain()
+ config.chains[chain.name] = chain
# XXX Read chains from the database and initialize them.
pass
diff --git a/Mailman/docs/antispam.txt b/Mailman/docs/antispam.txt
deleted file mode 100644
index 3ad5e982e..000000000
--- a/Mailman/docs/antispam.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-Anti-spam defences
-==================
-
-By design, Mailman does not have very sophisticated anti-spam measures because
-this type of filtering is done much more efficiently at the MTA level. For
-example, if Mailman were to do spam detection, it could not reject the message
-at SMTP time.
-
-Still, Mailman does employ a small number of rather ham-handed anti-spam
-measures.
-
- >>> from Mailman.Handlers.SpamDetect import process
- >>> from Mailman.queue import Switchboard
- >>> from Mailman.configuration import config
- >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
-
-
-Short circuiting
-----------------
-
-If a message is pre-approved, this handler does nothing.
-
- >>> msg = message_from_string("""\
- ... From: aperson@example.com
- ...
- ... An important message.
- ... """)
- >>> msgdata = {'approved': True}
- >>> process(mlist, msg, msgdata)
- >>> print msg.as_string()
- From: aperson@example.com
- <BLANKLINE>
- An important message.
- <BLANKLINE>
- >>> msgdata
- {'approved': True}
-
-
-Header matching
----------------
-
-There is a global configuration variable that can be set to a list of header
-matches. Each item in that list is a 2-tuple of the header to match and a
-regular expression. For example, if we wanted to block all message that come
-from 'aperson' regardless of the domain, we'd do something like the following
-in our mailman.cfg file:
-
- >>> old_value = config.KNOWN_SPAMMERS[:]
- >>> config.KNOWN_SPAMMERS.append(('from', 'aperson'))
-
-Now if the same message is posted to the mailing list, and that message is not
-pre-approved. The handler will throw an exception that signals the message is
-spam.
-
- >>> msgdata = {}
- >>> process(mlist, msg, msgdata)
- Traceback (most recent call last):
- ...
- SpamDetected
- >>> print msg.as_string()
- From: aperson@example.com
- <BLANKLINE>
- An important message.
- <BLANKLINE>
- >>> msgdata
- {}
-
- # Restore global state
- config.KNOWN_SPAMMERS = old_value
-
-
-Header filter rules
--------------------
-
-XXX Need tests.
diff --git a/Mailman/docs/chains.txt b/Mailman/docs/chains.txt
index 0a93683f7..e676957d8 100644
--- a/Mailman/docs/chains.txt
+++ b/Mailman/docs/chains.txt
@@ -36,12 +36,14 @@ The Discard chain simply throws the message away.
... An important message.
... """)
+ >>> from Mailman.app.chains import process
+
# XXX This checks the vette log file because there is no other evidence
# that this chain has done anything.
>>> import os
>>> fp = open(os.path.join(config.LOG_DIR, 'vette'))
>>> file_pos = fp.tell()
- >>> chain.process(mlist, msg, {})
+ >>> process(mlist, msg, {}, 'discard')
>>> fp.seek(file_pos)
>>> print 'LOG:', fp.read()
LOG: ... DISCARD: <first>
@@ -62,7 +64,7 @@ this action.
>>> chain.description
u'Reject/bounce a message and stop processing.'
>>> file_pos = fp.tell()
- >>> chain.process(mlist, msg, {})
+ >>> process(mlist, msg, {}, 'reject')
>>> fp.seek(file_pos)
>>> print 'LOG:', fp.read()
LOG: ... REJECT: <first>
@@ -111,7 +113,7 @@ sender and the list moderators.
u'Hold a message and stop processing.'
>>> file_pos = fp.tell()
- >>> chain.process(mlist, msg, {})
+ >>> process(mlist, msg, {}, 'hold')
>>> fp.seek(file_pos)
>>> print 'LOG:', fp.read()
LOG: ... HOLD: _xtest@example.com post from aperson@example.com held,
@@ -262,7 +264,7 @@ processed and sent on to the list membership.
>>> chain.description
u'Accept a message.'
>>> file_pos = fp.tell()
- >>> chain.process(mlist, msg, {})
+ >>> process(mlist, msg, {}, 'accept')
>>> fp.seek(file_pos)
>>> print 'LOG:', fp.read()
LOG: ... ACCEPT: <first>
@@ -311,7 +313,7 @@ all default rules. This message will end up in the prep queue.
>>> file_pos = fp.tell()
>>> from Mailman.app.chains import process
- >>> process('built-in', mlist, msg, {})
+ >>> process(mlist, msg, {})
>>> fp.seek(file_pos)
>>> print 'LOG:', fp.read()
LOG: ... ACCEPT: <first>
diff --git a/Mailman/interfaces/chain.py b/Mailman/interfaces/chain.py
index 8c0837820..eca663b30 100644
--- a/Mailman/interfaces/chain.py
+++ b/Mailman/interfaces/chain.py
@@ -69,6 +69,16 @@ class IChain(Interface):
:return: an IChainLink.
"""
+ def get_rule(name):
+ """Lookup and return the named rule.
+
+ :param name: The name of the rule to return. This may be a globally
+ registered rule name, in which case it must be unique, or it may
+ be a rule defined locally to the chain.
+ :return: The named `IRule`.
+ :raises: KeyError if the named rule cannot be found.
+ """
+
class IMutableChain(IChain):
diff --git a/Mailman/rules/docs/emergency.txt b/Mailman/rules/docs/emergency.txt
index 1375c3bc9..aecbcb90d 100644
--- a/Mailman/rules/docs/emergency.txt
+++ b/Mailman/rules/docs/emergency.txt
@@ -21,7 +21,7 @@ rule matches if the flag is set on the mailing list.
>>> from Mailman.app.chains import process
>>> mlist.emergency = True
- >>> process('built-in', mlist, msg, {})
+ >>> process(mlist, msg, {}, 'built-in')
There are two messages in the virgin queue. The one addressed to the original
sender will contain a token we can use to grab the held message out of the
@@ -69,6 +69,6 @@ However, if the message metadata has a 'moderator_approved' key set, then even
if the mailing list has its emergency flag set, the message still goes through
to the membership.
- >>> process('built-in', mlist, msg, dict(moderator_approved=True))
+ >>> process(mlist, msg, dict(moderator_approved=True), 'built-in')
>>> len(virginq.files)
0
diff --git a/Mailman/rules/docs/header-matching.txt b/Mailman/rules/docs/header-matching.txt
new file mode 100644
index 000000000..b32feabe5
--- /dev/null
+++ b/Mailman/rules/docs/header-matching.txt
@@ -0,0 +1,89 @@
+Header matching
+===============
+
+Mailman can do pattern based header matching during its normal rule
+processing. There is a set of site-wide default header matchines specified in
+the configuaration file under the HEADER_MATCHES variable.
+
+ >>> from Mailman.app.lifecycle import create_list
+ >>> mlist = create_list(u'_xtest@example.com')
+
+Because the default HEADER_MATCHES variable is empty when the configuration
+file is read, we'll just extend the current header matching chain with a
+pattern that matches 4 or more stars, discarding the message if it hits.
+
+ >>> from Mailman.configuration import config
+ >>> chain = config.chains['header-match']
+ >>> chain.extend('x-spam-score', '[*]{4,}', 'discard')
+
+First, if the message has no X-Spam-Score header, the message passes through
+the chain untouched (i.e. no disposition).
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... To: _xtest@example.com
+ ... Subject: Not spam
+ ... Message-ID: <one>
+ ...
+ ... This is a message.
+ ... """)
+
+ >>> from Mailman.app.chains import process
+
+Pass through is seen as nothing being in the log file after processing.
+
+ # XXX This checks the vette log file because there is no other evidence
+ # that this chain has done anything.
+ >>> import os
+ >>> fp = open(os.path.join(config.LOG_DIR, 'vette'))
+ >>> fp.seek(0, 2)
+ >>> file_pos = fp.tell()
+ >>> process(mlist, msg, {}, 'header-match')
+ >>> fp.seek(file_pos)
+ >>> print 'LOG:', fp.read()
+ LOG:
+ <BLANKLINE>
+
+Now, if the header exists but does not match, then it also passes through
+untouched.
+
+ >>> msg['X-Spam-Score'] = '***'
+ >>> del msg['subject']
+ >>> msg['Subject'] = 'This is almost spam'
+ >>> del msg['message-id']
+ >>> msg['Message-ID'] = '<two>'
+ >>> file_pos = fp.tell()
+ >>> process(mlist, msg, {}, 'header-match')
+ >>> fp.seek(file_pos)
+ >>> print 'LOG:', fp.read()
+ LOG:
+ <BLANKLINE>
+
+But now if the header matches, then the message gets discarded.
+
+ >>> msg['X-Spam-Score'] = '****'
+ >>> del msg['subject']
+ >>> msg['Subject'] = 'This is spam, but barely'
+ >>> del msg['message-id']
+ >>> msg['Message-ID'] = '<three>'
+ >>> file_pos = fp.tell()
+ >>> process(mlist, msg, {}, 'header-match')
+ >>> fp.seek(file_pos)
+ >>> print 'LOG:', fp.read()
+ LOG: ... DISCARD: <three>
+ <BLANKLINE>
+
+For kicks, let's show a message that's really spammy.
+
+ >>> msg['X-Spam-Score'] = '**********'
+ >>> del msg['subject']
+ >>> msg['Subject'] = 'This is really spammy'
+ >>> del msg['message-id']
+ >>> msg['Message-ID'] = '<four>'
+ >>> file_pos = fp.tell()
+ >>> process(mlist, msg, {}, 'header-match')
+ >>> fp.seek(file_pos)
+ >>> print 'LOG:', fp.read()
+ LOG: ... DISCARD: <four>
+ <BLANKLINE>
+
diff --git a/Mailman/tests/test_documentation.py b/Mailman/tests/test_documentation.py
index 9faf1d588..ad00ba19c 100644
--- a/Mailman/tests/test_documentation.py
+++ b/Mailman/tests/test_documentation.py
@@ -18,7 +18,6 @@
"""Harness for testing Mailman's documentation."""
import os
-import pdb
import doctest
import unittest