diff options
| author | Barry Warsaw | 2010-08-08 10:49:55 -0400 |
|---|---|---|
| committer | Barry Warsaw | 2010-08-08 10:49:55 -0400 |
| commit | 79cc1bc34c9386058a9f0734ab9ad7fad3b637b5 (patch) | |
| tree | 0252af62b19a0edbd311693b303b12bc1f2e9868 /src | |
| parent | 83f78ec26541ab0c7b91794ab6b3bc1d8285f9a9 (diff) | |
| download | mailman-79cc1bc34c9386058a9f0734ab9ad7fad3b637b5.tar.gz mailman-79cc1bc34c9386058a9f0734ab9ad7fad3b637b5.tar.zst mailman-79cc1bc34c9386058a9f0734ab9ad7fad3b637b5.zip | |
Diffstat (limited to 'src')
30 files changed, 1002 insertions, 705 deletions
diff --git a/src/mailman/Bouncers/Caiwireless.py b/src/mailman/Bouncers/Caiwireless.py deleted file mode 100644 index 7a0b698a6..000000000 --- a/src/mailman/Bouncers/Caiwireless.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Parse mystery style generated by MTA at caiwireless.net.""" - -import re -import email - -tcre = re.compile(r'the following recipients did not receive this message:', - re.IGNORECASE) -acre = re.compile(r'<(?P<addr>[^>]*)>') - - - -def process(msg): - if msg.get_content_type() <> 'multipart/mixed': - return None - # simple state machine - # 0 == nothing seen - # 1 == tag line seen - state = 0 - # This format thinks it's a MIME, but it really isn't - for line in email.Iterators.body_line_iterator(msg): - line = line.strip() - if state == 0 and tcre.match(line): - state = 1 - elif state == 1 and line: - mo = acre.match(line) - if not mo: - return None - return [mo.group('addr')] diff --git a/src/mailman/Bouncers/Microsoft.py b/src/mailman/Bouncers/Microsoft.py deleted file mode 100644 index 540748f05..000000000 --- a/src/mailman/Bouncers/Microsoft.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Microsoft's `SMTPSVC' nears I kin tell.""" - -import re -from cStringIO import StringIO - -scre = re.compile(r'transcript of session follows', re.IGNORECASE) - - - -def process(msg): - if msg.get_content_type() <> 'multipart/mixed': - return None - # Find the first subpart, which has no MIME type - try: - subpart = msg.get_payload(0) - except IndexError: - # The message *looked* like a multipart but wasn't - return None - data = subpart.get_payload() - if isinstance(data, list): - # The message is a multi-multipart, so not a matching bounce - return None - body = StringIO(data) - state = 0 - addrs = [] - while 1: - line = body.readline() - if not line: - break - if state == 0: - if scre.search(line): - state = 1 - if state == 1: - if '@' in line: - addrs.append(line) - return addrs diff --git a/src/mailman/Bouncers/Netscape.py b/src/mailman/Bouncers/Netscape.py deleted file mode 100644 index ae3125e68..000000000 --- a/src/mailman/Bouncers/Netscape.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Netscape Messaging Server bounce formats. - -I've seen at least one NMS server version 3.6 (envy.gmp.usyd.edu.au) bounce -messages of this format. Bounces come in DSN MIME format, but don't include -any -Recipient: headers. Gotta just parse the text :( - -NMS 4.1 (dfw-smtpin1.email.verio.net) seems even worse, but we'll try to -decipher the format here too. - -""" - -import re -from cStringIO import StringIO - -pcre = re.compile( - r'This Message was undeliverable due to the following reason:', - re.IGNORECASE) - -acre = re.compile( - r'(?P<reply>please reply to)?.*<(?P<addr>[^>]*)>', - re.IGNORECASE) - - - -def flatten(msg, leaves): - # give us all the leaf (non-multipart) subparts - if msg.is_multipart(): - for part in msg.get_payload(): - flatten(part, leaves) - else: - leaves.append(msg) - - - -def process(msg): - # Sigh. Some show NMS 3.6's show - # multipart/report; report-type=delivery-status - # and some show - # multipart/mixed; - if not msg.is_multipart(): - return None - # We're looking for a text/plain subpart occuring before a - # message/delivery-status subpart. - plainmsg = None - leaves = [] - flatten(msg, leaves) - for i, subpart in zip(range(len(leaves)-1), leaves): - if subpart.get_content_type() == 'text/plain': - plainmsg = subpart - break - if not plainmsg: - return None - # Total guesswork, based on captured examples... - body = StringIO(plainmsg.get_payload()) - addrs = [] - while 1: - line = body.readline() - if not line: - break - mo = pcre.search(line) - if mo: - # We found a bounce section, but I have no idea what the official - # format inside here is. :( We'll just search for <addr> - # strings. - while 1: - line = body.readline() - if not line: - break - mo = acre.search(line) - if mo and not mo.group('reply'): - addrs.append(mo.group('addr')) - return addrs diff --git a/src/mailman/Bouncers/Postfix.py b/src/mailman/Bouncers/Postfix.py deleted file mode 100644 index 3f78fbe88..000000000 --- a/src/mailman/Bouncers/Postfix.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Parse bounce messages generated by Postfix. - -This also matches something called `Keftamail' which looks just like Postfix -bounces with the word Postfix scratched out and the word `Keftamail' written -in in crayon. - -It also matches something claiming to be `The BNS Postfix program', and -`SMTP_Gateway'. Everybody's gotta be different, huh? -""" - -import re -from cStringIO import StringIO - - - -def flatten(msg, leaves): - # give us all the leaf (non-multipart) subparts - if msg.is_multipart(): - for part in msg.get_payload(): - flatten(part, leaves) - else: - leaves.append(msg) - - - -# are these heuristics correct or guaranteed? -pcre = re.compile(r'[ \t]*the\s*(bns)?\s*(postfix|keftamail|smtp_gateway)', - re.IGNORECASE) -rcre = re.compile(r'failure reason:$', re.IGNORECASE) -acre = re.compile(r'<(?P<addr>[^>]*)>:') - -def findaddr(msg): - addrs = [] - body = StringIO(msg.get_payload()) - # simple state machine - # 0 == nothing found - # 1 == salutation found - state = 0 - while 1: - line = body.readline() - if not line: - break - # preserve leading whitespace - line = line.rstrip() - # yes use match to match at beginning of string - if state == 0 and (pcre.match(line) or rcre.match(line)): - state = 1 - elif state == 1 and line: - mo = acre.search(line) - if mo: - addrs.append(mo.group('addr')) - # probably a continuation line - return addrs - - - -def process(msg): - if msg.get_content_type() not in ('multipart/mixed', 'multipart/report'): - return None - # We're looking for the plain/text subpart with a Content-Description: of - # `notification'. - leaves = [] - flatten(msg, leaves) - for subpart in leaves: - if subpart.get_content_type() == 'text/plain' and \ - subpart.get('content-description', '').lower() == 'notification': - # then... - return findaddr(subpart) - return None diff --git a/src/mailman/Bouncers/Qmail.py b/src/mailman/Bouncers/Qmail.py deleted file mode 100644 index 499571e47..000000000 --- a/src/mailman/Bouncers/Qmail.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Parse bounce messages generated by qmail. - -Qmail actually has a standard, called QSBMF (qmail-send bounce message -format), as described in - - http://cr.yp.to/proto/qsbmf.txt - -This module should be conformant. - -""" - -import re -import email.Iterators - -# Other (non-standard?) intros have been observed in the wild. -introtags = [ - 'Hi. This is the', - "We're sorry. There's a problem", - 'Check your send e-mail address.', - 'This is the mail delivery agent at', - 'Unfortunately, your mail was not delivered' - ] -acre = re.compile(r'<(?P<addr>[^>]*)>:') - - - -def process(msg): - addrs = [] - # simple state machine - # 0 = nothing seen yet - # 1 = intro paragraph seen - # 2 = recip paragraphs seen - state = 0 - for line in email.Iterators.body_line_iterator(msg): - line = line.strip() - if state == 0: - for introtag in introtags: - if line.startswith(introtag): - state = 1 - break - elif state == 1 and not line: - # Looking for the end of the intro paragraph - state = 2 - elif state == 2: - if line.startswith('-'): - # We're looking at the break paragraph, so we're done - break - # At this point we know we must be looking at a recipient - # paragraph - mo = acre.match(line) - if mo: - addrs.append(mo.group('addr')) - # Otherwise, it must be a continuation line, so just ignore it - # Not looking at anything in particular - return addrs diff --git a/src/mailman/Bouncers/Sina.py b/src/mailman/Bouncers/Sina.py deleted file mode 100644 index 15386abd3..000000000 --- a/src/mailman/Bouncers/Sina.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2002-2010 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""sina.com bounces""" - -import re -from email import Iterators - -acre = re.compile(r'<(?P<addr>[^>]*)>') - - - -def process(msg): - if msg.get('from', '').lower() <> 'mailer-daemon@sina.com': - print 'out 1' - return [] - if not msg.is_multipart(): - print 'out 2' - return [] - # The interesting bits are in the first text/plain multipart - part = None - try: - part = msg.get_payload(0) - except IndexError: - pass - if not part: - print 'out 3' - return [] - addrs = {} - for line in Iterators.body_line_iterator(part): - mo = acre.match(line) - if mo: - addrs[mo.group('addr')] = 1 - return addrs.keys() diff --git a/src/mailman/Bouncers/Yahoo.py b/src/mailman/Bouncers/Yahoo.py deleted file mode 100644 index 26c6183a0..000000000 --- a/src/mailman/Bouncers/Yahoo.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Yahoo! has its own weird format for bounces.""" - -import re -import email -from email.Utils import parseaddr - -tcre = re.compile(r'message\s+from\s+yahoo\.\S+', re.IGNORECASE) -acre = re.compile(r'<(?P<addr>[^>]*)>:') -ecre = re.compile(r'--- Original message follows') - - - -def process(msg): - # Yahoo! bounces seem to have a known subject value and something called - # an x-uidl: header, the value of which seems unimportant. - sender = parseaddr(msg.get('from', '').lower())[1] or '' - if not sender.startswith('mailer-daemon@yahoo'): - return None - addrs = [] - # simple state machine - # 0 == nothing seen - # 1 == tag line seen - state = 0 - for line in email.Iterators.body_line_iterator(msg): - line = line.strip() - if state == 0 and tcre.match(line): - state = 1 - elif state == 1: - mo = acre.match(line) - if mo: - addrs.append(mo.group('addr')) - continue - mo = ecre.match(line) - if mo: - # we're at the end of the error response - break - return addrs diff --git a/src/mailman/Bouncers/Yale.py b/src/mailman/Bouncers/Yale.py deleted file mode 100644 index b8a5c053e..000000000 --- a/src/mailman/Bouncers/Yale.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (C) 2000-2010 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Yale's mail server is pretty dumb. - -Its reports include the end user's name, but not the full domain. I think we -can usually guess it right anyway. This is completely based on examination of -the corpse, and is subject to failure whenever Yale even slightly changes -their MTA. :( - -""" - -import re -from cStringIO import StringIO -from email.Utils import getaddresses - -scre = re.compile(r'Message not delivered to the following', re.IGNORECASE) -ecre = re.compile(r'Error Detail', re.IGNORECASE) -acre = re.compile(r'\s+(?P<addr>\S+)\s+') - - - -def process(msg): - if msg.is_multipart(): - return None - try: - whofrom = getaddresses([msg.get('from', '')])[0][1] - if not whofrom: - return None - username, domain = whofrom.split('@', 1) - except (IndexError, ValueError): - return None - if username.lower() <> 'mailer-daemon': - return None - parts = domain.split('.') - parts.reverse() - for part1, part2 in zip(parts, ('edu', 'yale')): - if part1 <> part2: - return None - # Okay, we've established that the bounce came from the mailer-daemon at - # yale.edu. Let's look for a name, and then guess the relevant domains. - names = {} - body = StringIO(msg.get_payload()) - state = 0 - # simple state machine - # 0 == init - # 1 == intro found - while 1: - line = body.readline() - if not line: - break - if state == 0 and scre.search(line): - state = 1 - elif state == 1 and ecre.search(line): - break - elif state == 1: - mo = acre.search(line) - if mo: - names[mo.group('addr')] = 1 - # Now we have a bunch of names, these are either @yale.edu or - # @cs.yale.edu. Add them both. - addrs = [] - for name in names.keys(): - addrs.append(name + '@yale.edu') - addrs.append(name + '@cs.yale.edu') - return addrs diff --git a/src/mailman/Bouncers/Compuserve.py b/src/mailman/bouncers/Compuserve.py index 13052b68e..13052b68e 100644 --- a/src/mailman/Bouncers/Compuserve.py +++ b/src/mailman/bouncers/Compuserve.py diff --git a/src/mailman/Bouncers/__init__.py b/src/mailman/bouncers/__init__.py index e69de29bb..e69de29bb 100644 --- a/src/mailman/Bouncers/__init__.py +++ b/src/mailman/bouncers/__init__.py diff --git a/src/mailman/bouncers/caiwireless.py b/src/mailman/bouncers/caiwireless.py new file mode 100644 index 000000000..69b9dc753 --- /dev/null +++ b/src/mailman/bouncers/caiwireless.py @@ -0,0 +1,64 @@ +# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Parse mystery style generated by MTA at caiwireless.net.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Caiwireless', + ] + + +import re + +from email.iterators import body_line_iterator +from flufl.enum import Enum +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +tcre = re.compile(r'the following recipients did not receive this message:', + re.IGNORECASE) +acre = re.compile(r'<(?P<addr>[^>]*)>') + + +class ParseState(Enum): + start = 0 + tag_seen = 1 + + + +class Caiwireless: + """Parse mystery style generated by MTA at caiwireless.net.""" + + def process(self, msg): + if msg.get_content_type() != 'multipart/mixed': + return None + state = ParseState.start + # This format thinks it's a MIME, but it really isn't. + for line in body_line_iterator(msg): + line = line.strip() + if state is ParseState.start and tcre.match(line): + state = ParseState.tag_seen + elif state is ParseState.tag_seen and line: + mo = acre.match(line) + if not mo: + return None + return [mo.group('addr')] diff --git a/src/mailman/Bouncers/DSN.py b/src/mailman/bouncers/dsn.py index ce53df28e..f9d15bbd2 100644 --- a/src/mailman/Bouncers/DSN.py +++ b/src/mailman/bouncers/dsn.py @@ -21,16 +21,25 @@ RFC 3464 obsoletes 1894 which was the old DSN standard. This module has not been audited for differences between the two. """ -from email.Iterators import typed_subpart_iterator -from email.Utils import parseaddr +from __future__ import absolute_import, unicode_literals -from mailman.Bouncers.BouncerAPI import Stop +__metaclass__ = type +__all__ = [ + 'DSN', + ] + + +from email.iterators import typed_subpart_iterator +from email.utils import parseaddr +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector, NonFatal def check(msg): - # Iterate over each message/delivery-status subpart - addrs = [] + # Iterate over each message/delivery-status subpart. + addresses = [] for part in typed_subpart_iterator(msg, 'message', 'delivery-status'): if not part.is_multipart(): # Huh? @@ -48,9 +57,9 @@ def check(msg): action = msgblock.get('action', '').lower() # Some MTAs have been observed that put comments on the action. if action.startswith('delayed'): - return Stop + return NonFatal if not action.startswith('fail'): - # Some non-permanent failure, so ignore this block + # Some non-permanent failure, so ignore this block. continue params = [] foundp = False @@ -62,7 +71,7 @@ def check(msg): params.append(k) if foundp: # Note that params should already be unquoted. - addrs.extend(params) + addresses.extend(params) break else: # MAS: This is a kludge, but SMTP-GATEWAY01.intra.home.dk @@ -70,30 +79,30 @@ def check(msg): # address-type parameter at all. Non-compliant, but ... for param in params: if param.startswith('<') and param.endswith('>'): - addrs.append(param[1:-1]) - # Uniquify - rtnaddrs = {} - for a in addrs: - if a is not None: - realname, a = parseaddr(a) - rtnaddrs[a] = True - return rtnaddrs.keys() + addresses.append(param[1:-1]) + return set(parseaddr(address)[1] for address in addresses + if address is not None) -def process(msg): - # A DSN has been seen wrapped with a "legal disclaimer" by an outgoing MTA - # in a multipart/mixed outer part. - if msg.is_multipart() and msg.get_content_subtype() == 'mixed': - msg = msg.get_payload()[0] - # The above will suffice if the original message 'parts' were wrapped with - # the disclaimer added, but the original DSN can be wrapped as a - # message/rfc822 part. We need to test that too. - if msg.is_multipart() and msg.get_content_type() == 'message/rfc822': - msg = msg.get_payload()[0] - # The report-type parameter should be "delivery-status", but it seems that - # some DSN generating MTAs don't include this on the Content-Type: header, - # so let's relax the test a bit. - if not msg.is_multipart() or msg.get_content_subtype() <> 'report': - return None - return check(msg) +class DSN: + """Parse RFC 3464 (i.e. DSN) bounce formats.""" + + implements(IBounceDetector) + + def process(self, msg): + # A DSN has been seen wrapped with a "legal disclaimer" by an outgoing + # MTA in a multipart/mixed outer part. + if msg.is_multipart() and msg.get_content_subtype() == 'mixed': + msg = msg.get_payload()[0] + # The above will suffice if the original message 'parts' were wrapped + # with the disclaimer added, but the original DSN can be wrapped as a + # message/rfc822 part. We need to test that too. + if msg.is_multipart() and msg.get_content_type() == 'message/rfc822': + msg = msg.get_payload()[0] + # The report-type parameter should be "delivery-status", but it seems + # that some DSN generating MTAs don't include this on the + # Content-Type: header, so let's relax the test a bit. + if not msg.is_multipart() or msg.get_content_subtype() <> 'report': + return None + return check(msg) diff --git a/src/mailman/Bouncers/Exchange.py b/src/mailman/bouncers/exchange.py index f2fbb2f58..94a181d88 100644 --- a/src/mailman/Bouncers/Exchange.py +++ b/src/mailman/bouncers/exchange.py @@ -17,8 +17,21 @@ """Recognizes (some) Microsoft Exchange formats.""" +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Exchange', + ] + + import re -import email.Iterators + +from email.iterators import body_line_iterator +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + scre = re.compile('did not reach the following recipient') ecre = re.compile('MSEXCH:') @@ -27,22 +40,28 @@ a2cre = re.compile('(?P<addr>[^ ]+) on ') -def process(msg): - addrs = {} - it = email.Iterators.body_line_iterator(msg) - # Find the start line - for line in it: - if scre.search(line): - break - else: - return [] - # Search each line until we hit the end line - for line in it: - if ecre.search(line): - break - mo = a1cre.search(line) - if not mo: - mo = a2cre.search(line) - if mo: - addrs[mo.group('addr')] = 1 - return addrs.keys() +class Exchange: + """Recognizes (some) Microsoft Exchange formats.""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + addresses = set() + it = body_line_iterator(msg) + # Find the start line. + for line in it: + if scre.search(line): + break + else: + return [] + # Search each line until we hit the end line. + for line in it: + if ecre.search(line): + break + mo = a1cre.search(line) + if not mo: + mo = a2cre.search(line) + if mo: + addresses.add(mo.group('addr')) + return list(addresses) diff --git a/src/mailman/Bouncers/Exim.py b/src/mailman/bouncers/exim.py index 1a5133eed..84fc3b4d0 100644 --- a/src/mailman/Bouncers/Exim.py +++ b/src/mailman/bouncers/exim.py @@ -22,10 +22,27 @@ an `addresslist' of failed addresses. """ -from email.Utils import getaddresses +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Exim', + ] + + +from email.utils import getaddresses +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector -def process(msg): - all = msg.get_all('x-failed-recipients', []) - return [a for n, a in getaddresses(all)] +class Exim: + """Parse bounce messages generated by Exim.""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + all = msg.get_all('x-failed-recipients', []) + return [address for name, address in getaddresses(all)] diff --git a/src/mailman/Bouncers/GroupWise.py b/src/mailman/bouncers/groupwise.py index d7d7d4a20..d37a6183e 100644 --- a/src/mailman/Bouncers/GroupWise.py +++ b/src/mailman/bouncers/groupwise.py @@ -22,9 +22,22 @@ X-Mailer: NTMail v4.30.0012 X-Mailer: Internet Mail Service (5.5.2653.19) """ +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'GroupWise', + ] + + import re + from email.Message import Message from cStringIO import StringIO +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + acre = re.compile(r'<(?P<addr>[^>]*)>') @@ -44,28 +57,31 @@ def find_textplain(msg): -def process(msg): - if msg.get_content_type() <> 'multipart/mixed' or not msg['x-mailer']: - return None - addrs = {} - # find the first text/plain part in the message - textplain = find_textplain(msg) - if not textplain: - return None - body = StringIO(textplain.get_payload()) - while 1: - line = body.readline() - if not line: - break - mo = acre.search(line) - if mo: - addrs[mo.group('addr')] = 1 - elif '@' in line: - i = line.find(' ') - if i == 0: - continue - if i < 0: - addrs[line] = 1 - else: - addrs[line[:i]] = 1 - return addrs.keys() +class GroupWise: + """Parse Novell GroupWise and NTMail bounces.""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + if msg.get_content_type() != 'multipart/mixed' or not msg['x-mailer']: + return None + addresses = set() + # Find the first text/plain part in the message. + text_plain = find_textplain(msg) + if text_plain is None: + return None + body = StringIO(text_plain.get_payload()) + for line in body: + mo = acre.search(line) + if mo: + addresses.add(mo.group('addr')) + elif '@' in line: + i = line.find(' ') + if i == 0: + continue + if i < 0: + addresses.add(line) + else: + addresses.add(line[:i]) + return list(addresses) diff --git a/src/mailman/Bouncers/LLNL.py b/src/mailman/bouncers/llnl.py index d3fe282cc..d62a56591 100644 --- a/src/mailman/Bouncers/LLNL.py +++ b/src/mailman/bouncers/llnl.py @@ -17,16 +17,36 @@ """LLNL's custom Sendmail bounce message.""" +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'LLNL', + ] + + import re -import email + +from email.iterators import body_line_iterator +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + acre = re.compile(r',\s*(?P<addr>\S+@[^,]+),', re.IGNORECASE) -def process(msg): - for line in email.Iterators.body_line_iterator(msg): - mo = acre.search(line) - if mo: - return [mo.group('addr')] - return [] +class LLNL: + """LLNL's custom Sendmail bounce message.""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + + for line in body_line_iterator(msg): + mo = acre.search(line) + if mo: + return [mo.group('addr')] + return [] diff --git a/src/mailman/bouncers/microsoft.py b/src/mailman/bouncers/microsoft.py new file mode 100644 index 000000000..bc17bcaf9 --- /dev/null +++ b/src/mailman/bouncers/microsoft.py @@ -0,0 +1,72 @@ +# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Microsoft's `SMTPSVC' nears I kin tell.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Microsoft', + ] + + +import re + +from cStringIO import StringIO +from flufl.enum import Enum +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +scre = re.compile(r'transcript of session follows', re.IGNORECASE) + + +class ParseState(Enum): + start = 0 + tag_seen = 1 + + + +class Microsoft: + """Microsoft's `SMTPSVC' nears I kin tell.""" + + def process(self, msg): + if msg.get_content_type() != 'multipart/mixed': + return None + # Find the first subpart, which has no MIME type. + try: + subpart = msg.get_payload(0) + except IndexError: + # The message *looked* like a multipart but wasn't. + return None + data = subpart.get_payload() + if isinstance(data, list): + # The message is a multi-multipart, so not a matching bounce. + return None + body = StringIO(data) + state = ParseState.start + addresses = set() + for line in body: + if state is ParseState.start: + if scre.search(line): + state = ParseState.tag_seen + elif state is ParseState.tag_seen: + if '@' in line: + addresses.add(line.strip()) + return list(addresses) diff --git a/src/mailman/bouncers/netscape.py b/src/mailman/bouncers/netscape.py new file mode 100644 index 000000000..a7a23901e --- /dev/null +++ b/src/mailman/bouncers/netscape.py @@ -0,0 +1,103 @@ +# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Netscape Messaging Server bounce formats. + +I've seen at least one NMS server version 3.6 (envy.gmp.usyd.edu.au) bounce +messages of this format. Bounces come in DSN MIME format, but don't include +any -Recipient: headers. Gotta just parse the text :( + +NMS 4.1 (dfw-smtpin1.email.verio.net) seems even worse, but we'll try to +decipher the format here too. + +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Netscape', + ] + + +import re + +from cStringIO import StringIO +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +pcre = re.compile( + r'This Message was undeliverable due to the following reason:', + re.IGNORECASE) + +acre = re.compile( + r'(?P<reply>please reply to)?.*<(?P<addr>[^>]*)>', + re.IGNORECASE) + + + +def flatten(msg, leaves): + # Give us all the leaf (non-multipart) subparts. + if msg.is_multipart(): + for part in msg.get_payload(): + flatten(part, leaves) + else: + leaves.append(msg) + + + +class Netscape: + """Netscape Messaging Server bounce formats.""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + + # Sigh. Some NMS 3.6's show + # multipart/report; report-type=delivery-status + # and some show + # multipart/mixed; + if not msg.is_multipart(): + return None + # We're looking for a text/plain subpart occuring before a + # message/delivery-status subpart. + plainmsg = None + leaves = [] + flatten(msg, leaves) + for i, subpart in zip(range(len(leaves)-1), leaves): + if subpart.get_content_type() == 'text/plain': + plainmsg = subpart + break + if not plainmsg: + return None + # Total guesswork, based on captured examples... + body = StringIO(plainmsg.get_payload()) + addresses = set() + for line in body: + mo = pcre.search(line) + if mo: + # We found a bounce section, but I have no idea what the + # official format inside here is. :( We'll just search for + # <addr> strings. + for line in body: + mo = acre.search(line) + if mo and not mo.group('reply'): + addresses.add(mo.group('addr')) + return list(addresses) diff --git a/src/mailman/bouncers/postfix.py b/src/mailman/bouncers/postfix.py new file mode 100644 index 000000000..c178f48c0 --- /dev/null +++ b/src/mailman/bouncers/postfix.py @@ -0,0 +1,109 @@ +# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Parse bounce messages generated by Postfix. + +This also matches something called 'Keftamail' which looks just like Postfix +bounces with the word Postfix scratched out and the word 'Keftamail' written +in in crayon. + +It also matches something claiming to be 'The BNS Postfix program', and +'SMTP_Gateway'. Everybody's gotta be different, huh? +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Postfix', + ] + + +import re + +from cStringIO import StringIO +from flufl.enum import Enum +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +# Are these heuristics correct or guaranteed? +pcre = re.compile(r'[ \t]*the\s*(bns)?\s*(postfix|keftamail|smtp_gateway)', + re.IGNORECASE) +rcre = re.compile(r'failure reason:$', re.IGNORECASE) +acre = re.compile(r'<(?P<addr>[^>]*)>:') + +REPORT_TYPES = ('multipart/mixed', 'multipart/report') + + +class ParseState(Enum): + start = 0 + salutation_found = 1 + + + +def flatten(msg, leaves): + # Give us all the leaf (non-multipart) subparts. + if msg.is_multipart(): + for part in msg.get_payload(): + flatten(part, leaves) + else: + leaves.append(msg) + + + +def findaddr(msg): + addresses = set() + body = StringIO(msg.get_payload()) + state = ParseState.start + for line in body: + # Preserve leading whitespace. + line = line.rstrip() + # Yes, use match() to match at beginning of string. + if state is ParseState.start and ( + pcre.match(line) or rcre.match(line)): + # Then... + state = ParseState.salutation_found + elif state is ParseState.salutation_found and line: + mo = acre.search(line) + if mo: + addresses.add(mo.group('addr')) + # Probably a continuation line. + return addresses + + + +class Postfix: + """Parse bounce messages generated by Postfix.""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + if msg.get_content_type() not in REPORT_TYPES: + return None + # We're looking for the plain/text subpart with a Content-Description: + # of 'notification'. + leaves = [] + flatten(msg, leaves) + for subpart in leaves: + content_type = subpart.get_content_type() + content_desc = subpart.get('content-description', '').lower() + if content_type == 'text/plain' and content_desc == 'notification': + return set(findaddr(subpart)) + return None diff --git a/src/mailman/bouncers/qmail.py b/src/mailman/bouncers/qmail.py new file mode 100644 index 000000000..71967b516 --- /dev/null +++ b/src/mailman/bouncers/qmail.py @@ -0,0 +1,96 @@ +# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Parse bounce messages generated by qmail. + +Qmail actually has a standard, called QSBMF (qmail-send bounce message +format), as described in + + http://cr.yp.to/proto/qsbmf.txt + +This module should be conformant. + +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Qmail', + ] + + +import re + +from email.iterators import body_line_iterator +from flufl.enum import Enum +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +# Other (non-standard?) intros have been observed in the wild. +introtags = [ + 'Hi. This is the', + "We're sorry. There's a problem", + 'Check your send e-mail address.', + 'This is the mail delivery agent at', + 'Unfortunately, your mail was not delivered' + ] +acre = re.compile(r'<(?P<addr>[^>]*)>:') + + +class ParseState(Enum): + start = 0 + intro_paragraph_seen = 1 + recip_paragraph_seen = 2 + + + +class Qmail: + """Parse QSBMF format bounces.""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + addresses = set() + state = ParseState.start + for line in body_line_iterator(msg): + line = line.strip() + if state is ParseState.start: + for introtag in introtags: + if line.startswith(introtag): + state = ParseState.intro_paragraph_seen + break + elif state is ParseState.intro_paragraph_seen and not line: + # Looking for the end of the intro paragraph. + state = ParseState.recip_paragraph_seen + elif state is ParseState.recip_paragraph_seen: + if line.startswith('-'): + # We're looking at the break paragraph, so we're done. + break + # At this point we know we must be looking at a recipient + # paragraph. + mo = acre.match(line) + if mo: + addresses.add(mo.group('addr')) + # Otherwise, it must be a continuation line, so just ignore it. + else: + # We're not looking at anything in particular. + pass + return list(addresses) diff --git a/src/mailman/Bouncers/SimpleMatch.py b/src/mailman/bouncers/simplematch.py index 29fc92ee0..91913e786 100644 --- a/src/mailman/Bouncers/SimpleMatch.py +++ b/src/mailman/bouncers/simplematch.py @@ -17,22 +17,41 @@ """Recognizes simple heuristically delimited bounces.""" +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'SimpleMatch', + ] + + import re -import email.Iterators + +from email.iterators import body_line_iterator +from flufl.enum import Enum +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +class ParseState(Enum): + start = 0 + tag_seen = 1 def _c(pattern): return re.compile(pattern, re.IGNORECASE) + # This is a list of tuples of the form # # (start cre, end cre, address cre) # -# where `cre' means compiled regular expression, start is the line just before +# where 'cre' means compiled regular expression, start is the line just before # the bouncing address block, end is the line just after the bouncing address # block, and address cre is the regexp that will recognize the addresses. It -# must have a group called `addr' which will contain exactly and only the +# must have a group called 'addr' which will contain exactly and only the # address that bounced. PATTERNS = [ # sdm.de @@ -171,34 +190,37 @@ PATTERNS = [ -def process(msg, patterns=None): - if patterns is None: - patterns = PATTERNS - # simple state machine - # 0 = nothing seen yet - # 1 = intro seen - addrs = {} - # MAS: This is a mess. The outer loop used to be over the message - # so we only looped through the message once. Looping through the - # message for each set of patterns is obviously way more work, but - # if we don't do it, problems arise because scre from the wrong - # pattern set matches first and then acre doesn't match. The - # alternative is to split things into separate modules, but then - # we process the message multiple times anyway. - for scre, ecre, acre in patterns: - state = 0 - for line in email.Iterators.body_line_iterator(msg): - if state == 0: - if scre.search(line): - state = 1 - if state == 1: - mo = acre.search(line) - if mo: - addr = mo.group('addr') - if addr: - addrs[mo.group('addr')] = 1 - elif ecre.search(line): - break - if addrs: - break - return addrs.keys() +class SimpleMatch: + """Recognizes simple heuristically delimited bounces.""" + + implements(IBounceDetector) + + PATTERNS = PATTERNS + + def process(self, msg): + """See `IBounceDetector`.""" + addresses = set() + # MAS: This is a mess. The outer loop used to be over the message + # so we only looped through the message once. Looping through the + # message for each set of patterns is obviously way more work, but + # if we don't do it, problems arise because scre from the wrong + # pattern set matches first and then acre doesn't match. The + # alternative is to split things into separate modules, but then + # we process the message multiple times anyway. + for scre, ecre, acre in self.PATTERNS: + state = ParseState.start + for line in body_line_iterator(msg): + if state is ParseState.start: + if scre.search(line): + state = ParseState.tag_seen + if state is ParseState.tag_seen: + mo = acre.search(line) + if mo: + address = mo.group('addr') + if address: + addresses.add(address) + elif ecre.search(line): + break + if len(addresses) > 0: + break + return list(addresses) diff --git a/src/mailman/Bouncers/SimpleWarning.py b/src/mailman/bouncers/simplewarning.py index c20375a91..aeebb6b76 100644 --- a/src/mailman/Bouncers/SimpleWarning.py +++ b/src/mailman/bouncers/simplewarning.py @@ -17,9 +17,17 @@ """Recognizes simple heuristically delimited warnings.""" -from mailman.Bouncers.BouncerAPI import Stop -from mailman.Bouncers.SimpleMatch import _c -from mailman.Bouncers.SimpleMatch import process as _process +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'SimpleWarning', + ] + + +from mailman.bouncers.simplematch import _c +from mailman.bouncers.simplematch import SimpleMatch +from mailman.interfaces.bounce import NonFatal @@ -27,12 +35,12 @@ from mailman.Bouncers.SimpleMatch import process as _process # # (start cre, end cre, address cre) # -# where `cre' means compiled regular expression, start is the line just before +# where 'cre' means compiled regular expression, start is the line just before # the bouncing address block, end is the line just after the bouncing address # block, and address cre is the regexp that will recognize the addresses. It -# must have a group called `addr' which will contain exactly and only the +# must have a group called 'addr' which will contain exactly and only the # address that bounced. -patterns = [ +PATTERNS = [ # pop3.pta.lia.net (_c('The address to which the message has not yet been delivered is'), _c('No action is required on your part'), @@ -54,9 +62,15 @@ patterns = [ -def process(msg): - if _process(msg, patterns): - # It's a recognized warning so stop now - return Stop - else: - return [] +class SimpleWarning(SimpleMatch): + """Recognizes simple heuristically delimited warnings.""" + + PATTERNS = PATTERNS + + def process(self, msg): + """See `SimpleMatch`.""" + if super(SimpleWarning, self).process(msg): + # It's a recognized warning so stop now. + return NonFatal + else: + return None diff --git a/src/mailman/bouncers/sina.py b/src/mailman/bouncers/sina.py new file mode 100644 index 000000000..7bf42483c --- /dev/null +++ b/src/mailman/bouncers/sina.py @@ -0,0 +1,64 @@ +# Copyright (C) 2002-2010 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""sina.com bounces""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Sina', + ] + + +import re + +from email.iterators import body_line_iterator +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +acre = re.compile(r'<(?P<addr>[^>]*)>') + + + +class Sina: + """sina.com bounces""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + if msg.get('from', '').lower() != 'mailer-daemon@sina.com': + return [] + if not msg.is_multipart(): + return [] + # The interesting bits are in the first text/plain multipart. + part = None + try: + part = msg.get_payload(0) + except IndexError: + pass + if not part: + return [] + addresses = set() + for line in body_line_iterator(part): + mo = acre.match(line) + if mo: + addresses.add(mo.group('addr')) + return list(addresses) diff --git a/src/mailman/Bouncers/SMTP32.py b/src/mailman/bouncers/smtp32.py index 6cace9c24..5abf8053f 100644 --- a/src/mailman/Bouncers/SMTP32.py +++ b/src/mailman/bouncers/smtp32.py @@ -28,8 +28,21 @@ Escape character is '^]'. """ +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'SMTP32', + ] + + import re -import email + +from email.iterators import body_line_iterator +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + ecre = re.compile('original message follows', re.IGNORECASE) acre = re.compile(r''' @@ -46,15 +59,23 @@ acre = re.compile(r''' -def process(msg): - mailer = msg.get('x-mailer', '') - if not mailer.startswith('<SMTP32 v'): - return - addrs = {} - for line in email.Iterators.body_line_iterator(msg): - if ecre.search(line): - break - mo = acre.search(line) - if mo: - addrs[mo.group('addr')] = 1 - return addrs.keys() +class SMTP32: + """Something which claims + + X-Mailer: <SMTP32 vXXXXXX> + """ + + implements(IBounceDetector) + + def process(self, msg): + mailer = msg.get('x-mailer', '') + if not mailer.startswith('<SMTP32 v'): + return None + addrs = set() + for line in body_line_iterator(msg): + if ecre.search(line): + break + mo = acre.search(line) + if mo: + addrs.add(mo.group('addr')) + return list(addrs) diff --git a/src/mailman/bouncers/yahoo.py b/src/mailman/bouncers/yahoo.py new file mode 100644 index 000000000..39d743d46 --- /dev/null +++ b/src/mailman/bouncers/yahoo.py @@ -0,0 +1,76 @@ +# Copyright (C) 1998-2010 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Yahoo! has its own weird format for bounces.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Yahoo', + ] + + +import re +import email + +from email.utils import parseaddr +from flufl.enum import Enum +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +tcre = re.compile(r'message\s+from\s+yahoo\.\S+', re.IGNORECASE) +acre = re.compile(r'<(?P<addr>[^>]*)>:') +ecre = re.compile(r'--- Original message follows') + + +class ParseState(Enum): + start = 0 + tag_seen = 1 + + + +class Yahoo: + """Yahoo! bounce detection.""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + # Yahoo! bounces seem to have a known subject value and something + # called an x-uidl: header, the value of which seems unimportant. + sender = parseaddr(msg.get('from', '').lower())[1] or '' + if not sender.startswith('mailer-daemon@yahoo'): + return None + addresses = set() + state = ParseState.start + for line in email.Iterators.body_line_iterator(msg): + line = line.strip() + if state is ParseState.start and tcre.match(line): + state = ParseState.tag_seen + elif state is ParseState.tag_seen: + mo = acre.match(line) + if mo: + addresses.add(mo.group('addr')) + continue + mo = ecre.match(line) + if mo: + # We're at the end of the error response. + break + return list(addresses) diff --git a/src/mailman/bouncers/yale.py b/src/mailman/bouncers/yale.py new file mode 100644 index 000000000..8b11077dd --- /dev/null +++ b/src/mailman/bouncers/yale.py @@ -0,0 +1,100 @@ +# Copyright (C) 2000-2010 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Yale's mail server is pretty dumb. + +Its reports include the end user's name, but not the full domain. I think we +can usually guess it right anyway. This is completely based on examination of +the corpse, and is subject to failure whenever Yale even slightly changes +their MTA. :( + +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Yale', + ] + + +import re + +from cStringIO import StringIO +from email.utils import getaddresses +from flufl.enum import Enum +from zope.interface import implements + +from mailman.interfaces.bounce import IBounceDetector + + +scre = re.compile(r'Message not delivered to the following', re.IGNORECASE) +ecre = re.compile(r'Error Detail', re.IGNORECASE) +acre = re.compile(r'\s+(?P<addr>\S+)\s+') + + +class ParseState(Enum): + start = 0 + intro_found = 1 + + + +class Yale: + """Parse Yale's bounces (or what used to be).""" + + implements(IBounceDetector) + + def process(self, msg): + """See `IBounceDetector`.""" + if msg.is_multipart(): + return None + try: + whofrom = getaddresses([msg.get('from', '')])[0][1] + if not whofrom: + return None + username, domain = whofrom.split('@', 1) + except (IndexError, ValueError): + return None + if username.lower() != 'mailer-daemon': + return None + parts = domain.split('.') + parts.reverse() + for part1, part2 in zip(parts, ('edu', 'yale')): + if part1 != part2: + return None + # Okay, we've established that the bounce came from the mailer-daemon + # at yale.edu. Let's look for a name, and then guess the relevant + # domains. + names = set() + body = StringIO(msg.get_payload()) + state = ParseState.start + for line in body: + if state is ParseState.start and scre.search(line): + state = ParseState.intro_found + elif state is ParseState.intro_found and ecre.search(line): + break + elif state is ParseState.intro_found: + mo = acre.search(line) + if mo: + names.add(mo.group('addr')) + # Now we have a bunch of names, these are either @yale.edu or + # @cs.yale.edu. Add them both. + addresses = [] + for name in names: + addresses.append(name + '@yale.edu') + addresses.append(name + '@cs.yale.edu') + return addresses diff --git a/src/mailman/bounces/__init__.py b/src/mailman/bounces/__init__.py deleted file mode 100644 index e69de29bb..000000000 --- a/src/mailman/bounces/__init__.py +++ /dev/null diff --git a/src/mailman/interfaces/action.py b/src/mailman/interfaces/action.py index 0e4ba7d35..18bf5fab1 100644 --- a/src/mailman/interfaces/action.py +++ b/src/mailman/interfaces/action.py @@ -15,6 +15,8 @@ # You should have received a copy of the GNU General Public License along with # GNU Mailman. If not, see <http://www.gnu.org/licenses/>. +"""Message actions.""" + __metaclass__ = type __all__ = [ 'Action', diff --git a/src/mailman/queue/bounce.py b/src/mailman/queue/bounce.py index 2a7af6d1a..9f51a3545 100644 --- a/src/mailman/queue/bounce.py +++ b/src/mailman/queue/bounce.py @@ -30,7 +30,7 @@ from mailman.app.bounce import scan_message from mailman.config import config from mailman.core.i18n import _ from mailman.email.utils import split_email -from mailman.interfaces.bounce import Stop +from mailman.interfaces.bounce import NonFatal from mailman.queue import Runner @@ -193,7 +193,7 @@ class BounceRunner(Runner, BounceMixin): addrs = verp_bounce(mlist, msg) if addrs: # We have an address, but check if the message is non-fatal. - if scan_messages(mlist, msg) is Stop: + if scan_messages(mlist, msg) is NonFatal: return else: # See if this was a probe message. @@ -204,7 +204,7 @@ class BounceRunner(Runner, BounceMixin): # That didn't give us anything useful, so try the old fashion # bounce matching modules. addrs = scan_messages(mlist, msg) - if addrs is Stop: + if addrs is NonFatal: # This is a recognized, non-fatal notice. Ignore it. return # If that still didn't return us any useful addresses, then send it on diff --git a/src/mailman/tests/test_bounces.py b/src/mailman/tests/test_bounces.py index ac52d077c..2267b6af7 100644 --- a/src/mailman/tests/test_bounces.py +++ b/src/mailman/tests/test_bounces.py @@ -33,7 +33,11 @@ import unittest from contextlib import closing from pkg_resources import resource_stream -from mailman.Bouncers.BouncerAPI import Stop +from mailman.app.finder import find_components +from mailman.bouncers.caiwireless import Caiwireless +from mailman.bouncers.microsoft import Microsoft +from mailman.bouncers.smtp32 import SMTP32 +from mailman.interfaces.bounce import IBounceDetector, NonFatal @@ -80,9 +84,9 @@ class BounceTest(unittest.TestCase): ('SimpleMatch', 'bounce_02.txt', ['acinsp1@midsouth.rr.com']), ('SimpleMatch', 'bounce_03.txt', ['james@jeborall.demon.co.uk']), # SimpleWarning - ('SimpleWarning', 'simple_03.txt', Stop), - ('SimpleWarning', 'simple_21.txt', Stop), - ('SimpleWarning', 'simple_22.txt', Stop), + ('SimpleWarning', 'simple_03.txt', NonFatal), + ('SimpleWarning', 'simple_21.txt', NonFatal), + ('SimpleWarning', 'simple_22.txt', NonFatal), # GroupWise ('GroupWise', 'groupwise_01.txt', ['thoff@MAINEX1.ASU.EDU']), # This one really sucks 'cause it's text/html. Just make sure it @@ -99,10 +103,10 @@ class BounceTest(unittest.TestCase): ('DSN', 'dsn_02.txt', ['zzzzz@zeus.hud.ac.uk']), ('DSN', 'dsn_03.txt', ['ddd.kkk@advalvas.be']), ('DSN', 'dsn_04.txt', ['max.haas@unibas.ch']), - ('DSN', 'dsn_05.txt', Stop), - ('DSN', 'dsn_06.txt', Stop), - ('DSN', 'dsn_07.txt', Stop), - ('DSN', 'dsn_08.txt', Stop), + ('DSN', 'dsn_05.txt', NonFatal), + ('DSN', 'dsn_06.txt', NonFatal), + ('DSN', 'dsn_07.txt', NonFatal), + ('DSN', 'dsn_08.txt', NonFatal), ('DSN', 'dsn_09.txt', ['pr@allen-heath.com']), ('DSN', 'dsn_10.txt', ['anne.person@dom.ain']), ('DSN', 'dsn_11.txt', ['joem@example.com']), @@ -172,31 +176,28 @@ class BounceTest(unittest.TestCase): return email.message_from_file(fp) def test_bounce(self): - for modname, filename, addrs in self.DATA: - module = 'mailman.bouncers.' + modname - __import__(module) + detectors = {} + for detector in find_components('mailman.bouncers', IBounceDetector): + detectors[detector.__name__] = detector() + for detector_name, filename, expected_addresses in self.DATA: msg = self._getmsg(filename) - foundaddrs = sys.modules[module].process(msg) - # Some modules return None instead of [] for failure - if foundaddrs is None: - foundaddrs = [] - if foundaddrs is not Stop: - # MAS: The following strip() is only because of my - # hybrid test environment. It is not otherwise needed. - foundaddrs = [found.strip() for found in foundaddrs] - addrs.sort() - foundaddrs.sort() - self.assertEqual(addrs, foundaddrs) + found_addresses = detectors[detector_name].process(msg) + # Some modules return None instead of the empty sequence. + if found_addresses is None: + found_addresses = set() + elif found_addresses is not NonFatal: + found_addresses = set(found_addresses) + if expected_addresses is not NonFatal: + expected_addresses = set(expected_addresses) + self.assertEqual(found_addresses, expected_addresses) def test_SMTP32_failure(self): - from mailman.Bouncers import SMTP32 # This file has no X-Mailer: header msg = self._getmsg('postfix_01.txt') self.failIf(msg['x-mailer'] is not None) - self.failIf(SMTP32.process(msg)) + self.failIf(SMTP32().process(msg)) def test_caiwireless(self): - from mailman.Bouncers import Caiwireless # BAW: this is a mostly bogus test; I lost the samples. :( msg = email.message_from_string("""\ Content-Type: multipart/report; boundary=BOUNDARY @@ -206,10 +207,9 @@ Content-Type: multipart/report; boundary=BOUNDARY --BOUNDARY-- """) - self.assertEqual(None, Caiwireless.process(msg)) + self.assertEqual(None, Caiwireless().process(msg)) def test_microsoft(self): - from mailman.Bouncers import Microsoft # BAW: similarly as above, I lost the samples. :( msg = email.message_from_string("""\ Content-Type: multipart/report; boundary=BOUNDARY @@ -219,7 +219,7 @@ Content-Type: multipart/report; boundary=BOUNDARY --BOUNDARY-- """) - self.assertEqual(None, Microsoft.process(msg)) + self.assertEqual(None, Microsoft().process(msg)) |
