diff options
Diffstat (limited to 'src/mailman/Bouncers')
| -rw-r--r-- | src/mailman/Bouncers/BouncerAPI.py | 64 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Caiwireless.py | 45 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Compuserve.py | 46 | ||||
| -rw-r--r-- | src/mailman/Bouncers/DSN.py | 99 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Exchange.py | 48 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Exim.py | 31 | ||||
| -rw-r--r-- | src/mailman/Bouncers/GroupWise.py | 71 | ||||
| -rw-r--r-- | src/mailman/Bouncers/LLNL.py | 32 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Microsoft.py | 53 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Netscape.py | 89 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Postfix.py | 86 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Qmail.py | 72 | ||||
| -rw-r--r-- | src/mailman/Bouncers/SMTP32.py | 60 | ||||
| -rw-r--r-- | src/mailman/Bouncers/SimpleMatch.py | 204 | ||||
| -rw-r--r-- | src/mailman/Bouncers/SimpleWarning.py | 62 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Sina.py | 48 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Yahoo.py | 54 | ||||
| -rw-r--r-- | src/mailman/Bouncers/Yale.py | 80 | ||||
| -rw-r--r-- | src/mailman/Bouncers/__init__.py | 0 |
19 files changed, 1244 insertions, 0 deletions
diff --git a/src/mailman/Bouncers/BouncerAPI.py b/src/mailman/Bouncers/BouncerAPI.py new file mode 100644 index 000000000..f4712ec20 --- /dev/null +++ b/src/mailman/Bouncers/BouncerAPI.py @@ -0,0 +1,64 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Contains all the common functionality for msg bounce scanning API. + +This module can also be used as the basis for a bounce detection testing +framework. When run as a script, it expects two arguments, the listname and +the filename containing the bounce message. +""" + +import sys + +# If a bounce detector returns Stop, that means to just discard the message. +# An example is warning messages for temporary delivery problems. These +# shouldn't trigger a bounce notification, but we also don't want to send them +# on to the list administrator. +Stop = object() + + +BOUNCE_PIPELINE = [ + 'DSN', + 'Qmail', + 'Postfix', + 'Yahoo', + 'Caiwireless', + 'Exchange', + 'Exim', + 'Netscape', + 'Compuserve', + 'Microsoft', + 'GroupWise', + 'SMTP32', + 'SimpleMatch', + 'SimpleWarning', + 'Yale', + 'LLNL', + ] + + + +# msg must be a mimetools.Message +def ScanMessages(mlist, msg): + for module in BOUNCE_PIPELINE: + modname = 'mailman.Bouncers.' + module + __import__(modname) + addrs = sys.modules[modname].process(msg) + if addrs: + # Return addrs even if it is Stop. BounceRunner needs this info. + return addrs + return [] diff --git a/src/mailman/Bouncers/Caiwireless.py b/src/mailman/Bouncers/Caiwireless.py new file mode 100644 index 000000000..3bf03cc62 --- /dev/null +++ b/src/mailman/Bouncers/Caiwireless.py @@ -0,0 +1,45 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Parse mystery style generated by MTA at caiwireless.net.""" + +import re +import email + +tcre = re.compile(r'the following recipients did not receive this message:', + re.IGNORECASE) +acre = re.compile(r'<(?P<addr>[^>]*)>') + + + +def process(msg): + if msg.get_content_type() <> 'multipart/mixed': + return None + # simple state machine + # 0 == nothing seen + # 1 == tag line seen + state = 0 + # This format thinks it's a MIME, but it really isn't + for line in email.Iterators.body_line_iterator(msg): + line = line.strip() + if state == 0 and tcre.match(line): + state = 1 + elif state == 1 and line: + mo = acre.match(line) + if not mo: + return None + return [mo.group('addr')] diff --git a/src/mailman/Bouncers/Compuserve.py b/src/mailman/Bouncers/Compuserve.py new file mode 100644 index 000000000..2297a72a9 --- /dev/null +++ b/src/mailman/Bouncers/Compuserve.py @@ -0,0 +1,46 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Compuserve has its own weird format for bounces.""" + +import re +import email + +dcre = re.compile(r'your message could not be delivered', re.IGNORECASE) +acre = re.compile(r'Invalid receiver address: (?P<addr>.*)') + + + +def process(msg): + # simple state machine + # 0 = nothing seen yet + # 1 = intro line seen + state = 0 + addrs = [] + for line in email.Iterators.body_line_iterator(msg): + if state == 0: + mo = dcre.search(line) + if mo: + state = 1 + elif state == 1: + mo = dcre.search(line) + if mo: + break + mo = acre.search(line) + if mo: + addrs.append(mo.group('addr')) + return addrs diff --git a/src/mailman/Bouncers/DSN.py b/src/mailman/Bouncers/DSN.py new file mode 100644 index 000000000..37e5bcb83 --- /dev/null +++ b/src/mailman/Bouncers/DSN.py @@ -0,0 +1,99 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Parse RFC 3464 (i.e. DSN) bounce formats. + +RFC 3464 obsoletes 1894 which was the old DSN standard. This module has not +been audited for differences between the two. +""" + +from email.Iterators import typed_subpart_iterator +from email.Utils import parseaddr + +from mailman.Bouncers.BouncerAPI import Stop + + + +def check(msg): + # Iterate over each message/delivery-status subpart + addrs = [] + for part in typed_subpart_iterator(msg, 'message', 'delivery-status'): + if not part.is_multipart(): + # Huh? + continue + # Each message/delivery-status contains a list of Message objects + # which are the header blocks. Iterate over those too. + for msgblock in part.get_payload(): + # We try to dig out the Original-Recipient (which is optional) and + # Final-Recipient (which is mandatory, but may not exactly match + # an address on our list). Some MTA's also use X-Actual-Recipient + # as a synonym for Original-Recipient, but some apparently use + # that for other purposes :( + # + # Also grok out Action so we can do something with that too. + action = msgblock.get('action', '').lower() + # Some MTAs have been observed that put comments on the action. + if action.startswith('delayed'): + return Stop + if not action.startswith('fail'): + # Some non-permanent failure, so ignore this block + continue + params = [] + foundp = False + for header in ('original-recipient', 'final-recipient'): + for k, v in msgblock.get_params([], header): + if k.lower() == 'rfc822': + foundp = True + else: + params.append(k) + if foundp: + # Note that params should already be unquoted. + addrs.extend(params) + break + else: + # MAS: This is a kludge, but SMTP-GATEWAY01.intra.home.dk + # has a final-recipient with an angle-addr and no + # address-type parameter at all. Non-compliant, but ... + for param in params: + if param.startswith('<') and param.endswith('>'): + addrs.append(param[1:-1]) + # Uniquify + rtnaddrs = {} + for a in addrs: + if a is not None: + realname, a = parseaddr(a) + rtnaddrs[a] = True + return rtnaddrs.keys() + + + +def process(msg): + # A DSN has been seen wrapped with a "legal disclaimer" by an outgoing MTA + # in a multipart/mixed outer part. + if msg.is_multipart() and msg.get_content_subtype() == 'mixed': + msg = msg.get_payload()[0] + # The above will suffice if the original message 'parts' were wrapped with + # the disclaimer added, but the original DSN can be wrapped as a + # message/rfc822 part. We need to test that too. + if msg.is_multipart() and msg.get_content_type() == 'message/rfc822': + msg = msg.get_payload()[0] + # The report-type parameter should be "delivery-status", but it seems that + # some DSN generating MTAs don't include this on the Content-Type: header, + # so let's relax the test a bit. + if not msg.is_multipart() or msg.get_content_subtype() <> 'report': + return None + return check(msg) diff --git a/src/mailman/Bouncers/Exchange.py b/src/mailman/Bouncers/Exchange.py new file mode 100644 index 000000000..cf8beefce --- /dev/null +++ b/src/mailman/Bouncers/Exchange.py @@ -0,0 +1,48 @@ +# Copyright (C) 2002-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Recognizes (some) Microsoft Exchange formats.""" + +import re +import email.Iterators + +scre = re.compile('did not reach the following recipient') +ecre = re.compile('MSEXCH:') +a1cre = re.compile('SMTP=(?P<addr>[^;]+); on ') +a2cre = re.compile('(?P<addr>[^ ]+) on ') + + + +def process(msg): + addrs = {} + it = email.Iterators.body_line_iterator(msg) + # Find the start line + for line in it: + if scre.search(line): + break + else: + return [] + # Search each line until we hit the end line + for line in it: + if ecre.search(line): + break + mo = a1cre.search(line) + if not mo: + mo = a2cre.search(line) + if mo: + addrs[mo.group('addr')] = 1 + return addrs.keys() diff --git a/src/mailman/Bouncers/Exim.py b/src/mailman/Bouncers/Exim.py new file mode 100644 index 000000000..0f4e7f4cf --- /dev/null +++ b/src/mailman/Bouncers/Exim.py @@ -0,0 +1,31 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Parse bounce messages generated by Exim. + +Exim adds an X-Failed-Recipients: header to bounce messages containing +an `addresslist' of failed addresses. + +""" + +from email.Utils import getaddresses + + + +def process(msg): + all = msg.get_all('x-failed-recipients', []) + return [a for n, a in getaddresses(all)] diff --git a/src/mailman/Bouncers/GroupWise.py b/src/mailman/Bouncers/GroupWise.py new file mode 100644 index 000000000..e74291217 --- /dev/null +++ b/src/mailman/Bouncers/GroupWise.py @@ -0,0 +1,71 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""This appears to be the format for Novell GroupWise and NTMail + +X-Mailer: Novell GroupWise Internet Agent 5.5.3.1 +X-Mailer: NTMail v4.30.0012 +X-Mailer: Internet Mail Service (5.5.2653.19) +""" + +import re +from email.Message import Message +from cStringIO import StringIO + +acre = re.compile(r'<(?P<addr>[^>]*)>') + + + +def find_textplain(msg): + if msg.get_content_type() == 'text/plain': + return msg + if msg.is_multipart: + for part in msg.get_payload(): + if not isinstance(part, Message): + continue + ret = find_textplain(part) + if ret: + return ret + return None + + + +def process(msg): + if msg.get_content_type() <> 'multipart/mixed' or not msg['x-mailer']: + return None + addrs = {} + # find the first text/plain part in the message + textplain = find_textplain(msg) + if not textplain: + return None + body = StringIO(textplain.get_payload()) + while 1: + line = body.readline() + if not line: + break + mo = acre.search(line) + if mo: + addrs[mo.group('addr')] = 1 + elif '@' in line: + i = line.find(' ') + if i == 0: + continue + if i < 0: + addrs[line] = 1 + else: + addrs[line[:i]] = 1 + return addrs.keys() diff --git a/src/mailman/Bouncers/LLNL.py b/src/mailman/Bouncers/LLNL.py new file mode 100644 index 000000000..cc1a08542 --- /dev/null +++ b/src/mailman/Bouncers/LLNL.py @@ -0,0 +1,32 @@ +# Copyright (C) 2001-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""LLNL's custom Sendmail bounce message.""" + +import re +import email + +acre = re.compile(r',\s*(?P<addr>\S+@[^,]+),', re.IGNORECASE) + + + +def process(msg): + for line in email.Iterators.body_line_iterator(msg): + mo = acre.search(line) + if mo: + return [mo.group('addr')] + return [] diff --git a/src/mailman/Bouncers/Microsoft.py b/src/mailman/Bouncers/Microsoft.py new file mode 100644 index 000000000..98d27d4ee --- /dev/null +++ b/src/mailman/Bouncers/Microsoft.py @@ -0,0 +1,53 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Microsoft's `SMTPSVC' nears I kin tell.""" + +import re +from cStringIO import StringIO + +scre = re.compile(r'transcript of session follows', re.IGNORECASE) + + + +def process(msg): + if msg.get_content_type() <> 'multipart/mixed': + return None + # Find the first subpart, which has no MIME type + try: + subpart = msg.get_payload(0) + except IndexError: + # The message *looked* like a multipart but wasn't + return None + data = subpart.get_payload() + if isinstance(data, list): + # The message is a multi-multipart, so not a matching bounce + return None + body = StringIO(data) + state = 0 + addrs = [] + while 1: + line = body.readline() + if not line: + break + if state == 0: + if scre.search(line): + state = 1 + if state == 1: + if '@' in line: + addrs.append(line) + return addrs diff --git a/src/mailman/Bouncers/Netscape.py b/src/mailman/Bouncers/Netscape.py new file mode 100644 index 000000000..319329e84 --- /dev/null +++ b/src/mailman/Bouncers/Netscape.py @@ -0,0 +1,89 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Netscape Messaging Server bounce formats. + +I've seen at least one NMS server version 3.6 (envy.gmp.usyd.edu.au) bounce +messages of this format. Bounces come in DSN MIME format, but don't include +any -Recipient: headers. Gotta just parse the text :( + +NMS 4.1 (dfw-smtpin1.email.verio.net) seems even worse, but we'll try to +decipher the format here too. + +""" + +import re +from cStringIO import StringIO + +pcre = re.compile( + r'This Message was undeliverable due to the following reason:', + re.IGNORECASE) + +acre = re.compile( + r'(?P<reply>please reply to)?.*<(?P<addr>[^>]*)>', + re.IGNORECASE) + + + +def flatten(msg, leaves): + # give us all the leaf (non-multipart) subparts + if msg.is_multipart(): + for part in msg.get_payload(): + flatten(part, leaves) + else: + leaves.append(msg) + + + +def process(msg): + # Sigh. Some show NMS 3.6's show + # multipart/report; report-type=delivery-status + # and some show + # multipart/mixed; + if not msg.is_multipart(): + return None + # We're looking for a text/plain subpart occuring before a + # message/delivery-status subpart. + plainmsg = None + leaves = [] + flatten(msg, leaves) + for i, subpart in zip(range(len(leaves)-1), leaves): + if subpart.get_content_type() == 'text/plain': + plainmsg = subpart + break + if not plainmsg: + return None + # Total guesswork, based on captured examples... + body = StringIO(plainmsg.get_payload()) + addrs = [] + while 1: + line = body.readline() + if not line: + break + mo = pcre.search(line) + if mo: + # We found a bounce section, but I have no idea what the official + # format inside here is. :( We'll just search for <addr> + # strings. + while 1: + line = body.readline() + if not line: + break + mo = acre.search(line) + if mo and not mo.group('reply'): + addrs.append(mo.group('addr')) + return addrs diff --git a/src/mailman/Bouncers/Postfix.py b/src/mailman/Bouncers/Postfix.py new file mode 100644 index 000000000..cfc97a05e --- /dev/null +++ b/src/mailman/Bouncers/Postfix.py @@ -0,0 +1,86 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Parse bounce messages generated by Postfix. + +This also matches something called `Keftamail' which looks just like Postfix +bounces with the word Postfix scratched out and the word `Keftamail' written +in in crayon. + +It also matches something claiming to be `The BNS Postfix program', and +`SMTP_Gateway'. Everybody's gotta be different, huh? +""" + +import re +from cStringIO import StringIO + + + +def flatten(msg, leaves): + # give us all the leaf (non-multipart) subparts + if msg.is_multipart(): + for part in msg.get_payload(): + flatten(part, leaves) + else: + leaves.append(msg) + + + +# are these heuristics correct or guaranteed? +pcre = re.compile(r'[ \t]*the\s*(bns)?\s*(postfix|keftamail|smtp_gateway)', + re.IGNORECASE) +rcre = re.compile(r'failure reason:$', re.IGNORECASE) +acre = re.compile(r'<(?P<addr>[^>]*)>:') + +def findaddr(msg): + addrs = [] + body = StringIO(msg.get_payload()) + # simple state machine + # 0 == nothing found + # 1 == salutation found + state = 0 + while 1: + line = body.readline() + if not line: + break + # preserve leading whitespace + line = line.rstrip() + # yes use match to match at beginning of string + if state == 0 and (pcre.match(line) or rcre.match(line)): + state = 1 + elif state == 1 and line: + mo = acre.search(line) + if mo: + addrs.append(mo.group('addr')) + # probably a continuation line + return addrs + + + +def process(msg): + if msg.get_content_type() not in ('multipart/mixed', 'multipart/report'): + return None + # We're looking for the plain/text subpart with a Content-Description: of + # `notification'. + leaves = [] + flatten(msg, leaves) + for subpart in leaves: + if subpart.get_content_type() == 'text/plain' and \ + subpart.get('content-description', '').lower() == 'notification': + # then... + return findaddr(subpart) + return None diff --git a/src/mailman/Bouncers/Qmail.py b/src/mailman/Bouncers/Qmail.py new file mode 100644 index 000000000..2431da653 --- /dev/null +++ b/src/mailman/Bouncers/Qmail.py @@ -0,0 +1,72 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Parse bounce messages generated by qmail. + +Qmail actually has a standard, called QSBMF (qmail-send bounce message +format), as described in + + http://cr.yp.to/proto/qsbmf.txt + +This module should be conformant. + +""" + +import re +import email.Iterators + +# Other (non-standard?) intros have been observed in the wild. +introtags = [ + 'Hi. This is the', + "We're sorry. There's a problem", + 'Check your send e-mail address.', + 'This is the mail delivery agent at', + 'Unfortunately, your mail was not delivered' + ] +acre = re.compile(r'<(?P<addr>[^>]*)>:') + + + +def process(msg): + addrs = [] + # simple state machine + # 0 = nothing seen yet + # 1 = intro paragraph seen + # 2 = recip paragraphs seen + state = 0 + for line in email.Iterators.body_line_iterator(msg): + line = line.strip() + if state == 0: + for introtag in introtags: + if line.startswith(introtag): + state = 1 + break + elif state == 1 and not line: + # Looking for the end of the intro paragraph + state = 2 + elif state == 2: + if line.startswith('-'): + # We're looking at the break paragraph, so we're done + break + # At this point we know we must be looking at a recipient + # paragraph + mo = acre.match(line) + if mo: + addrs.append(mo.group('addr')) + # Otherwise, it must be a continuation line, so just ignore it + # Not looking at anything in particular + return addrs diff --git a/src/mailman/Bouncers/SMTP32.py b/src/mailman/Bouncers/SMTP32.py new file mode 100644 index 000000000..a7fff2ed3 --- /dev/null +++ b/src/mailman/Bouncers/SMTP32.py @@ -0,0 +1,60 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Something which claims +X-Mailer: <SMTP32 vXXXXXX> + +What the heck is this thing? Here's a recent host: + +% telnet 207.51.255.218 smtp +Trying 207.51.255.218... +Connected to 207.51.255.218. +Escape character is '^]'. +220 X1 NT-ESMTP Server 208.24.118.205 (IMail 6.00 45595-15) + +""" + +import re +import email + +ecre = re.compile('original message follows', re.IGNORECASE) +acre = re.compile(r''' + ( # several different prefixes + user\ mailbox[^:]*: # have been spotted in the + |delivery\ failed[^:]*: # wild... + |unknown\ user[^:]*: + |undeliverable\ +to + |delivery\ userid[^:]*: + ) + \s* # space separator + (?P<addr>[^\s]*) # and finally, the address + ''', re.IGNORECASE | re.VERBOSE) + + + +def process(msg): + mailer = msg.get('x-mailer', '') + if not mailer.startswith('<SMTP32 v'): + return + addrs = {} + for line in email.Iterators.body_line_iterator(msg): + if ecre.search(line): + break + mo = acre.search(line) + if mo: + addrs[mo.group('addr')] = 1 + return addrs.keys() diff --git a/src/mailman/Bouncers/SimpleMatch.py b/src/mailman/Bouncers/SimpleMatch.py new file mode 100644 index 000000000..338f52a19 --- /dev/null +++ b/src/mailman/Bouncers/SimpleMatch.py @@ -0,0 +1,204 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Recognizes simple heuristically delimited bounces.""" + +import re +import email.Iterators + + + +def _c(pattern): + return re.compile(pattern, re.IGNORECASE) + +# This is a list of tuples of the form +# +# (start cre, end cre, address cre) +# +# where `cre' means compiled regular expression, start is the line just before +# the bouncing address block, end is the line just after the bouncing address +# block, and address cre is the regexp that will recognize the addresses. It +# must have a group called `addr' which will contain exactly and only the +# address that bounced. +PATTERNS = [ + # sdm.de + (_c('here is your list of failed recipients'), + _c('here is your returned mail'), + _c(r'<(?P<addr>[^>]*)>')), + # sz-sb.de, corridor.com, nfg.nl + (_c('the following addresses had'), + _c('transcript of session follows'), + _c(r'<(?P<fulladdr>[^>]*)>|\(expanded from: <?(?P<addr>[^>)]*)>?\)')), + # robanal.demon.co.uk + (_c('this message was created automatically by mail delivery software'), + _c('original message follows'), + _c('rcpt to:\s*<(?P<addr>[^>]*)>')), + # s1.com (InterScan E-Mail VirusWall NT ???) + (_c('message from interscan e-mail viruswall nt'), + _c('end of message'), + _c('rcpt to:\s*<(?P<addr>[^>]*)>')), + # Smail + (_c('failed addresses follow:'), + _c('message text follows:'), + _c(r'\s*(?P<addr>\S+@\S+)')), + # newmail.ru + (_c('This is the machine generated message from mail service.'), + _c('--- Below the next line is a copy of the message.'), + _c('<(?P<addr>[^>]*)>')), + # turbosport.com runs something called `MDaemon 3.5.2' ??? + (_c('The following addresses did NOT receive a copy of your message:'), + _c('--- Session Transcript ---'), + _c('[>]\s*(?P<addr>.*)$')), + # usa.net + (_c('Intended recipient:\s*(?P<addr>.*)$'), + _c('--------RETURNED MAIL FOLLOWS--------'), + _c('Intended recipient:\s*(?P<addr>.*)$')), + # hotpop.com + (_c('Undeliverable Address:\s*(?P<addr>.*)$'), + _c('Original message attached'), + _c('Undeliverable Address:\s*(?P<addr>.*)$')), + # Another demon.co.uk format + (_c('This message was created automatically by mail delivery'), + _c('^---- START OF RETURNED MESSAGE ----'), + _c("addressed to '(?P<addr>[^']*)'")), + # Prodigy.net full mailbox + (_c("User's mailbox is full:"), + _c('Unable to deliver mail.'), + _c("User's mailbox is full:\s*<(?P<addr>[^>]*)>")), + # Microsoft SMTPSVC + (_c('The email below could not be delivered to the following user:'), + _c('Old message:'), + _c('<(?P<addr>[^>]*)>')), + # Yahoo on behalf of other domains like sbcglobal.net + (_c('Unable to deliver message to the following address\(es\)\.'), + _c('--- Original message follows\.'), + _c('<(?P<addr>[^>]*)>:')), + # googlemail.com + (_c('Delivery to the following recipient failed'), + _c('----- Original message -----'), + _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), + # kundenserver.de + (_c('A message that you sent could not be delivered'), + _c('^---'), + _c('<(?P<addr>[^>]*)>')), + # another kundenserver.de + (_c('A message that you sent could not be delivered'), + _c('^---'), + _c('^(?P<addr>[^\s@]+@[^\s@:]+):')), + # thehartford.com + (_c('Delivery to the following recipients failed'), + # this one may or may not have the original message, but there's nothing + # unique to stop on, so stop on the first line of at least 3 characters + # that doesn't start with 'D' (to not stop immediately) and has no '@'. + _c('^[^D][^@]{2,}$'), + _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), + # and another thehartfod.com/hartfordlife.com + (_c('^Your message\s*$'), + _c('^because:'), + _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), + # kviv.be (InterScan NT) + (_c('^Unable to deliver message to'), + _c(r'\*+\s+End of message\s+\*+'), + _c('<(?P<addr>[^>]*)>')), + # earthlink.net supported domains + (_c('^Sorry, unable to deliver your message to'), + _c('^A copy of the original message'), + _c('\s*(?P<addr>[^\s@]+@[^\s@]+)\s+')), + # ademe.fr + (_c('^A message could not be delivered to:'), + _c('^Subject:'), + _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), + # andrew.ac.jp + (_c('^Invalid final delivery userid:'), + _c('^Original message follows.'), + _c('\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), + # E500_SMTP_Mail_Service@lerctr.org + (_c('------ Failed Recipients ------'), + _c('-------- Returned Mail --------'), + _c('<(?P<addr>[^>]*)>')), + # cynergycom.net + (_c('A message that you sent could not be delivered'), + _c('^---'), + _c('(?P<addr>[^\s@]+@[^\s@)]+)')), + # LSMTP for Windows + (_c('^--> Error description:\s*$'), + _c('^Error-End:'), + _c('^Error-for:\s+(?P<addr>[^\s@]+@[^\s@]+)')), + # Qmail with a tri-language intro beginning in spanish + (_c('Your message could not be delivered'), + _c('^-'), + _c('<(?P<addr>[^>]*)>:')), + # socgen.com + (_c('Your message could not be delivered to'), + _c('^\s*$'), + _c('(?P<addr>[^\s@]+@[^\s@]+)')), + # dadoservice.it + (_c('Your message has encountered delivery problems'), + _c('Your message reads'), + _c('addressed to\s*(?P<addr>[^\s@]+@[^\s@)]+)')), + # gomaps.com + (_c('Did not reach the following recipient'), + _c('^\s*$'), + _c('\s(?P<addr>[^\s@]+@[^\s@]+)')), + # EYOU MTA SYSTEM + (_c('This is the deliver program at'), + _c('^-'), + _c('^(?P<addr>[^\s@]+@[^\s@<>]+)')), + # A non-standard qmail at ieo.it + (_c('this is the email server at'), + _c('^-'), + _c('\s(?P<addr>[^\s@]+@[^\s@]+)[\s,]')), + # pla.net.py (MDaemon.PRO ?) + (_c('- no such user here'), + _c('There is no user'), + _c('^(?P<addr>[^\s@]+@[^\s@]+)\s')), + # Next one goes here... + ] + + + +def process(msg, patterns=None): + if patterns is None: + patterns = PATTERNS + # simple state machine + # 0 = nothing seen yet + # 1 = intro seen + addrs = {} + # MAS: This is a mess. The outer loop used to be over the message + # so we only looped through the message once. Looping through the + # message for each set of patterns is obviously way more work, but + # if we don't do it, problems arise because scre from the wrong + # pattern set matches first and then acre doesn't match. The + # alternative is to split things into separate modules, but then + # we process the message multiple times anyway. + for scre, ecre, acre in patterns: + state = 0 + for line in email.Iterators.body_line_iterator(msg): + if state == 0: + if scre.search(line): + state = 1 + if state == 1: + mo = acre.search(line) + if mo: + addr = mo.group('addr') + if addr: + addrs[mo.group('addr')] = 1 + elif ecre.search(line): + break + if addrs: + break + return addrs.keys() diff --git a/src/mailman/Bouncers/SimpleWarning.py b/src/mailman/Bouncers/SimpleWarning.py new file mode 100644 index 000000000..ab18d2530 --- /dev/null +++ b/src/mailman/Bouncers/SimpleWarning.py @@ -0,0 +1,62 @@ +# Copyright (C) 2001-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Recognizes simple heuristically delimited warnings.""" + +from mailman.Bouncers.BouncerAPI import Stop +from mailman.Bouncers.SimpleMatch import _c +from mailman.Bouncers.SimpleMatch import process as _process + + + +# This is a list of tuples of the form +# +# (start cre, end cre, address cre) +# +# where `cre' means compiled regular expression, start is the line just before +# the bouncing address block, end is the line just after the bouncing address +# block, and address cre is the regexp that will recognize the addresses. It +# must have a group called `addr' which will contain exactly and only the +# address that bounced. +patterns = [ + # pop3.pta.lia.net + (_c('The address to which the message has not yet been delivered is'), + _c('No action is required on your part'), + _c(r'\s*(?P<addr>\S+@\S+)\s*')), + # This is from MessageSwitch. It is a kludge because the text that + # identifies it as a warning only comes after the address. We can't + # use ecre, because it really isn't significant, so we fake it. Once + # we see the start, we know it's a warning, and we're going to return + # Stop anyway, so we match anything for the address and end. + (_c('This is just a warning, you do not need to take any action'), + _c('.+'), + _c('(?P<addr>.+)')), + # Symantec_AntiVirus_for_SMTP_Gateways - see comments for MessageSwitch + (_c('Delivery attempts will continue to be made'), + _c('.+'), + _c('(?P<addr>.+)')), + # Next one goes here... + ] + + + +def process(msg): + if _process(msg, patterns): + # It's a recognized warning so stop now + return Stop + else: + return [] diff --git a/src/mailman/Bouncers/Sina.py b/src/mailman/Bouncers/Sina.py new file mode 100644 index 000000000..a6b8e0911 --- /dev/null +++ b/src/mailman/Bouncers/Sina.py @@ -0,0 +1,48 @@ +# Copyright (C) 2002-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""sina.com bounces""" + +import re +from email import Iterators + +acre = re.compile(r'<(?P<addr>[^>]*)>') + + + +def process(msg): + if msg.get('from', '').lower() <> 'mailer-daemon@sina.com': + print 'out 1' + return [] + if not msg.is_multipart(): + print 'out 2' + return [] + # The interesting bits are in the first text/plain multipart + part = None + try: + part = msg.get_payload(0) + except IndexError: + pass + if not part: + print 'out 3' + return [] + addrs = {} + for line in Iterators.body_line_iterator(part): + mo = acre.match(line) + if mo: + addrs[mo.group('addr')] = 1 + return addrs.keys() diff --git a/src/mailman/Bouncers/Yahoo.py b/src/mailman/Bouncers/Yahoo.py new file mode 100644 index 000000000..b0480b818 --- /dev/null +++ b/src/mailman/Bouncers/Yahoo.py @@ -0,0 +1,54 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Yahoo! has its own weird format for bounces.""" + +import re +import email +from email.Utils import parseaddr + +tcre = re.compile(r'message\s+from\s+yahoo\.\S+', re.IGNORECASE) +acre = re.compile(r'<(?P<addr>[^>]*)>:') +ecre = re.compile(r'--- Original message follows') + + + +def process(msg): + # Yahoo! bounces seem to have a known subject value and something called + # an x-uidl: header, the value of which seems unimportant. + sender = parseaddr(msg.get('from', '').lower())[1] or '' + if not sender.startswith('mailer-daemon@yahoo'): + return None + addrs = [] + # simple state machine + # 0 == nothing seen + # 1 == tag line seen + state = 0 + for line in email.Iterators.body_line_iterator(msg): + line = line.strip() + if state == 0 and tcre.match(line): + state = 1 + elif state == 1: + mo = acre.match(line) + if mo: + addrs.append(mo.group('addr')) + continue + mo = ecre.match(line) + if mo: + # we're at the end of the error response + break + return addrs diff --git a/src/mailman/Bouncers/Yale.py b/src/mailman/Bouncers/Yale.py new file mode 100644 index 000000000..956dfb838 --- /dev/null +++ b/src/mailman/Bouncers/Yale.py @@ -0,0 +1,80 @@ +# Copyright (C) 2000-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Yale's mail server is pretty dumb. + +Its reports include the end user's name, but not the full domain. I think we +can usually guess it right anyway. This is completely based on examination of +the corpse, and is subject to failure whenever Yale even slightly changes +their MTA. :( + +""" + +import re +from cStringIO import StringIO +from email.Utils import getaddresses + +scre = re.compile(r'Message not delivered to the following', re.IGNORECASE) +ecre = re.compile(r'Error Detail', re.IGNORECASE) +acre = re.compile(r'\s+(?P<addr>\S+)\s+') + + + +def process(msg): + if msg.is_multipart(): + return None + try: + whofrom = getaddresses([msg.get('from', '')])[0][1] + if not whofrom: + return None + username, domain = whofrom.split('@', 1) + except (IndexError, ValueError): + return None + if username.lower() <> 'mailer-daemon': + return None + parts = domain.split('.') + parts.reverse() + for part1, part2 in zip(parts, ('edu', 'yale')): + if part1 <> part2: + return None + # Okay, we've established that the bounce came from the mailer-daemon at + # yale.edu. Let's look for a name, and then guess the relevant domains. + names = {} + body = StringIO(msg.get_payload()) + state = 0 + # simple state machine + # 0 == init + # 1 == intro found + while 1: + line = body.readline() + if not line: + break + if state == 0 and scre.search(line): + state = 1 + elif state == 1 and ecre.search(line): + break + elif state == 1: + mo = acre.search(line) + if mo: + names[mo.group('addr')] = 1 + # Now we have a bunch of names, these are either @yale.edu or + # @cs.yale.edu. Add them both. + addrs = [] + for name in names.keys(): + addrs.append(name + '@yale.edu') + addrs.append(name + '@cs.yale.edu') + return addrs diff --git a/src/mailman/Bouncers/__init__.py b/src/mailman/Bouncers/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/mailman/Bouncers/__init__.py |
