This is essentially a port of the old bounce detection code to the new

bounce handling architecture. It should still detect all the bounces that the old code detected, but it should be placed at the end of the bounce detection pipeline. This code should be split apart and rewritten, at the least to get rid of regex and regsub usage!
author: bwarsaw 1999-12-09 22:20:25 +0000
committer: bwarsaw 1999-12-09 22:20:25 +0000
commit: fbe62c5d1828a580ddc4f4d36800d3b4c3da2b60 (patch)
tree: ab268839a7b9f547180ca39431cacf20e8194e70 /Mailman/Bouncers/Catchall.py
parent: 06f1dfed8bef672356e300afb7ee284f0f565f20 (diff)
download: mailman-fbe62c5d1828a580ddc4f4d36800d3b4c3da2b60.tar.gz
mailman-fbe62c5d1828a580ddc4f4d36800d3b4c3da2b60.tar.zst
mailman-fbe62c5d1828a580ddc4f4d36800d3b4c3da2b60.zip
1 files changed, 194 insertions, 0 deletions
diff --git a/Mailman/Bouncers/Catchall.py b/Mailman/Bouncers/Catchall.py
new file mode 100644
index 000000000..2bd81f48d
--- /dev/null
+++ b/Mailman/Bouncers/Catchall.py
@@ -0,0 +1,194 @@
+# Copyright (C) 1998 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software 
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# TBD: this is cruft and should eventually just go away.  It contains the old
+# implementation of Bouncer.ScanMessage().  We keep it because I don't feel
+# like splitting it up and porting it.  It should at the very least be ported
+# to use mimetools and re. :(
+
+import re
+import string
+import regsub
+import regex
+from types import StringType
+
+
+
+# Return 0 if we couldn't make any sense of it, 1 if we handled it.
+def process(mlist, msg):
+    candidates = []
+    # See Mailman.Message.GetSender :(
+    sender = msg.get('sender')
+    if sender:
+        name, addr = msg.getaddr('sender')
+    else:
+        name, addr = msg.getaddr('from')
+    if addr and type(addr) == StringType:
+        who_info = string.lower(addr)
+    elif msg.unixfrom:
+        who_info = string.lower(string.split(msg.unixfrom)[1])
+    else:
+        return None
+    at_index = string.find(who_info, '@')
+    if at_index != -1:
+        who_from = who_info[:at_index]
+        remote_host = who_info[at_index+1:]
+    else:
+        who_from = who_info
+        remote_host = mlist.host_name
+    if not who_from in ['mailer-daemon', 'postmaster', 'orphanage',
+                        'postoffice', 'ucx_smtp', 'a2']:
+        return 0
+    mime_info = msg.getheader('content-type')
+    boundry = None
+    if mime_info:
+        mime_info_parts = regsub.splitx(
+            mime_info, '[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="[^"]+"')
+        if len(mime_info_parts) > 1:
+            boundry = regsub.splitx(mime_info_parts[1],
+                                    '"[^"]+"')[1][1:-1]
+
+    # snag out the message body
+    msg.rewindbody()
+    msgbody = msg.fp.read()
+    if boundry:
+        relevant_text = string.split(msgbody, '--%s' % boundry)
+        # Invalid MIME messages shouldn't cause exceptions
+        if len(relevant_text) >= 2:
+            relevant_text = relevant_text[1]
+        else:
+            relevant_text = relevant_text[0]
+    else:
+        # This looks strange, but at least 2 are going to be no-ops.
+        relevant_text = regsub.split(msgbody,
+                                     '^.*Message header follows.*$')[0]
+        relevant_text = regsub.split(relevant_text,
+                                     '^The text you sent follows:.*$')[0]
+        relevant_text = regsub.split(
+            relevant_text, '^Additional Message Information:.*$')[0]
+        relevant_text = regsub.split(relevant_text,
+                                     '^-+Your original message-+.*$')[0]
+
+    BOUNCE = 1
+    REMOVE = 2
+
+    # Bounce patterns where it's simple to figure out the email addr.
+    email_regexp = '<?\([^ \t@|<>]+@[^ \t@<>]+\.[^ \t<>.]+\)>?'
+    simple_bounce_pats = (
+        (regex.compile('.*451 %s.*' % email_regexp), BOUNCE),
+        (regex.compile('.*554 %s.*' % email_regexp), BOUNCE),
+        (regex.compile('.*552 %s.*' % email_regexp), BOUNCE),
+        (regex.compile('.*501 %s.*' % email_regexp), BOUNCE),
+        (regex.compile('.*553 %s.*' % email_regexp), BOUNCE),
+        (regex.compile('.*550 %s.*' % email_regexp), BOUNCE),
+        (regex.compile('%s .bounced.*' % email_regexp), BOUNCE),
+        (regex.compile('.*%s\.\.\. Deferred.*' % email_regexp), BOUNCE),
+        (regex.compile('.*User %s not known.*' % email_regexp), REMOVE),
+        (regex.compile('.*%s: User unknown.*' % email_regexp), REMOVE),
+        (regex.compile('.*%s\.\.\. User unknown' % email_regexp), REMOVE))
+    # patterns we can't directly extract the email (special case these)
+    messy_pattern_1 = regex.compile('^Recipient .*$')
+    messy_pattern_2 = regex.compile('^Addressee: .*$')
+    messy_pattern_3 = regex.compile('^User .* not listed.*$')
+    messy_pattern_4 = regex.compile('^550 [^ ]+\.\.\. User unknown.*$')
+    messy_pattern_5 = regex.compile('^User [^ ]+ is not defined.*$')
+    messy_pattern_6 = regex.compile('^[ \t]*[^ ]+: User unknown.*$')
+    messy_pattern_7 = regex.compile('^[^ ]+ - User currently disabled.*$')
+
+    # Patterns for cases where email addr is separate from error cue.
+    separate_cue_1 = re.compile(
+        '^554 [^ ]+\.\.\. unknown mailer error.*$', re.I)
+    separate_addr_1 = regex.compile('expanded from: %s' % email_regexp)
+
+    message_grokked = 0
+    use_prospects = 0
+    prospects = []                  # If bad but no candidates found.
+
+    for line in string.split(relevant_text, '\n'):
+        for pattern, action in simple_bounce_pats:
+            if pattern.match(line) <> -1:
+                email = extract(line)
+                candidates.append((string.split(email,',')[0], action))
+                message_grokked = 1
+
+        # Now for the special case messages that are harder to parse...
+        if (messy_pattern_1.match(line) <> -1
+            or messy_pattern_2.match(line) <> -1):
+            username = string.split(line)[1]
+            candidates.append(('%s@%s' % (username, remote_host),
+                               BOUNCE))
+            message_grokked = 1
+            continue
+        if (messy_pattern_3.match(line) <> -1
+            or messy_pattern_4.match(line) <> -1
+            or messy_pattern_5.match(line) <> -1):
+            username = string.split(line)[1]
+            candidates.append(('%s@%s' % (username, remote_host),
+                               REMOVE))
+            message_grokked = 1
+            continue
+        if messy_pattern_6.match(line) <> -1:
+            username = string.split(string.strip(line))[0][:-1]
+            candidates.append(('%s@%s' % (username, remote_host),
+                               REMOVE))
+            message_grokked = 1
+            continue
+        if messy_pattern_7.match(line) <> -1:
+            username = string.split(string.strip(line))[0]
+            candidates.append(('%s@%s' % (username, remote_host),
+                               REMOVE))
+            message_grokked = 1
+            continue
+
+        if separate_cue_1.match(line):
+            # Here's an error message that doesn't contain the addr.
+            # Set a flag to use prospects found on separate lines.
+            use_prospects = 1
+        if separate_addr_1.search(line) != -1:
+            # Found an addr that *might* be part of an error message.
+            # Register it on prospects, where it will only be used if a 
+            # separate check identifies this message as an error message.
+            prospects.append((separate_addr_1.group(1), BOUNCE))
+
+    if use_prospects and prospects:
+        candidates = candidates + prospects
+
+    did = []
+    for who, action in candidates:
+        # First clean up some cruft around the addrs.
+        el = string.find(who, "...")
+        if el != -1:
+            who = who[:el]
+        if len(who) > 1 and who[0] == '<':
+            # Use stuff after open angle and before (optional) close:
+            who = regsub.splitx(who[1:], ">")[0]
+        if who not in did:
+##            if action == REMOVE:
+##                mlist.HandleBouncingAddress(who, msg)
+##            else:
+##                mlist.RegisterBounce(who, msg)
+            did.append(who)
+##    return message_grokked
+    return did
+
+
+
+def extract(line):
+    email = regsub.splitx(line, '[^ \t@<>]+@[^ \t@<>]+\.[^ \t<>.]+')[1]
+    if email[0] == '<':
+        return regsub.splitx(email[1:], ">")[0]
+    else:
+        return email
author	bwarsaw	1999-12-09 22:20:25 +0000
committer	bwarsaw	1999-12-09 22:20:25 +0000
commit	fbe62c5d1828a580ddc4f4d36800d3b4c3da2b60 (patch)
tree	ab268839a7b9f547180ca39431cacf20e8194e70 /Mailman/Bouncers/Catchall.py
parent	06f1dfed8bef672356e300afb7ee284f0f565f20 (diff)
download	mailman-fbe62c5d1828a580ddc4f4d36800d3b4c3da2b60.tar.gz mailman-fbe62c5d1828a580ddc4f4d36800d3b4c3da2b60.tar.zst mailman-fbe62c5d1828a580ddc4f4d36800d3b4c3da2b60.zip