diff options
| -rw-r--r-- | Mailman/Bouncers/Catchall.py | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/Mailman/Bouncers/Catchall.py b/Mailman/Bouncers/Catchall.py new file mode 100644 index 000000000..2bd81f48d --- /dev/null +++ b/Mailman/Bouncers/Catchall.py @@ -0,0 +1,194 @@ +# Copyright (C) 1998 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# TBD: this is cruft and should eventually just go away. It contains the old +# implementation of Bouncer.ScanMessage(). We keep it because I don't feel +# like splitting it up and porting it. It should at the very least be ported +# to use mimetools and re. :( + +import re +import string +import regsub +import regex +from types import StringType + + + +# Return 0 if we couldn't make any sense of it, 1 if we handled it. +def process(mlist, msg): + candidates = [] + # See Mailman.Message.GetSender :( + sender = msg.get('sender') + if sender: + name, addr = msg.getaddr('sender') + else: + name, addr = msg.getaddr('from') + if addr and type(addr) == StringType: + who_info = string.lower(addr) + elif msg.unixfrom: + who_info = string.lower(string.split(msg.unixfrom)[1]) + else: + return None + at_index = string.find(who_info, '@') + if at_index != -1: + who_from = who_info[:at_index] + remote_host = who_info[at_index+1:] + else: + who_from = who_info + remote_host = mlist.host_name + if not who_from in ['mailer-daemon', 'postmaster', 'orphanage', + 'postoffice', 'ucx_smtp', 'a2']: + return 0 + mime_info = msg.getheader('content-type') + boundry = None + if mime_info: + mime_info_parts = regsub.splitx( + mime_info, '[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="[^"]+"') + if len(mime_info_parts) > 1: + boundry = regsub.splitx(mime_info_parts[1], + '"[^"]+"')[1][1:-1] + + # snag out the message body + msg.rewindbody() + msgbody = msg.fp.read() + if boundry: + relevant_text = string.split(msgbody, '--%s' % boundry) + # Invalid MIME messages shouldn't cause exceptions + if len(relevant_text) >= 2: + relevant_text = relevant_text[1] + else: + relevant_text = relevant_text[0] + else: + # This looks strange, but at least 2 are going to be no-ops. + relevant_text = regsub.split(msgbody, + '^.*Message header follows.*$')[0] + relevant_text = regsub.split(relevant_text, + '^The text you sent follows:.*$')[0] + relevant_text = regsub.split( + relevant_text, '^Additional Message Information:.*$')[0] + relevant_text = regsub.split(relevant_text, + '^-+Your original message-+.*$')[0] + + BOUNCE = 1 + REMOVE = 2 + + # Bounce patterns where it's simple to figure out the email addr. + email_regexp = '<?\([^ \t@|<>]+@[^ \t@<>]+\.[^ \t<>.]+\)>?' + simple_bounce_pats = ( + (regex.compile('.*451 %s.*' % email_regexp), BOUNCE), + (regex.compile('.*554 %s.*' % email_regexp), BOUNCE), + (regex.compile('.*552 %s.*' % email_regexp), BOUNCE), + (regex.compile('.*501 %s.*' % email_regexp), BOUNCE), + (regex.compile('.*553 %s.*' % email_regexp), BOUNCE), + (regex.compile('.*550 %s.*' % email_regexp), BOUNCE), + (regex.compile('%s .bounced.*' % email_regexp), BOUNCE), + (regex.compile('.*%s\.\.\. Deferred.*' % email_regexp), BOUNCE), + (regex.compile('.*User %s not known.*' % email_regexp), REMOVE), + (regex.compile('.*%s: User unknown.*' % email_regexp), REMOVE), + (regex.compile('.*%s\.\.\. User unknown' % email_regexp), REMOVE)) + # patterns we can't directly extract the email (special case these) + messy_pattern_1 = regex.compile('^Recipient .*$') + messy_pattern_2 = regex.compile('^Addressee: .*$') + messy_pattern_3 = regex.compile('^User .* not listed.*$') + messy_pattern_4 = regex.compile('^550 [^ ]+\.\.\. User unknown.*$') + messy_pattern_5 = regex.compile('^User [^ ]+ is not defined.*$') + messy_pattern_6 = regex.compile('^[ \t]*[^ ]+: User unknown.*$') + messy_pattern_7 = regex.compile('^[^ ]+ - User currently disabled.*$') + + # Patterns for cases where email addr is separate from error cue. + separate_cue_1 = re.compile( + '^554 [^ ]+\.\.\. unknown mailer error.*$', re.I) + separate_addr_1 = regex.compile('expanded from: %s' % email_regexp) + + message_grokked = 0 + use_prospects = 0 + prospects = [] # If bad but no candidates found. + + for line in string.split(relevant_text, '\n'): + for pattern, action in simple_bounce_pats: + if pattern.match(line) <> -1: + email = extract(line) + candidates.append((string.split(email,',')[0], action)) + message_grokked = 1 + + # Now for the special case messages that are harder to parse... + if (messy_pattern_1.match(line) <> -1 + or messy_pattern_2.match(line) <> -1): + username = string.split(line)[1] + candidates.append(('%s@%s' % (username, remote_host), + BOUNCE)) + message_grokked = 1 + continue + if (messy_pattern_3.match(line) <> -1 + or messy_pattern_4.match(line) <> -1 + or messy_pattern_5.match(line) <> -1): + username = string.split(line)[1] + candidates.append(('%s@%s' % (username, remote_host), + REMOVE)) + message_grokked = 1 + continue + if messy_pattern_6.match(line) <> -1: + username = string.split(string.strip(line))[0][:-1] + candidates.append(('%s@%s' % (username, remote_host), + REMOVE)) + message_grokked = 1 + continue + if messy_pattern_7.match(line) <> -1: + username = string.split(string.strip(line))[0] + candidates.append(('%s@%s' % (username, remote_host), + REMOVE)) + message_grokked = 1 + continue + + if separate_cue_1.match(line): + # Here's an error message that doesn't contain the addr. + # Set a flag to use prospects found on separate lines. + use_prospects = 1 + if separate_addr_1.search(line) != -1: + # Found an addr that *might* be part of an error message. + # Register it on prospects, where it will only be used if a + # separate check identifies this message as an error message. + prospects.append((separate_addr_1.group(1), BOUNCE)) + + if use_prospects and prospects: + candidates = candidates + prospects + + did = [] + for who, action in candidates: + # First clean up some cruft around the addrs. + el = string.find(who, "...") + if el != -1: + who = who[:el] + if len(who) > 1 and who[0] == '<': + # Use stuff after open angle and before (optional) close: + who = regsub.splitx(who[1:], ">")[0] + if who not in did: +## if action == REMOVE: +## mlist.HandleBouncingAddress(who, msg) +## else: +## mlist.RegisterBounce(who, msg) + did.append(who) +## return message_grokked + return did + + + +def extract(line): + email = regsub.splitx(line, '[^ \t@<>]+@[^ \t@<>]+\.[^ \t<>.]+')[1] + if email[0] == '<': + return regsub.splitx(email[1:], ">")[0] + else: + return email |
