diff options
| author | Barry Warsaw | 2008-02-27 01:26:18 -0500 |
|---|---|---|
| committer | Barry Warsaw | 2008-02-27 01:26:18 -0500 |
| commit | a1c73f6c305c7f74987d99855ba59d8fa823c253 (patch) | |
| tree | 65696889450862357c9e05c8e9a589f1bdc074ac /Mailman/Bouncers/SimpleMatch.py | |
| parent | 3f31f8cce369529d177cfb5a7c66346ec1e12130 (diff) | |
| download | mailman-a1c73f6c305c7f74987d99855ba59d8fa823c253.tar.gz mailman-a1c73f6c305c7f74987d99855ba59d8fa823c253.tar.zst mailman-a1c73f6c305c7f74987d99855ba59d8fa823c253.zip | |
Diffstat (limited to 'Mailman/Bouncers/SimpleMatch.py')
| -rw-r--r-- | Mailman/Bouncers/SimpleMatch.py | 204 |
1 files changed, 0 insertions, 204 deletions
diff --git a/Mailman/Bouncers/SimpleMatch.py b/Mailman/Bouncers/SimpleMatch.py deleted file mode 100644 index 91e344662..000000000 --- a/Mailman/Bouncers/SimpleMatch.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (C) 1998-2008 by the Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, -# USA. - -"""Recognizes simple heuristically delimited bounces.""" - -import re -import email.Iterators - - - -def _c(pattern): - return re.compile(pattern, re.IGNORECASE) - -# This is a list of tuples of the form -# -# (start cre, end cre, address cre) -# -# where `cre' means compiled regular expression, start is the line just before -# the bouncing address block, end is the line just after the bouncing address -# block, and address cre is the regexp that will recognize the addresses. It -# must have a group called `addr' which will contain exactly and only the -# address that bounced. -PATTERNS = [ - # sdm.de - (_c('here is your list of failed recipients'), - _c('here is your returned mail'), - _c(r'<(?P<addr>[^>]*)>')), - # sz-sb.de, corridor.com, nfg.nl - (_c('the following addresses had'), - _c('transcript of session follows'), - _c(r'<(?P<fulladdr>[^>]*)>|\(expanded from: <?(?P<addr>[^>)]*)>?\)')), - # robanal.demon.co.uk - (_c('this message was created automatically by mail delivery software'), - _c('original message follows'), - _c('rcpt to:\s*<(?P<addr>[^>]*)>')), - # s1.com (InterScan E-Mail VirusWall NT ???) - (_c('message from interscan e-mail viruswall nt'), - _c('end of message'), - _c('rcpt to:\s*<(?P<addr>[^>]*)>')), - # Smail - (_c('failed addresses follow:'), - _c('message text follows:'), - _c(r'\s*(?P<addr>\S+@\S+)')), - # newmail.ru - (_c('This is the machine generated message from mail service.'), - _c('--- Below the next line is a copy of the message.'), - _c('<(?P<addr>[^>]*)>')), - # turbosport.com runs something called `MDaemon 3.5.2' ??? - (_c('The following addresses did NOT receive a copy of your message:'), - _c('--- Session Transcript ---'), - _c('[>]\s*(?P<addr>.*)$')), - # usa.net - (_c('Intended recipient:\s*(?P<addr>.*)$'), - _c('--------RETURNED MAIL FOLLOWS--------'), - _c('Intended recipient:\s*(?P<addr>.*)$')), - # hotpop.com - (_c('Undeliverable Address:\s*(?P<addr>.*)$'), - _c('Original message attached'), - _c('Undeliverable Address:\s*(?P<addr>.*)$')), - # Another demon.co.uk format - (_c('This message was created automatically by mail delivery'), - _c('^---- START OF RETURNED MESSAGE ----'), - _c("addressed to '(?P<addr>[^']*)'")), - # Prodigy.net full mailbox - (_c("User's mailbox is full:"), - _c('Unable to deliver mail.'), - _c("User's mailbox is full:\s*<(?P<addr>[^>]*)>")), - # Microsoft SMTPSVC - (_c('The email below could not be delivered to the following user:'), - _c('Old message:'), - _c('<(?P<addr>[^>]*)>')), - # Yahoo on behalf of other domains like sbcglobal.net - (_c('Unable to deliver message to the following address\(es\)\.'), - _c('--- Original message follows\.'), - _c('<(?P<addr>[^>]*)>:')), - # googlemail.com - (_c('Delivery to the following recipient failed'), - _c('----- Original message -----'), - _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), - # kundenserver.de - (_c('A message that you sent could not be delivered'), - _c('^---'), - _c('<(?P<addr>[^>]*)>')), - # another kundenserver.de - (_c('A message that you sent could not be delivered'), - _c('^---'), - _c('^(?P<addr>[^\s@]+@[^\s@:]+):')), - # thehartford.com - (_c('Delivery to the following recipients failed'), - # this one may or may not have the original message, but there's nothing - # unique to stop on, so stop on the first line of at least 3 characters - # that doesn't start with 'D' (to not stop immediately) and has no '@'. - _c('^[^D][^@]{2,}$'), - _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), - # and another thehartfod.com/hartfordlife.com - (_c('^Your message\s*$'), - _c('^because:'), - _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), - # kviv.be (InterScan NT) - (_c('^Unable to deliver message to'), - _c(r'\*+\s+End of message\s+\*+'), - _c('<(?P<addr>[^>]*)>')), - # earthlink.net supported domains - (_c('^Sorry, unable to deliver your message to'), - _c('^A copy of the original message'), - _c('\s*(?P<addr>[^\s@]+@[^\s@]+)\s+')), - # ademe.fr - (_c('^A message could not be delivered to:'), - _c('^Subject:'), - _c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), - # andrew.ac.jp - (_c('^Invalid final delivery userid:'), - _c('^Original message follows.'), - _c('\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')), - # E500_SMTP_Mail_Service@lerctr.org - (_c('------ Failed Recipients ------'), - _c('-------- Returned Mail --------'), - _c('<(?P<addr>[^>]*)>')), - # cynergycom.net - (_c('A message that you sent could not be delivered'), - _c('^---'), - _c('(?P<addr>[^\s@]+@[^\s@)]+)')), - # LSMTP for Windows - (_c('^--> Error description:\s*$'), - _c('^Error-End:'), - _c('^Error-for:\s+(?P<addr>[^\s@]+@[^\s@]+)')), - # Qmail with a tri-language intro beginning in spanish - (_c('Your message could not be delivered'), - _c('^-'), - _c('<(?P<addr>[^>]*)>:')), - # socgen.com - (_c('Your message could not be delivered to'), - _c('^\s*$'), - _c('(?P<addr>[^\s@]+@[^\s@]+)')), - # dadoservice.it - (_c('Your message has encountered delivery problems'), - _c('Your message reads'), - _c('addressed to\s*(?P<addr>[^\s@]+@[^\s@)]+)')), - # gomaps.com - (_c('Did not reach the following recipient'), - _c('^\s*$'), - _c('\s(?P<addr>[^\s@]+@[^\s@]+)')), - # EYOU MTA SYSTEM - (_c('This is the deliver program at'), - _c('^-'), - _c('^(?P<addr>[^\s@]+@[^\s@<>]+)')), - # A non-standard qmail at ieo.it - (_c('this is the email server at'), - _c('^-'), - _c('\s(?P<addr>[^\s@]+@[^\s@]+)[\s,]')), - # pla.net.py (MDaemon.PRO ?) - (_c('- no such user here'), - _c('There is no user'), - _c('^(?P<addr>[^\s@]+@[^\s@]+)\s')), - # Next one goes here... - ] - - - -def process(msg, patterns=None): - if patterns is None: - patterns = PATTERNS - # simple state machine - # 0 = nothing seen yet - # 1 = intro seen - addrs = {} - # MAS: This is a mess. The outer loop used to be over the message - # so we only looped through the message once. Looping through the - # message for each set of patterns is obviously way more work, but - # if we don't do it, problems arise because scre from the wrong - # pattern set matches first and then acre doesn't match. The - # alternative is to split things into separate modules, but then - # we process the message multiple times anyway. - for scre, ecre, acre in patterns: - state = 0 - for line in email.Iterators.body_line_iterator(msg): - if state == 0: - if scre.search(line): - state = 1 - if state == 1: - mo = acre.search(line) - if mo: - addr = mo.group('addr') - if addr: - addrs[mo.group('addr')] = 1 - elif ecre.search(line): - break - if addrs: - break - return addrs.keys() |
