src/mailman/bouncers/yale.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

# Copyright (C) 2000-2011 by the Free Software Foundation, Inc.
#
# This file is part of GNU Mailman.
#
# GNU Mailman is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.

"""Yale's mail server is pretty dumb.

Its reports include the end user's name, but not the full domain.  I think we
can usually guess it right anyway.  This is completely based on examination of
the corpse, and is subject to failure whenever Yale even slightly changes
their MTA. :(

"""

from __future__ import absolute_import, unicode_literals

__metaclass__ = type
__all__ = [
    'Yale',
    ]


import re

from cStringIO import StringIO
from email.utils import getaddresses
from flufl.enum import Enum
from zope.interface import implements

from mailman.interfaces.bounce import IBounceDetector


scre = re.compile(r'Message not delivered to the following', re.IGNORECASE)
ecre = re.compile(r'Error Detail', re.IGNORECASE)
acre = re.compile(r'\s+(?P<addr>\S+)\s+')


class ParseState(Enum):
    start = 0
    intro_found = 1


class Yale:
    """Parse Yale's bounces (or what used to be)."""

    implements(IBounceDetector)

    def process(self, msg):
        """See `IBounceDetector`."""
        if msg.is_multipart():
            return None
        try:
            whofrom = getaddresses([msg.get('from', '')])[0][1]
            if not whofrom:
                return None
            username, domain = whofrom.split('@', 1)
        except (IndexError, ValueError):
            return None
        if username.lower() != 'mailer-daemon':
            return None
        parts = domain.split('.')
        parts.reverse()
        for part1, part2 in zip(parts, ('edu', 'yale')):
            if part1 != part2:
                return None
        # Okay, we've established that the bounce came from the mailer-daemon
        # at yale.edu.  Let's look for a name, and then guess the relevant
        # domains.
        names = set()
        body = StringIO(msg.get_payload())
        state = ParseState.start
        for line in body:
            if state is ParseState.start and scre.search(line):
                state = ParseState.intro_found
            elif state is ParseState.intro_found and ecre.search(line):
                break
            elif state is ParseState.intro_found:
                mo = acre.search(line)
                if mo:
                    names.add(mo.group('addr'))
        # Now we have a bunch of names, these are either @yale.edu or
        # @cs.yale.edu.  Add them both.
        addresses = []
        for name in names:
            addresses.append(name + '@yale.edu')
            addresses.append(name + '@cs.yale.edu')
        return addresses