summaryrefslogtreecommitdiff
path: root/src/mailman/handlers/dmarc.py
blob: 21b12299098928db65c912aee1123cfec15b2ebd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# Copyright (C) 2016 by the Free Software Foundation, Inc.
#
# This file is part of GNU Mailman.
#
# GNU Mailman is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.

"""Do DMARC Munge From and Wrap Message actions.

This does the work of modifying the messages From: and Cc: or Reply-To: or
wrapping the message in an outer message with From: and Cc: or Reply-To:
as appropriate to avoid issues because of the original From: domain's DMARC
policy.  It does this either to selected messages flagged by the DMARC
moderation rule based on list settings and the original From: domain's DMARC
policy or to all messages based on list settings."""

import re
import copy
import logging

from email.header import Header, decode_header
from email.mime.message import MIMEMessage
from email.mime.text import MIMEText
from email.utils import formataddr, getaddresses, make_msgid
from mailman.core.i18n import _
from mailman.interfaces.handler import IHandler
from mailman.interfaces.mailinglist import (
    DMARCModerationAction, FromIsList, ReplyToMunging)
from mailman.utilities.string import wrap
from public import public
from zope.interface import implementer


log = logging.getLogger('mailman.error')

COMMASPACE = ', '
MAXLINELEN = 78
NONASCII = re.compile('[^\s!-~]')
# Headers from the original that we want to keep in the wrapper.  These are
# actually regexps matched with re.match so they match anything that starts
# with the given string unless they end with '$'.
KEEPERS = (
    'archived-at',
    'date',
    'in-reply-to',
    'list-',
    'precedence',
    'references',
    'subject',
    'to',
    'x-mailman-',
    )


def munged_headers(mlist, msg, msgdata):
    # This returns a list of tuples (header, content) where header is the
    # name of a header to be added to or replaced in the wrapper or message
    # for DMARC mitigation.  It sets From: to the string
    # 'original From: display name' via 'list name' <list posting address>
    # and adds the original From: to Reply-To: or Cc: per the following.
    # Our goals for this process are not completely compatible, so we do
    # the best we can.  Our goals are:
    # 1) as long as the list is not anonymous, the original From: address
    #    should be obviously exposed, i.e. not just in a header that MUAs
    #    don't display.
    # 2) the original From: address should not be in a comment or display
    #    name in the new From: because it is claimed that multiple domains
    #    in any fields in From: are indicative of spamminess.  This means
    #    it should be in Reply-To: or Cc:.
    # 3) the behavior of an MUA doing a 'reply' or 'reply all' should be
    #    consistent regardless of whether or not the From: is munged.
    # Goal 3) implies sometimes the original From: should be in Reply-To:
    # and sometimes in Cc:, and even so, this goal won't be achieved in
    # all cases with all MUAs.  In cases of conflict, the above ordering of
    # goals is priority order.
    #
    # Be as robust as possible here.
    faddrs = getaddresses(msg.get_all('from', []))
    # Strip the nulls and bad emails.
    faddrs = [x for x in faddrs if x[1].find('@') > 0]
    if len(faddrs) == 1:
        realname, email = o_from = faddrs[0]
    else:
        # No From: or multiple addresses.  Just punt and take
        # the get_sender result.
        realname = ''
        email = msgdata['original_sender']
        o_from = (realname, email)
    if len(realname) == 0:
        member = mlist.members.get_member(email)
        if member:
            realname = member.display_name or email
        else:
            realname = email
    # Remove domain from realname if it looks like an email address.
    realname = re.sub(r'@([^ .]+\.)+[^ .]+$', '---', realname)
    # Make a display name and RFC 2047 encode it if necessary.  This is
    # difficult and kludgy. If the realname came from From: it should be
    # ascii or RFC 2047 encoded. If it came from the list, it should be
    # a string.  If it's from the email address, it should be an ascii string.
    # In any case, ensure it's an unencoded string.
    srn = ''
    for frag, cs in decode_header(realname):
        if not cs:
            # Character set should be ascii, but use iso-8859-1 anyway.
            cs = 'iso-8859-1'
        if not isinstance(frag, str):
            srn += str(frag, cs, errors='replace')
        else:
            srn += frag
    # The list's real_name is a string.
    lrn = mlist.display_name      # noqa  F841
    realname = srn
    # Ensure the i18n context is the list's preferred_language.
    with _.using(mlist.preferred_language.code):
        via = _('$realname via $lrn')
    # Get an RFC 2047 encoded header string.
    dn = str(Header(via, mlist.preferred_language.charset))
    retn = [('From', formataddr((dn, mlist.posting_address)))]
    # We've made the munged From:.  Now put the original in Reply-To: or Cc:
    if mlist.reply_goes_to_list is ReplyToMunging.no_munging:
        # Add original from to Reply-To:
        add_to = 'Reply-To'
    else:
        # Add original from to Cc:
        add_to = 'Cc'
    orig = getaddresses(msg.get_all(add_to, []))
    if o_from[1] not in [x[1] for x in orig]:
        orig.append(o_from)
    retn.append((add_to, COMMASPACE.join(formataddr(x) for x in orig)))
    return retn


def munge_from(mlist, msg, msgdata):
    for k, v in munged_headers(mlist, msg, msgdata):
        del msg[k]
        msg[k] = v
    return


def wrap_message(mlist, msg, msgdata, dmarc_wrap=False):
    # Create a wrapper message around the original.
    # There are various headers in msg that we don't want, so we basically
    # make a copy of the msg, then delete almost everything and set/copy
    # what we want.
    omsg = copy.deepcopy(msg)
    for key in msg:
        keep = False
        for keeper in KEEPERS:
            if re.match(keeper, key, re.I):
                keep = True
                break
        if not keep:
            del msg[key]
    msg['MIME-Version'] = '1.0'
    msg['Message-ID'] = make_msgid()
    for k, v in munged_headers(mlist, omsg, msgdata):
        msg[k] = v
    # Are we including dmarc_wrapped_message_text?  I.e., do we have text and
    # are we wrapping because of dmarc_moderation_action?
    if len(mlist.dmarc_wrapped_message_text) > 0 and dmarc_wrap:
        part1 = MIMEText(wrap(mlist.dmarc_wrapped_message_text),
                         'plain',
                         mlist.preferred_language.charset)
        part1['Content-Disposition'] = 'inline'
        part2 = MIMEMessage(omsg)
        part2['Content-Disposition'] = 'inline'
        msg['Content-Type'] = 'multipart/mixed'
        msg.set_payload([part1, part2])
    else:
        msg['Content-Type'] = 'message/rfc822'
        msg['Content-Disposition'] = 'inline'
        msg.set_payload([omsg])
    return


def process(mlist, msg, msgdata):
    """Process DMARC actions."""
    if ((not msgdata.get('dmarc') or
            mlist.dmarc_moderation_action is DMARCModerationAction.none) and
            mlist.from_is_list is FromIsList.none):
        return
    if mlist.anonymous_list:
        # DMARC mitigation is not required for anonymous lists.
        return
    if (mlist.dmarc_moderation_action is not DMARCModerationAction.none and
            msgdata.get('dmarc')):
        if mlist.dmarc_moderation_action is DMARCModerationAction.munge_from:
            munge_from(mlist, msg, msgdata)
        elif (mlist.dmarc_moderation_action is
                DMARCModerationAction.wrap_message):
            wrap_message(mlist, msg, msgdata, dmarc_wrap=True)
        else:
            raise AssertionError(
                'handlers/dmarc.py: dmarc_moderation_action = {}'.format(
                    mlist.dmarc_moderation_action))
    else:
        if mlist.from_is_list is FromIsList.munge_from:
            munge_from(mlist, msg, msgdata)
        elif mlist.from_is_list is FromIsList.wrap_message:
            wrap_message(mlist, msg, msgdata)
        else:
            raise AssertionError(
                'handlers/dmarc.py: from_is_list = {}'.format(
                    mlist.from_is_list))


@public
@implementer(IHandler)
class DMARC:
    """Apply DMARC mitigations."""

    name = 'dmarc'
    description = _('Apply DMARC mitigations.')

    def process(self, mlist, msg, msgdata):
        """See `IHandler`."""
        process(mlist, msg, msgdata)