diff options
| author | Barry Warsaw | 2017-01-04 15:10:55 +0000 |
|---|---|---|
| committer | Barry Warsaw | 2017-01-04 15:10:55 +0000 |
| commit | 9ecb0d462b79bec42e82b6aecea8853262e15753 (patch) | |
| tree | dc422ff2eec850387fcdb7a3c326f8d3a267a16a /src/mailman/rules/dmarc.py | |
| parent | 8cb4546b9a2bf87a3e2d465651686ca26a5404db (diff) | |
| parent | ae3eeb642b452ee844ba7a2999361f31c3c327c7 (diff) | |
| download | mailman-9ecb0d462b79bec42e82b6aecea8853262e15753.tar.gz mailman-9ecb0d462b79bec42e82b6aecea8853262e15753.tar.zst mailman-9ecb0d462b79bec42e82b6aecea8853262e15753.zip | |
Diffstat (limited to 'src/mailman/rules/dmarc.py')
| -rw-r--r-- | src/mailman/rules/dmarc.py | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/src/mailman/rules/dmarc.py b/src/mailman/rules/dmarc.py new file mode 100644 index 000000000..928914a29 --- /dev/null +++ b/src/mailman/rules/dmarc.py @@ -0,0 +1,326 @@ +# Copyright (C) 2016 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""DMARC mitigation rule.""" + +import os +import re +import logging +import dns.resolver + +from dns.exception import DNSException +from email.utils import parseaddr +from lazr.config import as_timedelta +from mailman.config import config +from mailman.core.i18n import _ +from mailman.interfaces.mailinglist import DMARCMitigateAction +from mailman.interfaces.rules import IRule +from mailman.utilities.datetime import now +from mailman.utilities.protocols import get +from mailman.utilities.string import wrap +from pkg_resources import resource_string as resource_bytes +from public import public +from requests.exceptions import HTTPError +from urllib.error import URLError +from zope.interface import implementer + + +elog = logging.getLogger('mailman.error') +vlog = logging.getLogger('mailman.vette') + +DOT = '.' +EMPTYSTRING = '' +KEEP_LOOKING = object() +LOCAL_FILE_NAME = 'public_suffix_list.dat' + +# Map organizational domain suffix rules to a boolean indicating whether the +# rule is an exception or not. +suffix_cache = dict() + + +def ensure_current_suffix_list(): + # Read and parse the organizational domain suffix list. First look in the + # cached directory to see if we already have a valid copy of it. + cached_copy_path = os.path.join(config.VAR_DIR, LOCAL_FILE_NAME) + lifetime = as_timedelta(config.dmarc.cache_lifetime) + download = False + try: + mtime = os.stat(cached_copy_path).st_mtime + except FileNotFoundError: + vlog.info('No cached copy of the public suffix list found') + download = True + cache_found = False + else: + cache_found = True + # Is the cached copy out-of-date? Note that when we write a new cache + # version we explicitly set its mtime to the time in the future when + # the cache will expire. + if mtime < now().timestamp(): + download = True + vlog.info('Cached copy of public suffix list is out of date') + if download: + try: + content = get(config.dmarc.org_domain_data_url) + except (URLError, HTTPError) as error: + elog.error('Unable to retrieve public suffix list from %s: %s', + config.dmarc.org_domain_data_url, + getattr(error, 'reason', str(error))) + if cache_found: + vlog.info('Using out of date public suffix list') + content = None + else: + # We couldn't access the URL and didn't even have an out of + # date suffix list cached. Use the shipped version. + content = resource_bytes('mailman.rules.data', LOCAL_FILE_NAME) + if content is not None: + # Content is either a string or UTF-8 encoded bytes. + if isinstance(content, bytes): + content = content.decode('utf-8') + # Write the cache atomically. + new_path = cached_copy_path + '.new' + with open(new_path, 'w', encoding='utf-8') as fp: + fp.write(content) + # Set the expiry time to the future. + mtime = (now() + lifetime).timestamp() + os.utime(new_path, (mtime, mtime)) + # Flip the new file into the cached location. This does not + # modify the mtime. + os.rename(new_path, cached_copy_path) + return cached_copy_path + + +def parse_suffix_list(filename=None): + # Parse the suffix list into a per process cache. + if filename is None: + filename = ensure_current_suffix_list() + # At this point the cached copy must exist and is as valid as possible. + # Read and return the contents as a UTF-8 string. + with open(filename, 'r', encoding='utf-8') as fp: + for line in fp: + if not line.strip() or line.startswith('//'): + continue + line = re.sub('\s.*', '', line) + if not line: + continue + parts = line.lower().split('.') + if parts[0].startswith('!'): + exception = True + parts = [parts[0][1:]] + parts[1:] + else: + exception = False + parts.reverse() + key = DOT.join(parts) + suffix_cache[key] = exception + + +def get_domain(parts, label): + # A helper to get a domain name consisting of the first label+1 labels in + # parts. + domain = parts[:min(label+1, len(parts))] + domain.reverse() + return DOT.join(domain) + + +def get_organizational_domain(domain): + # Given a domain name, this returns the corresponding Organizational + # Domain which may be the same as the input. + if len(suffix_cache) == 0: + parse_suffix_list() + hits = [] + parts = domain.lower().split('.') + parts.reverse() + for key in suffix_cache: + key_parts = key.split('.') + if len(parts) >= len(key_parts): + for i in range(len(key_parts) - 1): + if parts[i] != key_parts[i] and key_parts[i] != '*': + break + else: + if (parts[len(key_parts) - 1] == key_parts[-1] or + key_parts[-1] == '*'): + hits.append(key) + if not hits: + return get_domain(parts, 1) + label = 0 + for key in hits: + key_parts = key.split('.') + if suffix_cache[key]: + # It's an exception. + return get_domain(parts, len(key_parts) - 1) + if len(key_parts) > label: + label = len(key_parts) + return get_domain(parts, label) + + +def is_reject_or_quarantine(mlist, email, dmarc_domain, org=False): + # This takes a mailing list, an email address as in the From: header, the + # _dmarc host name for the domain in question, and a flag stating whether + # we should check the organizational domains. It returns one of three + # values: + # * True if the DMARC policy is reject or quarantine; + # * False if is not; + # * A special sentinel if we should continue looking + resolver = dns.resolver.Resolver() + resolver.timeout = as_timedelta( + config.dmarc.resolver_timeout).total_seconds() + resolver.lifetime = as_timedelta( + config.dmarc.resolver_lifetime).total_seconds() + try: + txt_recs = resolver.query(dmarc_domain, dns.rdatatype.TXT) + except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + return KEEP_LOOKING + except DNSException as error: + elog.error( + 'DNSException: Unable to query DMARC policy for %s (%s). %s', + email, dmarc_domain, error.__doc__) + return KEEP_LOOKING + # Be as robust as possible in parsing the result. + results_by_name = {} + cnames = {} + want_names = set([dmarc_domain + '.']) + # Check all the TXT records returned by DNS. Keep track of the CNAMEs for + # checking later on. Ignore any other non-TXT records. + for txt_rec in txt_recs.response.answer: + if txt_rec.rdtype == dns.rdatatype.CNAME: + cnames[txt_rec.name.to_text()] = ( + txt_rec.items[0].target.to_text()) + if txt_rec.rdtype != dns.rdatatype.TXT: + continue + result = EMPTYSTRING.join( + str(record, encoding='utf-8') + for record in txt_rec.items[0].strings) + name = txt_rec.name.to_text() + results_by_name.setdefault(name, []).append(result) + expands = list(want_names) + seen = set(expands) + while expands: + item = expands.pop(0) + if item in cnames: + if cnames[item] in seen: + # CNAME loop. + continue + expands.append(cnames[item]) + seen.add(cnames[item]) + want_names.add(cnames[item]) + want_names.discard(item) + assert len(want_names) == 1, ( + 'Error in CNAME processing for {}; want_names != 1.'.format( + dmarc_domain)) + for name in want_names: + if name not in results_by_name: + continue + dmarcs = [ + record for record in results_by_name[name] + if record.startswith('v=DMARC1;') + ] + if len(dmarcs) == 0: + return KEEP_LOOKING + if len(dmarcs) > 1: + elog.error( + 'RRset of TXT records for %s has %d v=DMARC1 entries; ' + 'testing them all', + dmarc_domain, len(dmarcs)) + for entry in dmarcs: + mo = re.search(r'\bsp=(\w*)\b', entry, re.IGNORECASE) + if org and mo: + policy = mo.group(1).lower() + else: + mo = re.search(r'\bp=(\w*)\b', entry, re.IGNORECASE) + if mo: + policy = mo.group(1).lower() + else: + # This continue does actually get covered by + # TestDMARCRules.test_domain_with_subdomain_policy() and + # TestDMARCRules.test_no_policy() but because of + # Coverage BitBucket issue #198 and + # http://bugs.python.org/issue2506 coverage cannot report + # it as such, so just pragma it away. + continue # pragma: no cover + if policy in ('reject', 'quarantine'): + vlog.info( + '%s: DMARC lookup for %s (%s) found p=%s in %s = %s', + mlist.list_name, + email, + dmarc_domain, + policy, + name, + entry) + return True + return False + + +def maybe_mitigate(mlist, email): + # This takes an email address, and returns True if DMARC policy is + # p=reject or p=quarantine. + email = email.lower() + # Scan from the right in case quoted local part has an '@'. + local, at, from_domain = email.rpartition('@') + if at != '@': + return False + answer = is_reject_or_quarantine( + mlist, email, '_dmarc.{}'.format(from_domain)) + if answer is not KEEP_LOOKING: + return answer + org_dom = get_organizational_domain(from_domain) + if org_dom != from_domain: + answer = is_reject_or_quarantine( + mlist, email, '_dmarc.{}'.format(org_dom), org=True) + if answer is not KEEP_LOOKING: + return answer + return False + + +@public +@implementer(IRule) +class DMARCMitigation: + """The DMARC mitigation rule.""" + + name = 'dmarc-mitigation' + description = _('Find DMARC policy of From: domain.') + record = True + + def check(self, mlist, msg, msgdata): + """See `IRule`.""" + if mlist.dmarc_mitigate_action is DMARCMitigateAction.no_mitigation: + # Don't bother to check if we're not going to do anything. + return False + dn, addr = parseaddr(msg.get('from')) + if maybe_mitigate(mlist, addr): + # If dmarc_mitigate_action is discard or reject, this rule fires + # and jumps to the 'moderation' chain to do the actual discard. + # Otherwise, the rule misses but sets a flag for the dmarc handler + # to do the appropriate action. + msgdata['dmarc'] = True + if mlist.dmarc_mitigate_action is DMARCMitigateAction.discard: + msgdata['moderation_action'] = 'discard' + msgdata['moderation_reasons'] = [_('DMARC moderation')] + elif mlist.dmarc_mitigate_action is DMARCMitigateAction.reject: + listowner = mlist.owner_address # noqa F841 + reason = (mlist.dmarc_moderation_notice or + _('You are not allowed to post to this mailing ' + 'list From: a domain which publishes a DMARC ' + 'policy of reject or quarantine, and your message' + ' has been automatically rejected. If you think ' + 'that your messages are being rejected in error, ' + 'contact the mailing list owner at ${listowner}.')) + msgdata['moderation_reasons'] = [wrap(reason)] + msgdata['moderation_action'] = 'reject' + else: + return False + msgdata['moderation_sender'] = addr + return True + return False |
