summaryrefslogtreecommitdiff
path: root/src/mailman/rules/dmarc.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/mailman/rules/dmarc.py')
-rw-r--r--src/mailman/rules/dmarc.py326
1 files changed, 326 insertions, 0 deletions
diff --git a/src/mailman/rules/dmarc.py b/src/mailman/rules/dmarc.py
new file mode 100644
index 000000000..928914a29
--- /dev/null
+++ b/src/mailman/rules/dmarc.py
@@ -0,0 +1,326 @@
+# Copyright (C) 2016 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
+
+"""DMARC mitigation rule."""
+
+import os
+import re
+import logging
+import dns.resolver
+
+from dns.exception import DNSException
+from email.utils import parseaddr
+from lazr.config import as_timedelta
+from mailman.config import config
+from mailman.core.i18n import _
+from mailman.interfaces.mailinglist import DMARCMitigateAction
+from mailman.interfaces.rules import IRule
+from mailman.utilities.datetime import now
+from mailman.utilities.protocols import get
+from mailman.utilities.string import wrap
+from pkg_resources import resource_string as resource_bytes
+from public import public
+from requests.exceptions import HTTPError
+from urllib.error import URLError
+from zope.interface import implementer
+
+
+elog = logging.getLogger('mailman.error')
+vlog = logging.getLogger('mailman.vette')
+
+DOT = '.'
+EMPTYSTRING = ''
+KEEP_LOOKING = object()
+LOCAL_FILE_NAME = 'public_suffix_list.dat'
+
+# Map organizational domain suffix rules to a boolean indicating whether the
+# rule is an exception or not.
+suffix_cache = dict()
+
+
+def ensure_current_suffix_list():
+ # Read and parse the organizational domain suffix list. First look in the
+ # cached directory to see if we already have a valid copy of it.
+ cached_copy_path = os.path.join(config.VAR_DIR, LOCAL_FILE_NAME)
+ lifetime = as_timedelta(config.dmarc.cache_lifetime)
+ download = False
+ try:
+ mtime = os.stat(cached_copy_path).st_mtime
+ except FileNotFoundError:
+ vlog.info('No cached copy of the public suffix list found')
+ download = True
+ cache_found = False
+ else:
+ cache_found = True
+ # Is the cached copy out-of-date? Note that when we write a new cache
+ # version we explicitly set its mtime to the time in the future when
+ # the cache will expire.
+ if mtime < now().timestamp():
+ download = True
+ vlog.info('Cached copy of public suffix list is out of date')
+ if download:
+ try:
+ content = get(config.dmarc.org_domain_data_url)
+ except (URLError, HTTPError) as error:
+ elog.error('Unable to retrieve public suffix list from %s: %s',
+ config.dmarc.org_domain_data_url,
+ getattr(error, 'reason', str(error)))
+ if cache_found:
+ vlog.info('Using out of date public suffix list')
+ content = None
+ else:
+ # We couldn't access the URL and didn't even have an out of
+ # date suffix list cached. Use the shipped version.
+ content = resource_bytes('mailman.rules.data', LOCAL_FILE_NAME)
+ if content is not None:
+ # Content is either a string or UTF-8 encoded bytes.
+ if isinstance(content, bytes):
+ content = content.decode('utf-8')
+ # Write the cache atomically.
+ new_path = cached_copy_path + '.new'
+ with open(new_path, 'w', encoding='utf-8') as fp:
+ fp.write(content)
+ # Set the expiry time to the future.
+ mtime = (now() + lifetime).timestamp()
+ os.utime(new_path, (mtime, mtime))
+ # Flip the new file into the cached location. This does not
+ # modify the mtime.
+ os.rename(new_path, cached_copy_path)
+ return cached_copy_path
+
+
+def parse_suffix_list(filename=None):
+ # Parse the suffix list into a per process cache.
+ if filename is None:
+ filename = ensure_current_suffix_list()
+ # At this point the cached copy must exist and is as valid as possible.
+ # Read and return the contents as a UTF-8 string.
+ with open(filename, 'r', encoding='utf-8') as fp:
+ for line in fp:
+ if not line.strip() or line.startswith('//'):
+ continue
+ line = re.sub('\s.*', '', line)
+ if not line:
+ continue
+ parts = line.lower().split('.')
+ if parts[0].startswith('!'):
+ exception = True
+ parts = [parts[0][1:]] + parts[1:]
+ else:
+ exception = False
+ parts.reverse()
+ key = DOT.join(parts)
+ suffix_cache[key] = exception
+
+
+def get_domain(parts, label):
+ # A helper to get a domain name consisting of the first label+1 labels in
+ # parts.
+ domain = parts[:min(label+1, len(parts))]
+ domain.reverse()
+ return DOT.join(domain)
+
+
+def get_organizational_domain(domain):
+ # Given a domain name, this returns the corresponding Organizational
+ # Domain which may be the same as the input.
+ if len(suffix_cache) == 0:
+ parse_suffix_list()
+ hits = []
+ parts = domain.lower().split('.')
+ parts.reverse()
+ for key in suffix_cache:
+ key_parts = key.split('.')
+ if len(parts) >= len(key_parts):
+ for i in range(len(key_parts) - 1):
+ if parts[i] != key_parts[i] and key_parts[i] != '*':
+ break
+ else:
+ if (parts[len(key_parts) - 1] == key_parts[-1] or
+ key_parts[-1] == '*'):
+ hits.append(key)
+ if not hits:
+ return get_domain(parts, 1)
+ label = 0
+ for key in hits:
+ key_parts = key.split('.')
+ if suffix_cache[key]:
+ # It's an exception.
+ return get_domain(parts, len(key_parts) - 1)
+ if len(key_parts) > label:
+ label = len(key_parts)
+ return get_domain(parts, label)
+
+
+def is_reject_or_quarantine(mlist, email, dmarc_domain, org=False):
+ # This takes a mailing list, an email address as in the From: header, the
+ # _dmarc host name for the domain in question, and a flag stating whether
+ # we should check the organizational domains. It returns one of three
+ # values:
+ # * True if the DMARC policy is reject or quarantine;
+ # * False if is not;
+ # * A special sentinel if we should continue looking
+ resolver = dns.resolver.Resolver()
+ resolver.timeout = as_timedelta(
+ config.dmarc.resolver_timeout).total_seconds()
+ resolver.lifetime = as_timedelta(
+ config.dmarc.resolver_lifetime).total_seconds()
+ try:
+ txt_recs = resolver.query(dmarc_domain, dns.rdatatype.TXT)
+ except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
+ return KEEP_LOOKING
+ except DNSException as error:
+ elog.error(
+ 'DNSException: Unable to query DMARC policy for %s (%s). %s',
+ email, dmarc_domain, error.__doc__)
+ return KEEP_LOOKING
+ # Be as robust as possible in parsing the result.
+ results_by_name = {}
+ cnames = {}
+ want_names = set([dmarc_domain + '.'])
+ # Check all the TXT records returned by DNS. Keep track of the CNAMEs for
+ # checking later on. Ignore any other non-TXT records.
+ for txt_rec in txt_recs.response.answer:
+ if txt_rec.rdtype == dns.rdatatype.CNAME:
+ cnames[txt_rec.name.to_text()] = (
+ txt_rec.items[0].target.to_text())
+ if txt_rec.rdtype != dns.rdatatype.TXT:
+ continue
+ result = EMPTYSTRING.join(
+ str(record, encoding='utf-8')
+ for record in txt_rec.items[0].strings)
+ name = txt_rec.name.to_text()
+ results_by_name.setdefault(name, []).append(result)
+ expands = list(want_names)
+ seen = set(expands)
+ while expands:
+ item = expands.pop(0)
+ if item in cnames:
+ if cnames[item] in seen:
+ # CNAME loop.
+ continue
+ expands.append(cnames[item])
+ seen.add(cnames[item])
+ want_names.add(cnames[item])
+ want_names.discard(item)
+ assert len(want_names) == 1, (
+ 'Error in CNAME processing for {}; want_names != 1.'.format(
+ dmarc_domain))
+ for name in want_names:
+ if name not in results_by_name:
+ continue
+ dmarcs = [
+ record for record in results_by_name[name]
+ if record.startswith('v=DMARC1;')
+ ]
+ if len(dmarcs) == 0:
+ return KEEP_LOOKING
+ if len(dmarcs) > 1:
+ elog.error(
+ 'RRset of TXT records for %s has %d v=DMARC1 entries; '
+ 'testing them all',
+ dmarc_domain, len(dmarcs))
+ for entry in dmarcs:
+ mo = re.search(r'\bsp=(\w*)\b', entry, re.IGNORECASE)
+ if org and mo:
+ policy = mo.group(1).lower()
+ else:
+ mo = re.search(r'\bp=(\w*)\b', entry, re.IGNORECASE)
+ if mo:
+ policy = mo.group(1).lower()
+ else:
+ # This continue does actually get covered by
+ # TestDMARCRules.test_domain_with_subdomain_policy() and
+ # TestDMARCRules.test_no_policy() but because of
+ # Coverage BitBucket issue #198 and
+ # http://bugs.python.org/issue2506 coverage cannot report
+ # it as such, so just pragma it away.
+ continue # pragma: no cover
+ if policy in ('reject', 'quarantine'):
+ vlog.info(
+ '%s: DMARC lookup for %s (%s) found p=%s in %s = %s',
+ mlist.list_name,
+ email,
+ dmarc_domain,
+ policy,
+ name,
+ entry)
+ return True
+ return False
+
+
+def maybe_mitigate(mlist, email):
+ # This takes an email address, and returns True if DMARC policy is
+ # p=reject or p=quarantine.
+ email = email.lower()
+ # Scan from the right in case quoted local part has an '@'.
+ local, at, from_domain = email.rpartition('@')
+ if at != '@':
+ return False
+ answer = is_reject_or_quarantine(
+ mlist, email, '_dmarc.{}'.format(from_domain))
+ if answer is not KEEP_LOOKING:
+ return answer
+ org_dom = get_organizational_domain(from_domain)
+ if org_dom != from_domain:
+ answer = is_reject_or_quarantine(
+ mlist, email, '_dmarc.{}'.format(org_dom), org=True)
+ if answer is not KEEP_LOOKING:
+ return answer
+ return False
+
+
+@public
+@implementer(IRule)
+class DMARCMitigation:
+ """The DMARC mitigation rule."""
+
+ name = 'dmarc-mitigation'
+ description = _('Find DMARC policy of From: domain.')
+ record = True
+
+ def check(self, mlist, msg, msgdata):
+ """See `IRule`."""
+ if mlist.dmarc_mitigate_action is DMARCMitigateAction.no_mitigation:
+ # Don't bother to check if we're not going to do anything.
+ return False
+ dn, addr = parseaddr(msg.get('from'))
+ if maybe_mitigate(mlist, addr):
+ # If dmarc_mitigate_action is discard or reject, this rule fires
+ # and jumps to the 'moderation' chain to do the actual discard.
+ # Otherwise, the rule misses but sets a flag for the dmarc handler
+ # to do the appropriate action.
+ msgdata['dmarc'] = True
+ if mlist.dmarc_mitigate_action is DMARCMitigateAction.discard:
+ msgdata['moderation_action'] = 'discard'
+ msgdata['moderation_reasons'] = [_('DMARC moderation')]
+ elif mlist.dmarc_mitigate_action is DMARCMitigateAction.reject:
+ listowner = mlist.owner_address # noqa F841
+ reason = (mlist.dmarc_moderation_notice or
+ _('You are not allowed to post to this mailing '
+ 'list From: a domain which publishes a DMARC '
+ 'policy of reject or quarantine, and your message'
+ ' has been automatically rejected. If you think '
+ 'that your messages are being rejected in error, '
+ 'contact the mailing list owner at ${listowner}.'))
+ msgdata['moderation_reasons'] = [wrap(reason)]
+ msgdata['moderation_action'] = 'reject'
+ else:
+ return False
+ msgdata['moderation_sender'] = addr
+ return True
+ return False