diff options
Diffstat (limited to 'src/mailman/rules/dmarc.py')
| -rw-r--r-- | src/mailman/rules/dmarc.py | 120 |
1 files changed, 87 insertions, 33 deletions
diff --git a/src/mailman/rules/dmarc.py b/src/mailman/rules/dmarc.py index 5a1073970..122aefe7b 100644 --- a/src/mailman/rules/dmarc.py +++ b/src/mailman/rules/dmarc.py @@ -17,6 +17,7 @@ """DMARC mitigation rule.""" +import os import re import logging import dns.resolver @@ -28,46 +29,100 @@ from mailman.config import config from mailman.core.i18n import _ from mailman.interfaces.mailinglist import DMARCMitigateAction from mailman.interfaces.rules import IRule +from mailman.utilities.datetime import now +from mailman.utilities.protocols import get from mailman.utilities.string import wrap +from pkg_resources import resource_string as resource_bytes from public import public -from urllib import error, request +from requests.exceptions import HTTPError +from urllib.error import URLError from zope.interface import implementer elog = logging.getLogger('mailman.error') vlog = logging.getLogger('mailman.vette') -s_dict = dict() +DOT = '.' KEEP_LOOKING = object() +LOCAL_FILE_NAME = 'public_suffix_list.dat' +# Map organizational domain suffix rules to a boolean indicating whether the +# rule is an exception or not. +suffix_cache = dict() -def _get_suffixes(url): - # This loads and parses the data from the url argument into s_dict for - # use by _get_org_dom. - global s_dict - if not url: - return + +def ensure_current_suffix_list(): + # Read and parse the organizational domain suffix list. First look in the + # cached directory to see if we already have a valid copy of it. + cached_copy_path = os.path.join(config.VAR_DIR, LOCAL_FILE_NAME) + lifetime = as_timedelta(config.dmarc.cache_lifetime) + download = False try: - d = request.urlopen(url) - except error.URLError as e: - elog.error('Unable to retrieve data from %s: %s', url, e.reason) - return - for line in d.readlines(): - line = str(line, encoding='utf-8') - if not line.strip() or line.startswith('//'): - continue - line = re.sub('\s.*', '', line) - if not line: - continue - parts = line.lower().split('.') - if parts[0].startswith('!'): - exc = True - parts = [parts[0][1:]] + parts[1:] - else: - exc = False - parts.reverse() - k = '.'.join(parts) - s_dict[k] = exc + mtime = os.stat(cached_copy_path).st_mtime + except FileNotFoundError: + vlog.info('No cached copy of the public suffix list found') + download = True + cache_found = False + else: + cache_found = True + # Is the cached copy out-of-date? Note that when we write a new cache + # version we explicitly set its mtime to the time in the future when + # the cache will expire. + if mtime < now().timestamp(): + download = True + vlog.info('Cached copy of public suffix list is out of date') + if download: + try: + content = get(config.dmarc.org_domain_data_url) + except (URLError, HTTPError) as error: + elog.error('Unable to retrieve public suffix list from %s: %s', + config.dmarc.org_domain_data_url, + getattr(error, 'reason', str(error))) + if cache_found: + vlog.info('Using out of date public suffix list') + content = None + else: + # We couldn't access the URL and didn't even have an out of + # date suffix list cached. Use the shipped version. + content = resource_bytes('mailman.rules.data', LOCAL_FILE_NAME) + if content is not None: + # Content is either a string or UTF-8 encoded bytes. + if isinstance(content, bytes): + content = content.decode('utf-8') + # Write the cache atomically. + new_path = cached_copy_path + '.new' + with open(new_path, 'w', encoding='utf-8') as fp: + fp.write(content) + # Set the expiry time to the future. + mtime = (now() + lifetime).timestamp() + os.utime(new_path, (mtime, mtime)) + # Flip the new file into the cached location. This does not + # modify the mtime. + os.rename(new_path, cached_copy_path) + return cached_copy_path + + +def parse_suffix_list(): + # Parse the suffix list into a per process cache. + cached_copy_path = ensure_current_suffix_list() + # At this point the cached copy must exist and is as valid as possible. + # Read and return the contents as a UTF-8 string. + with open(cached_copy_path, 'r', encoding='utf-8') as fp: + for line in fp: + if not line.strip() or line.startswith('//'): + continue + line = re.sub('\s.*', '', line) + if not line: + continue + parts = line.lower().split('.') + if parts[0].startswith('!'): + exception = True + parts = [parts[0][1:]] + parts[1:] + else: + exception = False + parts.reverse() + k = DOT.join(parts) + suffix_cache[k] = exception def _get_dom(d, l): @@ -81,13 +136,12 @@ def _get_dom(d, l): def _get_org_dom(domain): # Given a domain name, this returns the corresponding Organizational # Domain which may be the same as the input. - global s_dict - if not s_dict: - _get_suffixes(config.dmarc.org_domain_data_url) + if len(suffix_cache) == 0: + parse_suffix_list() hits = [] d = domain.lower().split('.') d.reverse() - for k in s_dict.keys(): + for k in suffix_cache: ks = k.split('.') if len(d) >= len(ks): for i in range(len(ks)-1): @@ -100,7 +154,7 @@ def _get_org_dom(domain): return _get_dom(d, 1) l = 0 for k in hits: - if s_dict[k]: + if suffix_cache[k]: # It's an exception return _get_dom(d, len(k.split('.'))-1) if len(k.split('.')) > l: |
