diff options
| author | Barry Warsaw | 2012-03-26 08:04:00 -0400 |
|---|---|---|
| committer | Barry Warsaw | 2012-03-26 08:04:00 -0400 |
| commit | 5cb68db131db32c643382f6fd1418a3659dc6f8e (patch) | |
| tree | 13a2e02a48303804c2cae37c656937711bd37fa5 /src/mailman/runners | |
| parent | cfb7138579ddb8a4adb10956ceb39089181271b4 (diff) | |
| download | mailman-5cb68db131db32c643382f6fd1418a3659dc6f8e.tar.gz mailman-5cb68db131db32c643382f6fd1418a3659dc6f8e.tar.zst mailman-5cb68db131db32c643382f6fd1418a3659dc6f8e.zip | |
Architecture
------------
* Internally, all datetimes are kept in the UTC timezone, however because of
LP: #280708, they are stored in the database in naive format.
* `received_time` is now added to the message metadata by the LMTP runner
instead of by `Switchboard.enqueue()`. This latter no longer depends on
`received_time` in the metadata.
* The `ArchiveRunner` no longer acquires a lock before it calls the
individual archiver implementations, since not all of them need a lock. If
they do, the implementations must acquire said lock themselves.
Configuration
-------------
* New configuration variables `clobber_date` and `clobber_skew` supported in
every `[archiver.<name>]` section. These are used to determine under what
circumstances a message destined for a specific archiver should have its
`Date:` header clobbered.
Diffstat (limited to 'src/mailman/runners')
| -rw-r--r-- | src/mailman/runners/archive.py | 113 | ||||
| -rw-r--r-- | src/mailman/runners/lmtp.py | 6 | ||||
| -rw-r--r-- | src/mailman/runners/tests/test_archiver.py | 146 | ||||
| -rw-r--r-- | src/mailman/runners/tests/test_lmtp.py | 16 |
4 files changed, 225 insertions, 56 deletions
diff --git a/src/mailman/runners/archive.py b/src/mailman/runners/archive.py index 1c0a24785..7295a5c57 100644 --- a/src/mailman/runners/archive.py +++ b/src/mailman/runners/archive.py @@ -25,68 +25,83 @@ __all__ = [ ] -import os +import copy import logging +from email.utils import parsedate_tz, mktime_tz from datetime import datetime -from email.utils import parsedate_tz, mktime_tz, formatdate -from flufl.lock import Lock from lazr.config import as_timedelta from mailman.config import config from mailman.core.runner import Runner +from mailman.interfaces.archiver import ClobberDate +from mailman.utilities.datetime import RFC822_DATE_FMT, now + log = logging.getLogger('mailman.error') +def _should_clobber(msg, msgdata, archiver): + """Should the Date header in the original message get clobbered?""" + # Calculate the Date header of the message as a datetime. What if there + # are multiple Date headers, even in violation of the RFC? For now, take + # the first one. If there are no Date headers, then definitely clobber. + original_date = msg.get('date') + if original_date is None: + return True + section = getattr(config.archiver, archiver, None) + if section is None: + log.error('No archiver config section found: {0}'.format(archiver)) + return False + try: + clobber = ClobberDate[section.clobber_date] + except ValueError: + log.error('Invalid clobber_date for "{0}": {1}'.format( + archiver, section.clobber_date)) + return False + if clobber is ClobberDate.always: + return True + elif clobber is ClobberDate.never: + return False + # Maybe we'll clobber the date. Let's see if it's farther off from now + # than the skew period. + skew = as_timedelta(section.clobber_skew) + try: + time_tuple = parsedate_tz(original_date) + except (ValueError, OverflowError): + # The likely cause of this is that the year in the Date: field is + # horribly incorrect, e.g. (from SF bug # 571634): + # + # Date: Tue, 18 Jun 0102 05:12:09 +0500 + # + # Obviously clobber such dates. + return True + if time_tuple is None: + # There was some other bogosity in the Date header. + return True + claimed_date = datetime.fromtimestamp(mktime_tz(time_tuple)) + return (abs(now() - claimed_date) > skew) + + + class ArchiveRunner(Runner): """The archive runner.""" def _dispose(self, mlist, msg, msgdata): - # Support clobber_date, i.e. setting the date in the archive to the - # received date, not the (potentially bogus) Date: header of the - # original message. - clobber = False - original_date = msg.get('date') - received_time = formatdate(msgdata['received_time']) - # FIXME 2012-03-23 BAW: LP: #963612 - ## if not original_date: - ## clobber = True - ## elif int(config.archiver.pipermail.clobber_date_policy) == 1: - ## clobber = True - ## elif int(config.archiver.pipermail.clobber_date_policy) == 2: - ## # What's the timestamp on the original message? - ## timetup = parsedate_tz(original_date) - ## now = datetime.now() - ## try: - ## if not timetup: - ## clobber = True - ## else: - ## utc_timestamp = datetime.fromtimestamp(mktime_tz(timetup)) - ## date_skew = as_timedelta( - ## config.archiver.pipermail.allowable_sane_date_skew) - ## clobber = (abs(now - utc_timestamp) > date_skew) - ## except (ValueError, OverflowError): - ## # The likely cause of this is that the year in the Date: field - ## # is horribly incorrect, e.g. (from SF bug # 571634): - ## # Date: Tue, 18 Jun 0102 05:12:09 +0500 - ## # Obviously clobber such dates. - ## clobber = True - ## if clobber: - ## del msg['date'] - ## del msg['x-original-date'] - ## msg['Date'] = received_time - ## if original_date: - ## msg['X-Original-Date'] = original_date - # Always put an indication of when we received the message. - msg['X-List-Received-Date'] = received_time - # While a list archiving lock is acquired, archive the message. - with Lock(os.path.join(mlist.data_path, 'archive.lck')): - for archiver in config.archivers: - # A problem in one archiver should not prevent other archivers - # from running. - try: - archiver.archive_message(mlist, msg) - except Exception: - log.exception('Broken archiver: %s' % archiver.name) + received_time = msgdata.get('received_time', now(strip_tzinfo=False)) + for archiver in config.archivers: + msg_copy = copy.deepcopy(msg) + if _should_clobber(msg, msgdata, archiver.name): + original_date = msg_copy['date'] + del msg_copy['date'] + del msg_copy['x-original-date'] + msg_copy['Date'] = received_time.strftime(RFC822_DATE_FMT) + if original_date: + msg_copy['X-Original-Date'] = original_date + # A problem in one archiver should not prevent other archivers + # from running. + try: + archiver.archive_message(mlist, msg_copy) + except Exception: + log.exception('Broken archiver: %s' % archiver.name) diff --git a/src/mailman/runners/lmtp.py b/src/mailman/runners/lmtp.py index bee111ad1..45fa5a783 100644 --- a/src/mailman/runners/lmtp.py +++ b/src/mailman/runners/lmtp.py @@ -44,8 +44,10 @@ from mailman.core.runner import Runner from mailman.database.transaction import txn from mailman.email.message import Message from mailman.interfaces.listmanager import IListManager +from mailman.utilities.datetime import now from mailman.utilities.email import add_message_hash + elog = logging.getLogger('mailman.error') qlog = logging.getLogger('mailman.runner') slog = logging.getLogger('mailman.smtp') @@ -181,6 +183,7 @@ class LMTPRunner(Runner, smtpd.SMTPServer): # see if it's destined for a valid mailing list. If so, then queue # the message to the appropriate place and record a 250 status for # that recipient. If not, record a failure status for that recipient. + received_time = now() for to in rcpttos: try: to = parseaddr(to)[1].lower() @@ -196,7 +199,8 @@ class LMTPRunner(Runner, smtpd.SMTPServer): # queue. queue = None msgdata = dict(listname=listname, - original_size=msg.original_size) + original_size=msg.original_size, + received_time=received_time) canonical_subaddress = SUBADDRESS_NAMES.get(subaddress) queue = SUBADDRESS_QUEUES.get(canonical_subaddress) if subaddress is None: diff --git a/src/mailman/runners/tests/test_archiver.py b/src/mailman/runners/tests/test_archiver.py index 865a2be67..ca09de9fa 100644 --- a/src/mailman/runners/tests/test_archiver.py +++ b/src/mailman/runners/tests/test_archiver.py @@ -39,6 +39,31 @@ from mailman.testing.helpers import ( make_testable_runner, specialized_message_from_string as mfs) from mailman.testing.layers import ConfigLayer +from mailman.utilities.datetime import RFC822_DATE_FMT, factory, now + + + +# This helper will set up a specific archiver as appropriate for a specific +# test. It assumes the setUp() will just disable all archivers. +def archiver(name, enable=False, clobber=None, skew=None): + def decorator(func): + def wrapper(*args, **kws): + config_name = 'archiver {0}'.format(name) + section = """ + [archiver.{0}] + enable: {1} + clobber_date: {2} + clobber_skew: {3} + """.format(name, + 'yes' if enable else 'no', + clobber, skew) + config.push(config_name, section) + try: + return func(*args, **kws) + finally: + config.pop(config_name) + return wrapper + return decorator @@ -54,7 +79,7 @@ class DummyArchiver: def permalink(mlist, msg): filename = msg['x-message-id-hash'] return 'http://archive.example.com/' + filename - + @staticmethod def archive_message(mlist, msg): filename = msg['x-message-id-hash'] @@ -73,11 +98,12 @@ class TestArchiveRunner(unittest.TestCase): def setUp(self): self._mlist = create_list('test@example.com') + self._now = now() # Enable just the dummy archiver. config.push('dummy', """ [archiver.dummy] class: mailman.runners.tests.test_archiver.DummyArchiver - enable: yes + enable: no [archiver.prototype] enable: no [archiver.mhonarc] @@ -100,10 +126,13 @@ First post! def tearDown(self): config.pop('dummy') + @archiver('dummy', enable=True) def test_archive_runner(self): # Ensure that the archive runner ends up archiving the message. self._archiveq.enqueue( - self._msg, {}, listname=self._mlist.fqdn_listname) + self._msg, {}, + listname=self._mlist.fqdn_listname, + received_time=now()) self._runner.run() # There should now be a copy of the message in the file system. filename = os.path.join( @@ -112,11 +141,114 @@ First post! archived = message_from_file(fp) self.assertEqual(archived['message-id'], '<first>') + @archiver('dummy', enable=True) def test_archive_runner_with_dated_message(self): - # LP: #963612 FIXME - self._msg['Date'] = 'Sat, 11 Mar 2011 03:19:38 -0500' + # Date headers don't throw off the archiver runner. + self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT) + self._archiveq.enqueue( + self._msg, {}, + listname=self._mlist.fqdn_listname, + received_time=now()) + self._runner.run() + # There should now be a copy of the message in the file system. + filename = os.path.join( + config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB') + with open(filename) as fp: + archived = message_from_file(fp) + self.assertEqual(archived['message-id'], '<first>') + self.assertEqual(archived['date'], 'Mon, 01 Aug 2005 07:49:23 +0000') + + @archiver('dummy', enable=True, clobber='never') + def test_clobber_date_never(self): + # Even if the Date header is insanely off from the received time of + # the message, if clobber_date is 'never', the header is not clobbered. + self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT) + self._archiveq.enqueue( + self._msg, {}, + listname=self._mlist.fqdn_listname, + received_time=now()) + self._runner.run() + # There should now be a copy of the message in the file system. + filename = os.path.join( + config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB') + with open(filename) as fp: + archived = message_from_file(fp) + self.assertEqual(archived['message-id'], '<first>') + self.assertEqual(archived['date'], 'Mon, 01 Aug 2005 07:49:23 +0000') + + @archiver('dummy', enable=True) + def test_clobber_dateless(self): + # A message with no Date header will always get clobbered. + self.assertEqual(self._msg['date'], None) + # Now, before enqueuing the message (well, really, calling 'now()' + # again), fast forward a few days. + self._archiveq.enqueue( + self._msg, {}, + listname=self._mlist.fqdn_listname, + received_time=now(strip_tzinfo=False)) + self._runner.run() + # There should now be a copy of the message in the file system. + filename = os.path.join( + config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB') + with open(filename) as fp: + archived = message_from_file(fp) + self.assertEqual(archived['message-id'], '<first>') + self.assertEqual(archived['date'], 'Mon, 01 Aug 2005 07:49:23 +0000') + + @archiver('dummy', enable=True, clobber='always') + def test_clobber_date_always(self): + # The date always gets clobbered with the current received time. + self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT) + # Now, before enqueuing the message (well, really, calling 'now()' + # again as will happen in the runner), fast forward a few days. + self._archiveq.enqueue( + self._msg, {}, + listname=self._mlist.fqdn_listname) + factory.fast_forward(days=4) + self._runner.run() + # There should now be a copy of the message in the file system. + filename = os.path.join( + config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB') + with open(filename) as fp: + archived = message_from_file(fp) + self.assertEqual(archived['message-id'], '<first>') + self.assertEqual(archived['date'], 'Fri, 05 Aug 2005 07:49:23 +0000') + self.assertEqual(archived['x-original-date'], + 'Mon, 01 Aug 2005 07:49:23 +0000') + + @archiver('dummy', enable=True, clobber='maybe', skew='1d') + def test_clobber_date_maybe_when_insane(self): + # The date is clobbered if it's farther off from now than its skew + # period. + self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT) + # Now, before enqueuing the message (well, really, calling 'now()' + # again as will happen in the runner), fast forward a few days. + self._archiveq.enqueue( + self._msg, {}, + listname=self._mlist.fqdn_listname) + factory.fast_forward(days=4) + self._runner.run() + # There should now be a copy of the message in the file system. + filename = os.path.join( + config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB') + with open(filename) as fp: + archived = message_from_file(fp) + self.assertEqual(archived['message-id'], '<first>') + self.assertEqual(archived['date'], 'Fri, 05 Aug 2005 07:49:23 +0000') + self.assertEqual(archived['x-original-date'], + 'Mon, 01 Aug 2005 07:49:23 +0000') + + @archiver('dummy', enable=True, clobber='maybe', skew='10d') + def test_clobber_date_maybe_when_sane(self): + # The date is not clobbered if it's nearer to now than its skew + # period. + self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT) + # Now, before enqueuing the message (well, really, calling 'now()' + # again as will happen in the runner), fast forward a few days. self._archiveq.enqueue( - self._msg, {}, listname=self._mlist.fqdn_listname) + self._msg, {}, + listname=self._mlist.fqdn_listname) + factory.fast_forward(days=4) self._runner.run() # There should now be a copy of the message in the file system. filename = os.path.join( @@ -124,3 +256,5 @@ First post! with open(filename) as fp: archived = message_from_file(fp) self.assertEqual(archived['message-id'], '<first>') + self.assertEqual(archived['date'], 'Mon, 01 Aug 2005 07:49:23 +0000') + self.assertEqual(archived['x-original-date'], None) diff --git a/src/mailman/runners/tests/test_lmtp.py b/src/mailman/runners/tests/test_lmtp.py index 2c4defe59..87b69c7e4 100644 --- a/src/mailman/runners/tests/test_lmtp.py +++ b/src/mailman/runners/tests/test_lmtp.py @@ -28,6 +28,8 @@ __all__ = [ import smtplib import unittest +from datetime import datetime + from mailman.app.lifecycle import create_list from mailman.config import config from mailman.testing.helpers import get_lmtp_client, get_queue_messages @@ -96,3 +98,17 @@ Subject: This has a Message-ID but no X-Message-ID-Hash self.assertEqual(len(all_headers), 1) self.assertEqual(messages[0].msg['x-message-id-hash'], 'MS6QLWERIJLGCRF44J7USBFDELMNT2BW') + + def test_received_time(self): + # The LMTP runner adds a `received_time` key to the metadata. + self._lmtp.sendmail('anne@example.com', ['test@example.com'], """\ +From: anne@example.com +To: test@example.com +Subject: This has no Message-ID header +Message-ID: <ant> + +""") + messages = get_queue_messages('in') + self.assertEqual(len(messages), 1) + self.assertEqual(messages[0].msgdata['received_time'], + datetime(2005, 8, 1, 7, 49, 23)) |
