summaryrefslogtreecommitdiff
path: root/src/mailman/runners
diff options
context:
space:
mode:
authorBarry Warsaw2012-03-26 08:04:00 -0400
committerBarry Warsaw2012-03-26 08:04:00 -0400
commit5cb68db131db32c643382f6fd1418a3659dc6f8e (patch)
tree13a2e02a48303804c2cae37c656937711bd37fa5 /src/mailman/runners
parentcfb7138579ddb8a4adb10956ceb39089181271b4 (diff)
downloadmailman-5cb68db131db32c643382f6fd1418a3659dc6f8e.tar.gz
mailman-5cb68db131db32c643382f6fd1418a3659dc6f8e.tar.zst
mailman-5cb68db131db32c643382f6fd1418a3659dc6f8e.zip
Architecture
------------ * Internally, all datetimes are kept in the UTC timezone, however because of LP: #280708, they are stored in the database in naive format. * `received_time` is now added to the message metadata by the LMTP runner instead of by `Switchboard.enqueue()`. This latter no longer depends on `received_time` in the metadata. * The `ArchiveRunner` no longer acquires a lock before it calls the individual archiver implementations, since not all of them need a lock. If they do, the implementations must acquire said lock themselves. Configuration ------------- * New configuration variables `clobber_date` and `clobber_skew` supported in every `[archiver.<name>]` section. These are used to determine under what circumstances a message destined for a specific archiver should have its `Date:` header clobbered.
Diffstat (limited to 'src/mailman/runners')
-rw-r--r--src/mailman/runners/archive.py113
-rw-r--r--src/mailman/runners/lmtp.py6
-rw-r--r--src/mailman/runners/tests/test_archiver.py146
-rw-r--r--src/mailman/runners/tests/test_lmtp.py16
4 files changed, 225 insertions, 56 deletions
diff --git a/src/mailman/runners/archive.py b/src/mailman/runners/archive.py
index 1c0a24785..7295a5c57 100644
--- a/src/mailman/runners/archive.py
+++ b/src/mailman/runners/archive.py
@@ -25,68 +25,83 @@ __all__ = [
]
-import os
+import copy
import logging
+from email.utils import parsedate_tz, mktime_tz
from datetime import datetime
-from email.utils import parsedate_tz, mktime_tz, formatdate
-from flufl.lock import Lock
from lazr.config import as_timedelta
from mailman.config import config
from mailman.core.runner import Runner
+from mailman.interfaces.archiver import ClobberDate
+from mailman.utilities.datetime import RFC822_DATE_FMT, now
+
log = logging.getLogger('mailman.error')
+def _should_clobber(msg, msgdata, archiver):
+ """Should the Date header in the original message get clobbered?"""
+ # Calculate the Date header of the message as a datetime. What if there
+ # are multiple Date headers, even in violation of the RFC? For now, take
+ # the first one. If there are no Date headers, then definitely clobber.
+ original_date = msg.get('date')
+ if original_date is None:
+ return True
+ section = getattr(config.archiver, archiver, None)
+ if section is None:
+ log.error('No archiver config section found: {0}'.format(archiver))
+ return False
+ try:
+ clobber = ClobberDate[section.clobber_date]
+ except ValueError:
+ log.error('Invalid clobber_date for "{0}": {1}'.format(
+ archiver, section.clobber_date))
+ return False
+ if clobber is ClobberDate.always:
+ return True
+ elif clobber is ClobberDate.never:
+ return False
+ # Maybe we'll clobber the date. Let's see if it's farther off from now
+ # than the skew period.
+ skew = as_timedelta(section.clobber_skew)
+ try:
+ time_tuple = parsedate_tz(original_date)
+ except (ValueError, OverflowError):
+ # The likely cause of this is that the year in the Date: field is
+ # horribly incorrect, e.g. (from SF bug # 571634):
+ #
+ # Date: Tue, 18 Jun 0102 05:12:09 +0500
+ #
+ # Obviously clobber such dates.
+ return True
+ if time_tuple is None:
+ # There was some other bogosity in the Date header.
+ return True
+ claimed_date = datetime.fromtimestamp(mktime_tz(time_tuple))
+ return (abs(now() - claimed_date) > skew)
+
+
+
class ArchiveRunner(Runner):
"""The archive runner."""
def _dispose(self, mlist, msg, msgdata):
- # Support clobber_date, i.e. setting the date in the archive to the
- # received date, not the (potentially bogus) Date: header of the
- # original message.
- clobber = False
- original_date = msg.get('date')
- received_time = formatdate(msgdata['received_time'])
- # FIXME 2012-03-23 BAW: LP: #963612
- ## if not original_date:
- ## clobber = True
- ## elif int(config.archiver.pipermail.clobber_date_policy) == 1:
- ## clobber = True
- ## elif int(config.archiver.pipermail.clobber_date_policy) == 2:
- ## # What's the timestamp on the original message?
- ## timetup = parsedate_tz(original_date)
- ## now = datetime.now()
- ## try:
- ## if not timetup:
- ## clobber = True
- ## else:
- ## utc_timestamp = datetime.fromtimestamp(mktime_tz(timetup))
- ## date_skew = as_timedelta(
- ## config.archiver.pipermail.allowable_sane_date_skew)
- ## clobber = (abs(now - utc_timestamp) > date_skew)
- ## except (ValueError, OverflowError):
- ## # The likely cause of this is that the year in the Date: field
- ## # is horribly incorrect, e.g. (from SF bug # 571634):
- ## # Date: Tue, 18 Jun 0102 05:12:09 +0500
- ## # Obviously clobber such dates.
- ## clobber = True
- ## if clobber:
- ## del msg['date']
- ## del msg['x-original-date']
- ## msg['Date'] = received_time
- ## if original_date:
- ## msg['X-Original-Date'] = original_date
- # Always put an indication of when we received the message.
- msg['X-List-Received-Date'] = received_time
- # While a list archiving lock is acquired, archive the message.
- with Lock(os.path.join(mlist.data_path, 'archive.lck')):
- for archiver in config.archivers:
- # A problem in one archiver should not prevent other archivers
- # from running.
- try:
- archiver.archive_message(mlist, msg)
- except Exception:
- log.exception('Broken archiver: %s' % archiver.name)
+ received_time = msgdata.get('received_time', now(strip_tzinfo=False))
+ for archiver in config.archivers:
+ msg_copy = copy.deepcopy(msg)
+ if _should_clobber(msg, msgdata, archiver.name):
+ original_date = msg_copy['date']
+ del msg_copy['date']
+ del msg_copy['x-original-date']
+ msg_copy['Date'] = received_time.strftime(RFC822_DATE_FMT)
+ if original_date:
+ msg_copy['X-Original-Date'] = original_date
+ # A problem in one archiver should not prevent other archivers
+ # from running.
+ try:
+ archiver.archive_message(mlist, msg_copy)
+ except Exception:
+ log.exception('Broken archiver: %s' % archiver.name)
diff --git a/src/mailman/runners/lmtp.py b/src/mailman/runners/lmtp.py
index bee111ad1..45fa5a783 100644
--- a/src/mailman/runners/lmtp.py
+++ b/src/mailman/runners/lmtp.py
@@ -44,8 +44,10 @@ from mailman.core.runner import Runner
from mailman.database.transaction import txn
from mailman.email.message import Message
from mailman.interfaces.listmanager import IListManager
+from mailman.utilities.datetime import now
from mailman.utilities.email import add_message_hash
+
elog = logging.getLogger('mailman.error')
qlog = logging.getLogger('mailman.runner')
slog = logging.getLogger('mailman.smtp')
@@ -181,6 +183,7 @@ class LMTPRunner(Runner, smtpd.SMTPServer):
# see if it's destined for a valid mailing list. If so, then queue
# the message to the appropriate place and record a 250 status for
# that recipient. If not, record a failure status for that recipient.
+ received_time = now()
for to in rcpttos:
try:
to = parseaddr(to)[1].lower()
@@ -196,7 +199,8 @@ class LMTPRunner(Runner, smtpd.SMTPServer):
# queue.
queue = None
msgdata = dict(listname=listname,
- original_size=msg.original_size)
+ original_size=msg.original_size,
+ received_time=received_time)
canonical_subaddress = SUBADDRESS_NAMES.get(subaddress)
queue = SUBADDRESS_QUEUES.get(canonical_subaddress)
if subaddress is None:
diff --git a/src/mailman/runners/tests/test_archiver.py b/src/mailman/runners/tests/test_archiver.py
index 865a2be67..ca09de9fa 100644
--- a/src/mailman/runners/tests/test_archiver.py
+++ b/src/mailman/runners/tests/test_archiver.py
@@ -39,6 +39,31 @@ from mailman.testing.helpers import (
make_testable_runner,
specialized_message_from_string as mfs)
from mailman.testing.layers import ConfigLayer
+from mailman.utilities.datetime import RFC822_DATE_FMT, factory, now
+
+
+
+# This helper will set up a specific archiver as appropriate for a specific
+# test. It assumes the setUp() will just disable all archivers.
+def archiver(name, enable=False, clobber=None, skew=None):
+ def decorator(func):
+ def wrapper(*args, **kws):
+ config_name = 'archiver {0}'.format(name)
+ section = """
+ [archiver.{0}]
+ enable: {1}
+ clobber_date: {2}
+ clobber_skew: {3}
+ """.format(name,
+ 'yes' if enable else 'no',
+ clobber, skew)
+ config.push(config_name, section)
+ try:
+ return func(*args, **kws)
+ finally:
+ config.pop(config_name)
+ return wrapper
+ return decorator
@@ -54,7 +79,7 @@ class DummyArchiver:
def permalink(mlist, msg):
filename = msg['x-message-id-hash']
return 'http://archive.example.com/' + filename
-
+
@staticmethod
def archive_message(mlist, msg):
filename = msg['x-message-id-hash']
@@ -73,11 +98,12 @@ class TestArchiveRunner(unittest.TestCase):
def setUp(self):
self._mlist = create_list('test@example.com')
+ self._now = now()
# Enable just the dummy archiver.
config.push('dummy', """
[archiver.dummy]
class: mailman.runners.tests.test_archiver.DummyArchiver
- enable: yes
+ enable: no
[archiver.prototype]
enable: no
[archiver.mhonarc]
@@ -100,10 +126,13 @@ First post!
def tearDown(self):
config.pop('dummy')
+ @archiver('dummy', enable=True)
def test_archive_runner(self):
# Ensure that the archive runner ends up archiving the message.
self._archiveq.enqueue(
- self._msg, {}, listname=self._mlist.fqdn_listname)
+ self._msg, {},
+ listname=self._mlist.fqdn_listname,
+ received_time=now())
self._runner.run()
# There should now be a copy of the message in the file system.
filename = os.path.join(
@@ -112,11 +141,114 @@ First post!
archived = message_from_file(fp)
self.assertEqual(archived['message-id'], '<first>')
+ @archiver('dummy', enable=True)
def test_archive_runner_with_dated_message(self):
- # LP: #963612 FIXME
- self._msg['Date'] = 'Sat, 11 Mar 2011 03:19:38 -0500'
+ # Date headers don't throw off the archiver runner.
+ self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT)
+ self._archiveq.enqueue(
+ self._msg, {},
+ listname=self._mlist.fqdn_listname,
+ received_time=now())
+ self._runner.run()
+ # There should now be a copy of the message in the file system.
+ filename = os.path.join(
+ config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB')
+ with open(filename) as fp:
+ archived = message_from_file(fp)
+ self.assertEqual(archived['message-id'], '<first>')
+ self.assertEqual(archived['date'], 'Mon, 01 Aug 2005 07:49:23 +0000')
+
+ @archiver('dummy', enable=True, clobber='never')
+ def test_clobber_date_never(self):
+ # Even if the Date header is insanely off from the received time of
+ # the message, if clobber_date is 'never', the header is not clobbered.
+ self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT)
+ self._archiveq.enqueue(
+ self._msg, {},
+ listname=self._mlist.fqdn_listname,
+ received_time=now())
+ self._runner.run()
+ # There should now be a copy of the message in the file system.
+ filename = os.path.join(
+ config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB')
+ with open(filename) as fp:
+ archived = message_from_file(fp)
+ self.assertEqual(archived['message-id'], '<first>')
+ self.assertEqual(archived['date'], 'Mon, 01 Aug 2005 07:49:23 +0000')
+
+ @archiver('dummy', enable=True)
+ def test_clobber_dateless(self):
+ # A message with no Date header will always get clobbered.
+ self.assertEqual(self._msg['date'], None)
+ # Now, before enqueuing the message (well, really, calling 'now()'
+ # again), fast forward a few days.
+ self._archiveq.enqueue(
+ self._msg, {},
+ listname=self._mlist.fqdn_listname,
+ received_time=now(strip_tzinfo=False))
+ self._runner.run()
+ # There should now be a copy of the message in the file system.
+ filename = os.path.join(
+ config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB')
+ with open(filename) as fp:
+ archived = message_from_file(fp)
+ self.assertEqual(archived['message-id'], '<first>')
+ self.assertEqual(archived['date'], 'Mon, 01 Aug 2005 07:49:23 +0000')
+
+ @archiver('dummy', enable=True, clobber='always')
+ def test_clobber_date_always(self):
+ # The date always gets clobbered with the current received time.
+ self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT)
+ # Now, before enqueuing the message (well, really, calling 'now()'
+ # again as will happen in the runner), fast forward a few days.
+ self._archiveq.enqueue(
+ self._msg, {},
+ listname=self._mlist.fqdn_listname)
+ factory.fast_forward(days=4)
+ self._runner.run()
+ # There should now be a copy of the message in the file system.
+ filename = os.path.join(
+ config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB')
+ with open(filename) as fp:
+ archived = message_from_file(fp)
+ self.assertEqual(archived['message-id'], '<first>')
+ self.assertEqual(archived['date'], 'Fri, 05 Aug 2005 07:49:23 +0000')
+ self.assertEqual(archived['x-original-date'],
+ 'Mon, 01 Aug 2005 07:49:23 +0000')
+
+ @archiver('dummy', enable=True, clobber='maybe', skew='1d')
+ def test_clobber_date_maybe_when_insane(self):
+ # The date is clobbered if it's farther off from now than its skew
+ # period.
+ self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT)
+ # Now, before enqueuing the message (well, really, calling 'now()'
+ # again as will happen in the runner), fast forward a few days.
+ self._archiveq.enqueue(
+ self._msg, {},
+ listname=self._mlist.fqdn_listname)
+ factory.fast_forward(days=4)
+ self._runner.run()
+ # There should now be a copy of the message in the file system.
+ filename = os.path.join(
+ config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB')
+ with open(filename) as fp:
+ archived = message_from_file(fp)
+ self.assertEqual(archived['message-id'], '<first>')
+ self.assertEqual(archived['date'], 'Fri, 05 Aug 2005 07:49:23 +0000')
+ self.assertEqual(archived['x-original-date'],
+ 'Mon, 01 Aug 2005 07:49:23 +0000')
+
+ @archiver('dummy', enable=True, clobber='maybe', skew='10d')
+ def test_clobber_date_maybe_when_sane(self):
+ # The date is not clobbered if it's nearer to now than its skew
+ # period.
+ self._msg['Date'] = now(strip_tzinfo=False).strftime(RFC822_DATE_FMT)
+ # Now, before enqueuing the message (well, really, calling 'now()'
+ # again as will happen in the runner), fast forward a few days.
self._archiveq.enqueue(
- self._msg, {}, listname=self._mlist.fqdn_listname)
+ self._msg, {},
+ listname=self._mlist.fqdn_listname)
+ factory.fast_forward(days=4)
self._runner.run()
# There should now be a copy of the message in the file system.
filename = os.path.join(
@@ -124,3 +256,5 @@ First post!
with open(filename) as fp:
archived = message_from_file(fp)
self.assertEqual(archived['message-id'], '<first>')
+ self.assertEqual(archived['date'], 'Mon, 01 Aug 2005 07:49:23 +0000')
+ self.assertEqual(archived['x-original-date'], None)
diff --git a/src/mailman/runners/tests/test_lmtp.py b/src/mailman/runners/tests/test_lmtp.py
index 2c4defe59..87b69c7e4 100644
--- a/src/mailman/runners/tests/test_lmtp.py
+++ b/src/mailman/runners/tests/test_lmtp.py
@@ -28,6 +28,8 @@ __all__ = [
import smtplib
import unittest
+from datetime import datetime
+
from mailman.app.lifecycle import create_list
from mailman.config import config
from mailman.testing.helpers import get_lmtp_client, get_queue_messages
@@ -96,3 +98,17 @@ Subject: This has a Message-ID but no X-Message-ID-Hash
self.assertEqual(len(all_headers), 1)
self.assertEqual(messages[0].msg['x-message-id-hash'],
'MS6QLWERIJLGCRF44J7USBFDELMNT2BW')
+
+ def test_received_time(self):
+ # The LMTP runner adds a `received_time` key to the metadata.
+ self._lmtp.sendmail('anne@example.com', ['test@example.com'], """\
+From: anne@example.com
+To: test@example.com
+Subject: This has no Message-ID header
+Message-ID: <ant>
+
+""")
+ messages = get_queue_messages('in')
+ self.assertEqual(len(messages), 1)
+ self.assertEqual(messages[0].msgdata['received_time'],
+ datetime(2005, 8, 1, 7, 49, 23))