diff options
Diffstat (limited to 'mailman/queue')
| -rw-r--r-- | mailman/queue/__init__.py | 415 | ||||
| -rw-r--r-- | mailman/queue/archive.py | 72 | ||||
| -rw-r--r-- | mailman/queue/bounce.py | 316 | ||||
| -rw-r--r-- | mailman/queue/command.py | 230 | ||||
| -rw-r--r-- | mailman/queue/docs/OVERVIEW.txt | 78 | ||||
| -rw-r--r-- | mailman/queue/docs/incoming.txt | 198 | ||||
| -rw-r--r-- | mailman/queue/docs/news.txt | 158 | ||||
| -rw-r--r-- | mailman/queue/docs/outgoing.txt | 155 | ||||
| -rw-r--r-- | mailman/queue/docs/runner.txt | 70 | ||||
| -rw-r--r-- | mailman/queue/docs/switchboard.txt | 149 | ||||
| -rw-r--r-- | mailman/queue/http.py | 73 | ||||
| -rw-r--r-- | mailman/queue/incoming.py | 44 | ||||
| -rw-r--r-- | mailman/queue/lmtp.py | 193 | ||||
| -rw-r--r-- | mailman/queue/maildir.py | 189 | ||||
| -rw-r--r-- | mailman/queue/news.py | 166 | ||||
| -rw-r--r-- | mailman/queue/outgoing.py | 130 | ||||
| -rw-r--r-- | mailman/queue/pipeline.py | 38 | ||||
| -rw-r--r-- | mailman/queue/retry.py | 40 | ||||
| -rw-r--r-- | mailman/queue/virgin.py | 44 |
19 files changed, 2758 insertions, 0 deletions
diff --git a/mailman/queue/__init__.py b/mailman/queue/__init__.py new file mode 100644 index 000000000..fb6b07479 --- /dev/null +++ b/mailman/queue/__init__.py @@ -0,0 +1,415 @@ +# Copyright (C) 2001-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Queing and dequeuing message/metadata pickle files. + +Messages are represented as email.message.Message objects (or an instance ofa +subclass). Metadata is represented as a Python dictionary. For every +message/metadata pair in a queue, a single file containing two pickles is +written. First, the message is written to the pickle, then the metadata +dictionary is written. +""" + +from __future__ import with_statement + +__metaclass__ = type +__all__ = [ + 'Runner', + 'Switchboard', + ] + + +import os +import sha +import time +import email +import errno +import cPickle +import logging +import marshal +import traceback + +from cStringIO import StringIO +from zope.interface import implements + +from mailman import i18n +from mailman import Message +from mailman import Utils +from mailman.configuration import config +from mailman.interfaces import IRunner, ISwitchboard + +# 20 bytes of all bits set, maximum sha.digest() value +shamax = 0xffffffffffffffffffffffffffffffffffffffffL + +# Small increment to add to time in case two entries have the same time. This +# prevents skipping one of two entries with the same time until the next pass. +DELTA = .0001 + +log = logging.getLogger('mailman.error') + + + +class Switchboard: + implements(ISwitchboard) + + def __init__(self, whichq, slice=None, numslices=1, recover=False): + self._whichq = whichq + # Create the directory if it doesn't yet exist. + Utils.makedirs(self._whichq, 0770) + # Fast track for no slices + self._lower = None + self._upper = None + # BAW: test performance and end-cases of this algorithm + if numslices <> 1: + self._lower = ((shamax + 1) * slice) / numslices + self._upper = (((shamax + 1) * (slice + 1)) / numslices) - 1 + if recover: + self.recover_backup_files() + + @property + def queue_directory(self): + return self._whichq + + def enqueue(self, _msg, _metadata=None, **_kws): + if _metadata is None: + _metadata = {} + # Calculate the SHA hexdigest of the message to get a unique base + # filename. We're also going to use the digest as a hash into the set + # of parallel qrunner processes. + data = _metadata.copy() + data.update(_kws) + listname = data.get('listname', '--nolist--') + # Get some data for the input to the sha hash. + now = time.time() + if not data.get('_plaintext'): + protocol = 1 + msgsave = cPickle.dumps(_msg, protocol) + else: + protocol = 0 + msgsave = cPickle.dumps(str(_msg), protocol) + # listname is unicode but the input to the hash function must be an + # 8-bit string (eventually, a bytes object). + hashfood = msgsave + listname.encode('utf-8') + repr(now) + # Encode the current time into the file name for FIFO sorting. The + # file name consists of two parts separated by a '+': the received + # time for this message (i.e. when it first showed up on this system) + # and the sha hex digest. + rcvtime = data.setdefault('received_time', now) + filebase = repr(rcvtime) + '+' + sha.new(hashfood).hexdigest() + filename = os.path.join(self._whichq, filebase + '.pck') + tmpfile = filename + '.tmp' + # Always add the metadata schema version number + data['version'] = config.QFILE_SCHEMA_VERSION + # Filter out volatile entries + for k in data.keys(): + if k.startswith('_'): + del data[k] + # We have to tell the dequeue() method whether to parse the message + # object or not. + data['_parsemsg'] = (protocol == 0) + # Write to the pickle file the message object and metadata. + with open(tmpfile, 'w') as fp: + fp.write(msgsave) + cPickle.dump(data, fp, protocol) + fp.flush() + os.fsync(fp.fileno()) + os.rename(tmpfile, filename) + return filebase + + def dequeue(self, filebase): + # Calculate the filename from the given filebase. + filename = os.path.join(self._whichq, filebase + '.pck') + backfile = os.path.join(self._whichq, filebase + '.bak') + # Read the message object and metadata. + with open(filename) as fp: + # Move the file to the backup file name for processing. If this + # process crashes uncleanly the .bak file will be used to + # re-instate the .pck file in order to try again. XXX what if + # something caused Python to constantly crash? Is it possible + # that we'd end up mail bombing recipients or crushing the + # archiver? How would we defend against that? + os.rename(filename, backfile) + msg = cPickle.load(fp) + data = cPickle.load(fp) + if data.get('_parsemsg'): + # Calculate the original size of the text now so that we won't + # have to generate the message later when we do size restriction + # checking. + original_size = len(msg) + msg = email.message_from_string(msg, Message.Message) + msg.original_size = original_size + data['original_size'] = original_size + return msg, data + + def finish(self, filebase, preserve=False): + bakfile = os.path.join(self._whichq, filebase + '.bak') + try: + if preserve: + psvfile = os.path.join(config.SHUNTQUEUE_DIR, + filebase + '.psv') + # Create the directory if it doesn't yet exist. + Utils.makedirs(config.SHUNTQUEUE_DIR, 0770) + os.rename(bakfile, psvfile) + else: + os.unlink(bakfile) + except EnvironmentError, e: + log.exception('Failed to unlink/preserve backup file: %s', bakfile) + + @property + def files(self): + return self.get_files() + + def get_files(self, extension='.pck'): + times = {} + lower = self._lower + upper = self._upper + for f in os.listdir(self._whichq): + # By ignoring anything that doesn't end in .pck, we ignore + # tempfiles and avoid a race condition. + filebase, ext = os.path.splitext(f) + if ext <> extension: + continue + when, digest = filebase.split('+', 1) + # Throw out any files which don't match our bitrange. BAW: test + # performance and end-cases of this algorithm. MAS: both + # comparisons need to be <= to get complete range. + if lower is None or (lower <= long(digest, 16) <= upper): + key = float(when) + while key in times: + key += DELTA + times[key] = filebase + # FIFO sort + return [times[key] for key in sorted(times)] + + def recover_backup_files(self): + # Move all .bak files in our slice to .pck. It's impossible for both + # to exist at the same time, so the move is enough to ensure that our + # normal dequeuing process will handle them. + for filebase in self.get_files('.bak'): + src = os.path.join(self._whichq, filebase + '.bak') + dst = os.path.join(self._whichq, filebase + '.pck') + os.rename(src, dst) + + + +class Runner: + implements(IRunner) + + QDIR = None + SLEEPTIME = config.QRUNNER_SLEEP_TIME + + def __init__(self, slice=None, numslices=1): + self._kids = {} + # Create our own switchboard. Don't use the switchboard cache because + # we want to provide slice and numslice arguments. + self._switchboard = Switchboard(self.QDIR, slice, numslices, True) + # Create the shunt switchboard + self._shunt = Switchboard(config.SHUNTQUEUE_DIR) + self._stop = False + + def __repr__(self): + return '<%s at %s>' % (self.__class__.__name__, id(self)) + + def stop(self): + self._stop = True + + def run(self): + # Start the main loop for this queue runner. + try: + try: + while True: + # Once through the loop that processes all the files in + # the queue directory. + filecnt = self._oneloop() + # Do the periodic work for the subclass. BAW: this + # shouldn't be called here. There should be one more + # _doperiodic() call at the end of the _oneloop() loop. + self._doperiodic() + # If the stop flag is set, we're done. + if self._stop: + break + # Give the runner an opportunity to snooze for a while, + # but pass it the file count so it can decide whether to + # do more work now or not. + self._snooze(filecnt) + except KeyboardInterrupt: + pass + finally: + # We've broken out of our main loop, so we want to reap all the + # subprocesses we've created and do any other necessary cleanups. + self._cleanup() + + def _oneloop(self): + # First, list all the files in our queue directory. + # Switchboard.files() is guaranteed to hand us the files in FIFO + # order. Return an integer count of the number of files that were + # available for this qrunner to process. + files = self._switchboard.files + for filebase in files: + try: + # Ask the switchboard for the message and metadata objects + # associated with this filebase. + msg, msgdata = self._switchboard.dequeue(filebase) + except Exception, e: + # This used to just catch email.Errors.MessageParseError, + # but other problems can occur in message parsing, e.g. + # ValueError, and exceptions can occur in unpickling too. + # We don't want the runner to die, so we just log and skip + # this entry, but preserve it for analysis. + self._log(e) + log.error('Skipping and preserving unparseable message: %s', + filebase) + self._switchboard.finish(filebase, preserve=True) + continue + try: + self._onefile(msg, msgdata) + self._switchboard.finish(filebase) + except Exception, e: + # All runners that implement _dispose() must guarantee that + # exceptions are caught and dealt with properly. Still, there + # may be a bug in the infrastructure, and we do not want those + # to cause messages to be lost. Any uncaught exceptions will + # cause the message to be stored in the shunt queue for human + # intervention. + self._log(e) + # Put a marker in the metadata for unshunting + msgdata['whichq'] = self._switchboard.queue_directory + # It is possible that shunting can throw an exception, e.g. a + # permissions problem or a MemoryError due to a really large + # message. Try to be graceful. + try: + new_filebase = self._shunt.enqueue(msg, msgdata) + log.error('SHUNTING: %s', new_filebase) + self._switchboard.finish(filebase) + except Exception, e: + # The message wasn't successfully shunted. Log the + # exception and try to preserve the original queue entry + # for possible analysis. + self._log(e) + log.error('SHUNTING FAILED, preserving original entry: %s', + filebase) + self._switchboard.finish(filebase, preserve=True) + # Other work we want to do each time through the loop + Utils.reap(self._kids, once=True) + self._doperiodic() + if self._shortcircuit(): + break + return len(files) + + def _onefile(self, msg, msgdata): + # Do some common sanity checking on the message metadata. It's got to + # be destined for a particular mailing list. This switchboard is used + # to shunt off badly formatted messages. We don't want to just trash + # them because they may be fixable with human intervention. Just get + # them out of our site though. + # + # Find out which mailing list this message is destined for. + listname = msgdata.get('listname') + mlist = config.db.list_manager.get(listname) + if not mlist: + log.error('Dequeuing message destined for missing list: %s', + listname) + self._shunt.enqueue(msg, msgdata) + return + # Now process this message, keeping track of any subprocesses that may + # have been spawned. We'll reap those later. + # + # We also want to set up the language context for this message. The + # context will be the preferred language for the user if a member of + # the list, or the list's preferred language. However, we must take + # special care to reset the defaults, otherwise subsequent messages + # may be translated incorrectly. BAW: I'm not sure I like this + # approach, but I can't think of anything better right now. + sender = msg.get_sender() + member = mlist.members.get_member(sender) + if member: + lang = member.preferred_language + else: + lang = mlist.preferred_language + with i18n.using_language(lang): + msgdata['lang'] = lang + keepqueued = self._dispose(mlist, msg, msgdata) + # Keep tabs on any child processes that got spawned. + kids = msgdata.get('_kids') + if kids: + self._kids.update(kids) + if keepqueued: + self._switchboard.enqueue(msg, msgdata) + + def _log(self, exc): + log.error('Uncaught runner exception: %s', exc) + s = StringIO() + traceback.print_exc(file=s) + log.error('%s', s.getvalue()) + + # + # Subclasses can override these methods. + # + def _cleanup(self): + """Clean up upon exit from the main processing loop. + + Called when the Runner's main loop is stopped, this should perform + any necessary resource deallocation. Its return value is irrelevant. + """ + Utils.reap(self._kids) + + def _dispose(self, mlist, msg, msgdata): + """Dispose of a single message destined for a mailing list. + + Called for each message that the Runner is responsible for, this is + the primary overridable method for processing each message. + Subclasses, must provide implementation for this method. + + mlist is the IMailingList instance this message is destined for. + + msg is the Message object representing the message. + + msgdata is a dictionary of message metadata. + """ + raise NotImplementedError + + def _doperiodic(self): + """Do some processing `every once in a while'. + + Called every once in a while both from the Runner's main loop, and + from the Runner's hash slice processing loop. You can do whatever + special periodic processing you want here, and the return value is + irrelevant. + """ + pass + + def _snooze(self, filecnt): + """Sleep for a little while. + + filecnt is the number of messages in the queue the last time through. + Sub-runners can decide to continue to do work, or sleep for a while + based on this value. By default, we only snooze if there was nothing + to do last time around. + """ + if filecnt or float(self.SLEEPTIME) <= 0: + return + time.sleep(float(self.SLEEPTIME)) + + def _shortcircuit(self): + """Return a true value if the individual file processing loop should + exit before it's finished processing each message in the current slice + of hash space. A false value tells _oneloop() to continue processing + until the current snapshot of hash space is exhausted. + + You could, for example, implement a throttling algorithm here. + """ + return self._stop diff --git a/mailman/queue/archive.py b/mailman/queue/archive.py new file mode 100644 index 000000000..f17e6b751 --- /dev/null +++ b/mailman/queue/archive.py @@ -0,0 +1,72 @@ +# Copyright (C) 2000-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Archive queue runner.""" + +from __future__ import with_statement + +import time + +from email.Utils import parsedate_tz, mktime_tz, formatdate +from locknix.lockfile import Lock + +from mailman.configuration import config +from mailman.queue import Runner + + + +class ArchiveRunner(Runner): + QDIR = config.ARCHQUEUE_DIR + + def _dispose(self, mlist, msg, msgdata): + # Support clobber_date, i.e. setting the date in the archive to the + # received date, not the (potentially bogus) Date: header of the + # original message. + clobber = 0 + originaldate = msg.get('date') + receivedtime = formatdate(msgdata['received_time']) + if not originaldate: + clobber = 1 + elif config.ARCHIVER_CLOBBER_DATE_POLICY == 1: + clobber = 1 + elif config.ARCHIVER_CLOBBER_DATE_POLICY == 2: + # what's the timestamp on the original message? + tup = parsedate_tz(originaldate) + now = time.time() + try: + if not tup: + clobber = 1 + elif abs(now - mktime_tz(tup)) > \ + config.ARCHIVER_ALLOWABLE_SANE_DATE_SKEW: + clobber = 1 + except (ValueError, OverflowError): + # The likely cause of this is that the year in the Date: field + # is horribly incorrect, e.g. (from SF bug # 571634): + # Date: Tue, 18 Jun 0102 05:12:09 +0500 + # Obviously clobber such dates. + clobber = 1 + if clobber: + del msg['date'] + del msg['x-original-date'] + msg['Date'] = receivedtime + if originaldate: + msg['X-Original-Date'] = originaldate + # Always put an indication of when we received the message. + msg['X-List-Received-Date'] = receivedtime + # While a list archiving lock is acquired, archive the message. + with Lock(os.path.join(mlist.full_path, 'archive.lck')): + mlist.ArchiveMail(msg) diff --git a/mailman/queue/bounce.py b/mailman/queue/bounce.py new file mode 100644 index 000000000..e5bebe3eb --- /dev/null +++ b/mailman/queue/bounce.py @@ -0,0 +1,316 @@ +# Copyright (C) 2001-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Bounce queue runner.""" + +import os +import re +import cPickle +import logging +import datetime + +from email.MIMEMessage import MIMEMessage +from email.MIMEText import MIMEText +from email.Utils import parseaddr + +from mailman import Utils +from mailman.Bouncers import BouncerAPI +from mailman.Message import UserNotification +from mailman.configuration import config +from mailman.i18n import _ +from mailman.queue import Runner, Switchboard + +COMMASPACE = ', ' + +log = logging.getLogger('mailman.bounce') +elog = logging.getLogger('mailman.error') + + + +class BounceMixin: + def __init__(self): + # Registering a bounce means acquiring the list lock, and it would be + # too expensive to do this for each message. Instead, each bounce + # runner maintains an event log which is essentially a file with + # multiple pickles. Each bounce we receive gets appended to this file + # as a 4-tuple record: (listname, addr, today, msg) + # + # today is itself a 3-tuple of (year, month, day) + # + # Every once in a while (see _doperiodic()), the bounce runner cracks + # open the file, reads all the records and registers all the bounces. + # Then it truncates the file and continues on. We don't need to lock + # the bounce event file because bounce qrunners are single threaded + # and each creates a uniquely named file to contain the events. + # + # XXX When Python 2.3 is minimal require, we can use the new + # tempfile.TemporaryFile() function. + # + # XXX We used to classify bounces to the site list as bounce events + # for every list, but this caused severe problems. Here's the + # scenario: aperson@example.com is a member of 4 lists, and a list + # owner of the foo list. example.com has an aggressive spam filter + # which rejects any message that is spam or contains spam as an + # attachment. Now, a spambot sends a piece of spam to the foo list, + # but since that spambot is not a member, the list holds the message + # for approval, and sends a notification to aperson@example.com as + # list owner. That notification contains a copy of the spam. Now + # example.com rejects the message, causing a bounce to be sent to the + # site list's bounce address. The bounce runner would then dutifully + # register a bounce for all 4 lists that aperson@example.com was a + # member of, and eventually that person would get disabled on all + # their lists. So now we ignore site list bounces. Ce La Vie for + # password reminder bounces. + self._bounce_events_file = os.path.join( + config.DATA_DIR, 'bounce-events-%05d.pck' % os.getpid()) + self._bounce_events_fp = None + self._bouncecnt = 0 + self._nextaction = (datetime.datetime.now() + + config.REGISTER_BOUNCES_EVERY) + + def _queue_bounces(self, listname, addrs, msg): + today = datetime.date.today() + if self._bounce_events_fp is None: + self._bounce_events_fp = open(self._bounce_events_file, 'a+b') + for addr in addrs: + cPickle.dump((listname, addr, today, msg), + self._bounce_events_fp, 1) + self._bounce_events_fp.flush() + os.fsync(self._bounce_events_fp.fileno()) + self._bouncecnt += len(addrs) + + def _register_bounces(self): + log.info('%s processing %s queued bounces', self, self._bouncecnt) + # Read all the records from the bounce file, then unlink it. Sort the + # records by listname for more efficient processing. + events = {} + self._bounce_events_fp.seek(0) + while True: + try: + listname, addr, day, msg = cPickle.load(self._bounce_events_fp) + except ValueError, e: + log.error('Error reading bounce events: %s', e) + except EOFError: + break + events.setdefault(listname, []).append((addr, day, msg)) + # Now register all events sorted by list + for listname in events.keys(): + mlist = self._open_list(listname) + mlist.Lock() + try: + for addr, day, msg in events[listname]: + mlist.registerBounce(addr, msg, day=day) + mlist.Save() + finally: + mlist.Unlock() + # Reset and free all the cached memory + self._bounce_events_fp.close() + self._bounce_events_fp = None + os.unlink(self._bounce_events_file) + self._bouncecnt = 0 + + def _cleanup(self): + if self._bouncecnt > 0: + self._register_bounces() + + def _doperiodic(self): + now = datetime.datetime.now() + if self._nextaction > now or self._bouncecnt == 0: + return + # Let's go ahead and register the bounces we've got stored up + self._nextaction = now + config.REGISTER_BOUNCES_EVERY + self._register_bounces() + + def _probe_bounce(self, mlist, token): + locked = mlist.Locked() + if not locked: + mlist.Lock() + try: + op, addr, bmsg = mlist.pend_confirm(token) + info = mlist.getBounceInfo(addr) + mlist.disableBouncingMember(addr, info, bmsg) + # Only save the list if we're unlocking it + if not locked: + mlist.Save() + finally: + if not locked: + mlist.Unlock() + + + +class BounceRunner(Runner, BounceMixin): + QDIR = config.BOUNCEQUEUE_DIR + + def __init__(self, slice=None, numslices=1): + Runner.__init__(self, slice, numslices) + BounceMixin.__init__(self) + + def _dispose(self, mlist, msg, msgdata): + # Make sure we have the most up-to-date state + mlist.Load() + outq = Switchboard(config.OUTQUEUE_DIR) + # There are a few possibilities here: + # + # - the message could have been VERP'd in which case, we know exactly + # who the message was destined for. That make our job easy. + # - the message could have been originally destined for a list owner, + # but a list owner address itself bounced. That's bad, and for now + # we'll simply log the problem and attempt to deliver the message to + # the site owner. + # + # All messages sent to list owners have their sender set to the site + # owner address. That way, if a list owner address bounces, at least + # some human has a chance to deal with it. Is this a bounce for a + # message to a list owner, coming to the site owner? + if msg.get('to', '') == config.SITE_OWNER_ADDRESS: + # Send it on to the site owners, but craft the envelope sender to + # be the noreply address, so if the site owner bounce, we won't + # get stuck in a bounce loop. + outq.enqueue(msg, msgdata, + recips=[config.SITE_OWNER_ADDRESS], + envsender=config.NO_REPLY_ADDRESS, + ) + # List isn't doing bounce processing? + if not mlist.bounce_processing: + return + # Try VERP detection first, since it's quick and easy + addrs = verp_bounce(mlist, msg) + if addrs: + # We have an address, but check if the message is non-fatal. + if BouncerAPI.ScanMessages(mlist, msg) is BouncerAPI.Stop: + return + else: + # See if this was a probe message. + token = verp_probe(mlist, msg) + if token: + self._probe_bounce(mlist, token) + return + # That didn't give us anything useful, so try the old fashion + # bounce matching modules. + addrs = BouncerAPI.ScanMessages(mlist, msg) + if addrs is BouncerAPI.Stop: + # This is a recognized, non-fatal notice. Ignore it. + return + # If that still didn't return us any useful addresses, then send it on + # or discard it. + if not addrs: + log.info('bounce message w/no discernable addresses: %s', + msg.get('message-id')) + maybe_forward(mlist, msg) + return + # BAW: It's possible that there are None's in the list of addresses, + # although I'm unsure how that could happen. Possibly ScanMessages() + # can let None's sneak through. In any event, this will kill them. + addrs = filter(None, addrs) + self._queue_bounces(mlist.fqdn_listname, addrs, msg) + + _doperiodic = BounceMixin._doperiodic + + def _cleanup(self): + BounceMixin._cleanup(self) + Runner._cleanup(self) + + + +def verp_bounce(mlist, msg): + bmailbox, bdomain = Utils.ParseEmail(mlist.GetBouncesEmail()) + # Sadly not every MTA bounces VERP messages correctly, or consistently. + # Fall back to Delivered-To: (Postfix), Envelope-To: (Exim) and + # Apparently-To:, and then short-circuit if we still don't have anything + # to work with. Note that there can be multiple Delivered-To: headers so + # we need to search them all (and we don't worry about false positives for + # forwarded email, because only one should match VERP_REGEXP). + vals = [] + for header in ('to', 'delivered-to', 'envelope-to', 'apparently-to'): + vals.extend(msg.get_all(header, [])) + for field in vals: + to = parseaddr(field)[1] + if not to: + continue # empty header + mo = re.search(config.VERP_REGEXP, to) + if not mo: + continue # no match of regexp + try: + if bmailbox <> mo.group('bounces'): + continue # not a bounce to our list + # All is good + addr = '%s@%s' % mo.group('mailbox', 'host') + except IndexError: + elog.error("VERP_REGEXP doesn't yield the right match groups: %s", + config.VERP_REGEXP) + return [] + return [addr] + + + +def verp_probe(mlist, msg): + bmailbox, bdomain = Utils.ParseEmail(mlist.GetBouncesEmail()) + # Sadly not every MTA bounces VERP messages correctly, or consistently. + # Fall back to Delivered-To: (Postfix), Envelope-To: (Exim) and + # Apparently-To:, and then short-circuit if we still don't have anything + # to work with. Note that there can be multiple Delivered-To: headers so + # we need to search them all (and we don't worry about false positives for + # forwarded email, because only one should match VERP_REGEXP). + vals = [] + for header in ('to', 'delivered-to', 'envelope-to', 'apparently-to'): + vals.extend(msg.get_all(header, [])) + for field in vals: + to = parseaddr(field)[1] + if not to: + continue # empty header + mo = re.search(config.VERP_PROBE_REGEXP, to) + if not mo: + continue # no match of regexp + try: + if bmailbox <> mo.group('bounces'): + continue # not a bounce to our list + # Extract the token and see if there's an entry + token = mo.group('token') + data = mlist.pend_confirm(token, expunge=False) + if data is not None: + return token + except IndexError: + elog.error( + "VERP_PROBE_REGEXP doesn't yield the right match groups: %s", + config.VERP_PROBE_REGEXP) + return None + + + +def maybe_forward(mlist, msg): + # Does the list owner want to get non-matching bounce messages? + # If not, simply discard it. + if mlist.bounce_unrecognized_goes_to_list_owner: + adminurl = mlist.GetScriptURL('admin', absolute=1) + '/bounce' + mlist.ForwardMessage(msg, + text=_("""\ +The attached message was received as a bounce, but either the bounce format +was not recognized, or no member addresses could be extracted from it. This +mailing list has been configured to send all unrecognized bounce messages to +the list administrator(s). + +For more information see: +%(adminurl)s + +"""), + subject=_('Uncaught bounce notification'), + tomoderators=0) + log.error('forwarding unrecognized, message-id: %s', + msg.get('message-id', 'n/a')) + else: + log.error('discarding unrecognized, message-id: %s', + msg.get('message-id', 'n/a')) diff --git a/mailman/queue/command.py b/mailman/queue/command.py new file mode 100644 index 000000000..c95504401 --- /dev/null +++ b/mailman/queue/command.py @@ -0,0 +1,230 @@ +# Copyright (C) 1998-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +"""-request robot command queue runner.""" + +# See the delivery diagram in IncomingRunner.py. This module handles all +# email destined for mylist-request, -join, and -leave. It no longer handles +# bounce messages (i.e. -admin or -bounces), nor does it handle mail to +# -owner. + +import re +import sys +import logging + +from email.Errors import HeaderParseError +from email.Header import decode_header, make_header, Header +from email.Iterators import typed_subpart_iterator +from email.MIMEMessage import MIMEMessage +from email.MIMEText import MIMEText + +from mailman import Message +from mailman import Utils +from mailman.app.replybot import autorespond_to_sender +from mailman.configuration import config +from mailman.i18n import _ +from mailman.queue import Runner + +NL = '\n' + +log = logging.getLogger('mailman.vette') + + + +class Results: + def __init__(self, mlist, msg, msgdata): + self.mlist = mlist + self.msg = msg + self.msgdata = msgdata + # Only set returnaddr if the response is to go to someone other than + # the address specified in the From: header (e.g. for the password + # command). + self.returnaddr = None + self.commands = [] + self.results = [] + self.ignored = [] + self.lineno = 0 + self.subjcmdretried = 0 + self.respond = True + # Extract the subject header and do RFC 2047 decoding. Note that + # Python 2.1's unicode() builtin doesn't call obj.__unicode__(). + subj = msg.get('subject', '') + try: + subj = make_header(decode_header(subj)).__unicode__() + # TK: Currently we don't allow 8bit or multibyte in mail command. + subj = subj.encode('us-ascii') + # Always process the Subject: header first + self.commands.append(subj) + except (HeaderParseError, UnicodeError, LookupError): + # We couldn't parse it so ignore the Subject header + pass + # Find the first text/plain part + part = None + for part in typed_subpart_iterator(msg, 'text', 'plain'): + break + if part is None or part is not msg: + # Either there was no text/plain part or we ignored some + # non-text/plain parts. + self.results.append(_('Ignoring non-text/plain MIME parts')) + if part is None: + # E.g the outer Content-Type: was text/html + return + body = part.get_payload(decode=True) + # text/plain parts better have string payloads + assert isinstance(body, basestring) + lines = body.splitlines() + # Use no more lines than specified + self.commands.extend(lines[:config.EMAIL_COMMANDS_MAX_LINES]) + self.ignored.extend(lines[config.EMAIL_COMMANDS_MAX_LINES:]) + + def process(self): + # Now, process each line until we find an error. The first + # non-command line found stops processing. + stop = False + for line in self.commands: + if line and line.strip(): + args = line.split() + cmd = args.pop(0).lower() + stop = self.do_command(cmd, args) + self.lineno += 1 + if stop: + break + + def do_command(self, cmd, args=None): + if args is None: + args = () + # Try to import a command handler module for this command + modname = 'mailman.Commands.cmd_' + cmd + try: + __import__(modname) + handler = sys.modules[modname] + # ValueError can be raised if cmd has dots in it. + except (ImportError, ValueError): + # If we're on line zero, it was the Subject: header that didn't + # contain a command. It's possible there's a Re: prefix (or + # localized version thereof) on the Subject: line that's messing + # things up. Pop the prefix off and try again... once. + # + # If that still didn't work it isn't enough to stop processing. + # BAW: should we include a message that the Subject: was ignored? + if not self.subjcmdretried and args: + self.subjcmdretried += 1 + cmd = args.pop(0) + return self.do_command(cmd, args) + return self.lineno <> 0 + return handler.process(self, args) + + def send_response(self): + # Helper + def indent(lines): + return [' ' + line for line in lines] + # Quick exit for some commands which don't need a response + if not self.respond: + return + resp = [Utils.wrap(_("""\ +The results of your email command are provided below. +Attached is your original message. +"""))] + if self.results: + resp.append(_('- Results:')) + resp.extend(indent(self.results)) + # Ignore empty lines + unprocessed = [line for line in self.commands[self.lineno:] + if line and line.strip()] + if unprocessed: + resp.append(_('\n- Unprocessed:')) + resp.extend(indent(unprocessed)) + if not unprocessed and not self.results: + # The user sent an empty message; return a helpful one. + resp.append(Utils.wrap(_("""\ +No commands were found in this message. +To obtain instructions, send a message containing just the word "help". +"""))) + if self.ignored: + resp.append(_('\n- Ignored:')) + resp.extend(indent(self.ignored)) + resp.append(_('\n- Done.\n\n')) + # Encode any unicode strings into the list charset, so we don't try to + # join unicode strings and invalid ASCII. + charset = Utils.GetCharSet(self.msgdata['lang']) + encoded_resp = [] + for item in resp: + if isinstance(item, unicode): + item = item.encode(charset, 'replace') + encoded_resp.append(item) + results = MIMEText(NL.join(encoded_resp), _charset=charset) + # Safety valve for mail loops with misconfigured email 'bots. We + # don't respond to commands sent with "Precedence: bulk|junk|list" + # unless they explicitly "X-Ack: yes", but not all mail 'bots are + # correctly configured, so we max out the number of responses we'll + # give to an address in a single day. + # + # BAW: We wait until now to make this decision since our sender may + # not be self.msg.get_sender(), but I'm not sure this is right. + recip = self.returnaddr or self.msg.get_sender() + if not autorespond_to_sender(self.mlist, recip, self.msgdata['lang']): + return + msg = Message.UserNotification( + recip, + self.mlist.GetBouncesEmail(), + _('The results of your email commands'), + lang=self.msgdata['lang']) + msg.set_type('multipart/mixed') + msg.attach(results) + orig = MIMEMessage(self.msg) + msg.attach(orig) + msg.send(self.mlist) + + + +class CommandRunner(Runner): + QDIR = config.CMDQUEUE_DIR + + def _dispose(self, mlist, msg, msgdata): + # The policy here is similar to the Replybot policy. If a message has + # "Precedence: bulk|junk|list" and no "X-Ack: yes" header, we discard + # it to prevent replybot response storms. + precedence = msg.get('precedence', '').lower() + ack = msg.get('x-ack', '').lower() + if ack <> 'yes' and precedence in ('bulk', 'junk', 'list'): + log.info('Precedence: %s message discarded by: %s', + precedence, mlist.GetRequestEmail()) + return False + # Do replybot for commands + mlist.Load() + replybot = config.handlers['replybot'] + replybot.process(mlist, msg, msgdata) + if mlist.autorespond_requests == 1: + log.info('replied and discard') + # w/discard + return False + # Now craft the response + res = Results(mlist, msg, msgdata) + # This message will have been delivered to one of mylist-request, + # mylist-join, or mylist-leave, and the message metadata will contain + # a key to which one was used. + if msgdata.get('torequest'): + res.process() + elif msgdata.get('tojoin'): + res.do_command('join') + elif msgdata.get('toleave'): + res.do_command('leave') + elif msgdata.get('toconfirm'): + mo = re.match(config.VERP_CONFIRM_REGEXP, msg.get('to', '')) + if mo: + res.do_command('confirm', (mo.group('cookie'),)) + res.send_response() + config.db.commit() diff --git a/mailman/queue/docs/OVERVIEW.txt b/mailman/queue/docs/OVERVIEW.txt new file mode 100644 index 000000000..643fa8a5c --- /dev/null +++ b/mailman/queue/docs/OVERVIEW.txt @@ -0,0 +1,78 @@ +Alias overview +============== + +A typical Mailman list exposes nine aliases which point to seven different +wrapped scripts. E.g. for a list named `mylist', you'd have: + + mylist-bounces -> bounces + mylist-confirm -> confirm + mylist-join -> join (-subscribe is an alias) + mylist-leave -> leave (-unsubscribe is an alias) + mylist-owner -> owner + mylist -> post + mylist-request -> request + +-request, -join, and -leave are a robot addresses; their sole purpose is to +process emailed commands, although the latter two are hardcoded to +subscription and unsubscription requests. -bounces is the automated bounce +processor, and all messages to list members have their return address set to +-bounces. If the bounce processor fails to extract a bouncing member address, +it can optionally forward the message on to the list owners. + +-owner is for reaching a human operator with minimal list interaction (i.e. no +bounce processing). -confirm is another robot address which processes replies +to VERP-like confirmation notices. + +So delivery flow of messages look like this: + + joerandom ---> mylist ---> list members + | | + | |[bounces] + | mylist-bounces <---+ <-------------------------------+ + | | | + | +--->[internal bounce processing] | + | ^ | | + | | | [bounce found] | + | [bounces *] +--->[register and discard] | + | | | | | + | | | |[*] | + | [list owners] |[no bounce found] | | + | ^ | | | + | | | | | + +-------> mylist-owner <--------+ | | + | | | + | data/owner-bounces.mbox <--[site list] <---+ | + | | + +-------> mylist-join--+ | + | | | + +------> mylist-leave--+ | + | | | + | v | + +-------> mylist-request | + | | | + | +---> [command processor] | + | | | + +-----> mylist-confirm ----> +---> joerandom | + | | + |[bounces] | + +----------------------+ + +A person can send an email to the list address (for posting), the -owner +address (to reach the human operator), or the -confirm, -join, -leave, and +-request mailbots. Message to the list address are then forwarded on to the +list membership, with bounces directed to the -bounces address. + +[*] Messages sent to the -owner address are forwarded on to the list +owner/moderators. All -owner destined messages have their bounces directed to +the site list -bounces address, regardless of whether a human sent the message +or the message was crafted internally. The intention here is that the site +owners want to be notified when one of their list owners' addresses starts +bouncing (yes, the will be automated in a future release). + +Any messages to site owners has their bounces directed to a special +"loop-killer" address, which just dumps the message into +data/owners-bounces.mbox. + +Finally, message to any of the mailbots causes the requested action to be +performed. Results notifications are sent to the author of the message, which +all bounces pointing back to the -bounces address. diff --git a/mailman/queue/docs/incoming.txt b/mailman/queue/docs/incoming.txt new file mode 100644 index 000000000..48fb308c7 --- /dev/null +++ b/mailman/queue/docs/incoming.txt @@ -0,0 +1,198 @@ +The incoming queue runner +========================= + +This runner's sole purpose in life is to decide the disposition of the +message. It can either be accepted for delivery, rejected (i.e. bounced), +held for moderator approval, or discarded. + +The runner operates by processing chains on a message/metadata pair in the +context of a mailing list. Each mailing list may have a 'start chain' where +processing begins, with a global default. This chain is processed with the +message eventually ending up in one of the four disposition states described +above. + + >>> from mailman.app.lifecycle import create_list + >>> mlist = create_list(u'_xtest@example.com') + >>> mlist.start_chain + u'built-in' + + +Accepted messages +----------------- + +We have a message that is going to be sent to the mailing list. This message +is so perfectly fine for posting that it will be accepted and forward to the +pipeline queue. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... Subject: My first post + ... Message-ID: <first> + ... + ... First post! + ... """) + +Normally, the upstream mail server would drop the message in the incoming +queue, but this is an effective simulation. + + >>> from mailman.inject import inject + >>> inject(u'_xtest@example.com', msg) + +The incoming queue runner runs until it is empty. + + >>> from mailman.queue.incoming import IncomingRunner + >>> from mailman.tests.helpers import make_testable_runner + >>> incoming = make_testable_runner(IncomingRunner) + >>> incoming.run() + +And now the message is in the pipeline queue. + + >>> from mailman.configuration import config + >>> from mailman.queue import Switchboard + >>> pipeline_queue = Switchboard(config.PIPELINEQUEUE_DIR) + >>> len(pipeline_queue.files) + 1 + >>> incoming_queue = Switchboard(config.INQUEUE_DIR) + >>> len(incoming_queue.files) + 0 + >>> from mailman.tests.helpers import get_queue_messages + >>> item = get_queue_messages(pipeline_queue)[0] + >>> print item.msg.as_string() + From: aperson@example.com + To: _xtest@example.com + Subject: My first post + Message-ID: <first> + X-Mailman-Rule-Misses: approved; emergency; loop; administrivia; + implicit-dest; + max-recipients; max-size; news-moderation; no-subject; + suspicious-header + <BLANKLINE> + First post! + <BLANKLINE> + >>> sorted(item.msgdata.items()) + [...('envsender', u'noreply@example.com')...('tolist', True)...] + + +Held messages +------------- + +The list moderator sets the emergency flag on the mailing list. The built-in +chain will now hold all posted messages, so nothing will show up in the +pipeline queue. + + # XXX This checks the vette log file because there is no other evidence + # that this chain has done anything. + >>> import os + >>> fp = open(os.path.join(config.LOG_DIR, 'vette')) + >>> fp.seek(0, 2) + + >>> mlist.emergency = True + >>> mlist.web_page_url = u'http://archives.example.com/' + >>> inject(u'_xtest@example.com', msg) + >>> file_pos = fp.tell() + >>> incoming.run() + >>> len(pipeline_queue.files) + 0 + >>> len(incoming_queue.files) + 0 + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: ... HOLD: _xtest@example.com post from aperson@example.com held, + message-id=<first>: n/a + <BLANKLINE> + + >>> mlist.emergency = False + + +Discarded messages +------------------ + +Another possibility is that the message would get immediately discarded. The +built-in chain does not have such a disposition by default, so let's craft a +new chain and set it as the mailing list's start chain. + + >>> from mailman.chains.base import Chain, Link + >>> from mailman.interfaces import LinkAction + >>> truth_rule = config.rules['truth'] + >>> discard_chain = config.chains['discard'] + >>> test_chain = Chain('always-discard', u'Testing discards') + >>> link = Link(truth_rule, LinkAction.jump, discard_chain) + >>> test_chain.append_link(link) + >>> mlist.start_chain = u'always-discard' + + >>> inject(u'_xtest@example.com', msg) + >>> file_pos = fp.tell() + >>> incoming.run() + >>> len(pipeline_queue.files) + 0 + >>> len(incoming_queue.files) + 0 + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: ... DISCARD: <first> + <BLANKLINE> + + >>> del config.chains['always-discard'] + + +Rejected messages +----------------- + +Similar to discarded messages, a message can be rejected, or bounced back to +the original sender. Again, the built-in chain doesn't support this so we'll +just create a new chain that does. + + >>> reject_chain = config.chains['reject'] + >>> test_chain = Chain('always-reject', u'Testing rejections') + >>> link = Link(truth_rule, LinkAction.jump, reject_chain) + >>> test_chain.append_link(link) + >>> mlist.start_chain = u'always-reject' + +The virgin queue needs to be cleared out due to artifacts from the previous +tests above. + + >>> virgin_queue = Switchboard(config.VIRGINQUEUE_DIR) + >>> ignore = get_queue_messages(virgin_queue) + + >>> inject(u'_xtest@example.com', msg) + >>> file_pos = fp.tell() + >>> incoming.run() + >>> len(pipeline_queue.files) + 0 + >>> len(incoming_queue.files) + 0 + + >>> len(virgin_queue.files) + 1 + >>> item = get_queue_messages(virgin_queue)[0] + >>> print item.msg.as_string() + Subject: My first post + From: _xtest-owner@example.com + To: aperson@example.com + ... + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + <BLANKLINE> + [No bounce details are available] + ... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: My first post + Message-ID: <first> + <BLANKLINE> + First post! + <BLANKLINE> + ... + >>> sorted(item.msgdata.items()) + [...('recips', [u'aperson@example.com'])...] + >>> fp.seek(file_pos) + >>> print 'LOG:', fp.read() + LOG: ... REJECT: <first> + <BLANKLINE> + + >>> del config.chains['always-reject'] diff --git a/mailman/queue/docs/news.txt b/mailman/queue/docs/news.txt new file mode 100644 index 000000000..0b89de2bc --- /dev/null +++ b/mailman/queue/docs/news.txt @@ -0,0 +1,158 @@ +The news runner +=============== + +The news runner is the queue runner that gateways mailing list messages to an +NNTP newsgroup. One of the most important things this runner does is prepare +the message for Usenet (yes, I know that NNTP is not Usenet, but this runner +was originally written to gate to Usenet, which has its own rules). + + >>> from mailman.configuration import config + >>> from mailman.queue.news import prepare_message + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.linked_newsgroup = u'comp.lang.python' + +Some NNTP servers such as INN reject messages containing a set of prohibited +headers, so one of the things that the news runner does is remove these +prohibited headers. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... NNTP-Posting-Host: news.example.com + ... NNTP-Posting-Date: today + ... X-Trace: blah blah + ... X-Complaints-To: abuse@dom.ain + ... Xref: blah blah + ... Xref: blah blah + ... Date-Received: yesterday + ... Posted: tomorrow + ... Posting-Version: 99.99 + ... Relay-Version: 88.88 + ... Received: blah blah + ... + ... A message + ... """) + >>> msgdata = {} + >>> prepare_message(mlist, msg, msgdata) + >>> msgdata['prepped'] + True + >>> print msg.as_string() + From: aperson@example.com + To: _xtest@example.com + Newsgroups: comp.lang.python + Message-ID: ... + Lines: 1 + <BLANKLINE> + A message + <BLANKLINE> + +Some NNTP servers will reject messages where certain headers are duplicated, +so the news runner must collapse or move these duplicate headers to an +X-Original-* header that the news server doesn't care about. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... To: two@example.com + ... Cc: three@example.com + ... Cc: four@example.com + ... Cc: five@example.com + ... Content-Transfer-Encoding: yes + ... Content-Transfer-Encoding: no + ... Content-Transfer-Encoding: maybe + ... + ... A message + ... """) + >>> msgdata = {} + >>> prepare_message(mlist, msg, msgdata) + >>> msgdata['prepped'] + True + >>> print msg.as_string() + From: aperson@example.com + Newsgroups: comp.lang.python + Message-ID: ... + Lines: 1 + To: _xtest@example.com + X-Original-To: two@example.com + CC: three@example.com + X-Original-CC: four@example.com + X-Original-CC: five@example.com + Content-Transfer-Encoding: yes + X-Original-Content-Transfer-Encoding: no + X-Original-Content-Transfer-Encoding: maybe + <BLANKLINE> + A message + <BLANKLINE> + +But if no headers are duplicated, then the news runner doesn't need to modify +the message. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... Cc: someother@example.com + ... Content-Transfer-Encoding: yes + ... + ... A message + ... """) + >>> msgdata = {} + >>> prepare_message(mlist, msg, msgdata) + >>> msgdata['prepped'] + True + >>> print msg.as_string() + From: aperson@example.com + To: _xtest@example.com + Cc: someother@example.com + Content-Transfer-Encoding: yes + Newsgroups: comp.lang.python + Message-ID: ... + Lines: 1 + <BLANKLINE> + A message + <BLANKLINE> + + +Newsgroup moderation +-------------------- + +When the newsgroup is moderated, an Approved: header with the list's posting +address is added for the benefit of the Usenet system. + + >>> from mailman.interfaces import NewsModeration + >>> mlist.news_moderation = NewsModeration.open_moderated + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... Approved: this gets deleted + ... + ... """) + >>> prepare_message(mlist, msg, {}) + >>> msg['approved'] + u'_xtest@example.com' + + >>> mlist.news_moderation = NewsModeration.moderated + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... Approved: this gets deleted + ... + ... """) + >>> prepare_message(mlist, msg, {}) + >>> msg['approved'] + u'_xtest@example.com' + +But if the newsgroup is not moderated, the Approved: header is not chnaged. + + >>> mlist.news_moderation = NewsModeration.none + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... Approved: this doesn't get deleted + ... + ... """) + >>> prepare_message(mlist, msg, {}) + >>> msg['approved'] + u"this doesn't get deleted" + + +XXX More of the NewsRunner should be tested. diff --git a/mailman/queue/docs/outgoing.txt b/mailman/queue/docs/outgoing.txt new file mode 100644 index 000000000..3840b71ee --- /dev/null +++ b/mailman/queue/docs/outgoing.txt @@ -0,0 +1,155 @@ +The outgoing handler +==================== + +Mailman's outgoing queue is used as the wrapper around SMTP delivery to the +upstream mail server. The ToOutgoing handler does little more than drop the +message into the outgoing queue, after calculating whether the message should +be VERP'd or not. VERP means Variable Envelope Return Path; we're using that +term somewhat incorrectly, but within the spirit of the standard, which +basically describes how to encode the recipient's address in the originator +headers for unambigous bounce processing. + + >>> from mailman.queue import Switchboard + >>> from mailman.configuration import config + >>> handler = config.handlers['to-outgoing'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> switchboard = Switchboard(config.OUTQUEUE_DIR) + + >>> def queue_size(): + ... size = len(switchboard.files) + ... for filebase in switchboard.files: + ... msg, msgdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + ... return size + +Craft a message destined for the outgoing queue. Include some random metadata +as if this message had passed through some other handlers. + + >>> msg = message_from_string("""\ + ... Subject: Here is a message + ... + ... Something of great import. + ... """) + +When certain conditions are met, the message will be VERP'd. For example, if +the message metadata already has a VERP key, this message will be VERP'd. + + >>> msgdata = dict(foo=1, bar=2, verp=True) + >>> handler.process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: Here is a message + <BLANKLINE> + Something of great import. + >>> msgdata['verp'] + True + +While the queued message will not be changed, the queued metadata will have an +additional key set: the mailing list name. + + >>> filebase = switchboard.files[0] + >>> qmsg, qmsgdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> print qmsg.as_string() + Subject: Here is a message + <BLANKLINE> + Something of great import. + >>> sorted(qmsgdata.items()) + [('_parsemsg', False), + ('bar', 2), ('foo', 1), + ('listname', u'_xtest@example.com'), + ('received_time', ...), + ('verp', True), ('version', 3)] + >>> queue_size() + 0 + +If the list is set to personalize deliveries, and the global configuration +option to VERP personalized deliveries is set, then the message will be +VERP'd. + + # Save the original value for clean up. + >>> verp_personalized_delivieries = config.VERP_PERSONALIZED_DELIVERIES + >>> config.VERP_PERSONALIZED_DELIVERIES = True + >>> from mailman.interfaces import Personalization + >>> mlist.personalize = Personalization.individual + >>> msgdata = dict(foo=1, bar=2) + >>> handler.process(mlist, msg, msgdata) + >>> msgdata['verp'] + True + >>> queue_size() + 1 + +However, if the global configuration variable prohibits VERP'ing, even +personalized lists will not VERP. + + >>> config.VERP_PERSONALIZED_DELIVERIES = False + >>> msgdata = dict(foo=1, bar=2) + >>> handler.process(mlist, msg, msgdata) + >>> print msgdata.get('verp') + None + >>> queue_size() + 1 + +If the list is not personalized, then the message may still be VERP'd based on +the global configuration variable VERP_DELIVERY_INTERVAL. This variable tells +Mailman how often to VERP even non-personalized mailing lists. It can be set +to zero, which means non-personalized messages will never be VERP'd. + + # Save the original value for clean up. + >>> verp_delivery_interval = config.VERP_DELIVERY_INTERVAL + >>> config.VERP_DELIVERY_INTERVAL = 0 + >>> mlist.personalize = Personalization.none + >>> msgdata = dict(foo=1, bar=2) + >>> handler.process(mlist, msg, msgdata) + >>> print msgdata.get('verp') + None + >>> queue_size() + 1 + +If the interval is set to 1, then every message will be VERP'd. + + >>> config.VERP_DELIVERY_INTERVAL = 1 + >>> for i in range(10): + ... msgdata = dict(foo=1, bar=2) + ... handler.process(mlist, msg, msgdata) + ... print i, msgdata['verp'] + 0 True + 1 True + 2 True + 3 True + 4 True + 5 True + 6 True + 7 True + 8 True + 9 True + >>> queue_size() + 10 + +If the interval is set to some other number, then one out of that many posts +will be VERP'd. + + >>> config.VERP_DELIVERY_INTERVAL = 3 + >>> for i in range(10): + ... mlist.post_id = i + ... msgdata = dict(foo=1, bar=2) + ... handler.process(mlist, msg, msgdata) + ... print i, msgdata.get('verp', False) + 0 True + 1 False + 2 False + 3 True + 4 False + 5 False + 6 True + 7 False + 8 False + 9 True + >>> queue_size() + 10 + + +Clean up +======== + + >>> config.VERP_PERSONALIZED_DELIVERIES = verp_personalized_delivieries + >>> config.VERP_DELIVERY_INTERVAL = verp_delivery_interval diff --git a/mailman/queue/docs/runner.txt b/mailman/queue/docs/runner.txt new file mode 100644 index 000000000..e95e20ecd --- /dev/null +++ b/mailman/queue/docs/runner.txt @@ -0,0 +1,70 @@ +Queue runners +============= + +The queue runners (qrunner) are the processes that move messages around the +Mailman system. Each qrunner is responsible for a slice of the hash space in +a queue directory. It processes all the files in its slice, sleeps a little +while, then wakes up and runs through its queue files again. + + +Basic architecture +------------------ + +The basic architecture of qrunner is implemented in the base class that all +runners inherit from. This base class implements a .run() method that runs +continuously in a loop until the .stop() method is called. + + >>> import os + >>> from mailman.queue import Runner, Switchboard + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + +Here is a very simple derived qrunner class. The class attribute QDIR tells +the qrunner which queue directory it is responsible for. Derived classes +should also implement various methods to provide the special functionality. +This is about as simple as a qrunner can be. + + >>> queue_directory = os.path.join(config.QUEUE_DIR, 'test') + >>> class TestableRunner(Runner): + ... QDIR = queue_directory + ... + ... def _dispose(self, mlist, msg, msgdata): + ... self.msg = msg + ... self.msgdata = msgdata + ... return False + ... + ... def _doperiodic(self): + ... self.stop() + ... + ... def _snooze(self, filecnt): + ... return + + >>> runner = TestableRunner() + >>> switchboard = Switchboard(queue_directory) + +This qrunner doesn't do much except run once, storing the message and metadata +on instance variables. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... + ... A test message. + ... """) + >>> filebase = switchboard.enqueue(msg, listname=mlist.fqdn_listname, + ... foo='yes', bar='no') + >>> runner.run() + >>> print runner.msg.as_string() + From: aperson@example.com + To: _xtest@example.com + <BLANKLINE> + A test message. + <BLANKLINE> + >>> sorted(runner.msgdata.items()) + [('_parsemsg', False), + ('bar', 'no'), ('foo', 'yes'), + ('lang', u'en'), ('listname', u'_xtest@example.com'), + ('received_time', ...), ('version', 3)] + +XXX More of the Runner API should be tested. diff --git a/mailman/queue/docs/switchboard.txt b/mailman/queue/docs/switchboard.txt new file mode 100644 index 000000000..633bdabe6 --- /dev/null +++ b/mailman/queue/docs/switchboard.txt @@ -0,0 +1,149 @@ +The switchboard +=============== + +The switchboard is subsystem that moves messages between queues. Each +instance of a switchboard is responsible for one queue directory. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... + ... A test message. + ... """) + +Create a switchboard by giving its queue directory. + + >>> import os + >>> from mailman.configuration import config + >>> queue_directory = os.path.join(config.QUEUE_DIR, 'test') + >>> from mailman.queue import Switchboard + >>> switchboard = Switchboard(queue_directory) + >>> switchboard.queue_directory == queue_directory + True + +Here's a helper function for ensuring things work correctly. + + >>> def check_qfiles(): + ... files = {} + ... for qfile in os.listdir(queue_directory): + ... root, ext = os.path.splitext(qfile) + ... files[ext] = files.get(ext, 0) + 1 + ... return sorted(files.items()) + + +Enqueing and dequeing +--------------------- + +The message can be enqueued with metadata specified in the passed in +dictionary. + + >>> filebase = switchboard.enqueue(msg) + >>> check_qfiles() + [('.pck', 1)] + +To read the contents of a queue file, dequeue it. + + >>> msg, msgdata = switchboard.dequeue(filebase) + >>> print msg.as_string() + From: aperson@example.com + To: _xtest@example.com + <BLANKLINE> + A test message. + <BLANKLINE> + >>> sorted(msgdata.items()) + [('_parsemsg', False), ('received_time', ...), ('version', 3)] + >>> check_qfiles() + [('.bak', 1)] + +To complete the dequeing process, removing all traces of the message file, +finish it (without preservation). + + >>> switchboard.finish(filebase) + >>> check_qfiles() + [] + +When enqueing a file, you can provide additional metadata keys by using +keyword arguments. + + >>> filebase = switchboard.enqueue(msg, {'foo': 1}, bar=2) + >>> msg, msgdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> sorted(msgdata.items()) + [('_parsemsg', False), + ('bar', 2), ('foo', 1), + ('received_time', ...), ('version', 3)] + +Keyword arguments override keys from the metadata dictionary. + + >>> filebase = switchboard.enqueue(msg, {'foo': 1}, foo=2) + >>> msg, msgdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> sorted(msgdata.items()) + [('_parsemsg', False), + ('foo', 2), + ('received_time', ...), ('version', 3)] + + +Iterating over files +-------------------- + +There are two ways to iterate over all the files in a switchboard's queue. +Normally, queue files end in .pck (for 'pickle') and the easiest way to +iterate over just these files is to use the .files attribute. + + >>> filebase_1 = switchboard.enqueue(msg, foo=1) + >>> filebase_2 = switchboard.enqueue(msg, foo=2) + >>> filebase_3 = switchboard.enqueue(msg, foo=3) + >>> filebases = sorted((filebase_1, filebase_2, filebase_3)) + >>> sorted(switchboard.files) == filebases + True + >>> check_qfiles() + [('.pck', 3)] + +You can also use the .get_files() method if you want to iterate over all the +file bases for some other extension. + + >>> for filebase in switchboard.get_files(): + ... msg, msgdata = switchboard.dequeue(filebase) + >>> bakfiles = sorted(switchboard.get_files('.bak')) + >>> bakfiles == filebases + True + >>> check_qfiles() + [('.bak', 3)] + >>> for filebase in switchboard.get_files('.bak'): + ... switchboard.finish(filebase) + >>> check_qfiles() + [] + + +Recovering files +---------------- + +Calling .dequeue() without calling .finish() leaves .bak backup files in +place. These can be recovered when the switchboard is instantiated. + + >>> filebase_1 = switchboard.enqueue(msg, foo=1) + >>> filebase_2 = switchboard.enqueue(msg, foo=2) + >>> filebase_3 = switchboard.enqueue(msg, foo=3) + >>> for filebase in switchboard.files: + ... msg, msgdata = switchboard.dequeue(filebase) + ... # Don't call .finish() + >>> check_qfiles() + [('.bak', 3)] + >>> switchboard_2 = Switchboard(queue_directory, recover=True) + >>> check_qfiles() + [('.pck', 3)] + +Clean up + + >>> for filebase in switchboard.files: + ... msg, msgdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + >>> check_qfiles() + [] + + +Queue slices +------------ + +XXX Add tests for queue slices. diff --git a/mailman/queue/http.py b/mailman/queue/http.py new file mode 100644 index 000000000..3ecf709ec --- /dev/null +++ b/mailman/queue/http.py @@ -0,0 +1,73 @@ +# Copyright (C) 2006-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Mailman HTTP runner (server).""" + +import sys +import signal +import logging + +from cStringIO import StringIO +from wsgiref.simple_server import make_server, WSGIRequestHandler + +from mailman.Cgi.wsgi_app import mailman_app +from mailman.configuration import config +from mailman.queue import Runner + +hlog = logging.getLogger('mailman.http') +qlog = logging.getLogger('mailman.qrunner') + + + +class HTTPRunner(Runner): + def __init__(self, slice=None, numslices=1): + pass + + def _cleanup(self): + pass + + + +class MailmanWSGIRequestHandler(WSGIRequestHandler): + def handle(self): + """Handle a single HTTP request with error output to elog""" + stderr = StringIO() + saved_stderr = sys.stderr + sys.stderr = stderr + try: + WSGIRequestHandler.handle(self) + finally: + sys.stderr = saved_stderr + hlog.info(stderr.getvalue().strip()) + + + +server = make_server(config.HTTP_HOST, config.HTTP_PORT, + mailman_app, + handler_class=MailmanWSGIRequestHandler) + + +qlog.info('HTTPRunner qrunner started.') +hlog.info('HTTPRunner listening on %s:%s', config.HTTP_HOST, config.HTTP_PORT) +try: + server.serve_forever() +except KeyboardInterrupt: + qlog.exception('HTTPRunner qrunner exiting.') + sys.exit(signal.SIGTERM) +except: + qlog.exception('HTTPRunner qrunner exiting.') + raise diff --git a/mailman/queue/incoming.py b/mailman/queue/incoming.py new file mode 100644 index 000000000..e1f151446 --- /dev/null +++ b/mailman/queue/incoming.py @@ -0,0 +1,44 @@ +# Copyright (C) 1998-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Incoming queue runner. + +This runner's sole purpose in life is to decide the disposition of the +message. It can either be accepted for delivery, rejected (i.e. bounced), +held for moderator approval, or discarded. + +When accepted, the message is forwarded on to the `prep queue` where it is +prepared for delivery. Rejections, discards, and holds are processed +immediately. +""" + +from mailman.app.chains import process +from mailman.configuration import config +from mailman.queue import Runner + + + +class IncomingRunner(Runner): + QDIR = config.INQUEUE_DIR + + def _dispose(self, mlist, msg, msgdata): + if msgdata.get('envsender') is None: + msgdata['envsender'] = mlist.no_reply_address + # Process the message through the mailing list's start chain. + process(mlist, msg, msgdata, mlist.start_chain) + # Do not keep this message queued. + return False diff --git a/mailman/queue/lmtp.py b/mailman/queue/lmtp.py new file mode 100644 index 000000000..23aa9b360 --- /dev/null +++ b/mailman/queue/lmtp.py @@ -0,0 +1,193 @@ +# Copyright (C) 2006-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Mailman LMTP runner (server). + +Most mail servers can be configured to deliver local messages via 'LMTP'[1]. +This module is actually an LMTP server rather than a standard queue runner. +Once it enters its main asyncore loop, it does not respond to mailmanctl +signals the same way as other runners do. All signals will kill this process, +but the normal mailmanctl watchdog will restart it upon exit. + +The LMTP runner opens a local TCP port and waits for the mail server to +connect to it. The messages it receives over LMTP are very minimally parsed +for sanity and if they look okay, they are accepted and injected into +Mailman's incoming queue for processing through the normal pipeline. If they +don't look good, or are destined for a bogus sub-queue address, they are +rejected right away, hopefully so that the peer mail server can provide better +diagnostics. + +[1] RFC 2033 Local Mail Transport Protocol + http://www.faqs.org/rfcs/rfc2033.html + +See the variable USE_LMTP in Defaults.py.in for enabling this delivery +mechanism. +""" + +# NOTE: LMTP delivery is experimental in Mailman 2.2. + +import os +import email +import smtpd +import logging +import asyncore + +from email.utils import parseaddr + +from mailman.Message import Message +from mailman.configuration import config +from mailman.runner import Runner, Switchboard + +elog = logging.getLogger('mailman.error') +qlog = logging.getLogger('mailman.qrunner') + +# We only care about the listname and the subq as in listname@ or +# listname-request@ +subqnames = ( + 'bounces', 'confirm', 'join', ' leave', + 'owner', 'request', 'subscribe', 'unsubscribe', + ) + +DASH = '-' +CRLF = '\r\n' +ERR_451 = '451 Requested action aborted: error in processing' +ERR_502 = '502 Error: command HELO not implemented' +ERR_550 = config.LMTP_ERR_550 + +# XXX Blech +smtpd.__version__ = 'Python LMTP queue runner 1.0' + + + +def getlistq(address): + localpart, domain = address.split('@', 1) + localpart = localpart.split(config.VERP_DELIMITER, 1)[0] + l = localpart.split(DASH) + if l[-1] in subqnames: + listname = DASH.join(l[:-1]) + subq = l[-1] + else: + listname = localpart + subq = None + return listname, subq, domain + + + +class SMTPChannel(smtpd.SMTPChannel): + # Override smtpd.SMTPChannel don't can't change the class name so that we + # don't have to reverse engineer Python's name mangling scheme. + # + # LMTP greeting is LHLO and no HELO/EHLO + + def smtp_LHLO(self, arg): + smtpd.SMTPChannel.smtp_HELO(self, arg) + + def smtp_HELO(self, arg): + self.push(ERR_502) + + + +class LMTPRunner(Runner, smtpd.SMTPServer): + # Only __init__ is called on startup. Asyncore is responsible for later + # connections from MTA. slice and numslices are ignored and are + # necessary only to satisfy the API. + def __init__(self, slice=None, numslices=1): + localaddr = config.LMTP_HOST, config.LMTP_PORT + # Do not call Runner's constructor because there's no QDIR to create + smtpd.SMTPServer.__init__(self, localaddr, remoteaddr=None) + + def handle_accept(self): + conn, addr = self.accept() + channel = SMTPChannel(self, conn, addr) + + def process_message(self, peer, mailfrom, rcpttos, data): + try: + # Refresh the list of list names every time we process a message + # since the set of mailing lists could have changed. However, on + # a big site this could be fairly expensive, so we may need to + # cache this in some way. + listnames = set(config.list_manager.names) + # Parse the message data. XXX Should we reject the message + # immediately if it has defects? Usually only spam has defects. + msg = email.message_from_string(data, Message) + msg['X-MailFrom'] = mailfrom + except Exception, e: + elog.error('%s', e) + return CRLF.join([ERR_451 for to in rcpttos]) + # RFC 2033 requires us to return a status code for every recipient. + status = [] + # Now for each address in the recipients, parse the address to first + # see if it's destined for a valid mailing list. If so, then queue + # the message to the appropriate place and record a 250 status for + # that recipient. If not, record a failure status for that recipient. + for to in rcpttos: + try: + to = parseaddr(to)[1].lower() + listname, subq, domain = getlistq(to) + listname += '@' + domain + if listname not in listnames: + status.append(ERR_550) + continue + # The recipient is a valid mailing list; see if it's a valid + # sub-queue, and if so, enqueue it. + msgdata = dict(listname=listname) + if subq in ('bounces', 'admin'): + queue = Switchboard(config.BOUNCEQUEUE_DIR) + elif subq == 'confirm': + msgdata['toconfirm'] = True + queue = Switchboard(config.CMDQUEUE_DIR) + elif subq in ('join', 'subscribe'): + msgdata['tojoin'] = True + queue = Switchboard(config.CMDQUEUE_DIR) + elif subq in ('leave', 'unsubscribe'): + msgdata['toleave'] = True + queue = Switchboard(config.CMDQUEUE_DIR) + elif subq == 'owner': + msgdata.update({ + 'toowner' : True, + 'envsender' : config.SITE_OWNER_ADDRESS, + 'pipeline' : config.OWNER_PIPELINE, + }) + queue = Switchboard(config.INQUEUE_DIR) + elif subq is None: + msgdata['tolist'] = True + queue = Switchboard(config.INQUEUE_DIR) + elif subq == 'request': + msgdata['torequest'] = True + queue = Switchboard(config.CMDQUEUE_DIR) + else: + elog.error('Unknown sub-queue: %s', subq) + status.append(ERR_550) + continue + queue.enqueue(msg, msgdata) + status.append('250 Ok') + except Exception, e: + elog.error('%s', e) + status.append(ERR_550) + # All done; returning this big status string should give the expected + # response to the LMTP client. + return CRLF.join(status) + + def _cleanup(self): + pass + + +server = LMTPRunner() +qlog.info('LMTPRunner qrunner started.') +asyncore.loop() +# We'll never get here, but just in case... +qlog.info('LMTPRunner qrunner exiting.') diff --git a/mailman/queue/maildir.py b/mailman/queue/maildir.py new file mode 100644 index 000000000..9a1bef2e3 --- /dev/null +++ b/mailman/queue/maildir.py @@ -0,0 +1,189 @@ +# Copyright (C) 2002-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Maildir pre-queue runner. + +Most MTAs can be configured to deliver messages to a `Maildir'[1]. This +runner will read messages from a maildir's new/ directory and inject them into +Mailman's qfiles/in directory for processing in the normal pipeline. This +delivery mechanism contrasts with mail program delivery, where incoming +messages end up in qfiles/in via the MTA executing the scripts/post script +(and likewise for the other -aliases for each mailing list). + +The advantage to Maildir delivery is that it is more efficient; there's no +need to fork an intervening program just to take the message from the MTA's +standard output, to the qfiles/in directory. + +[1] http://cr.yp.to/proto/maildir.html + +We're going to use the :info flag == 1, experimental status flag for our own +purposes. The :1 can be followed by one of these letters: + +- P means that MaildirRunner's in the process of parsing and enqueuing the + message. If successful, it will delete the file. + +- X means something failed during the parse/enqueue phase. An error message + will be logged to log/error and the file will be renamed <filename>:1,X. + MaildirRunner will never automatically return to this file, but once the + problem is fixed, you can manually move the file back to the new/ directory + and MaildirRunner will attempt to re-process it. At some point we may do + this automatically. + +See the variable USE_MAILDIR in Defaults.py.in for enabling this delivery +mechanism. +""" + +# NOTE: Maildir delivery is experimental in Mailman 2.1. + +import os +import errno +import logging + +from email.Parser import Parser +from email.Utils import parseaddr + +from mailman.Message import Message +from mailman.configuration import config +from mailman.queue import Runner + +log = logging.getLogger('mailman.error') + +# We only care about the listname and the subq as in listname@ or +# listname-request@ +subqnames = ('admin','bounces','confirm','join','leave', + 'owner','request','subscribe','unsubscribe') + +def getlistq(address): + localpart, domain = address.split('@', 1) + # TK: FIXME I only know configs of Postfix. + if config.POSTFIX_STYLE_VIRTUAL_DOMAINS: + p = localpart.split(config.POSTFIX_VIRTUAL_SEPARATOR,1) + if len(p) == 2: + localpart, domain = p + l = localpart.split('-') + if l[-1] in subqnames: + listname = '-'.join(l[:-1]) + subq = l[-1] + else: + listname = localpart + subq = None + return listname, subq, domain + + +class MaildirRunner(Runner): + # This class is much different than most runners because it pulls files + # of a different format than what scripts/post and friends leaves. The + # files this runner reads are just single message files as dropped into + # the directory by the MTA. This runner will read the file, and enqueue + # it in the expected qfiles directory for normal processing. + def __init__(self, slice=None, numslices=1): + # Don't call the base class constructor, but build enough of the + # underlying attributes to use the base class's implementation. + self._stop = 0 + self._dir = os.path.join(config.MAILDIR_DIR, 'new') + self._cur = os.path.join(config.MAILDIR_DIR, 'cur') + self._parser = Parser(Message) + + def _oneloop(self): + # Refresh this each time through the list. + listnames = list(config.list_manager.names) + # Cruise through all the files currently in the new/ directory + try: + files = os.listdir(self._dir) + except OSError, e: + if e.errno <> errno.ENOENT: raise + # Nothing's been delivered yet + return 0 + for file in files: + srcname = os.path.join(self._dir, file) + dstname = os.path.join(self._cur, file + ':1,P') + xdstname = os.path.join(self._cur, file + ':1,X') + try: + os.rename(srcname, dstname) + except OSError, e: + if e.errno == errno.ENOENT: + # Some other MaildirRunner beat us to it + continue + log.error('Could not rename maildir file: %s', srcname) + raise + # Now open, read, parse, and enqueue this message + try: + fp = open(dstname) + try: + msg = self._parser.parse(fp) + finally: + fp.close() + # Now we need to figure out which queue of which list this + # message was destined for. See verp_bounce() in + # BounceRunner.py for why we do things this way. + vals = [] + for header in ('delivered-to', 'envelope-to', 'apparently-to'): + vals.extend(msg.get_all(header, [])) + for field in vals: + to = parseaddr(field)[1].lower() + if not to: + continue + listname, subq, domain = getlistq(to) + listname = listname + '@' + domain + if listname in listnames: + break + else: + # As far as we can tell, this message isn't destined for + # any list on the system. What to do? + log.error('Message apparently not for any list: %s', + xdstname) + os.rename(dstname, xdstname) + continue + # BAW: blech, hardcoded + msgdata = {'listname': listname} + # -admin is deprecated + if subq in ('bounces', 'admin'): + queue = Switchboard(config.BOUNCEQUEUE_DIR) + elif subq == 'confirm': + msgdata['toconfirm'] = 1 + queue = Switchboard(config.CMDQUEUE_DIR) + elif subq in ('join', 'subscribe'): + msgdata['tojoin'] = 1 + queue = Switchboard(config.CMDQUEUE_DIR) + elif subq in ('leave', 'unsubscribe'): + msgdata['toleave'] = 1 + queue = Switchboard(config.CMDQUEUE_DIR) + elif subq == 'owner': + msgdata.update({ + 'toowner': True, + 'envsender': config.SITE_OWNER_ADDRESS, + 'pipeline': config.OWNER_PIPELINE, + }) + queue = Switchboard(config.INQUEUE_DIR) + elif subq is None: + msgdata['tolist'] = 1 + queue = Switchboard(config.INQUEUE_DIR) + elif subq == 'request': + msgdata['torequest'] = 1 + queue = Switchboard(config.CMDQUEUE_DIR) + else: + log.error('Unknown sub-queue: %s', subq) + os.rename(dstname, xdstname) + continue + queue.enqueue(msg, msgdata) + os.unlink(dstname) + except Exception, e: + os.rename(dstname, xdstname) + log.error('%s', e) + + def _cleanup(self): + pass diff --git a/mailman/queue/news.py b/mailman/queue/news.py new file mode 100644 index 000000000..ba2a81f98 --- /dev/null +++ b/mailman/queue/news.py @@ -0,0 +1,166 @@ +# Copyright (C) 2000-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""NNTP queue runner.""" + +import re +import email +import socket +import logging +import nntplib + +from cStringIO import StringIO +from email.utils import getaddresses, make_msgid + +COMMASPACE = ', ' + +from mailman import Utils +from mailman.configuration import config +from mailman.interfaces import NewsModeration +from mailman.queue import Runner + +log = logging.getLogger('mailman.error') + +# Matches our Mailman crafted Message-IDs. See Utils.unique_message_id() +# XXX The move to email.utils.make_msgid() breaks this. +mcre = re.compile(r""" + <mailman. # match the prefix + \d+. # serial number + \d+. # time in seconds since epoch + \d+. # pid + (?P<listname>[^@]+) # list's internal_name() + @ # localpart@dom.ain + (?P<hostname>[^>]+) # list's host_name + > # trailer + """, re.VERBOSE) + + + +class NewsRunner(Runner): + QDIR = config.NEWSQUEUE_DIR + + def _dispose(self, mlist, msg, msgdata): + # Make sure we have the most up-to-date state + mlist.Load() + if not msgdata.get('prepped'): + prepare_message(mlist, msg, msgdata) + try: + # Flatten the message object, sticking it in a StringIO object + fp = StringIO(msg.as_string()) + conn = None + try: + try: + nntp_host, nntp_port = Utils.nntpsplit(mlist.nntp_host) + conn = nntplib.NNTP(nntp_host, nntp_port, + readermode=True, + user=config.NNTP_USERNAME, + password=config.NNTP_PASSWORD) + conn.post(fp) + except nntplib.error_temp, e: + log.error('(NNTPDirect) NNTP error for list "%s": %s', + mlist.internal_name(), e) + except socket.error, e: + log.error('(NNTPDirect) socket error for list "%s": %s', + mlist.internal_name(), e) + finally: + if conn: + conn.quit() + except Exception, e: + # Some other exception occurred, which we definitely did not + # expect, so set this message up for requeuing. + self._log(e) + return True + return False + + + +def prepare_message(mlist, msg, msgdata): + # If the newsgroup is moderated, we need to add this header for the Usenet + # software to accept the posting, and not forward it on to the n.g.'s + # moderation address. The posting would not have gotten here if it hadn't + # already been approved. 1 == open list, mod n.g., 2 == moderated + if mlist.news_moderation in (NewsModeration.open_moderated, + NewsModeration.moderated): + del msg['approved'] + msg['Approved'] = mlist.posting_address + # Should we restore the original, non-prefixed subject for gatewayed + # messages? TK: We use stripped_subject (prefix stripped) which was + # crafted in CookHeaders.py to ensure prefix was stripped from the subject + # came from mailing list user. + stripped_subject = msgdata.get('stripped_subject') \ + or msgdata.get('origsubj') + if not mlist.news_prefix_subject_too and stripped_subject is not None: + del msg['subject'] + msg['subject'] = stripped_subject + # Add the appropriate Newsgroups: header + ngheader = msg['newsgroups'] + if ngheader is not None: + # See if the Newsgroups: header already contains our linked_newsgroup. + # If so, don't add it again. If not, append our linked_newsgroup to + # the end of the header list + ngroups = [s.strip() for s in ngheader.split(',')] + if mlist.linked_newsgroup not in ngroups: + ngroups.append(mlist.linked_newsgroup) + # Subtitute our new header for the old one. + del msg['newsgroups'] + msg['Newsgroups'] = COMMASPACE.join(ngroups) + else: + # Newsgroups: isn't in the message + msg['Newsgroups'] = mlist.linked_newsgroup + # Note: We need to be sure two messages aren't ever sent to the same list + # in the same process, since message ids need to be unique. Further, if + # messages are crossposted to two Usenet-gated mailing lists, they each + # need to have unique message ids or the nntpd will only accept one of + # them. The solution here is to substitute any existing message-id that + # isn't ours with one of ours, so we need to parse it to be sure we're not + # looping. + # + # Our Message-ID format is <mailman.secs.pid.listname@hostname> + msgid = msg['message-id'] + hackmsgid = True + if msgid: + mo = mcre.search(msgid) + if mo: + lname, hname = mo.group('listname', 'hostname') + if lname == mlist.internal_name() and hname == mlist.host_name: + hackmsgid = False + if hackmsgid: + del msg['message-id'] + msg['Message-ID'] = email.utils.make_msgid() + # Lines: is useful + if msg['Lines'] is None: + # BAW: is there a better way? + count = len(list(email.Iterators.body_line_iterator(msg))) + msg['Lines'] = str(count) + # Massage the message headers by remove some and rewriting others. This + # woon't completely sanitize the message, but it will eliminate the bulk + # of the rejections based on message headers. The NNTP server may still + # reject the message because of other problems. + for header in config.NNTP_REMOVE_HEADERS: + del msg[header] + for header, rewrite in config.NNTP_REWRITE_DUPLICATE_HEADERS: + values = msg.get_all(header, []) + if len(values) < 2: + # We only care about duplicates + continue + del msg[header] + # But keep the first one... + msg[header] = values[0] + for v in values[1:]: + msg[rewrite] = v + # Mark this message as prepared in case it has to be requeued + msgdata['prepped'] = True diff --git a/mailman/queue/outgoing.py b/mailman/queue/outgoing.py new file mode 100644 index 000000000..31599e5ee --- /dev/null +++ b/mailman/queue/outgoing.py @@ -0,0 +1,130 @@ +# Copyright (C) 2000-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Outgoing queue runner.""" + +import os +import sys +import copy +import time +import email +import socket +import logging + +from mailman import Errors +from mailman import Message +from mailman.configuration import config +from mailman.queue import Runner, Switchboard +from mailman.queue.bounce import BounceMixin + +# This controls how often _doperiodic() will try to deal with deferred +# permanent failures. It is a count of calls to _doperiodic() +DEAL_WITH_PERMFAILURES_EVERY = 10 + +log = logging.getLogger('mailman.error') + + + +class OutgoingRunner(Runner, BounceMixin): + QDIR = config.OUTQUEUE_DIR + + def __init__(self, slice=None, numslices=1): + Runner.__init__(self, slice, numslices) + BounceMixin.__init__(self) + # We look this function up only at startup time + handler = config.handlers[config.DELIVERY_MODULE] + self._func = handler.process + # This prevents smtp server connection problems from filling up the + # error log. It gets reset if the message was successfully sent, and + # set if there was a socket.error. + self._logged = False + self._retryq = Switchboard(config.RETRYQUEUE_DIR) + + def _dispose(self, mlist, msg, msgdata): + # See if we should retry delivery of this message again. + deliver_after = msgdata.get('deliver_after', 0) + if time.time() < deliver_after: + return True + # Make sure we have the most up-to-date state + mlist.Load() + try: + pid = os.getpid() + self._func(mlist, msg, msgdata) + # Failsafe -- a child may have leaked through. + if pid <> os.getpid(): + log.error('child process leaked thru: %s', modname) + os._exit(1) + self._logged = False + except socket.error: + # There was a problem connecting to the SMTP server. Log this + # once, but crank up our sleep time so we don't fill the error + # log. + port = config.SMTPPORT + if port == 0: + port = 'smtp' + # Log this just once. + if not self._logged: + log.error('Cannot connect to SMTP server %s on port %s', + config.SMTPHOST, port) + self._logged = True + return True + except Errors.SomeRecipientsFailed, e: + # Handle local rejects of probe messages differently. + if msgdata.get('probe_token') and e.permfailures: + self._probe_bounce(mlist, msgdata['probe_token']) + else: + # Delivery failed at SMTP time for some or all of the + # recipients. Permanent failures are registered as bounces, + # but temporary failures are retried for later. + # + # BAW: msg is going to be the original message that failed + # delivery, not a bounce message. This may be confusing if + # this is what's sent to the user in the probe message. Maybe + # we should craft a bounce-like message containing information + # about the permanent SMTP failure? + if e.permfailures: + self._queue_bounces(mlist.fqdn_listname, e.permfailures, + msg) + # Move temporary failures to the qfiles/retry queue which will + # occasionally move them back here for another shot at + # delivery. + if e.tempfailures: + now = time.time() + recips = e.tempfailures + last_recip_count = msgdata.get('last_recip_count', 0) + deliver_until = msgdata.get('deliver_until', now) + if len(recips) == last_recip_count: + # We didn't make any progress, so don't attempt + # delivery any longer. BAW: is this the best + # disposition? + if now > deliver_until: + return False + else: + # Keep trying to delivery this message for a while + deliver_until = now + config.DELIVERY_RETRY_PERIOD + msgdata['last_recip_count'] = len(recips) + msgdata['deliver_until'] = deliver_until + msgdata['recips'] = recips + self._retryq.enqueue(msg, msgdata) + # We've successfully completed handling of this message + return False + + _doperiodic = BounceMixin._doperiodic + + def _cleanup(self): + BounceMixin._cleanup(self) + Runner._cleanup(self) diff --git a/mailman/queue/pipeline.py b/mailman/queue/pipeline.py new file mode 100644 index 000000000..ac311864f --- /dev/null +++ b/mailman/queue/pipeline.py @@ -0,0 +1,38 @@ +# Copyright (C) 2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""The pipeline queue runner. + +This runner's purpose is to take messages that have been approved for posting +through the 'preparation pipeline'. This pipeline adds, deletes and modifies +headers, calculates message recipients, and more. +""" + +from mailman.app.pipelines import process +from mailman.configuration import config +from mailman.queue import Runner + + + +class PipelineRunner(Runner): + QDIR = config.PIPELINEQUEUE_DIR + + def _dispose(self, mlist, msg, msgdata): + # Process the message through the mailing list's pipeline. + process(mlist, msg, msgdata, mlist.pipeline) + # Do not keep this message queued. + return False diff --git a/mailman/queue/retry.py b/mailman/queue/retry.py new file mode 100644 index 000000000..e90b73990 --- /dev/null +++ b/mailman/queue/retry.py @@ -0,0 +1,40 @@ +# Copyright (C) 2003-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +import time + +from mailman.configuration import config +from mailman.queue import Runner, Switchboard + + + +class RetryRunner(Runner): + QDIR = config.RETRYQUEUE_DIR + SLEEPTIME = config.minutes(15) + + def __init__(self, slice=None, numslices=1): + Runner.__init__(self, slice, numslices) + self.__outq = Switchboard(config.OUTQUEUE_DIR) + + def _dispose(self, mlist, msg, msgdata): + # Move it to the out queue for another retry + self.__outq.enqueue(msg, msgdata) + return False + + def _snooze(self, filecnt): + # We always want to snooze + time.sleep(float(self.SLEEPTIME)) diff --git a/mailman/queue/virgin.py b/mailman/queue/virgin.py new file mode 100644 index 000000000..5534c95f0 --- /dev/null +++ b/mailman/queue/virgin.py @@ -0,0 +1,44 @@ +# Copyright (C) 1998-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Virgin message queue runner. + +This qrunner handles messages that the Mailman system gives virgin birth to. +E.g. acknowledgement responses to user posts or Replybot messages. They need +to go through some minimal processing before they can be sent out to the +recipient. +""" + +from mailman.configuration import config +from mailman.queue import Runner +from mailman.queue.incoming import IncomingRunner + + + +class VirginRunner(IncomingRunner): + QDIR = config.VIRGINQUEUE_DIR + + def _dispose(self, mlist, msg, msgdata): + # We need to fasttrack this message through any handlers that touch + # it. E.g. especially CookHeaders. + msgdata['_fasttrack'] = 1 + return IncomingRunner._dispose(self, mlist, msg, msgdata) + + def _get_pipeline(self, mlist, msg, msgdata): + # It's okay to hardcode this, since it'll be the same for all + # internally crafted messages. + return ['CookHeaders', 'ToOutgoing'] |
