From eefd06f1b88b8ecbb23a9013cd223b72ca85c20d Mon Sep 17 00:00:00 2001
From: Barry Warsaw
Date: Sun, 25 Jan 2009 13:01:41 -0500
Subject: Push the source directory into a 'src' subdirectory so that
zc.buildout works correctly regardless of how it's used.
---
src/mailman/pipeline/__init__.py | 54 +++
src/mailman/pipeline/acknowledge.py | 80 ++++
src/mailman/pipeline/after_delivery.py | 48 +++
src/mailman/pipeline/avoid_duplicates.py | 116 ++++++
src/mailman/pipeline/calculate_recipients.py | 148 +++++++
src/mailman/pipeline/cleanse.py | 75 ++++
src/mailman/pipeline/cleanse_dkim.py | 58 +++
src/mailman/pipeline/cook_headers.py | 357 +++++++++++++++++
src/mailman/pipeline/decorate.py | 231 +++++++++++
src/mailman/pipeline/docs/ack-headers.txt | 40 ++
src/mailman/pipeline/docs/acknowledge.txt | 159 ++++++++
src/mailman/pipeline/docs/after-delivery.txt | 27 ++
src/mailman/pipeline/docs/archives.txt | 133 ++++++
src/mailman/pipeline/docs/avoid-duplicates.txt | 168 ++++++++
src/mailman/pipeline/docs/calc-recips.txt | 100 +++++
src/mailman/pipeline/docs/cleanse.txt | 94 +++++
src/mailman/pipeline/docs/cook-headers.txt | 326 +++++++++++++++
src/mailman/pipeline/docs/decorate.txt | 317 +++++++++++++++
src/mailman/pipeline/docs/digests.txt | 535 +++++++++++++++++++++++++
src/mailman/pipeline/docs/file-recips.txt | 96 +++++
src/mailman/pipeline/docs/filtering.txt | 340 ++++++++++++++++
src/mailman/pipeline/docs/nntp.txt | 65 +++
src/mailman/pipeline/docs/reply-to.txt | 127 ++++++
src/mailman/pipeline/docs/replybot.txt | 213 ++++++++++
src/mailman/pipeline/docs/scrubber.txt | 225 +++++++++++
src/mailman/pipeline/docs/subject-munging.txt | 244 +++++++++++
src/mailman/pipeline/docs/tagger.txt | 235 +++++++++++
src/mailman/pipeline/docs/to-outgoing.txt | 173 ++++++++
src/mailman/pipeline/file_recipients.py | 65 +++
src/mailman/pipeline/mime_delete.py | 285 +++++++++++++
src/mailman/pipeline/moderate.py | 175 ++++++++
src/mailman/pipeline/owner_recipients.py | 34 ++
src/mailman/pipeline/replybot.py | 134 +++++++
src/mailman/pipeline/scrubber.py | 509 +++++++++++++++++++++++
src/mailman/pipeline/tagger.py | 187 +++++++++
src/mailman/pipeline/to_archive.py | 55 +++
src/mailman/pipeline/to_digest.py | 440 ++++++++++++++++++++
src/mailman/pipeline/to_outgoing.py | 78 ++++
src/mailman/pipeline/to_usenet.py | 69 ++++
39 files changed, 6815 insertions(+)
create mode 100644 src/mailman/pipeline/__init__.py
create mode 100644 src/mailman/pipeline/acknowledge.py
create mode 100644 src/mailman/pipeline/after_delivery.py
create mode 100644 src/mailman/pipeline/avoid_duplicates.py
create mode 100644 src/mailman/pipeline/calculate_recipients.py
create mode 100644 src/mailman/pipeline/cleanse.py
create mode 100644 src/mailman/pipeline/cleanse_dkim.py
create mode 100644 src/mailman/pipeline/cook_headers.py
create mode 100644 src/mailman/pipeline/decorate.py
create mode 100644 src/mailman/pipeline/docs/ack-headers.txt
create mode 100644 src/mailman/pipeline/docs/acknowledge.txt
create mode 100644 src/mailman/pipeline/docs/after-delivery.txt
create mode 100644 src/mailman/pipeline/docs/archives.txt
create mode 100644 src/mailman/pipeline/docs/avoid-duplicates.txt
create mode 100644 src/mailman/pipeline/docs/calc-recips.txt
create mode 100644 src/mailman/pipeline/docs/cleanse.txt
create mode 100644 src/mailman/pipeline/docs/cook-headers.txt
create mode 100644 src/mailman/pipeline/docs/decorate.txt
create mode 100644 src/mailman/pipeline/docs/digests.txt
create mode 100644 src/mailman/pipeline/docs/file-recips.txt
create mode 100644 src/mailman/pipeline/docs/filtering.txt
create mode 100644 src/mailman/pipeline/docs/nntp.txt
create mode 100644 src/mailman/pipeline/docs/reply-to.txt
create mode 100644 src/mailman/pipeline/docs/replybot.txt
create mode 100644 src/mailman/pipeline/docs/scrubber.txt
create mode 100644 src/mailman/pipeline/docs/subject-munging.txt
create mode 100644 src/mailman/pipeline/docs/tagger.txt
create mode 100644 src/mailman/pipeline/docs/to-outgoing.txt
create mode 100644 src/mailman/pipeline/file_recipients.py
create mode 100644 src/mailman/pipeline/mime_delete.py
create mode 100644 src/mailman/pipeline/moderate.py
create mode 100644 src/mailman/pipeline/owner_recipients.py
create mode 100644 src/mailman/pipeline/replybot.py
create mode 100644 src/mailman/pipeline/scrubber.py
create mode 100644 src/mailman/pipeline/tagger.py
create mode 100644 src/mailman/pipeline/to_archive.py
create mode 100644 src/mailman/pipeline/to_digest.py
create mode 100644 src/mailman/pipeline/to_outgoing.py
create mode 100644 src/mailman/pipeline/to_usenet.py
(limited to 'src/mailman/pipeline')
diff --git a/src/mailman/pipeline/__init__.py b/src/mailman/pipeline/__init__.py
new file mode 100644
index 000000000..f73061874
--- /dev/null
+++ b/src/mailman/pipeline/__init__.py
@@ -0,0 +1,54 @@
+# Copyright (C) 2008-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""The built in set of pipeline handlers."""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'initialize',
+ ]
+
+
+import os
+import sys
+
+from mailman.interfaces.handler import IHandler
+
+
+
+def initialize():
+ """Initialize the built-in handlers.
+
+ Rules are auto-discovered by searching for IHandler implementations in all
+ importable modules in this subpackage.
+ """
+ # Find all rules found in all modules inside our package.
+ import mailman.pipeline
+ here = os.path.dirname(mailman.pipeline.__file__)
+ for filename in os.listdir(here):
+ basename, extension = os.path.splitext(filename)
+ if extension <> '.py':
+ continue
+ module_name = 'mailman.pipeline.' + basename
+ __import__(module_name, fromlist='*')
+ module = sys.modules[module_name]
+ for name in getattr(module, '__all__', ()):
+ handler = getattr(module, name)
+ if IHandler.implementedBy(handler):
+ yield handler
diff --git a/src/mailman/pipeline/acknowledge.py b/src/mailman/pipeline/acknowledge.py
new file mode 100644
index 000000000..de520df65
--- /dev/null
+++ b/src/mailman/pipeline/acknowledge.py
@@ -0,0 +1,80 @@
+# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""Send an acknowledgment of the successful post to the sender.
+
+This only happens if the sender has set their AcknowledgePosts attribute.
+"""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'Acknowledge',
+ ]
+
+
+from zope.interface import implements
+
+from mailman import Message
+from mailman import Utils
+from mailman.i18n import _
+from mailman.interfaces.handler import IHandler
+
+
+
+class Acknowledge:
+ """Send an acknowledgment."""
+ implements(IHandler)
+
+ name = 'acknowledge'
+ description = _("""Send an acknowledgment of a posting.""")
+
+ def process(self, mlist, msg, msgdata):
+ """See `IHandler`."""
+ # Extract the sender's address and find them in the user database
+ sender = msgdata.get('original_sender', msg.get_sender())
+ member = mlist.members.get_member(sender)
+ if member is None or not member.acknowledge_posts:
+ # Either the sender is not a member, in which case we can't know
+ # whether they want an acknowlegment or not, or they are a member
+ # who definitely does not want an acknowlegment.
+ return
+ # Okay, they are a member that wants an acknowledgment of their post.
+ # Give them their original subject. BAW: do we want to use the
+ # decoded header?
+ original_subject = msgdata.get(
+ 'origsubj', msg.get('subject', _('(no subject)')))
+ # Get the user's preferred language.
+ lang = msgdata.get('lang', member.preferred_language)
+ # Now get the acknowledgement template.
+ realname = mlist.real_name
+ text = Utils.maketext(
+ 'postack.txt',
+ {'subject' : Utils.oneline(original_subject,
+ Utils.GetCharSet(lang)),
+ 'listname' : realname,
+ 'listinfo_url': mlist.script_url('listinfo'),
+ 'optionsurl' : member.options_url,
+ }, lang=lang, mlist=mlist, raw=True)
+ # Craft the outgoing message, with all headers and attributes
+ # necessary for general delivery. Then enqueue it to the outgoing
+ # queue.
+ subject = _('$realname post acknowledgment')
+ usermsg = Message.UserNotification(sender, mlist.bounces_address,
+ subject, text, lang)
+ usermsg.send(mlist)
diff --git a/src/mailman/pipeline/after_delivery.py b/src/mailman/pipeline/after_delivery.py
new file mode 100644
index 000000000..4626ba292
--- /dev/null
+++ b/src/mailman/pipeline/after_delivery.py
@@ -0,0 +1,48 @@
+# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""Perform some bookkeeping after a successful post."""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'AfterDelivery',
+ ]
+
+
+import datetime
+
+from zope.interface import implements
+
+from mailman.i18n import _
+from mailman.interfaces.handler import IHandler
+
+
+
+class AfterDelivery:
+ """Perform some bookkeeping after a successful post."""
+
+ implements(IHandler)
+
+ name = 'after-delivery'
+ description = _('Perform some bookkeeping after a successful post.')
+
+ def process(self, mlist, msg, msgdata):
+ """See `IHander`."""
+ mlist.last_post_time = datetime.datetime.now()
+ mlist.post_id += 1
diff --git a/src/mailman/pipeline/avoid_duplicates.py b/src/mailman/pipeline/avoid_duplicates.py
new file mode 100644
index 000000000..0458e117c
--- /dev/null
+++ b/src/mailman/pipeline/avoid_duplicates.py
@@ -0,0 +1,116 @@
+# Copyright (C) 2002-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""If the user wishes it, do not send duplicates of the same message.
+
+This module keeps an in-memory dictionary of Message-ID: and recipient pairs.
+If a message with an identical Message-ID: is about to be sent to someone who
+has already received a copy, we either drop the message, add a duplicate
+warning header, or pass it through, depending on the user's preferences.
+"""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'AvoidDuplicates',
+ ]
+
+
+from email.Utils import getaddresses, formataddr
+from zope.interface import implements
+
+from mailman.i18n import _
+from mailman.interfaces.handler import IHandler
+
+
+COMMASPACE = ', '
+
+
+
+class AvoidDuplicates:
+ """If the user wishes it, do not send duplicates of the same message."""
+
+ implements(IHandler)
+
+ name = 'avoid-duplicates'
+ description = _('Suppress some duplicates of the same message.')
+
+ def process(self, mlist, msg, msgdata):
+ """See `IHandler`."""
+ recips = msgdata.get('recips')
+ # Short circuit
+ if not recips:
+ return
+ # Seed this set with addresses we don't care about dup avoiding.
+ listaddrs = set((mlist.posting_address,
+ mlist.bounces_address,
+ mlist.owner_address,
+ mlist.request_address))
+ explicit_recips = listaddrs.copy()
+ # Figure out the set of explicit recipients.
+ cc_addresses = {}
+ for header in ('to', 'cc', 'resent-to', 'resent-cc'):
+ addrs = getaddresses(msg.get_all(header, []))
+ header_addresses = dict((addr, formataddr((name, addr)))
+ for name, addr in addrs
+ if addr)
+ if header == 'cc':
+ # Yes, it's possible that an address is mentioned in multiple
+ # CC headers using different names. In that case, the last
+ # real name will win, but that doesn't seem like such a big
+ # deal. Besides, how else would you chose?
+ cc_addresses.update(header_addresses)
+ # Ignore the list addresses for purposes of dup avoidance.
+ explicit_recips |= set(header_addresses)
+ # Now strip out the list addresses.
+ explicit_recips -= listaddrs
+ if not explicit_recips:
+ # No one was explicitly addressed, so we can't do any dup
+ # collapsing
+ return
+ newrecips = set()
+ for r in recips:
+ # If this recipient is explicitly addressed...
+ if r in explicit_recips:
+ send_duplicate = True
+ # If the member wants to receive duplicates, or if the
+ # recipient is not a member at all, they will get a copy.
+ # header.
+ member = mlist.members.get_member(r)
+ if member and not member.receive_list_copy:
+ send_duplicate = False
+ # We'll send a duplicate unless the user doesn't wish it. If
+ # personalization is enabled, the add-dupe-header flag will
+ # add a X-Mailman-Duplicate: yes header for this user's
+ # message.
+ if send_duplicate:
+ msgdata.setdefault('add-dup-header', set()).add(r)
+ newrecips.add(r)
+ elif r in cc_addresses:
+ del cc_addresses[r]
+ else:
+ # Otherwise, this is the first time they've been in the recips
+ # list. Add them to the newrecips list and flag them as
+ # having received this message.
+ newrecips.add(r)
+ # Set the new list of recipients. XXX recips should always be a set.
+ msgdata['recips'] = list(newrecips)
+ # RFC 2822 specifies zero or one CC header
+ if cc_addresses:
+ del msg['cc']
+ msg['CC'] = COMMASPACE.join(cc_addresses.values())
diff --git a/src/mailman/pipeline/calculate_recipients.py b/src/mailman/pipeline/calculate_recipients.py
new file mode 100644
index 000000000..9837c1e6b
--- /dev/null
+++ b/src/mailman/pipeline/calculate_recipients.py
@@ -0,0 +1,148 @@
+# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""Calculate the regular (i.e. non-digest) recipients of the message.
+
+This module calculates the non-digest recipients for the message based on the
+list's membership and configuration options. It places the list of recipients
+on the `recips' attribute of the message. This attribute is used by the
+SendmailDeliver and BulkDeliver modules.
+"""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'CalculateRecipients',
+ ]
+
+from zope.interface import implements
+
+from mailman import Utils
+from mailman.config import config
+from mailman.core import errors
+from mailman.i18n import _
+from mailman.interfaces.handler import IHandler
+from mailman.interfaces.member import DeliveryStatus
+
+
+
+class CalculateRecipients:
+ """Calculate the regular (i.e. non-digest) recipients of the message."""
+
+ implements(IHandler)
+
+ name = 'calculate-recipients'
+ description = _('Calculate the regular recipients of the message.')
+
+ def process(self, mlist, msg, msgdata):
+ # Short circuit if we've already calculated the recipients list,
+ # regardless of whether the list is empty or not.
+ if 'recips' in msgdata:
+ return
+ # Should the original sender should be included in the recipients list?
+ include_sender = True
+ sender = msg.get_sender()
+ member = mlist.members.get_member(sender)
+ if member and not member.receive_own_postings:
+ include_sender = False
+ # Support for urgent messages, which bypasses digests and disabled
+ # delivery and forces an immediate delivery to all members Right Now.
+ # We are specifically /not/ allowing the site admins password to work
+ # here because we want to discourage the practice of sending the site
+ # admin password through email in the clear. (see also Approve.py)
+ #
+ # XXX This is broken.
+ missing = object()
+ password = msg.get('urgent', missing)
+ if password is not missing:
+ if mlist.Authenticate((config.AuthListModerator,
+ config.AuthListAdmin),
+ password):
+ recips = mlist.getMemberCPAddresses(
+ mlist.getRegularMemberKeys() +
+ mlist.getDigestMemberKeys())
+ msgdata['recips'] = recips
+ return
+ else:
+ # Bad Urgent: password, so reject it instead of passing it on.
+ # I think it's better that the sender know they screwed up
+ # than to deliver it normally.
+ realname = mlist.real_name
+ text = _("""\
+Your urgent message to the %(realname)s mailing list was not authorized for
+delivery. The original message as received by Mailman is attached.
+""")
+ raise errors.RejectMessage(Utils.wrap(text))
+ # Calculate the regular recipients of the message
+ recips = set(member.address.address
+ for member in mlist.regular_members.members
+ if member.delivery_status == DeliveryStatus.enabled)
+ # Remove the sender if they don't want to receive their own posts
+ if not include_sender and member.address.address in recips:
+ recips.remove(member.address.address)
+ # Handle topic classifications
+ do_topic_filters(mlist, msg, msgdata, recips)
+ # Bookkeeping
+ msgdata['recips'] = recips
+
+
+
+def do_topic_filters(mlist, msg, msgdata, recips):
+ if not mlist.topics_enabled:
+ # MAS: if topics are currently disabled for the list, send to all
+ # regardless of ReceiveNonmatchingTopics
+ return
+ hits = msgdata.get('topichits')
+ zaprecips = []
+ if hits:
+ # The message hit some topics, so only deliver this message to those
+ # who are interested in one of the hit topics.
+ for user in recips:
+ utopics = mlist.getMemberTopics(user)
+ if not utopics:
+ # This user is not interested in any topics, so they get all
+ # postings.
+ continue
+ # BAW: Slow, first-match, set intersection!
+ for topic in utopics:
+ if topic in hits:
+ # The user wants this message
+ break
+ else:
+ # The user was interested in topics, but not any of the ones
+ # this message matched, so zap him.
+ zaprecips.append(user)
+ else:
+ # The semantics for a message that did not hit any of the pre-canned
+ # topics is to troll through the membership list, looking for users
+ # who selected at least one topic of interest, but turned on
+ # ReceiveNonmatchingTopics.
+ for user in recips:
+ if not mlist.getMemberTopics(user):
+ # The user did not select any topics of interest, so he gets
+ # this message by default.
+ continue
+ if not mlist.getMemberOption(
+ user, config.ReceiveNonmatchingTopics):
+ # The user has interest in some topics, but elects not to
+ # receive message that match no topics, so zap him.
+ zaprecips.append(user)
+ # Otherwise, the user wants non-matching messages.
+ # Prune out the non-receiving users
+ for user in zaprecips:
+ recips.remove(user)
diff --git a/src/mailman/pipeline/cleanse.py b/src/mailman/pipeline/cleanse.py
new file mode 100644
index 000000000..330f415c2
--- /dev/null
+++ b/src/mailman/pipeline/cleanse.py
@@ -0,0 +1,75 @@
+# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""Cleanse certain headers from all messages."""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'Cleanse',
+ ]
+
+
+import logging
+
+from email.Utils import formataddr
+from zope.interface import implements
+
+from mailman.i18n import _
+from mailman.interfaces.handler import IHandler
+from mailman.pipeline.cook_headers import uheader
+
+
+log = logging.getLogger('mailman.post')
+
+
+
+class Cleanse:
+ """Cleanse certain headers from all messages."""
+
+ implements(IHandler)
+
+ name = 'cleanse'
+ description = _('Cleanse certain headers from all messages.')
+
+ def process(self, mlist, msg, msgdata):
+ """See `IHandler`."""
+ # Remove headers that could contain passwords.
+ del msg['approved']
+ del msg['approve']
+ del msg['urgent']
+ # We remove other headers from anonymous lists.
+ if mlist.anonymous_list:
+ log.info('post to %s from %s anonymized',
+ mlist.fqdn_listname, msg.get('from'))
+ del msg['from']
+ del msg['reply-to']
+ del msg['sender']
+ # Hotmail sets this one
+ del msg['x-originating-email']
+ i18ndesc = str(uheader(mlist, mlist.description, 'From'))
+ msg['From'] = formataddr((i18ndesc, mlist.posting_address))
+ msg['Reply-To'] = mlist.posting_address
+ # Some headers can be used to fish for membership.
+ del msg['return-receipt-to']
+ del msg['disposition-notification-to']
+ del msg['x-confirm-reading-to']
+ # Pegasus mail uses this one... sigh.
+ del msg['x-pmrqc']
+ # Don't let this header be spoofed. See RFC 5064.
+ del msg['archived-at']
diff --git a/src/mailman/pipeline/cleanse_dkim.py b/src/mailman/pipeline/cleanse_dkim.py
new file mode 100644
index 000000000..38623079c
--- /dev/null
+++ b/src/mailman/pipeline/cleanse_dkim.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2006-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""Remove any 'DomainKeys' (or similar) headers.
+
+The values contained in these header lines are intended to be used by the
+recipient to detect forgery or tampering in transit, and the modifications
+made by Mailman to the headers and body of the message will cause these keys
+to appear invalid. Removing them will at least avoid this misleading result,
+and it will also give the MTA the opportunity to regenerate valid keys
+originating at the Mailman server for the outgoing message.
+"""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'CleanseDKIM',
+ ]
+
+
+from lazr.config import as_boolean
+from zope.interface import implements
+
+from mailman.config import config
+from mailman.i18n import _
+from mailman.interfaces.handler import IHandler
+
+
+
+class CleanseDKIM:
+ """Remove DomainKeys headers."""
+
+ implements(IHandler)
+
+ name = 'cleanse-dkim'
+ description = _('Remove DomainKeys headers.')
+
+ def process(self, mlist, msg, msgdata):
+ """See `IHandler`."""
+ if as_boolean(config.mta.remove_dkim_headers):
+ del msg['domainkey-signature']
+ del msg['dkim-signature']
+ del msg['authentication-results']
diff --git a/src/mailman/pipeline/cook_headers.py b/src/mailman/pipeline/cook_headers.py
new file mode 100644
index 000000000..529d7ce5d
--- /dev/null
+++ b/src/mailman/pipeline/cook_headers.py
@@ -0,0 +1,357 @@
+# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""Cook a message's headers."""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'CookHeaders',
+ ]
+
+
+import re
+
+from email.errors import HeaderParseError
+from email.header import Header, decode_header, make_header
+from email.utils import parseaddr, formataddr, getaddresses
+from zope.interface import implements
+
+from mailman import Utils
+from mailman.config import config
+from mailman.i18n import _
+from mailman.interfaces.handler import IHandler
+from mailman.interfaces.mailinglist import Personalization, ReplyToMunging
+from mailman.version import VERSION
+
+
+CONTINUATION = ',\n\t'
+COMMASPACE = ', '
+MAXLINELEN = 78
+
+nonascii = re.compile('[^\s!-~]')
+
+
+
+def uheader(mlist, s, header_name=None, continuation_ws='\t', maxlinelen=None):
+ # Get the charset to encode the string in. Then search if there is any
+ # non-ascii character is in the string. If there is and the charset is
+ # us-ascii then we use iso-8859-1 instead. If the string is ascii only
+ # we use 'us-ascii' if another charset is specified.
+ charset = Utils.GetCharSet(mlist.preferred_language)
+ if nonascii.search(s):
+ # use list charset but ...
+ if charset == 'us-ascii':
+ charset = 'iso-8859-1'
+ else:
+ # there is no nonascii so ...
+ charset = 'us-ascii'
+ return Header(s, charset, maxlinelen, header_name, continuation_ws)
+
+
+
+def process(mlist, msg, msgdata):
+ # Set the "X-Ack: no" header if noack flag is set.
+ if msgdata.get('noack'):
+ del msg['x-ack']
+ msg['X-Ack'] = 'no'
+ # Because we're going to modify various important headers in the email
+ # message, we want to save some of the information in the msgdata
+ # dictionary for later. Specifically, the sender header will get waxed,
+ # but we need it for the Acknowledge module later.
+ msgdata['original_sender'] = msg.get_sender()
+ # VirginRunner sets _fasttrack for internally crafted messages.
+ fasttrack = msgdata.get('_fasttrack')
+ if not msgdata.get('isdigest') and not fasttrack:
+ try:
+ prefix_subject(mlist, msg, msgdata)
+ except (UnicodeError, ValueError):
+ # TK: Sometimes subject header is not MIME encoded for 8bit
+ # simply abort prefixing.
+ pass
+ # Mark message so we know we've been here, but leave any existing
+ # X-BeenThere's intact.
+ msg['X-BeenThere'] = mlist.posting_address
+ # Add Precedence: and other useful headers. None of these are standard
+ # and finding information on some of them are fairly difficult. Some are
+ # just common practice, and we'll add more here as they become necessary.
+ # Good places to look are:
+ #
+ # http://www.dsv.su.se/~jpalme/ietf/jp-ietf-home.html
+ # http://www.faqs.org/rfcs/rfc2076.html
+ #
+ # None of these headers are added if they already exist. BAW: some
+ # consider the advertising of this a security breach. I.e. if there are
+ # known exploits in a particular version of Mailman and we know a site is
+ # using such an old version, they may be vulnerable. It's too easy to
+ # edit the code to add a configuration variable to handle this.
+ if 'x-mailman-version' not in msg:
+ msg['X-Mailman-Version'] = VERSION
+ # We set "Precedence: list" because this is the recommendation from the
+ # sendmail docs, the most authoritative source of this header's semantics.
+ if 'precedence' not in msg:
+ msg['Precedence'] = 'list'
+ # Reply-To: munging. Do not do this if the message is "fast tracked",
+ # meaning it is internally crafted and delivered to a specific user. BAW:
+ # Yuck, I really hate this feature but I've caved under the sheer pressure
+ # of the (very vocal) folks want it. OTOH, RFC 2822 allows Reply-To: to
+ # be a list of addresses, so instead of replacing the original, simply
+ # augment it. RFC 2822 allows max one Reply-To: header so collapse them
+ # if we're adding a value, otherwise don't touch it. (Should we collapse
+ # in all cases?)
+ if not fasttrack:
+ # A convenience function, requires nested scopes. pair is (name, addr)
+ new = []
+ d = {}
+ def add(pair):
+ lcaddr = pair[1].lower()
+ if lcaddr in d:
+ return
+ d[lcaddr] = pair
+ new.append(pair)
+ # List admin wants an explicit Reply-To: added
+ if mlist.reply_goes_to_list == ReplyToMunging.explicit_header:
+ add(parseaddr(mlist.reply_to_address))
+ # If we're not first stripping existing Reply-To: then we need to add
+ # the original Reply-To:'s to the list we're building up. In both
+ # cases we'll zap the existing field because RFC 2822 says max one is
+ # allowed.
+ if not mlist.first_strip_reply_to:
+ orig = msg.get_all('reply-to', [])
+ for pair in getaddresses(orig):
+ add(pair)
+ # Set Reply-To: header to point back to this list. Add this last
+ # because some folks think that some MUAs make it easier to delete
+ # addresses from the right than from the left.
+ if mlist.reply_goes_to_list == ReplyToMunging.point_to_list:
+ i18ndesc = uheader(mlist, mlist.description, 'Reply-To')
+ add((str(i18ndesc), mlist.posting_address))
+ del msg['reply-to']
+ # Don't put Reply-To: back if there's nothing to add!
+ if new:
+ # Preserve order
+ msg['Reply-To'] = COMMASPACE.join(
+ [formataddr(pair) for pair in new])
+ # The To field normally contains the list posting address. However
+ # when messages are fully personalized, that header will get
+ # overwritten with the address of the recipient. We need to get the
+ # posting address in one of the recipient headers or they won't be
+ # able to reply back to the list. It's possible the posting address
+ # was munged into the Reply-To header, but if not, we'll add it to a
+ # Cc header. BAW: should we force it into a Reply-To header in the
+ # above code?
+ # Also skip Cc if this is an anonymous list as list posting address
+ # is already in From and Reply-To in this case.
+ if (mlist.personalize == Personalization.full and
+ mlist.reply_goes_to_list <> ReplyToMunging.point_to_list and
+ not mlist.anonymous_list):
+ # Watch out for existing Cc headers, merge, and remove dups. Note
+ # that RFC 2822 says only zero or one Cc header is allowed.
+ new = []
+ d = {}
+ for pair in getaddresses(msg.get_all('cc', [])):
+ add(pair)
+ i18ndesc = uheader(mlist, mlist.description, 'Cc')
+ add((str(i18ndesc), mlist.posting_address))
+ del msg['Cc']
+ msg['Cc'] = COMMASPACE.join([formataddr(pair) for pair in new])
+ # Add list-specific headers as defined in RFC 2369 and RFC 2919, but only
+ # if the message is being crafted for a specific list (e.g. not for the
+ # password reminders).
+ #
+ # BAW: Some people really hate the List-* headers. It seems that the free
+ # version of Eudora (possibly on for some platforms) does not hide these
+ # headers by default, pissing off their users. Too bad. Fix the MUAs.
+ if msgdata.get('_nolist') or not mlist.include_rfc2369_headers:
+ return
+ # This will act like an email address for purposes of formataddr()
+ listid = '{0}.{1}'.format(mlist.list_name, mlist.host_name)
+ cset = Utils.GetCharSet(mlist.preferred_language)
+ if mlist.description:
+ # Don't wrap the header since here we just want to get it properly RFC
+ # 2047 encoded.
+ i18ndesc = uheader(mlist, mlist.description, 'List-Id', maxlinelen=998)
+ listid_h = formataddr((str(i18ndesc), listid))
+ else:
+ # without desc we need to ensure the MUST brackets
+ listid_h = '<{0}>'.format(listid)
+ # We always add a List-ID: header.
+ del msg['list-id']
+ msg['List-Id'] = listid_h
+ # For internally crafted messages, we also add a (nonstandard),
+ # "X-List-Administrivia: yes" header. For all others (i.e. those coming
+ # from list posts), we add a bunch of other RFC 2369 headers.
+ requestaddr = mlist.request_address
+ subfieldfmt = '<{0}>, '
+ listinfo = mlist.script_url('listinfo')
+ headers = {}
+ # XXX reduced_list_headers used to suppress List-Help, List-Subject, and
+ # List-Unsubscribe from UserNotification. That doesn't seem to make sense
+ # any more, so always add those three headers (others will still be
+ # suppressed).
+ headers.update({
+ 'List-Help' : ''.format(requestaddr),
+ 'List-Unsubscribe': subfieldfmt.format(listinfo, mlist.leave_address),
+ 'List-Subscribe' : subfieldfmt.format(listinfo, mlist.join_address),
+ })
+ if msgdata.get('reduced_list_headers'):
+ headers['X-List-Administrivia'] = 'yes'
+ else:
+ # List-Post: is controlled by a separate attribute
+ if mlist.include_list_post_header:
+ headers['List-Post'] = ''.format(mlist.posting_address)
+ # Add RFC 2369 and 5064 archiving headers, if archiving is enabled.
+ if mlist.archive:
+ for archiver in config.archivers:
+ headers['List-Archive'] = '<{0}>'.format(
+ archiver.list_url(mlist))
+ permalink = archiver.permalink(mlist, msg)
+ if permalink is not None:
+ headers['Archived-At'] = permalink
+ # XXX RFC 2369 also defines a List-Owner header which we are not currently
+ # supporting, but should.
+ for h, v in headers.items():
+ # First we delete any pre-existing headers because the RFC permits
+ # only one copy of each, and we want to be sure it's ours.
+ del msg[h]
+ # Wrap these lines if they are too long. 78 character width probably
+ # shouldn't be hardcoded, but is at least text-MUA friendly. The
+ # adding of 2 is for the colon-space separator.
+ if len(h) + 2 + len(v) > 78:
+ v = CONTINUATION.join(v.split(', '))
+ msg[h] = v
+
+
+
+def prefix_subject(mlist, msg, msgdata):
+ # Add the subject prefix unless the message is a digest or is being fast
+ # tracked (e.g. internally crafted, delivered to a single user such as the
+ # list admin).
+ if not mlist.subject_prefix.strip():
+ return
+ prefix = mlist.subject_prefix
+ subject = msg.get('subject', '')
+ # Try to figure out what the continuation_ws is for the header
+ if isinstance(subject, Header):
+ lines = str(subject).splitlines()
+ else:
+ lines = subject.splitlines()
+ ws = '\t'
+ if len(lines) > 1 and lines[1] and lines[1][0] in ' \t':
+ ws = lines[1][0]
+ msgdata['origsubj'] = subject
+ # The subject may be multilingual but we take the first charset as major
+ # one and try to decode. If it is decodable, returned subject is in one
+ # line and cset is properly set. If fail, subject is mime-encoded and
+ # cset is set as us-ascii. See detail for ch_oneline() (CookHeaders one
+ # line function).
+ subject, cset = ch_oneline(subject)
+ # TK: Python interpreter has evolved to be strict on ascii charset code
+ # range. It is safe to use unicode string when manupilating header
+ # contents with re module. It would be best to return unicode in
+ # ch_oneline() but here is temporary solution.
+ subject = unicode(subject, cset)
+ # If the subject_prefix contains '%d', it is replaced with the
+ # mailing list sequential number. Sequential number format allows
+ # '%d' or '%05d' like pattern.
+ prefix_pattern = re.escape(prefix)
+ # unescape '%' :-<
+ prefix_pattern = '%'.join(prefix_pattern.split(r'\%'))
+ p = re.compile('%\d*d')
+ if p.search(prefix, 1):
+ # prefix have number, so we should search prefix w/number in subject.
+ # Also, force new style.
+ prefix_pattern = p.sub(r'\s*\d+\s*', prefix_pattern)
+ subject = re.sub(prefix_pattern, '', subject)
+ rematch = re.match('((RE|AW|SV|VS)(\[\d+\])?:\s*)+', subject, re.I)
+ if rematch:
+ subject = subject[rematch.end():]
+ recolon = 'Re:'
+ else:
+ recolon = ''
+ # At this point, subject may become null if someone post mail with
+ # subject: [subject prefix]
+ if subject.strip() == '':
+ subject = _('(no subject)')
+ cset = Utils.GetCharSet(mlist.preferred_language)
+ # and substitute %d in prefix with post_id
+ try:
+ prefix = prefix % mlist.post_id
+ except TypeError:
+ pass
+ # Get the header as a Header instance, with proper unicode conversion
+ if not recolon:
+ h = uheader(mlist, prefix, 'Subject', continuation_ws=ws)
+ else:
+ h = uheader(mlist, prefix, 'Subject', continuation_ws=ws)
+ h.append(recolon)
+ # TK: Subject is concatenated and unicode string.
+ subject = subject.encode(cset, 'replace')
+ h.append(subject, cset)
+ del msg['subject']
+ msg['Subject'] = h
+ ss = uheader(mlist, recolon, 'Subject', continuation_ws=ws)
+ ss.append(subject, cset)
+ msgdata['stripped_subject'] = ss
+
+
+
+def ch_oneline(headerstr):
+ # Decode header string in one line and convert into single charset
+ # copied and modified from ToDigest.py and Utils.py
+ # return (string, cset) tuple as check for failure
+ try:
+ d = decode_header(headerstr)
+ # At this point, we should rstrip() every string because some
+ # MUA deliberately add trailing spaces when composing return
+ # message.
+ d = [(s.rstrip(), c) for (s, c) in d]
+ # Find all charsets in the original header. We use 'utf-8' rather
+ # than using the first charset (in mailman 2.1.x) if multiple
+ # charsets are used.
+ csets = []
+ for (s, c) in d:
+ if c and c not in csets:
+ csets.append(c)
+ if len(csets) == 0:
+ cset = 'us-ascii'
+ elif len(csets) == 1:
+ cset = csets[0]
+ else:
+ cset = 'utf-8'
+ h = make_header(d)
+ ustr = unicode(h)
+ oneline = ''.join(ustr.splitlines())
+ return oneline.encode(cset, 'replace'), cset
+ except (LookupError, UnicodeError, ValueError, HeaderParseError):
+ # possibly charset problem. return with undecoded string in one line.
+ return ''.join(headerstr.splitlines()), 'us-ascii'
+
+
+
+class CookHeaders:
+ """Modify message headers."""
+
+ implements(IHandler)
+
+ name = 'cook-headers'
+ description = _('Modify message headers.')
+
+ def process(self, mlist, msg, msgdata):
+ """See `IHandler`."""
+ process(mlist, msg, msgdata)
diff --git a/src/mailman/pipeline/decorate.py b/src/mailman/pipeline/decorate.py
new file mode 100644
index 000000000..e1fa0c155
--- /dev/null
+++ b/src/mailman/pipeline/decorate.py
@@ -0,0 +1,231 @@
+# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see .
+
+"""Decorate a message by sticking the header and footer around it."""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'Decorate',
+ ]
+
+
+import re
+import logging
+
+from email.MIMEText import MIMEText
+from zope.interface import implements
+
+from mailman import Utils
+from mailman.Message import Message
+from mailman.config import config
+from mailman.i18n import _
+from mailman.interfaces.handler import IHandler
+from mailman.utilities.string import expand
+
+
+log = logging.getLogger('mailman.error')
+
+
+
+def process(mlist, msg, msgdata):
+ # Digests and Mailman-craft messages should not get additional headers
+ if msgdata.get('isdigest') or msgdata.get('nodecorate'):
+ return
+ d = {}
+ if msgdata.get('personalize'):
+ # Calculate the extra personalization dictionary. Note that the
+ # length of the recips list better be exactly 1.
+ recips = msgdata.get('recips', [])
+ assert len(recips) == 1, (
+ 'The number of intended recipients must be exactly 1')
+ recipient = recips[0].lower()
+ user = config.db.user_manager.get_user(recipient)
+ member = mlist.members.get_member(recipient)
+ d['user_address'] = recipient
+ if user is not None and member is not None:
+ d['user_delivered_to'] = member.address.original_address
+ # BAW: Hmm, should we allow this?
+ d['user_password'] = user.password
+ d['user_language'] = member.preferred_language
+ d['user_name'] = (user.real_name if user.real_name
+ else member.address.original_address)
+ d['user_optionsurl'] = member.options_url
+ # These strings are descriptive for the log file and shouldn't be i18n'd
+ d.update(msgdata.get('decoration-data', {}))
+ header = decorate(mlist, mlist.msg_header, d)
+ footer = decorate(mlist, mlist.msg_footer, d)
+ # Escape hatch if both the footer and header are empty
+ if not header and not footer:
+ return
+ # Be MIME smart here. We only attach the header and footer by
+ # concatenation when the message is a non-multipart of type text/plain.
+ # Otherwise, if it is not a multipart, we make it a multipart, and then we
+ # add the header and footer as text/plain parts.
+ #
+ # BJG: In addition, only add the footer if the message's character set
+ # matches the charset of the list's preferred language. This is a
+ # suboptimal solution, and should be solved by allowing a list to have
+ # multiple headers/footers, for each language the list supports.
+ #
+ # Also, if the list's preferred charset is us-ascii, we can always
+ # safely add the header/footer to a plain text message since all
+ # charsets Mailman supports are strict supersets of us-ascii --
+ # no, UTF-16 emails are not supported yet.
+ #
+ # TK: Message with 'charset=' cause trouble. So, instead of
+ # mgs.get_content_charset('us-ascii') ...
+ mcset = msg.get_content_charset() or 'us-ascii'
+ lcset = Utils.GetCharSet(mlist.preferred_language)
+ msgtype = msg.get_content_type()
+ # BAW: If the charsets don't match, should we add the header and footer by
+ # MIME multipart chroming the message?
+ wrap = True
+ if not msg.is_multipart() and msgtype == 'text/plain':
+ # Save the RFC-3676 format parameters.
+ format = msg.get_param('format')
+ delsp = msg.get_param('delsp')
+ # Save 'Content-Transfer-Encoding' header in case decoration fails.
+ cte = msg.get('content-transfer-encoding')
+ # header/footer is now in unicode (2.2)
+ try:
+ oldpayload = unicode(msg.get_payload(decode=True), mcset)
+ del msg['content-transfer-encoding']
+ frontsep = endsep = ''
+ if header and not header.endswith('\n'):
+ frontsep = '\n'
+ if footer and not oldpayload.endswith('\n'):
+ endsep = '\n'
+ payload = header + frontsep + oldpayload + endsep + footer
+ # When setting the payload for the message, try various charset
+ # encodings until one does not produce a UnicodeError. We'll try
+ # charsets in this order: the list's charset, the message's
+ # charset, then utf-8. It's okay if some of these are duplicates.
+ for cset in (lcset, mcset, 'utf-8'):
+ try:
+ msg.set_payload(payload.encode(cset), cset)
+ except UnicodeError:
+ pass
+ else:
+ if format:
+ msg.set_param('format', format)
+ if delsp:
+ msg.set_param('delsp', delsp)
+ wrap = False
+ break
+ except (LookupError, UnicodeError):
+ if cte:
+ # Restore the original c-t-e.
+ del msg['content-transfer-encoding']
+ msg['Content-Transfer-Encoding'] = cte
+ elif msg.get_content_type() == 'multipart/mixed':
+ # The next easiest thing to do is just prepend the header and append
+ # the footer as additional subparts
+ payload = msg.get_payload()
+ if not isinstance(payload, list):
+ payload = [payload]
+ if footer:
+ mimeftr = MIMEText(footer.encode(lcset), 'plain', lcset)
+ mimeftr['Content-Disposition'] = 'inline'
+ payload.append(mimeftr)
+ if header:
+ mimehdr = MIMEText(header.encode(lcset), 'plain', lcset)
+ mimehdr['Content-Disposition'] = 'inline'
+ payload.insert(0, mimehdr)
+ msg.set_payload(payload)
+ wrap = False
+ # If we couldn't add the header or footer in a less intrusive way, we can
+ # at least do it by MIME encapsulation. We want to keep as much of the
+ # outer chrome as possible.
+ if not wrap:
+ return
+ # Because of the way Message objects are passed around to process(), we
+ # need to play tricks with the outer message -- i.e. the outer one must
+ # remain the same instance. So we're going to create a clone of the outer
+ # message, with all the header chrome intact, then copy the payload to it.
+ # This will give us a clone of the original message, and it will form the
+ # basis of the interior, wrapped Message.
+ inner = Message()
+ # Which headers to copy? Let's just do the Content-* headers
+ for h, v in msg.items():
+ if h.lower().startswith('content-'):
+ inner[h] = v
+ inner.set_payload(msg.get_payload())
+ # For completeness
+ inner.set_unixfrom(msg.get_unixfrom())
+ inner.preamble = msg.preamble
+ inner.epilogue = msg.epilogue
+ # Don't copy get_charset, as this might be None, even if
+ # get_content_charset isn't. However, do make sure there is a default
+ # content-type, even if the original message was not MIME.
+ inner.set_default_type(msg.get_default_type())
+ # BAW: HACK ALERT.
+ if hasattr(msg, '__version__'):
+ inner.__version__ = msg.__version__
+ # Now, play games with the outer message to make it contain three
+ # subparts: the header (if any), the wrapped message, and the footer (if
+ # any).
+ payload = [inner]
+ if header:
+ mimehdr = MIMEText(header.encode(lcset), 'plain', lcset)
+ mimehdr['Content-Disposition'] = 'inline'
+ payload.insert(0, mimehdr)
+ if footer:
+ mimeftr = MIMEText(footer.encode(lcset), 'plain', lcset)
+ mimeftr['Content-Disposition'] = 'inline'
+ payload.append(mimeftr)
+ msg.set_payload(payload)
+ del msg['content-type']
+ del msg['content-transfer-encoding']
+ del msg['content-disposition']
+ msg['Content-Type'] = 'multipart/mixed'
+
+
+
+def decorate(mlist, template, extradict=None):
+ # Create a dictionary which includes the default set of interpolation
+ # variables allowed in headers and footers. These will be augmented by
+ # any key/value pairs in the extradict.
+ substitutions = dict(
+ real_name = mlist.real_name,
+ list_name = mlist.list_name,
+ fqdn_listname = mlist.fqdn_listname,
+ host_name = mlist.host_name,
+ listinfo_page = mlist.script_url('listinfo'),
+ description = mlist.description,
+ info = mlist.info,
+ )
+ if extradict is not None:
+ substitutions.update(extradict)
+ text = expand(template, substitutions)
+ # Turn any \r\n line endings into just \n
+ return re.sub(r' *\r?\n', r'\n', text)
+
+
+
+class Decorate:
+ """Decorate a message with headers and footers."""
+
+ implements(IHandler)
+
+ name = 'decorate'
+ description = _('Decorate a message with headers and footers.')
+
+ def process(self, mlist, msg, msgdata):
+ "See `IHandler`."""
+ process(mlist, msg, msgdata)
diff --git a/src/mailman/pipeline/docs/ack-headers.txt b/src/mailman/pipeline/docs/ack-headers.txt
new file mode 100644
index 000000000..ca41df03e
--- /dev/null
+++ b/src/mailman/pipeline/docs/ack-headers.txt
@@ -0,0 +1,40 @@
+Acknowledgment headers
+======================
+
+Messages that flow through the global pipeline get their headers 'cooked',
+which basically means that their headers go through several mostly unrelated
+transformations. Some headers get added, others get changed. Some of these
+changes depend on mailing list settings and others depend on how the message
+is getting sent through the system. We'll take things one-by-one.
+
+ >>> from mailman.pipeline.cook_headers import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.subject_prefix = u''
+
+When the message's metadata has a 'noack' key set, an 'X-Ack: no' header is
+added.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, dict(noack=True))
+ >>> print msg.as_string()
+ From: aperson@example.com
+ X-Ack: no
+ ...
+
+Any existing X-Ack header in the original message is removed.
+
+ >>> msg = message_from_string("""\
+ ... X-Ack: yes
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, dict(noack=True))
+ >>> print msg.as_string()
+ From: aperson@example.com
+ X-Ack: no
+ ...
diff --git a/src/mailman/pipeline/docs/acknowledge.txt b/src/mailman/pipeline/docs/acknowledge.txt
new file mode 100644
index 000000000..a4c68f900
--- /dev/null
+++ b/src/mailman/pipeline/docs/acknowledge.txt
@@ -0,0 +1,159 @@
+Message acknowledgment
+======================
+
+When a user posts a message to a mailing list, and that user has chosen to
+receive acknowledgments of their postings, Mailman will sent them such an
+acknowledgment.
+
+ >>> handler = config.handlers['acknowledge']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.real_name = u'XTest'
+ >>> mlist.preferred_language = u'en'
+ >>> # XXX This will almost certainly change once we've worked out the web
+ >>> # space layout for mailing lists now.
+
+ >>> # Ensure that the virgin queue is empty, since we'll be checking this
+ >>> # for new auto-response messages.
+ >>> virginq = config.switchboards['virgin']
+ >>> virginq.files
+ []
+
+Subscribe a user to the mailing list.
+
+ >>> usermgr = config.db.user_manager
+ >>> from mailman.interfaces.member import MemberRole
+ >>> user_1 = usermgr.create_user(u'aperson@example.com')
+ >>> address_1 = list(user_1.addresses)[0]
+ >>> address_1.subscribe(mlist, MemberRole.member)
+
+
+
+Non-member posts
+----------------
+
+Non-members can't get acknowledgments of their posts to the mailing list.
+
+ >>> msg = message_from_string("""\
+ ... From: bperson@example.com
+ ...
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> virginq.files
+ []
+
+We can also specify the original sender in the message's metadata. If that
+person is also not a member, no acknowledgment will be sent either.
+
+ >>> msg = message_from_string("""\
+ ... From: bperson@example.com
+ ...
+ ... """)
+ >>> handler.process(mlist, msg,
+ ... dict(original_sender=u'cperson@example.com'))
+ >>> virginq.files
+ []
+
+
+No acknowledgment requested
+---------------------------
+
+Unless the user has requested acknowledgments, they will not get one.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> virginq.files
+ []
+
+Similarly if the original sender is specified in the message metadata, and
+that sender is a member but not one who has requested acknowledgments, none
+will be sent.
+
+ >>> user_2 = usermgr.create_user(u'dperson@example.com')
+ >>> address_2 = list(user_2.addresses)[0]
+ >>> address_2.subscribe(mlist, MemberRole.member)
+
+
+ >>> handler.process(mlist, msg,
+ ... dict(original_sender=u'dperson@example.com'))
+ >>> virginq.files
+ []
+
+
+Requested acknowledgments
+-------------------------
+
+If the member requests acknowledgments, Mailman will send them one when they
+post to the mailing list.
+
+ >>> user_1.preferences.acknowledge_posts = True
+
+The receipt will include the original message's subject in the response body,
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Something witty and insightful
+ ...
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> virginq.files
+ []
+ >>> sorted(qdata.items())
+ [..., ('recips', [u'aperson@example.com']), ...]
+ >>> print qmsg.as_string()
+ ...
+ MIME-Version: 1.0
+ ...
+ Subject: XTest post acknowledgment
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ ...
+ Precedence: bulk
+
+ Your message entitled
+
+ Something witty and insightful
+
+ was successfully received by the XTest mailing list.
+
+ List info page: http://lists.example.com/listinfo/_xtest@example.com
+ Your preferences: http://example.com/aperson@example.com
+
+
+If there is no subject, then the receipt will use a generic message.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> virginq.files
+ []
+ >>> sorted(qdata.items())
+ [..., ('recips', [u'aperson@example.com']), ...]
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ ...
+ Subject: XTest post acknowledgment
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ ...
+ Precedence: bulk
+
+ Your message entitled
+
+ (no subject)
+
+ was successfully received by the XTest mailing list.
+
+ List info page: http://lists.example.com/listinfo/_xtest@example.com
+ Your preferences: http://example.com/aperson@example.com
+
diff --git a/src/mailman/pipeline/docs/after-delivery.txt b/src/mailman/pipeline/docs/after-delivery.txt
new file mode 100644
index 000000000..b910e89a6
--- /dev/null
+++ b/src/mailman/pipeline/docs/after-delivery.txt
@@ -0,0 +1,27 @@
+After delivery
+==============
+
+After a message is delivered, or more correctly, after it has been processed
+by the rest of the handlers in the incoming queue pipeline, a couple of
+bookkeeping pieces of information are updated.
+
+ >>> import datetime
+ >>> handler = config.handlers['after-delivery']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> post_time = datetime.datetime.now() - datetime.timedelta(minutes=10)
+ >>> mlist.last_post_time = post_time
+ >>> mlist.post_id = 10
+
+Processing a message with this handler updates the last_post_time and post_id
+attributes.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... Something interesting.
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> mlist.last_post_time > post_time
+ True
+ >>> mlist.post_id
+ 11
diff --git a/src/mailman/pipeline/docs/archives.txt b/src/mailman/pipeline/docs/archives.txt
new file mode 100644
index 000000000..d90228525
--- /dev/null
+++ b/src/mailman/pipeline/docs/archives.txt
@@ -0,0 +1,133 @@
+Archives
+========
+
+Updating the archives with posted messages is handled by a separate queue,
+which allows for better memory management and prevents blocking the main
+delivery processes while messages are archived. This also allows external
+archivers to work in a separate process from the main Mailman delivery
+processes.
+
+ >>> from mailman.app.lifecycle import create_list
+ >>> handler = config.handlers['to-archive']
+ >>> mlist = create_list(u'_xtest@example.com')
+ >>> switchboard = config.switchboards['archive']
+
+A helper function.
+
+ >>> def clear():
+ ... for filebase in switchboard.files:
+ ... msg, msgdata = switchboard.dequeue(filebase)
+ ... switchboard.finish(filebase)
+
+The purpose of the ToArchive handler is to make a simple decision as to
+whether the message should get archived and if so, to drop the message in the
+archiving queue. Really the most important things are to determine when a
+message should /not/ get archived.
+
+For example, no digests should ever get archived.
+
+ >>> mlist.archive = True
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, dict(isdigest=True))
+ >>> switchboard.files
+ []
+
+If the mailing list is not configured to archive, then even regular deliveries
+won't be archived.
+
+ >>> mlist.archive = False
+ >>> handler.process(mlist, msg, {})
+ >>> switchboard.files
+ []
+
+There are two de-facto standards for a message to indicate that it does not
+want to be archived. We've seen both in the wild so both are supported. The
+X-No-Archive: header can be used to indicate that the message should not be
+archived. Confusingly, this header's value is actually ignored.
+
+ >>> mlist.archive = True
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ... X-No-Archive: YES
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, dict(isdigest=True))
+ >>> switchboard.files
+ []
+
+Even a 'no' value will stop the archiving of the message.
+
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ... X-No-Archive: No
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, dict(isdigest=True))
+ >>> switchboard.files
+ []
+
+Another header that's been observed is the X-Archive: header. Here, the
+header's case folded value must be 'no' in order to prevent archiving.
+
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ... X-Archive: No
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, dict(isdigest=True))
+ >>> switchboard.files
+ []
+
+But if the value is 'yes', then the message will be archived.
+
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ... X-Archive: Yes
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> len(switchboard.files)
+ 1
+ >>> filebase = switchboard.files[0]
+ >>> qmsg, qdata = switchboard.dequeue(filebase)
+ >>> switchboard.finish(filebase)
+ >>> print qmsg.as_string()
+ Subject: A sample message
+ X-Archive: Yes
+
+ A message of great import.
+
+ >>> dump_msgdata(qdata)
+ _parsemsg: False
+ version : 3
+
+Without either archiving header, and all other things being the same, the
+message will get archived.
+
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> len(switchboard.files)
+ 1
+ >>> filebase = switchboard.files[0]
+ >>> qmsg, qdata = switchboard.dequeue(filebase)
+ >>> switchboard.finish(filebase)
+ >>> print qmsg.as_string()
+ Subject: A sample message
+
+ A message of great import.
+
+ >>> dump_msgdata(qdata)
+ _parsemsg: False
+ version : 3
diff --git a/src/mailman/pipeline/docs/avoid-duplicates.txt b/src/mailman/pipeline/docs/avoid-duplicates.txt
new file mode 100644
index 000000000..fe91a9a71
--- /dev/null
+++ b/src/mailman/pipeline/docs/avoid-duplicates.txt
@@ -0,0 +1,168 @@
+Avoid duplicates
+================
+
+The AvoidDuplicates handler module implements several strategies to try to
+reduce the reception of duplicate messages. It does this by removing certain
+recipients from the list of recipients that earlier handler modules
+(e.g. CalcRecips) calculates.
+
+ >>> handler = config.handlers['avoid-duplicates']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+Create some members we're going to use.
+
+ >>> from mailman.interfaces.member import MemberRole
+ >>> address_a = config.db.user_manager.create_address(
+ ... u'aperson@example.com')
+ >>> address_b = config.db.user_manager.create_address(
+ ... u'bperson@example.com')
+ >>> member_a = address_a.subscribe(mlist, MemberRole.member)
+ >>> member_b = address_b.subscribe(mlist, MemberRole.member)
+ >>> # This is the message metadata dictionary as it would be produced by
+ >>> # the CalcRecips handler.
+ >>> recips = dict(recips=[u'aperson@example.com', u'bperson@example.com'])
+
+
+Short circuiting
+----------------
+
+The module short-circuits if there are no recipients.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: A message of great import
+ ...
+ ... Something
+ ... """)
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> msgdata
+ {}
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: A message of great import
+
+ Something
+
+
+
+Suppressing the list copy
+-------------------------
+
+Members can elect not to receive a list copy of any message on which they are
+explicitly named as a recipient. This is done by setting their
+receive_list_copy preference to False. However, if they aren't mentioned in
+one of the recipient headers (i.e. To, CC, Resent-To, or Resent-CC), then they
+will get a list copy.
+
+ >>> member_a.preferences.receive_list_copy = False
+ >>> msg = message_from_string("""\
+ ... From: Claire Person
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'aperson@example.com', u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person
+
+ Something of great import.
+
+
+If they're mentioned on the CC line, they won't get a list copy.
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person
+ ... CC: aperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person
+ CC: aperson@example.com
+
+ Something of great import.
+
+
+But if they're mentioned on the CC line and have receive_list_copy set to True
+(the default), then they still get a list copy.
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person
+ ... CC: bperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'aperson@example.com', u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person
+ CC: bperson@example.com
+
+ Something of great import.
+
+
+Other headers checked for recipients include the To...
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person
+ ... To: aperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person
+ To: aperson@example.com
+
+ Something of great import.
+
+
+...Resent-To...
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person
+ ... Resent-To: aperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person
+ Resent-To: aperson@example.com
+
+ Something of great import.
+
+
+...and Resent-CC headers.
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person
+ ... Resent-Cc: aperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person
+ Resent-Cc: aperson@example.com
+
+ Something of great import.
+
diff --git a/src/mailman/pipeline/docs/calc-recips.txt b/src/mailman/pipeline/docs/calc-recips.txt
new file mode 100644
index 000000000..adfbeabbf
--- /dev/null
+++ b/src/mailman/pipeline/docs/calc-recips.txt
@@ -0,0 +1,100 @@
+Calculating recipients
+======================
+
+Every message that makes it through to the list membership gets sent to a set
+of recipient addresses. These addresses are calculated by one of the handler
+modules and depends on a host of factors.
+
+ >>> handler = config.handlers['calculate-recipients']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+Recipients are calculate from the list members, so add a bunch of members to
+start out with. First, create a bunch of addresses...
+
+ >>> usermgr = config.db.user_manager
+ >>> address_a = usermgr.create_address(u'aperson@example.com')
+ >>> address_b = usermgr.create_address(u'bperson@example.com')
+ >>> address_c = usermgr.create_address(u'cperson@example.com')
+ >>> address_d = usermgr.create_address(u'dperson@example.com')
+ >>> address_e = usermgr.create_address(u'eperson@example.com')
+ >>> address_f = usermgr.create_address(u'fperson@example.com')
+
+...then subscribe these addresses to the mailing list as members...
+
+ >>> from mailman.interfaces.member import MemberRole
+ >>> member_a = address_a.subscribe(mlist, MemberRole.member)
+ >>> member_b = address_b.subscribe(mlist, MemberRole.member)
+ >>> member_c = address_c.subscribe(mlist, MemberRole.member)
+ >>> member_d = address_d.subscribe(mlist, MemberRole.member)
+ >>> member_e = address_e.subscribe(mlist, MemberRole.member)
+ >>> member_f = address_f.subscribe(mlist, MemberRole.member)
+
+...then make some of the members digest members.
+
+ >>> from mailman.constants import DeliveryMode
+ >>> member_d.preferences.delivery_mode = DeliveryMode.plaintext_digests
+ >>> member_e.preferences.delivery_mode = DeliveryMode.mime_digests
+ >>> member_f.preferences.delivery_mode = DeliveryMode.summary_digests
+
+
+Short-circuiting
+----------------
+
+Sometimes, the list of recipients already exists in the message metadata.
+This can happen for example, when a message was previously delivered to some
+but not all of the recipients.
+
+ >>> msg = message_from_string("""\
+ ... From: Xavier Person
+ ...
+ ... Something of great import.
+ ... """)
+ >>> recips = set((u'qperson@example.com', u'zperson@example.com'))
+ >>> msgdata = dict(recips=recips)
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'qperson@example.com', u'zperson@example.com']
+
+
+Regular delivery recipients
+---------------------------
+
+Regular delivery recipients are those people who get messages from the list as
+soon as they are posted. In other words, these folks are not digest members.
+
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'aperson@example.com', u'bperson@example.com', u'cperson@example.com']
+
+Members can elect not to receive a list copy of their own postings.
+
+ >>> member_c.preferences.receive_own_postings = False
+ >>> msg = message_from_string("""\
+ ... From: Claire Person
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'aperson@example.com', u'bperson@example.com']
+
+Members can also elect not to receive a list copy of any message on which they
+are explicitly named as a recipient. However, see the AvoidDuplicates handler
+for details.
+
+
+Digest recipients
+-----------------
+
+XXX Test various digest deliveries.
+
+
+Urgent messages
+---------------
+
+XXX Test various urgent deliveries:
+ * test_urgent_moderator()
+ * test_urgent_admin()
+ * test_urgent_reject()
diff --git a/src/mailman/pipeline/docs/cleanse.txt b/src/mailman/pipeline/docs/cleanse.txt
new file mode 100644
index 000000000..0940cdb4b
--- /dev/null
+++ b/src/mailman/pipeline/docs/cleanse.txt
@@ -0,0 +1,94 @@
+Cleansing headers
+=================
+
+All messages posted to a list get their headers cleansed. Some headers are
+related to additional permissions that can be granted to the message and other
+headers can be used to fish for membership.
+
+ >>> handler = config.handlers['cleanse']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+Headers such as Approved, Approve, and Urgent are used to grant special
+pemissions to individual messages. All may contain a password; the first two
+headers are used by list administrators to pre-approve a message normal held
+for approval. The latter header is used to send a regular message to all
+members, regardless of whether they get digests or not. Because all three
+headers contain passwords, they must be removed from any posted message.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Approved: foobar
+ ... Approve: barfoo
+ ... Urgent: notreally
+ ... Subject: A message of great import
+ ...
+ ... Blah blah blah
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: A message of great import
+
+ Blah blah blah
+
+
+Other headers can be used by list members to fish the list for membership, so
+we don't let them go through. These are a mix of standard headers and custom
+headers supported by some mail readers. For example, X-PMRC is supported by
+Pegasus mail. I don't remember what program uses X-Confirm-Reading-To though
+(Some Microsoft product perhaps?).
+
+ >>> msg = message_from_string("""\
+ ... From: bperson@example.com
+ ... Reply-To: bperson@example.org
+ ... Sender: asystem@example.net
+ ... Return-Receipt-To: another@example.com
+ ... Disposition-Notification-To: athird@example.com
+ ... X-Confirm-Reading-To: afourth@example.com
+ ... X-PMRQC: afifth@example.com
+ ... Subject: a message to you
+ ...
+ ... How are you doing?
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: bperson@example.com
+ Reply-To: bperson@example.org
+ Sender: asystem@example.net
+ Subject: a message to you
+
+ How are you doing?
+
+
+
+Anonymous lists
+---------------
+
+Anonymous mailing lists also try to cleanse certain identifying headers from
+the original posting, so that it is at least a bit more difficult to determine
+who sent the message. This isn't perfect though, for example, the body of the
+messages are never scrubbed (though that might not be a bad idea). The From
+and Reply-To headers in the posted message are taken from list attributes.
+
+Hotmail apparently sets X-Originating-Email.
+
+ >>> mlist.anonymous_list = True
+ >>> mlist.description = u'A Test Mailing List'
+ >>> mlist.preferred_language = u'en'
+ >>> msg = message_from_string("""\
+ ... From: bperson@example.com
+ ... Reply-To: bperson@example.org
+ ... Sender: asystem@example.net
+ ... X-Originating-Email: cperson@example.com
+ ... Subject: a message to you
+ ...
+ ... How are you doing?
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> print msg.as_string()
+ Subject: a message to you
+ From: A Test Mailing List <_xtest@example.com>
+ Reply-To: _xtest@example.com
+
+ How are you doing?
+
diff --git a/src/mailman/pipeline/docs/cook-headers.txt b/src/mailman/pipeline/docs/cook-headers.txt
new file mode 100644
index 000000000..ce13a45b6
--- /dev/null
+++ b/src/mailman/pipeline/docs/cook-headers.txt
@@ -0,0 +1,326 @@
+Cooking headers
+===============
+
+Messages that flow through the global pipeline get their headers 'cooked',
+which basically means that their headers go through several mostly unrelated
+transformations. Some headers get added, others get changed. Some of these
+changes depend on mailing list settings and others depend on how the message
+is getting sent through the system. We'll take things one-by-one.
+
+ >>> from mailman.pipeline.cook_headers import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.subject_prefix = u''
+ >>> mlist.include_list_post_header = False
+ >>> mlist.archive = True
+
+
+Saving the original sender
+--------------------------
+
+Because the original sender headers may get deleted or changed, CookHeaders
+will place the sender in the message metadata for safe keeping.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> msgdata['original_sender']
+ u'aperson@example.com'
+
+But if there was no original sender, then the empty string will be saved.
+
+ >>> msg = message_from_string("""\
+ ... Subject: No original sender
+ ...
+ ... A message of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> msgdata['original_sender']
+ ''
+
+
+X-BeenThere header
+------------------
+
+The X-BeenThere header is what Mailman uses to recognize messages that have
+already been processed by this mailing list. It's one small measure against
+mail loops.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg['x-beenthere']
+ u'_xtest@example.com'
+
+Mailman appends X-BeenThere headers, so if there already is one in the
+original message, the posted message will contain two such headers.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... X-BeenThere: another@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> sorted(msg.get_all('x-beenthere'))
+ [u'_xtest@example.com', u'another@example.com']
+
+
+Mailman version header
+----------------------
+
+Mailman will also insert an X-Mailman-Version header...
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> from mailman.version import VERSION
+ >>> msg['x-mailman-version'] == VERSION
+ True
+
+...but only if one doesn't already exist.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... X-Mailman-Version: 3000
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg['x-mailman-version']
+ u'3000'
+
+
+Precedence header
+-----------------
+
+Mailman will insert a Precedence header, which is a de-facto standard for
+telling automatic reply software (e.g. vacation(1)) not to respond to this
+message.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg['precedence']
+ u'list'
+
+But Mailman will only add that header if the original message doesn't already
+have one of them.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Precedence: junk
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg['precedence']
+ u'junk'
+
+
+RFC 2919 and 2369 headers
+-------------------------
+
+This is a helper function for the following section.
+
+ >>> def list_headers(msg):
+ ... print '---start---'
+ ... # Sort the List-* headers found in the message. We need to do
+ ... # this because CookHeaders puts them in a dictionary which does
+ ... # not have a guaranteed sort order.
+ ... for header in sorted(msg.keys()):
+ ... parts = header.lower().split('-')
+ ... if 'list' not in parts:
+ ... continue
+ ... for value in msg.get_all(header):
+ ... print '%s: %s' % (header, value)
+ ... print '---end---'
+
+These RFCs define headers for mailing list actions. A mailing list should
+generally add these headers, but not for messages that aren't crafted for a
+specific list (e.g. password reminders in Mailman 2.x).
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, dict(_nolist=True))
+ >>> list_headers(msg)
+ ---start---
+ ---end---
+
+Some people don't like these headers because their mail readers aren't good
+about hiding them. A list owner can turn these headers off.
+
+ >>> mlist.include_rfc2369_headers = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ ---end---
+
+But normally, a list will include these headers.
+
+ >>> mlist.include_rfc2369_headers = True
+ >>> mlist.include_list_post_header = True
+ >>> mlist.preferred_language = u'en'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Message-ID: <12345>
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ List-Archive:
+ List-Help:
+ List-Id: <_xtest.example.com>
+ List-Post:
+ List-Subscribe: ,
+
+ List-Unsubscribe: ,
+
+ ---end---
+
+If the mailing list has a description, then it is included in the List-Id
+header.
+
+ >>> mlist.description = u'My test mailing list'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ List-Archive:
+ List-Help:
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Post:
+ List-Subscribe: ,
+
+ List-Unsubscribe: ,
+
+ ---end---
+
+Administrative messages crafted by Mailman will have a reduced set of headers.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, dict(reduced_list_headers=True))
+ >>> list_headers(msg)
+ ---start---
+ List-Help:
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Subscribe: ,
+
+ List-Unsubscribe: ,
+
+ X-List-Administrivia: yes
+ ---end---
+
+With the normal set of List-* headers, it's still possible to suppress the
+List-Post header, which is reasonable for an announce only mailing list.
+
+ >>> mlist.include_list_post_header = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ List-Archive:
+ List-Help:
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Subscribe: ,
+
+ List-Unsubscribe: ,
+
+ ---end---
+
+And if the list isn't being archived, it makes no sense to add the
+List-Archive header either.
+
+ >>> mlist.include_list_post_header = True
+ >>> mlist.archive = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ List-Help:
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Post:
+ List-Subscribe: ,
+
+ List-Unsubscribe: ,
+
+ ---end---
+
+
+Archived-At
+-----------
+
+RFC 5064 (draft) defines a new Archived-At header which contains the url to
+the individual message in the archives. The stock Pipermail archiver doesn't
+support this because the url can't be calculated until after the message is
+archived. Because this is done by the archive runner, this information isn't
+available to us now.
+
+ >>> print msg['archived-at']
+ None
+
+
+Personalization
+---------------
+
+The To field normally contains the list posting address. However when
+messages are fully personalized, that header will get overwritten with the
+address of the recipient. The list's posting address will be added to one of
+the recipient headers so that users will be able to reply back to the list.
+
+ >>> from mailman.interfaces.mailinglist import (
+ ... Personalization, ReplyToMunging)
+ >>> mlist.personalize = Personalization.full
+ >>> mlist.reply_goes_to_list = ReplyToMunging.no_munging
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ X-BeenThere: _xtest@example.com
+ X-Mailman-Version: ...
+ Precedence: list
+ Cc: My test mailing list <_xtest@example.com>
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Unsubscribe: ,
+
+ List-Post:
+ List-Help:
+ List-Subscribe: ,
+
+
+
diff --git a/src/mailman/pipeline/docs/decorate.txt b/src/mailman/pipeline/docs/decorate.txt
new file mode 100644
index 000000000..b805e23cf
--- /dev/null
+++ b/src/mailman/pipeline/docs/decorate.txt
@@ -0,0 +1,317 @@
+Message decoration
+==================
+
+Message decoration is the process of adding headers and footers to the
+original message. A handler module takes care of this based on the settings
+of the mailing list and the type of message being processed.
+
+ >>> from mailman.pipeline.decorate import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> msg_text = """\
+ ... From: aperson@example.org
+ ...
+ ... Here is a message.
+ ... """
+ >>> msg = message_from_string(msg_text)
+
+
+Short circuiting
+----------------
+
+Digest messages get decorated during the digest creation phase so no extra
+decorations are added for digest messages.
+
+ >>> process(mlist, msg, dict(isdigest=True))
+ >>> print msg.as_string()
+ From: aperson@example.org
+
+ Here is a message.
+
+ >>> process(mlist, msg, dict(nodecorate=True))
+ >>> print msg.as_string()
+ From: aperson@example.org
+
+ Here is a message.
+
+
+Decorating simple text messages
+-------------------------------
+
+Text messages that have no declared content type character set are by default,
+encoded in us-ascii. When the mailing list's preferred language is 'en'
+(i.e. English), the character set of the mailing list and of the message will
+match. In this case, and when the header and footer have no interpolation
+placeholder variables, the message's payload will be prepended by the verbatim
+header, and appended with the verbatim footer.
+
+ >>> msg = message_from_string(msg_text)
+ >>> mlist.msg_header = u'header\n'
+ >>> mlist.msg_footer = u'footer'
+ >>> mlist.preferred_language = u'en'
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.org
+ ...
+
+ header
+ Here is a message.
+ footer
+
+Mailman supports a number of interpolation variables, placeholders in the
+header and footer for information to be filled in with mailing list specific
+data. An example of such information is the mailing list's "real name" (a
+short descriptive name for the mailing list).
+
+ >>> msg = message_from_string(msg_text)
+ >>> mlist.msg_header = u'$real_name header\n'
+ >>> mlist.msg_footer = u'$real_name footer'
+ >>> mlist.real_name = u'XTest'
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.org
+ ...
+ XTest header
+ Here is a message.
+ XTest footer
+
+You can't just pick any interpolation variable though; if you do, the variable
+will remain in the header or footer unchanged.
+
+ >>> msg = message_from_string(msg_text)
+ >>> mlist.msg_header = u'$dummy header\n'
+ >>> mlist.msg_footer = u'$dummy footer'
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.org
+ ...
+ $dummy header
+ Here is a message.
+ $dummy footer
+
+
+Handling RFC 3676 'format=flowed' parameters
+--------------------------------------------
+
+RFC 3676 describes a standard by which text/plain messages can marked by
+generating MUAs for better readability in compatible receiving MUAs. The
+'format' parameter on the text/plain Content-Type header gives hints as to how
+the receiving MUA may flow and delete trailing whitespace for better display
+in a proportional font.
+
+When Mailman sees text/plain messages with such RFC 3676 parameters, it
+preserves these parameters when it concatenates headers and footers to the
+message payload.
+
+ >>> mlist.msg_header = u'header'
+ >>> mlist.msg_footer = u'footer'
+ >>> mlist.preferred_language = u'en'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.org
+ ... Content-Type: text/plain; format=flowed; delsp=no
+ ...
+ ... Here is a message\x20
+ ... with soft line breaks.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> # Don't use 'print' here as above because it won't be obvious from the
+ >>> # output that the soft-line break space at the end of the 'Here is a
+ >>> # message' line will be retained in the output.
+ >>> msg['content-type']
+ u'text/plain; format="flowed"; delsp="no"; charset="us-ascii"'
+ >>> [line for line in msg.get_payload().splitlines()]
+ ['header', 'Here is a message ', 'with soft line breaks.', 'footer']
+
+
+Decorating mixed-charset messages
+---------------------------------
+
+When a message has no explicit character set, it is assumed to be us-ascii.
+However, if the mailing list's preferred language has a different character
+set, Mailman will still try to concatenate the header and footer, but it will
+convert the text to utf-8 and base-64 encode the message payload.
+
+ # 'ja' = Japanese; charset = 'euc-jp'
+ >>> mlist.preferred_language = u'ja'
+ >>> mlist.msg_header = u'$description header'
+ >>> mlist.msg_footer = u'$description footer'
+ >>> mlist.description = u'\u65e5\u672c\u8a9e'
+
+ >>> from email.message import Message
+ >>> msg = Message()
+ >>> msg.set_payload('Fran\xe7aise', 'iso-8859-1')
+ >>> print msg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="iso-8859-1"
+ Content-Transfer-Encoding: quoted-printable
+
+ Fran=E7aise
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: base64
+
+ 5pel5pys6KqeIGhlYWRlcgpGcmFuw6dhaXNlCuaXpeacrOiqniBmb290ZXI=
+
+
+Sometimes the message even has an unknown character set. In this case,
+Mailman has no choice but to decorate the original message with MIME
+attachments.
+
+ >>> mlist.preferred_language = u'en'
+ >>> mlist.msg_header = u'header'
+ >>> mlist.msg_footer = u'footer'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.org
+ ... Content-Type: text/plain; charset=unknown
+ ... Content-Transfer-Encoding: 7bit
+ ...
+ ... Here is a message.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg.set_boundary('BOUNDARY')
+ >>> print msg.as_string()
+ From: aperson@example.org
+ Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+
+ header
+ --BOUNDARY
+ Content-Type: text/plain; charset=unknown
+ Content-Transfer-Encoding: 7bit
+
+ Here is a message.
+
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+
+ footer
+ --BOUNDARY--
+
+
+Decorating multipart messages
+-----------------------------
+
+Multipart messages have to be decorated differently. The header and footer
+cannot be simply concatenated into the payload because that will break the
+MIME structure of the message. Instead, the header and footer are attached as
+separate MIME subparts.
+
+When the outerpart is multipart/mixed, the header and footer can have a
+Content-Disposition of 'inline' so that MUAs can display these headers as if
+they were simply concatenated.
+
+ >>> mlist.preferred_language = u'en'
+ >>> mlist.msg_header = u'header'
+ >>> mlist.msg_footer = u'footer'
+ >>> part_1 = message_from_string("""\
+ ... From: aperson@example.org
+ ...
+ ... Here is the first message.
+ ... """)
+ >>> part_2 = message_from_string("""\
+ ... From: bperson@example.com
+ ...
+ ... Here is the second message.
+ ... """)
+ >>> from email.mime.multipart import MIMEMultipart
+ >>> msg = MIMEMultipart('mixed', boundary='BOUNDARY',
+ ... _subparts=(part_1, part_2))
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ Content-Type: multipart/mixed; boundary="BOUNDARY"
+ MIME-Version: 1.0
+
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+
+ header
+ --BOUNDARY
+ From: aperson@example.org
+
+ Here is the first message.
+
+ --BOUNDARY
+ From: bperson@example.com
+
+ Here is the second message.
+
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+
+ footer
+ --BOUNDARY--
+
+
+Decorating other content types
+------------------------------
+
+Non-multipart non-text content types will get wrapped in a multipart/mixed so
+that the header and footer can be added as attachments.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.org
+ ... Content-Type: image/x-beautiful
+ ...
+ ... IMAGEDATAIMAGEDATAIMAGEDATA
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg.set_boundary('BOUNDARY')
+ >>> print msg.as_string()
+ From: aperson@example.org
+ ...
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+
+ header
+ --BOUNDARY
+ Content-Type: image/x-beautiful
+
+ IMAGEDATAIMAGEDATAIMAGEDATA
+
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+
+ footer
+ --BOUNDARY--
+
+
+Personalization
+---------------
+
+A mailing list can be 'personalized', meaning that each message is unique for
+each recipient. When the list is personalized, additional interpolation
+variables are available, however the list of intended recipients must be
+provided in the message data, otherwise an exception occurs.
+
+ >>> process(mlist, None, dict(personalize=True))
+ Traceback (most recent call last):
+ ...
+ AssertionError: The number of intended recipients must be exactly 1
+
+And the number of intended recipients must be exactly 1.
+
+ >>> process(mlist, None, dict(personalize=True, recips=[1, 2, 3]))
+ Traceback (most recent call last):
+ ...
+ AssertionError: The number of intended recipients must be exactly 1
diff --git a/src/mailman/pipeline/docs/digests.txt b/src/mailman/pipeline/docs/digests.txt
new file mode 100644
index 000000000..cb939f7ca
--- /dev/null
+++ b/src/mailman/pipeline/docs/digests.txt
@@ -0,0 +1,535 @@
+Digests
+=======
+
+Digests are a way for a user to receive list traffic in collections instead of
+as individual messages when immediately posted. There are several forms of
+digests, although only two are currently supported: MIME digests and RFC 1153
+(a.k.a. plain text) digests.
+
+ >>> from mailman.pipeline.to_digest import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+ >>> mlist.real_name = u'XTest'
+ >>> mlist.subject_prefix = u'[_XTest] '
+ >>> mlist.one_last_digest = set()
+ >>> switchboard = config.switchboards['virgin']
+
+This is a helper function used to iterate through all the accumulated digest
+messages, in the order in which they were posted. This makes it easier to
+update the tests when we switch to a different mailbox format.
+
+ >>> from mailman.testing.helpers import digest_mbox
+ >>> from itertools import count
+ >>> from string import Template
+ >>> def makemsg():
+ ... for i in count(1):
+ ... text = Template("""\
+ ... From: aperson@example.com
+ ... To: _xtest@example.com
+ ... Subject: Test message $i
+ ...
+ ... Here is message $i
+ ... """).substitute(i=i)
+ ... yield message_from_string(text)
+
+
+Short circuiting
+----------------
+
+When a message is posted to the mailing list, it is generally added to a
+running collection of messages. For now, this is a Unix mailbox file,
+although in the future this may end up being converted to a maildir style
+mailbox. In any event, there are several factors that would bypass the
+storing of posted messages to the mailbox. For example, the mailing list may
+not allow digests...
+
+ >>> mlist.digestable = False
+ >>> msg = makemsg().next()
+ >>> process(mlist, msg, {})
+ >>> sum(1 for mboxmsg in digest_mbox(mlist))
+ 0
+ >>> switchboard.files
+ []
+
+...or they may allow digests but the message is already a digest.
+
+ >>> mlist.digestable = True
+ >>> process(mlist, msg, dict(isdigest=True))
+ >>> sum(1 for mboxmsg in digest_mbox(mlist))
+ 0
+ >>> switchboard.files
+ []
+
+
+Sending a digest
+----------------
+
+For messages which are not digests, but which are posted to a digestable
+mailing list, the messages will be stored until they reach a criteria
+triggering the sending of the digest. If none of those criteria are met, then
+the message will just sit in the mailbox for a while.
+
+ >>> mlist.digest_size_threshold = 10000
+ >>> process(mlist, msg, {})
+ >>> switchboard.files
+ []
+ >>> digest = digest_mbox(mlist)
+ >>> sum(1 for mboxmsg in digest)
+ 1
+ >>> import os
+ >>> os.remove(digest._path)
+
+When the size of the digest mbox reaches the maximum size threshold, a digest
+is crafted and sent out. This puts two messages in the virgin queue, an HTML
+digest and an RFC 1153 plain text digest. The size threshold is in KB.
+
+ >>> mlist.digest_size_threshold = 1
+ >>> mlist.volume = 2
+ >>> mlist.next_digest_number = 10
+ >>> size = 0
+ >>> for msg in makemsg():
+ ... process(mlist, msg, {})
+ ... size += len(str(msg))
+ ... if size > mlist.digest_size_threshold * 1024:
+ ... break
+ >>> sum(1 for mboxmsg in digest_mbox(mlist))
+ 0
+ >>> len(switchboard.files)
+ 2
+ >>> for filebase in switchboard.files:
+ ... qmsg, qdata = switchboard.dequeue(filebase)
+ ... switchboard.finish(filebase)
+ ... if qmsg.is_multipart():
+ ... mimemsg = qmsg
+ ... mimedata = qdata
+ ... else:
+ ... rfc1153msg = qmsg
+ ... rfc1153data = qdata
+ >>> print mimemsg.as_string()
+ Content-Type: multipart/mixed; boundary="..."
+ MIME-Version: 1.0
+ From: _xtest-request@example.com
+ Subject: XTest Digest, Vol 2, Issue 10
+ To: _xtest@example.com
+ Reply-To: _xtest@example.com
+ Date: ...
+ Message-ID: ...
+
+ --...
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Description: XTest Digest, Vol 2, Issue 10
+
+ Send XTest mailing list submissions to
+ _xtest@example.com
+
+ To subscribe or unsubscribe via the World Wide Web, visit
+ http://lists.example.com/listinfo/_xtest@example.com
+ or, via email, send a message with subject or body 'help' to
+ _xtest-request@example.com
+
+ You can reach the person managing the list at
+ _xtest-owner@example.com
+
+ When replying, please edit your Subject line so it is more specific
+ than "Re: Contents of XTest digest..."
+
+ --...
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Description: Today's Topics (8 messages)
+
+ Today's Topics:
+
+ 1. Test message 1 (aperson@example.com)
+ 2. Test message 2 (aperson@example.com)
+ 3. Test message 3 (aperson@example.com)
+ 4. Test message 4 (aperson@example.com)
+ 5. Test message 5 (aperson@example.com)
+ 6. Test message 6 (aperson@example.com)
+ 7. Test message 7 (aperson@example.com)
+ 8. Test message 8 (aperson@example.com)
+
+ --...
+ Content-Type: multipart/digest; boundary="..."
+ MIME-Version: 1.0
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 1
+ Message: 1
+
+ Here is message 1
+
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 2
+ Message: 2
+
+ Here is message 2
+
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 3
+ Message: 3
+
+ Here is message 3
+
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 4
+ Message: 4
+
+ Here is message 4
+
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 5
+ Message: 5
+
+ Here is message 5
+
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 6
+ Message: 6
+
+ Here is message 6
+
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 7
+ Message: 7
+
+ Here is message 7
+
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 8
+ Message: 8
+
+ Here is message 8
+
+
+ --...
+ --...
+ >>> dump_msgdata(mimedata)
+ _parsemsg: False
+ isdigest : True
+ listname : _xtest@example.com
+ recips : set([])
+ version : 3
+
+
+ >>> print rfc1153msg.as_string()
+ From: _xtest-request@example.com
+ Subject: XTest Digest, Vol 2, Issue 10
+ To: _xtest@example.com
+ Reply-To: _xtest@example.com
+ Date: ...
+ Message-ID: ...
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+
+ Send XTest mailing list submissions to
+ _xtest@example.com
+
+ To subscribe or unsubscribe via the World Wide Web, visit
+ http://lists.example.com/listinfo/_xtest@example.com
+ or, via email, send a message with subject or body 'help' to
+ _xtest-request@example.com
+
+ You can reach the person managing the list at
+ _xtest-owner@example.com
+
+ When replying, please edit your Subject line so it is more specific
+ than "Re: Contents of XTest digest..."
+
+
+ Today's Topics:
+
+ 1. Test message 1 (aperson@example.com)
+ 2. Test message 2 (aperson@example.com)
+ 3. Test message 3 (aperson@example.com)
+ 4. Test message 4 (aperson@example.com)
+ 5. Test message 5 (aperson@example.com)
+ 6. Test message 6 (aperson@example.com)
+ 7. Test message 7 (aperson@example.com)
+ 8. Test message 8 (aperson@example.com)
+
+
+ ----------------------------------------------------------------------
+
+ Message: 1
+ From: aperson@example.com
+ Subject: Test message 1
+ To: _xtest@example.com
+ Message-ID: ...
+
+ Here is message 1
+
+
+ ------------------------------
+
+ Message: 2
+ From: aperson@example.com
+ Subject: Test message 2
+ To: _xtest@example.com
+ Message-ID: ...
+
+ Here is message 2
+
+
+ ------------------------------
+
+ Message: 3
+ From: aperson@example.com
+ Subject: Test message 3
+ To: _xtest@example.com
+ Message-ID: ...
+
+ Here is message 3
+
+
+ ------------------------------
+
+ Message: 4
+ From: aperson@example.com
+ Subject: Test message 4
+ To: _xtest@example.com
+ Message-ID: ...
+
+ Here is message 4
+
+
+ ------------------------------
+
+ Message: 5
+ From: aperson@example.com
+ Subject: Test message 5
+ To: _xtest@example.com
+ Message-ID: ...
+
+ Here is message 5
+
+
+ ------------------------------
+
+ Message: 6
+ From: aperson@example.com
+ Subject: Test message 6
+ To: _xtest@example.com
+ Message-ID: ...
+
+ Here is message 6
+
+
+ ------------------------------
+
+ Message: 7
+ From: aperson@example.com
+ Subject: Test message 7
+ To: _xtest@example.com
+ Message-ID: ...
+
+ Here is message 7
+
+
+ ------------------------------
+
+ Message: 8
+ From: aperson@example.com
+ Subject: Test message 8
+ To: _xtest@example.com
+ Message-ID: ...
+
+ Here is message 8
+
+
+ End of XTest Digest, Vol 2, Issue 10
+ ************************************
+
+ >>> dump_msgdata(rfc1153data)
+ _parsemsg: False
+ isdigest : True
+ listname : _xtest@example.com
+ recips : set([])
+ version : 3
+
+
+Internationalized digests
+-------------------------
+
+When messages come in with a content-type character set different than that of
+the list's preferred language, recipients will get an internationalized
+digest. French is not enabled by default site-wide, so enable that now.
+
+ >>> config.languages.enable_language('fr')
+
+ # Simulate the site administrator setting the default server language to
+ # French in the configuration file. Without this, the English template
+ # will be found and the masthead won't be translated.
+ >>> config.push('french', """
+ ... [mailman]
+ ... default_language: fr
+ ... """)
+
+ >>> mlist.preferred_language = u'fr'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.org
+ ... To: _xtest@example.com
+ ... Subject: =?iso-2022-jp?b?GyRCMGxIVhsoQg==?=
+ ... MIME-Version: 1.0
+ ... Content-Type: text/plain; charset=iso-2022-jp
+ ... Content-Transfer-Encoding: 7bit
+ ...
+ ... \x1b$B0lHV\x1b(B
+ ... """)
+
+Set the digest threshold to zero so that the digests will be sent immediately.
+
+ >>> mlist.digest_size_threshold = 0
+ >>> process(mlist, msg, {})
+ >>> sum(1 for mboxmsg in digest_mbox(mlist))
+ 0
+ >>> len(switchboard.files)
+ 2
+ >>> for filebase in switchboard.files:
+ ... qmsg, qdata = switchboard.dequeue(filebase)
+ ... switchboard.finish(filebase)
+ ... if qmsg.is_multipart():
+ ... mimemsg = qmsg
+ ... mimedata = qdata
+ ... else:
+ ... rfc1153msg = qmsg
+ ... rfc1153data = qdata
+ >>> print mimemsg.as_string()
+ Content-Type: multipart/mixed; boundary="..."
+ MIME-Version: 1.0
+ From: _xtest-request@example.com
+ Subject: Groupe XTest, Vol. 2, Parution 11
+ To: _xtest@example.com
+ Reply-To: _xtest@example.com
+ Date: ...
+ Message-ID: ...
+
+ --...
+ Content-Type: text/plain; charset="iso-8859-1"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: quoted-printable
+ Content-Description: Groupe XTest, Vol. 2, Parution 11
+
+ Envoyez vos messages pour la liste XTest =E0
+ _xtest@example.com
+
+ Pour vous (d=E9s)abonner par le web, consultez
+ http://lists.example.com/listinfo/_xtest@example.com
+
+ ou, par courriel, envoyez un message avec =AB=A0help=A0=BB dans le corps ou
+ dans le sujet =E0
+ _xtest-request@example.com
+
+ Vous pouvez contacter l'administrateur de la liste =E0 l'adresse
+ _xtest-owner@example.com
+
+ Si vous r=E9pondez, n'oubliez pas de changer l'objet du message afin
+ qu'il soit plus sp=E9cifique que =AB=A0Re: Contenu du groupe de XTest...=A0=
+ =BB
+
+ --...
+ Content-Type: text/plain; charset="utf-8"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: base64
+ Content-Description: Today's Topics (1 messages)
+
+ VGjDqG1lcyBkdSBqb3VyIDoKCiAgIDEuIOS4gOeVqiAoYXBlcnNvbkBleGFtcGxlLm9yZykK
+
+ --...
+ Content-Type: multipart/digest; boundary="..."
+ MIME-Version: 1.0
+
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+
+ Content-Transfer-Encoding: 7bit
+ From: aperson@example.org
+ MIME-Version: 1.0
+ To: _xtest@example.com
+ Content-Type: text/plain; charset=iso-2022-jp
+ Subject: =?iso-2022-jp?b?GyRCMGxIVhsoQg==?=
+ Message: 1
+
+ $B0lHV(B
+
+
+ --...
+ --...
+ >>> dump_msgdata(mimedata)
+ _parsemsg: False
+ isdigest : True
+ listname : _xtest@example.com
+ recips : set([])
+ version : 3
+
+ >>> print rfc1153msg.as_string()
+ From: _xtest-request@example.com
+ Subject: Groupe XTest, Vol. 2, Parution 11
+ To: _xtest@example.com
+ Reply-To: _xtest@example.com
+ Date: ...
+ Message-ID: ...
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: base64
+
+ ...
+
+ >>> dump_msgdata(rfc1153data)
+ _parsemsg: False
+ isdigest : True
+ listname : _xtest@example.com
+ recips : set([])
+ version : 3
diff --git a/src/mailman/pipeline/docs/file-recips.txt b/src/mailman/pipeline/docs/file-recips.txt
new file mode 100644
index 000000000..81510b6e7
--- /dev/null
+++ b/src/mailman/pipeline/docs/file-recips.txt
@@ -0,0 +1,96 @@
+File recipients
+===============
+
+Mailman can calculate the recipients for a message from a Sendmail-style
+include file. This file must be called members.txt and it must live in the
+list's data directory.
+
+ >>> handler = config.handlers['file-recipients']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+
+Short circuiting
+----------------
+
+If the message's metadata already has recipients, this handler immediately
+returns.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message.
+ ... """)
+ >>> msgdata = {'recips': 7}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+
+ A message.
+
+ >>> msgdata
+ {'recips': 7}
+
+
+Missing file
+------------
+
+The include file must live inside the list's data directory, under the name
+members.txt. If the file doesn't exist, the list of recipients will be
+empty.
+
+ >>> import os
+ >>> file_path = os.path.join(mlist.data_path, 'members.txt')
+ >>> open(file_path)
+ Traceback (most recent call last):
+ ...
+ IOError: [Errno ...]
+ No such file or directory: u'.../_xtest@example.com/members.txt'
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ []
+
+
+Existing file
+-------------
+
+If the file exists, it contains a list of addresses, one per line. These
+addresses are returned as the set of recipients.
+
+ >>> fp = open(file_path, 'w')
+ >>> try:
+ ... print >> fp, 'bperson@example.com'
+ ... print >> fp, 'cperson@example.com'
+ ... print >> fp, 'dperson@example.com'
+ ... print >> fp, 'eperson@example.com'
+ ... print >> fp, 'fperson@example.com'
+ ... print >> fp, 'gperson@example.com'
+ ... finally:
+ ... fp.close()
+
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ ['bperson@example.com', 'cperson@example.com', 'dperson@example.com',
+ 'eperson@example.com', 'fperson@example.com', 'gperson@example.com']
+
+However, if the sender of the original message is a member of the list and
+their address is in the include file, the sender's address is /not/ included
+in the recipients list.
+
+ >>> from mailman.interfaces.member import MemberRole
+ >>> address_1 = config.db.user_manager.create_address(
+ ... u'cperson@example.com')
+ >>> address_1.subscribe(mlist, MemberRole.member)
+
+
+ >>> msg = message_from_string("""\
+ ... From: cperson@example.com
+ ...
+ ... A message.
+ ... """)
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ ['bperson@example.com', 'dperson@example.com',
+ 'eperson@example.com', 'fperson@example.com', 'gperson@example.com']
diff --git a/src/mailman/pipeline/docs/filtering.txt b/src/mailman/pipeline/docs/filtering.txt
new file mode 100644
index 000000000..70ca3098d
--- /dev/null
+++ b/src/mailman/pipeline/docs/filtering.txt
@@ -0,0 +1,340 @@
+Content filtering
+=================
+
+Mailman can filter the content of messages posted to a mailing list by
+stripping MIME subparts, and possibly reorganizing the MIME structure of a
+message. It does this with the MimeDel handler module, although other
+handlers can potentially do other kinds of finer level content filtering.
+
+ >>> from mailman.pipeline.mime_delete import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+
+Several mailing list options control content filtering. First, the feature
+must be enabled, then there are two options that control which MIME types get
+filtered and which get passed. Finally, there is an option to control whether
+text/html parts will get converted to plain text. Let's set up some defaults
+for these variables, then we'll explain them in more detail below.
+
+ >>> mlist.filter_content = True
+ >>> mlist.filter_mime_types = []
+ >>> mlist.pass_mime_types = []
+ >>> mlist.convert_html_to_plaintext = False
+
+
+Filtering the outer content type
+--------------------------------
+
+A simple filtering setting will just search the content types of the messages
+parts, discarding all parts with a matching MIME type. If the message's outer
+content type matches the filter, the entire message will be discarded.
+
+ >>> mlist.filter_mime_types = ['image/jpeg']
+ >>> # XXX Change this to an enum
+ >>> mlist.filter_action = 0 # Discard
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: image/jpeg
+ ... MIME-Version: 1.0
+ ...
+ ... xxxxx
+ ... """)
+ >>> process(mlist, msg, {})
+ Traceback (most recent call last):
+ ...
+ DiscardMessage
+
+However, if we turn off content filtering altogether, then the handler
+short-circuits.
+
+ >>> mlist.filter_content = False
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: image/jpeg
+ MIME-Version: 1.0
+
+ xxxxx
+ >>> msgdata
+ {}
+
+Similarly, no content filtering is performed on digest messages, which are
+crafted internally by Mailman.
+
+ >>> mlist.filter_content = True
+ >>> msgdata = {'isdigest': True}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: image/jpeg
+ MIME-Version: 1.0
+
+ xxxxx
+ >>> msgdata
+ {'isdigest': True}
+
+
+Simple multipart filtering
+--------------------------
+
+If one of the subparts in a multipart message matches the filter type, then
+just that subpart will be stripped.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: multipart/mixed; boundary=BOUNDARY
+ ... MIME-Version: 1.0
+ ...
+ ... --BOUNDARY
+ ... Content-Type: image/jpeg
+ ... MIME-Version: 1.0
+ ...
+ ... xxx
+ ...
+ ... --BOUNDARY
+ ... Content-Type: image/gif
+ ... MIME-Version: 1.0
+ ...
+ ... yyy
+ ... --BOUNDARY--
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: multipart/mixed; boundary=BOUNDARY
+ MIME-Version: 1.0
+ X-Content-Filtered-By: Mailman/MimeDel ...
+
+ --BOUNDARY
+ Content-Type: image/gif
+ MIME-Version: 1.0
+
+ yyy
+ --BOUNDARY--
+
+
+
+Collapsing multipart/alternative messages
+-----------------------------------------
+
+When content filtering encounters a multipart/alternative part, and the
+results of filtering leave only one of the subparts, then the
+multipart/alternative may be collapsed. For example, in the following
+message, the outer content type is a multipart/mixed. Inside this part is
+just a single subpart that has a content type of multipart/alternative. This
+inner multipart has two subparts, a jpeg and a gif.
+
+Content filtering will remove the jpeg part, leaving the multipart/alternative
+with only a single gif subpart. Because there's only one subpart left, the
+MIME structure of the message will be reorganized, removing the inner
+multipart/alternative so that the outer multipart/mixed has just a single gif
+subpart.
+
+ >>> mlist.collapse_alternatives = True
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: multipart/mixed; boundary=BOUNDARY
+ ... MIME-Version: 1.0
+ ...
+ ... --BOUNDARY
+ ... Content-Type: multipart/alternative; boundary=BOUND2
+ ... MIME-Version: 1.0
+ ...
+ ... --BOUND2
+ ... Content-Type: image/jpeg
+ ... MIME-Version: 1.0
+ ...
+ ... xxx
+ ...
+ ... --BOUND2
+ ... Content-Type: image/gif
+ ... MIME-Version: 1.0
+ ...
+ ... yyy
+ ... --BOUND2--
+ ...
+ ... --BOUNDARY--
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: multipart/mixed; boundary=BOUNDARY
+ MIME-Version: 1.0
+ X-Content-Filtered-By: Mailman/MimeDel ...
+
+ --BOUNDARY
+ Content-Type: image/gif
+ MIME-Version: 1.0
+
+ yyy
+ --BOUNDARY--
+
+
+When the outer part is a multipart/alternative and filtering leaves this outer
+part with just one subpart, the entire message is converted to the left over
+part's content type. In other words, the left over inner part is promoted to
+being the outer part.
+
+ >>> mlist.filter_mime_types.append('text/html')
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: multipart/alternative; boundary=AAA
+ ...
+ ... --AAA
+ ... Content-Type: text/html
+ ...
+ ... This is some html
+ ... --AAA
+ ... Content-Type: text/plain
+ ...
+ ... This is plain text
+ ... --AAA--
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: text/plain
+ X-Content-Filtered-By: Mailman/MimeDel ...
+
+ This is plain text
+
+Clean up.
+
+ >>> ignore = mlist.filter_mime_types.pop()
+
+
+Conversion to plain text
+------------------------
+
+Many mailing lists prohibit HTML email, and in fact, such email can be a
+phishing or spam vector. However, many mail readers will send HTML email by
+default because users think it looks pretty. One approach to handling this
+would be to filter out text/html parts and rely on multipart/alternative
+collapsing to leave just a plain text part. This works because many mail
+readers that send HTML email actually send a plain text part in the second
+subpart of such multipart/alternatives.
+
+While this is a good suggestion for plain text-only mailing lists, often a
+mail reader will send only a text/html part with no plain text alternative.
+in this case, the site administer can enable text/html to text/plain
+conversion by defining a conversion command. A list administrator still needs
+to enable such conversion for their list though.
+
+ >>> mlist.convert_html_to_plaintext = True
+
+By default, Mailman sends the message through lynx, but since this program is
+not guaranteed to exist, we'll craft a simple, but stupid script to simulate
+the conversion process. The script expects a single argument, which is the
+name of the file containing the message payload to filter.
+
+ >>> import os, sys
+ >>> script_path = os.path.join(config.DATA_DIR, 'filter.py')
+ >>> fp = open(script_path, 'w')
+ >>> try:
+ ... print >> fp, """\
+ ... import sys
+ ... print 'Converted text/html to text/plain'
+ ... print 'Filename:', sys.argv[1]
+ ... """
+ ... finally:
+ ... fp.close()
+ >>> config.HTML_TO_PLAIN_TEXT_COMMAND = '%s %s %%(filename)s' % (
+ ... sys.executable, script_path)
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: text/html
+ ... MIME-Version: 1.0
+ ...
+ ...
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ MIME-Version: 1.0
+ Content-Type: text/plain
+ X-Content-Filtered-By: Mailman/MimeDel ...
+
+ Converted text/html to text/plain
+ Filename: ...
+
+
+
+Discarding empty parts
+----------------------
+
+Similarly, if after filtering a multipart section ends up empty, then the
+entire multipart is discarded. For example, here's a message where an inner
+multipart/mixed contains two jpeg subparts. Both jpegs are filtered out, so
+the entire inner multipart/mixed is discarded.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: multipart/mixed; boundary=AAA
+ ...
+ ... --AAA
+ ... Content-Type: multipart/mixed; boundary=BBB
+ ...
+ ... --BBB
+ ... Content-Type: image/jpeg
+ ...
+ ... xxx
+ ... --BBB
+ ... Content-Type: image/jpeg
+ ...
+ ... yyy
+ ... --BBB---
+ ... --AAA
+ ... Content-Type: multipart/alternative; boundary=CCC
+ ...
+ ... --CCC
+ ... Content-Type: text/html
+ ...
+ ... This is a header
+ ...
+ ... --CCC
+ ... Content-Type: text/plain
+ ...
+ ... A different message
+ ... --CCC--
+ ... --AAA
+ ... Content-Type: image/gif
+ ...
+ ... zzz
+ ... --AAA
+ ... Content-Type: image/gif
+ ...
+ ... aaa
+ ... --AAA--
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: multipart/mixed; boundary=AAA
+ X-Content-Filtered-By: Mailman/MimeDel ...
+
+ --AAA
+ MIME-Version: 1.0
+ Content-Type: text/plain
+
+ Converted text/html to text/plain
+ Filename: ...
+
+ --AAA
+ Content-Type: image/gif
+
+ zzz
+ --AAA
+ Content-Type: image/gif
+
+ aaa
+ --AAA--
+
+
+
+Passing MIME types
+------------------
+
+XXX Describe the pass_mime_types setting and how it interacts with
+filter_mime_types.
diff --git a/src/mailman/pipeline/docs/nntp.txt b/src/mailman/pipeline/docs/nntp.txt
new file mode 100644
index 000000000..3f48be1da
--- /dev/null
+++ b/src/mailman/pipeline/docs/nntp.txt
@@ -0,0 +1,65 @@
+NNTP (i.e. Usenet) Gateway
+==========================
+
+Mailman has an NNTP gateway, whereby messages posted to the mailing list can
+be forwarded onto an NNTP newsgroup. Typically this means Usenet, but since
+NNTP is to Usenet as IP is to the web, it's more general than that.
+
+ >>> handler = config.handlers['to-usenet']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+ >>> switchboard = config.switchboards['news']
+
+Gatewaying from the mailing list to the newsgroup happens through a separate
+'nntp' queue and happen immediately when the message is posted through to the
+list. Note that gatewaying from the newsgroup to the list happens via a
+cronjob (currently not shown).
+
+There are several situations which prevent a message from being gatewayed to
+the newsgroup. The feature could be disabled, as is the default.
+
+ >>> mlist.gateway_to_news = False
+ >>> msg = message_from_string("""\
+ ... Subject: An important message
+ ...
+ ... Something of great import.
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> switchboard.files
+ []
+
+Even if enabled, messages that came from the newsgroup are never gated back to
+the newsgroup.
+
+ >>> mlist.gateway_to_news = True
+ >>> handler.process(mlist, msg, {'fromusenet': True})
+ >>> switchboard.files
+ []
+
+Neither are digests ever gated to the newsgroup.
+
+ >>> handler.process(mlist, msg, {'isdigest': True})
+ >>> switchboard.files
+ []
+
+However, other posted messages get gated to the newsgroup via the nntp queue.
+The list owner can set the linked newsgroup and the nntp host that its
+messages are gated to.
+
+ >>> mlist.linked_newsgroup = u'comp.lang.thing'
+ >>> mlist.nntp_host = u'news.example.com'
+ >>> handler.process(mlist, msg, {})
+ >>> len(switchboard.files)
+ 1
+ >>> filebase = switchboard.files[0]
+ >>> msg, msgdata = switchboard.dequeue(filebase)
+ >>> switchboard.finish(filebase)
+ >>> print msg.as_string()
+ Subject: An important message
+
+ Something of great import.
+
+ >>> dump_msgdata(msgdata)
+ _parsemsg: False
+ listname : _xtest@example.com
+ version : 3
diff --git a/src/mailman/pipeline/docs/reply-to.txt b/src/mailman/pipeline/docs/reply-to.txt
new file mode 100644
index 000000000..e57b97e5d
--- /dev/null
+++ b/src/mailman/pipeline/docs/reply-to.txt
@@ -0,0 +1,127 @@
+Reply-to munging
+================
+
+Messages that flow through the global pipeline get their headers 'cooked',
+which basically means that their headers go through several mostly unrelated
+transformations. Some headers get added, others get changed. Some of these
+changes depend on mailing list settings and others depend on how the message
+is getting sent through the system. We'll take things one-by-one.
+
+ >>> from mailman.pipeline.cook_headers import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.subject_prefix = u''
+
+Reply-to munging refers to the behavior where a mailing list can be configured
+to change or augment an existing Reply-To header in a message posted to the
+list. Reply-to munging is fairly controversial, with arguments made either
+for or against munging.
+
+The Mailman developers, and I believe the majority consensus is to do no
+Reply-to munging, under several principles. Primarily, most reply-to munging
+is requested by people who do not have both a Reply and Reply All button on
+their mail reader. If you do not munge Reply-To, then these buttons will work
+properly, but if you munge the header, it is impossible for these buttons to
+work right, because both will reply to the list. This leads to unfortunate
+accidents where a private message is accidentally posted to the entire list.
+
+However, Mailman gives list owners the option to do Reply-To munging anyway,
+mostly as a way to shut up the really vocal minority who seem to insist on
+this mis-feature.
+
+
+Reply to list
+-------------
+
+A list can be configured to add a Reply-To header pointing back to the mailing
+list's posting address. If there's no Reply-To header in the original
+message, the list's posting address simply gets inserted.
+
+ >>> from mailman.interfaces.mailinglist import ReplyToMunging
+ >>> mlist.reply_goes_to_list = ReplyToMunging.point_to_list
+ >>> mlist.preferred_language = u'en'
+ >>> mlist.description = u''
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'_xtest@example.com'
+
+It's also possible to strip any existing Reply-To header first, before adding
+the list's posting address.
+
+ >>> mlist.first_strip_reply_to = True
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Reply-To: bperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'_xtest@example.com'
+
+If you don't first strip the header, then the list's posting address will just
+get appended to whatever the original version was.
+
+ >>> mlist.first_strip_reply_to = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Reply-To: bperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'bperson@example.com, _xtest@example.com'
+
+
+Explicit Reply-To
+-----------------
+
+The list can also be configured to have an explicit Reply-To header.
+
+ >>> mlist.reply_goes_to_list = ReplyToMunging.explicit_header
+ >>> mlist.reply_to_address = u'my-list@example.com'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'my-list@example.com'
+
+And as before, it's possible to either strip any existing Reply-To header...
+
+ >>> mlist.first_strip_reply_to = True
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Reply-To: bperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'my-list@example.com'
+
+...or not.
+
+ >>> mlist.first_strip_reply_to = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Reply-To: bperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'my-list@example.com, bperson@example.com'
diff --git a/src/mailman/pipeline/docs/replybot.txt b/src/mailman/pipeline/docs/replybot.txt
new file mode 100644
index 000000000..f3c3281b3
--- /dev/null
+++ b/src/mailman/pipeline/docs/replybot.txt
@@ -0,0 +1,213 @@
+Auto-reply handler
+==================
+
+Mailman has an auto-reply handler that sends automatic responses to messages
+it receives on its posting address, or special robot addresses. Automatic
+responses are subject to various conditions, such as headers in the original
+message or the amount of time since the last auto-response.
+
+ >>> from mailman.pipeline.replybot import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.real_name = u'XTest'
+
+ >>> # Ensure that the virgin queue is empty, since we'll be checking this
+ >>> # for new auto-response messages.
+ >>> virginq = config.switchboards['virgin']
+ >>> virginq.files
+ []
+
+
+Basic autoresponding
+--------------------
+
+Basic autoresponding occurs when the list is set up to respond to either its
+-owner address, its -request address, or to the posting address, and a message
+is sent to one of these addresses. A mailing list also has an autoresponse
+grace period which describes how much time must pass before a second response
+will be sent, with 0 meaning "there is no grace period".
+
+ >>> import datetime
+ >>> mlist.autorespond_admin = True
+ >>> mlist.autoresponse_graceperiod = datetime.timedelta()
+ >>> mlist.autoresponse_admin_text = u'admin autoresponse text'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... To: _xtest-owner@example.com
+ ...
+ ... help
+ ... """)
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> # Print only some of the meta data. The rest is uninteresting.
+ >>> qdata['listname']
+ u'_xtest@example.com'
+ >>> sorted(qdata['recips'])
+ [u'aperson@example.com']
+ >>> # Delete data that is time dependent or random
+ >>> del qmsg['message-id']
+ >>> del qmsg['date']
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: Auto-response for your message to the "XTest" mailing list
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ X-Mailer: The Mailman Replybot
+ X-Ack: No
+ Precedence: bulk
+
+ admin autoresponse text
+ >>> virginq.files
+ []
+
+
+Short circuiting
+----------------
+
+Several headers in the original message determine whether an autoresponse
+should even be sent. For example, if the message has an "X-Ack: No" header,
+no auto-response is sent.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... X-Ack: No
+ ...
+ ... help me
+ ... """)
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> virginq.files
+ []
+
+Mailman itself can suppress autoresponses for certain types of internally
+crafted messages, by setting the 'noack' metadata key.
+
+ >>> msg = message_from_string("""\
+ ... From: mailman@example.com
+ ...
+ ... help for you
+ ... """)
+ >>> process(mlist, msg, dict(noack=True, toowner=True))
+ >>> virginq.files
+ []
+
+If there is a Precedence: header with any of the values 'bulk', 'junk', or
+'list', then the autoresponse is also suppressed.
+
+ >>> msg = message_from_string("""\
+ ... From: asystem@example.com
+ ... Precedence: bulk
+ ...
+ ... hey!
+ ... """)
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> virginq.files
+ []
+
+ >>> msg.replace_header('precedence', 'junk')
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> virginq.files
+ []
+ >>> msg.replace_header('precedence', 'list')
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> virginq.files
+ []
+
+Unless the X-Ack: header has a value of "yes", in which case, the Precedence
+header is ignored.
+
+ >>> msg['X-Ack'] = 'yes'
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> del qmsg['message-id']
+ >>> del qmsg['date']
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: Auto-response for your message to the "XTest" mailing list
+ From: _xtest-bounces@example.com
+ To: asystem@example.com
+ X-Mailer: The Mailman Replybot
+ X-Ack: No
+ Precedence: bulk
+
+ admin autoresponse text
+
+
+Available auto-responses
+------------------------
+
+As shown above, a message sent to the -owner address will get an auto-response
+with the text set for owner responses. Two other types of email will get
+auto-responses: those sent to the -request address...
+
+ >>> mlist.autorespond_requests = True
+ >>> mlist.autoresponse_request_text = u'robot autoresponse text'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... To: _xtest-request@example.com
+ ...
+ ... help me
+ ... """)
+ >>> process(mlist, msg, dict(torequest=True))
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> del qmsg['message-id']
+ >>> del qmsg['date']
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: Auto-response for your message to the "XTest" mailing list
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ X-Mailer: The Mailman Replybot
+ X-Ack: No
+ Precedence: bulk
+
+ robot autoresponse text
+
+...and those sent to the posting address.
+
+ >>> mlist.autorespond_postings = True
+ >>> mlist.autoresponse_postings_text = u'postings autoresponse text'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... To: _xtest@example.com
+ ...
+ ... help me
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> del qmsg['message-id']
+ >>> del qmsg['date']
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: Auto-response for your message to the "XTest" mailing list
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ X-Mailer: The Mailman Replybot
+ X-Ack: No
+ Precedence: bulk
+
+ postings autoresponse text
+
+
+Grace periods
+-------------
+
+Auto-responses have a grace period, during which no additional responses will
+be sent. This is so as not to bombard the sender with responses. The grace
+period is measured in days.
+
+XXX Add grace period tests.
diff --git a/src/mailman/pipeline/docs/scrubber.txt b/src/mailman/pipeline/docs/scrubber.txt
new file mode 100644
index 000000000..dec1c1f64
--- /dev/null
+++ b/src/mailman/pipeline/docs/scrubber.txt
@@ -0,0 +1,225 @@
+The scrubber
+============
+
+The scrubber is an integral part of Mailman, both in the normal delivery of
+messages and in components such as the archiver. Its primary purpose is to
+scrub attachments from messages so that binary goop doesn't end up in an
+archive message.
+
+ >>> from mailman.pipeline.scrubber import process, save_attachment
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+
+Helper functions for getting the attachment data.
+
+ >>> import os, re
+ >>> def read_attachment(filename, remove=True):
+ ... path = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR,
+ ... mlist.fqdn_listname, filename)
+ ... fp = open(path)
+ ... try:
+ ... data = fp.read()
+ ... finally:
+ ... fp.close()
+ ... if remove:
+ ... os.unlink(path)
+ ... return data
+
+ >>> from urlparse import urlparse
+ >>> def read_url_from_message(msg):
+ ... url = None
+ ... for line in msg.get_payload().splitlines():
+ ... mo = re.match('URL: <(?P[^>]+)>', line)
+ ... if mo:
+ ... url = mo.group('url')
+ ... break
+ ... path = '/'.join(urlparse(url).path.split('/')[3:])
+ ... return read_attachment(path)
+
+
+Saving attachments
+------------------
+
+The Scrubber handler exposes a function called save_attachments() which can be
+used to strip various types of attachments and store them in the archive
+directory. This is a public interface used by components outside the normal
+processing pipeline.
+
+Site administrators can decide whether the scrubber should use the attachment
+filename suggested in the message's Content-Disposition: header or not. If
+enabled, the filename will be used when this header attribute is present (yes,
+this is an unfortunate double negative).
+
+ >>> config.push('test config', """
+ ... [scrubber]
+ ... use_attachment_filename: yes
+ ... """)
+ >>> msg = message_from_string("""\
+ ... Content-Type: image/gif; name="xtest.gif"
+ ... Content-Transfer-Encoding: base64
+ ... Content-Disposition: attachment; filename="xtest.gif"
+ ...
+ ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+ ... """)
+ >>> save_attachment(mlist, msg, 'dir')
+ u''
+ >>> data = read_attachment('dir/xtest.gif')
+ >>> data[:6]
+ 'GIF87a'
+ >>> len(data)
+ 34
+
+Saving the attachment does not alter the original message.
+
+ >>> print msg.as_string()
+ Content-Type: image/gif; name="xtest.gif"
+ Content-Transfer-Encoding: base64
+ Content-Disposition: attachment; filename="xtest.gif"
+
+ R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+
+The site administrator can also configure Mailman to ignore the
+Content-Disposition: filename. This is the default.
+
+ >>> config.pop('test config')
+ >>> config.push('test config', """
+ ... [scrubber]
+ ... use_attachment_filename: no
+ ... """)
+ >>> msg = message_from_string("""\
+ ... Content-Type: image/gif; name="xtest.gif"
+ ... Content-Transfer-Encoding: base64
+ ... Content-Disposition: attachment; filename="xtest.gif"
+ ...
+ ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+ ... """)
+ >>> save_attachment(mlist, msg, 'dir')
+ u''
+ >>> data = read_attachment('dir/xtest.gif')
+ Traceback (most recent call last):
+ IOError: [Errno ...] No such file or directory:
+ u'.../archives/private/_xtest@example.com/dir/xtest.gif'
+ >>> data = read_attachment('dir/attachment.gif')
+ >>> data[:6]
+ 'GIF87a'
+ >>> len(data)
+ 34
+
+
+Scrubbing image attachments
+---------------------------
+
+When scrubbing image attachments, the original message is modified to include
+a reference to the attachment file as available through the on-line archive.
+
+ >>> msg = message_from_string("""\
+ ... MIME-Version: 1.0
+ ... Content-Type: multipart/mixed; boundary="BOUNDARY"
+ ...
+ ... --BOUNDARY
+ ... Content-type: text/plain; charset=us-ascii
+ ...
+ ... This is a message.
+ ... --BOUNDARY
+ ... Content-Type: image/gif; name="xtest.gif"
+ ... Content-Transfer-Encoding: base64
+ ... Content-Disposition: attachment; filename="xtest.gif"
+ ...
+ ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+ ... --BOUNDARY--
+ ... """)
+ >>> msgdata = {}
+
+The Scrubber.process() function is different than other handler process
+functions in that it returns the scrubbed message.
+
+ >>> scrubbed_msg = process(mlist, msg, msgdata)
+ >>> scrubbed_msg is msg
+ True
+ >>> print scrubbed_msg.as_string()
+ MIME-Version: 1.0
+ Message-ID: ...
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+
+ This is a message.
+ -------------- next part --------------
+ A non-text attachment was scrubbed...
+ Name: xtest.gif
+ Type: image/gif
+ Size: 34 bytes
+ Desc: not available
+ URL:
+
+
+This is the same as the transformed message originally passed in.
+
+ >>> print msg.as_string()
+ MIME-Version: 1.0
+ Message-ID: ...
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+
+ This is a message.
+ -------------- next part --------------
+ A non-text attachment was scrubbed...
+ Name: xtest.gif
+ Type: image/gif
+ Size: 34 bytes
+ Desc: not available
+ URL:
+
+ >>> msgdata
+ {}
+
+The URL will point to the attachment sitting in the archive.
+
+ >>> data = read_url_from_message(msg)
+ >>> data[:6]
+ 'GIF87a'
+ >>> len(data)
+ 34
+
+
+Scrubbing text attachments
+--------------------------
+
+Similar to image attachments, text attachments will also be scrubbed, but the
+placeholder will be slightly different.
+
+ >>> msg = message_from_string("""\
+ ... MIME-Version: 1.0
+ ... Content-Type: multipart/mixed; boundary="BOUNDARY"
+ ...
+ ... --BOUNDARY
+ ... Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no
+ ...
+ ... This is a message.
+ ... --BOUNDARY
+ ... Content-type: text/plain; name="xtext.txt"
+ ... Content-Disposition: attachment; filename="xtext.txt"
+ ...
+ ... This is a text attachment.
+ ... --BOUNDARY--
+ ... """)
+ >>> scrubbed_msg = process(mlist, msg, {})
+ >>> print scrubbed_msg.as_string()
+ MIME-Version: 1.0
+ Message-ID: ...
+ Content-Transfer-Encoding: 7bit
+ Content-Type: text/plain; charset="us-ascii"; format="flowed"; delsp="no"
+
+ This is a message.
+ -------------- next part --------------
+ An embedded and charset-unspecified text was scrubbed...
+ Name: xtext.txt
+ URL:
+
+ >>> read_url_from_message(msg)
+ 'This is a text attachment.'
+
+
+Clean up
+--------
+
+ >>> config.pop('test config')
diff --git a/src/mailman/pipeline/docs/subject-munging.txt b/src/mailman/pipeline/docs/subject-munging.txt
new file mode 100644
index 000000000..b2972683b
--- /dev/null
+++ b/src/mailman/pipeline/docs/subject-munging.txt
@@ -0,0 +1,244 @@
+Subject munging
+===============
+
+Messages that flow through the global pipeline get their headers 'cooked',
+which basically means that their headers go through several mostly unrelated
+transformations. Some headers get added, others get changed. Some of these
+changes depend on mailing list settings and others depend on how the message
+is getting sent through the system. We'll take things one-by-one.
+
+ >>> from mailman.pipeline.cook_headers import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.subject_prefix = u''
+
+
+Inserting a prefix
+------------------
+
+Another thing CookHeaders does is 'munge' the Subject header by inserting the
+subject prefix for the list at the front. If there's no subject header in the
+original message, Mailman uses a canned default. In order to do subject
+munging, a mailing list must have a preferred language.
+
+ >>> mlist.subject_prefix = u'[XTest] '
+ >>> mlist.preferred_language = u'en'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+
+The original subject header is stored in the message metadata. We must print
+the new Subject header because it gets converted from a string to an
+email.header.Header instance which has an unhelpful repr.
+
+ >>> msgdata['origsubj']
+ u''
+ >>> print msg['subject']
+ [XTest] (no subject)
+
+If the original message had a Subject header, then the prefix is inserted at
+the beginning of the header's value.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> msgdata['origsubj']
+ u'Something important'
+ >>> print msg['subject']
+ [XTest] Something important
+
+Subject headers are not munged for digest messages.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, dict(isdigest=True))
+ >>> msg['subject']
+ u'Something important'
+
+Nor are they munged for 'fast tracked' messages, which are generally defined
+as messages that Mailman crafts internally.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, dict(_fasttrack=True))
+ >>> msg['subject']
+ u'Something important'
+
+If a Subject header already has a prefix, usually following a Re: marker,
+another one will not be added but the prefix will be moved to the front of the
+header text.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Re: [XTest] Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest] Re: Something important
+
+If the Subjec header has a prefix at the front of the header text, that's
+where it will stay. This is called 'new style' prefixing and is the only
+option available in Mailman 3.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: [XTest] Re: Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest] Re: Something important
+
+
+Internationalized headers
+-------------------------
+
+Internationalization adds some interesting twists to the handling of subject
+prefixes. Part of what makes this interesting is the encoding of i18n headers
+using RFC 2047, and lists whose preferred language is in a different character
+set than the encoded header.
+
+ >>> msg = message_from_string("""\
+ ... Subject: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ >>> unicode(msg['subject'])
+ u'[XTest] \u30e1\u30fc\u30eb\u30de\u30f3'
+
+
+Prefix numbers
+--------------
+
+Subject prefixes support a placeholder for the numeric post id. Every time a
+message is posted to the mailing list, a 'post id' gets incremented. This is
+a purely sequential integer that increases monotonically. By added a '%d'
+placeholder to the subject prefix, this post id can be included in the prefix.
+
+ >>> mlist.subject_prefix = u'[XTest %d] '
+ >>> mlist.post_id = 456
+ >>> msg = message_from_string("""\
+ ... Subject: Something important
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Something important
+
+This works even when the message is a reply, except that in this case, the
+numeric post id in the generated subject prefix is updated with the new post
+id.
+
+ >>> msg = message_from_string("""\
+ ... Subject: [XTest 123] Re: Something important
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Re: Something important
+
+If the Subject header had old style prefixing, the prefix is moved to the
+front of the header text.
+
+ >>> msg = message_from_string("""\
+ ... Subject: Re: [XTest 123] Something important
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Re: Something important
+
+
+And of course, the proper thing is done when posting id numbers are included
+in the subject prefix, and the subject is encoded non-ascii.
+
+ >>> msg = message_from_string("""\
+ ... Subject: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ >>> unicode(msg['subject'])
+ u'[XTest 456] \u30e1\u30fc\u30eb\u30de\u30f3'
+
+Even more fun is when the i18n Subject header already has a prefix, possibly
+with a different posting number.
+
+ >>> msg = message_from_string("""\
+ ... Subject: [XTest 123] Re: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Re: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+
+# XXX This requires Python email patch #1681333 to succeed.
+# >>> unicode(msg['subject'])
+# u'[XTest 456] Re: \u30e1\u30fc\u30eb\u30de\u30f3'
+
+As before, old style subject prefixes are re-ordered.
+
+ >>> msg = message_from_string("""\
+ ... Subject: Re: [XTest 123] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Re:
+ =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+
+# XXX This requires Python email patch #1681333 to succeed.
+# >>> unicode(msg['subject'])
+# u'[XTest 456] Re: \u30e1\u30fc\u30eb\u30de\u30f3'
+
+
+In this test case, we get an extra space between the prefix and the original
+subject. It's because the original is 'crooked'. Note that a Subject
+starting with '\n ' is generated by some version of Eudora Japanese edition.
+
+ >>> mlist.subject_prefix = u'[XTest] '
+ >>> msg = message_from_string("""\
+ ... Subject:
+ ... Important message
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest] Important message
+
+And again, with an RFC 2047 encoded header.
+
+ >>> msg = message_from_string("""\
+ ... Subject:
+ ... =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+
+# XXX This one does not appear to work the same way as
+# test_subject_munging_prefix_crooked() in the old Python-based tests. I need
+# to get Tokio to look at this.
+# >>> print msg['subject']
+# [XTest] =?iso-2022-jp?b?IBskQiVhITwlayVeJXMbKEI=?=
diff --git a/src/mailman/pipeline/docs/tagger.txt b/src/mailman/pipeline/docs/tagger.txt
new file mode 100644
index 000000000..9f0bcd4b2
--- /dev/null
+++ b/src/mailman/pipeline/docs/tagger.txt
@@ -0,0 +1,235 @@
+Message tagger
+==============
+
+Mailman has a topics system which works like this: a mailing list
+administrator sets up one or more topics, which is essentially a named regular
+expression. The topic name can be any arbitrary string, and the name serves
+double duty as the 'topic tag'. Each message that flows the mailing list has
+its Subject: and Keywords: headers compared against these regular
+expressions. The message then gets tagged with the topic names of each hit.
+
+ >>> from mailman.pipeline.tagger import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+Topics must be enabled for Mailman to do any topic matching, even if topics
+are defined.
+
+ >>> mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', False)]
+ >>> mlist.topics_enabled = False
+ >>> mlist.topics_bodylines_limit = 0
+
+ >>> msg = message_from_string("""\
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ...
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ Subject: foobar
+ Keywords: barbaz
+
+
+ >>> msgdata
+ {}
+
+However, once topics are enabled, message will be tagged. There are two
+artifacts of tagging; an X-Topics: header is added with the topic name, and
+the message metadata gets a key with a list of matching topic names.
+
+ >>> mlist.topics_enabled = True
+ >>> msg = message_from_string("""\
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ...
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ Subject: foobar
+ Keywords: barbaz
+ X-Topics: bar fight
+