summaryrefslogtreecommitdiff
path: root/mailman/pipeline/docs
diff options
context:
space:
mode:
Diffstat (limited to 'mailman/pipeline/docs')
-rw-r--r--mailman/pipeline/docs/ack-headers.txt41
-rw-r--r--mailman/pipeline/docs/acknowledge.txt162
-rw-r--r--mailman/pipeline/docs/after-delivery.txt28
-rw-r--r--mailman/pipeline/docs/archives.txt133
-rw-r--r--mailman/pipeline/docs/avoid-duplicates.txt169
-rw-r--r--mailman/pipeline/docs/calc-recips.txt101
-rw-r--r--mailman/pipeline/docs/cleanse.txt95
-rw-r--r--mailman/pipeline/docs/cook-headers.txt328
-rw-r--r--mailman/pipeline/docs/decorate.txt318
-rw-r--r--mailman/pipeline/docs/digests.txt536
-rw-r--r--mailman/pipeline/docs/file-recips.txt97
-rw-r--r--mailman/pipeline/docs/filtering.txt341
-rw-r--r--mailman/pipeline/docs/nntp.txt68
-rw-r--r--mailman/pipeline/docs/reply-to.txt128
-rw-r--r--mailman/pipeline/docs/replybot.txt216
-rw-r--r--mailman/pipeline/docs/scrubber.txt214
-rw-r--r--mailman/pipeline/docs/subject-munging.txt245
-rw-r--r--mailman/pipeline/docs/tagger.txt237
18 files changed, 3457 insertions, 0 deletions
diff --git a/mailman/pipeline/docs/ack-headers.txt b/mailman/pipeline/docs/ack-headers.txt
new file mode 100644
index 000000000..28a8eed9e
--- /dev/null
+++ b/mailman/pipeline/docs/ack-headers.txt
@@ -0,0 +1,41 @@
+Acknowledgment headers
+======================
+
+Messages that flow through the global pipeline get their headers 'cooked',
+which basically means that their headers go through several mostly unrelated
+transformations. Some headers get added, others get changed. Some of these
+changes depend on mailing list settings and others depend on how the message
+is getting sent through the system. We'll take things one-by-one.
+
+ >>> from mailman.configuration import config
+ >>> from mailman.pipeline.cook_headers import process
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.subject_prefix = u''
+
+When the message's metadata has a 'noack' key set, an 'X-Ack: no' header is
+added.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, dict(noack=True))
+ >>> print msg.as_string()
+ From: aperson@example.com
+ X-Ack: no
+ ...
+
+Any existing X-Ack header in the original message is removed.
+
+ >>> msg = message_from_string("""\
+ ... X-Ack: yes
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, dict(noack=True))
+ >>> print msg.as_string()
+ From: aperson@example.com
+ X-Ack: no
+ ...
diff --git a/mailman/pipeline/docs/acknowledge.txt b/mailman/pipeline/docs/acknowledge.txt
new file mode 100644
index 000000000..76c8fdf21
--- /dev/null
+++ b/mailman/pipeline/docs/acknowledge.txt
@@ -0,0 +1,162 @@
+Message acknowledgment
+======================
+
+When a user posts a message to a mailing list, and that user has chosen to
+receive acknowledgments of their postings, Mailman will sent them such an
+acknowledgment.
+
+ >>> from mailman.configuration import config
+ >>> handler = config.handlers['acknowledge']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.real_name = u'XTest'
+ >>> mlist.preferred_language = u'en'
+ >>> # XXX This will almost certainly change once we've worked out the web
+ >>> # space layout for mailing lists now.
+ >>> mlist.web_page_url = u'http://lists.example.com/'
+
+ >>> # Ensure that the virgin queue is empty, since we'll be checking this
+ >>> # for new auto-response messages.
+ >>> from mailman.queue import Switchboard
+ >>> virginq = Switchboard(config.VIRGINQUEUE_DIR)
+ >>> virginq.files
+ []
+
+Subscribe a user to the mailing list.
+
+ >>> usermgr = config.db.user_manager
+ >>> from mailman.interfaces import MemberRole
+ >>> user_1 = usermgr.create_user(u'aperson@example.com')
+ >>> address_1 = list(user_1.addresses)[0]
+ >>> address_1.subscribe(mlist, MemberRole.member)
+ <Member: aperson@example.com on _xtest@example.com as MemberRole.member>
+
+
+Non-member posts
+----------------
+
+Non-members can't get acknowledgments of their posts to the mailing list.
+
+ >>> msg = message_from_string("""\
+ ... From: bperson@example.com
+ ...
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> virginq.files
+ []
+
+We can also specify the original sender in the message's metadata. If that
+person is also not a member, no acknowledgment will be sent either.
+
+ >>> msg = message_from_string("""\
+ ... From: bperson@example.com
+ ...
+ ... """)
+ >>> handler.process(mlist, msg,
+ ... dict(original_sender=u'cperson@example.com'))
+ >>> virginq.files
+ []
+
+
+No acknowledgment requested
+---------------------------
+
+Unless the user has requested acknowledgments, they will not get one.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> virginq.files
+ []
+
+Similarly if the original sender is specified in the message metadata, and
+that sender is a member but not one who has requested acknowledgments, none
+will be sent.
+
+ >>> user_2 = usermgr.create_user(u'dperson@example.com')
+ >>> address_2 = list(user_2.addresses)[0]
+ >>> address_2.subscribe(mlist, MemberRole.member)
+ <Member: dperson@example.com on _xtest@example.com as MemberRole.member>
+
+ >>> handler.process(mlist, msg,
+ ... dict(original_sender=u'dperson@example.com'))
+ >>> virginq.files
+ []
+
+
+Requested acknowledgments
+-------------------------
+
+If the member requests acknowledgments, Mailman will send them one when they
+post to the mailing list.
+
+ >>> user_1.preferences.acknowledge_posts = True
+
+The receipt will include the original message's subject in the response body,
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Something witty and insightful
+ ...
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> virginq.files
+ []
+ >>> sorted(qdata.items())
+ [..., ('recips', [u'aperson@example.com']), ...]
+ >>> print qmsg.as_string()
+ ...
+ MIME-Version: 1.0
+ ...
+ Subject: XTest post acknowledgment
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ ...
+ Precedence: bulk
+ <BLANKLINE>
+ Your message entitled
+ <BLANKLINE>
+ Something witty and insightful
+ <BLANKLINE>
+ was successfully received by the XTest mailing list.
+ <BLANKLINE>
+ List info page: http://lists.example.com/listinfo/_xtest@example.com
+ Your preferences: http://example.com/aperson@example.com
+ <BLANKLINE>
+
+If there is no subject, then the receipt will use a generic message.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> virginq.files
+ []
+ >>> sorted(qdata.items())
+ [..., ('recips', [u'aperson@example.com']), ...]
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ ...
+ Subject: XTest post acknowledgment
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ ...
+ Precedence: bulk
+ <BLANKLINE>
+ Your message entitled
+ <BLANKLINE>
+ (no subject)
+ <BLANKLINE>
+ was successfully received by the XTest mailing list.
+ <BLANKLINE>
+ List info page: http://lists.example.com/listinfo/_xtest@example.com
+ Your preferences: http://example.com/aperson@example.com
+ <BLANKLINE>
diff --git a/mailman/pipeline/docs/after-delivery.txt b/mailman/pipeline/docs/after-delivery.txt
new file mode 100644
index 000000000..5bc9b5936
--- /dev/null
+++ b/mailman/pipeline/docs/after-delivery.txt
@@ -0,0 +1,28 @@
+After delivery
+==============
+
+After a message is delivered, or more correctly, after it has been processed
+by the rest of the handlers in the incoming queue pipeline, a couple of
+bookkeeping pieces of information are updated.
+
+ >>> import datetime
+ >>> from mailman.configuration import config
+ >>> handler = config.handlers['after-delivery']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> post_time = datetime.datetime.now() - datetime.timedelta(minutes=10)
+ >>> mlist.last_post_time = post_time
+ >>> mlist.post_id = 10
+
+Processing a message with this handler updates the last_post_time and post_id
+attributes.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... Something interesting.
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> mlist.last_post_time > post_time
+ True
+ >>> mlist.post_id
+ 11
diff --git a/mailman/pipeline/docs/archives.txt b/mailman/pipeline/docs/archives.txt
new file mode 100644
index 000000000..b7b54f17f
--- /dev/null
+++ b/mailman/pipeline/docs/archives.txt
@@ -0,0 +1,133 @@
+Archives
+========
+
+Updating the archives with posted messages is handled by a separate queue,
+which allows for better memory management and prevents blocking the main
+delivery processes while messages are archived. This also allows external
+archivers to work in a separate process from the main Mailman delivery
+processes.
+
+ >>> from mailman.queue import Switchboard
+ >>> from mailman.configuration import config
+ >>> handler = config.handlers['to-archive']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+ >>> switchboard = Switchboard(config.ARCHQUEUE_DIR)
+
+A helper function.
+
+ >>> def clear():
+ ... for filebase in switchboard.files:
+ ... msg, msgdata = switchboard.dequeue(filebase)
+ ... switchboard.finish(filebase)
+
+The purpose of the ToArchive handler is to make a simple decision as to
+whether the message should get archived and if so, to drop the message in the
+archiving queue. Really the most important things are to determine when a
+message should /not/ get archived.
+
+For example, no digests should ever get archived.
+
+ >>> mlist.archive = True
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, dict(isdigest=True))
+ >>> switchboard.files
+ []
+
+If the mailing list is not configured to archive, then even regular deliveries
+won't be archived.
+
+ >>> mlist.archive = False
+ >>> handler.process(mlist, msg, {})
+ >>> switchboard.files
+ []
+
+There are two de-facto standards for a message to indicate that it does not
+want to be archived. We've seen both in the wild so both are supported. The
+X-No-Archive: header can be used to indicate that the message should not be
+archived. Confusingly, this header's value is actually ignored.
+
+ >>> mlist.archive = True
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ... X-No-Archive: YES
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, dict(isdigest=True))
+ >>> switchboard.files
+ []
+
+Even a 'no' value will stop the archiving of the message.
+
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ... X-No-Archive: No
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, dict(isdigest=True))
+ >>> switchboard.files
+ []
+
+Another header that's been observed is the X-Archive: header. Here, the
+header's case folded value must be 'no' in order to prevent archiving.
+
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ... X-Archive: No
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, dict(isdigest=True))
+ >>> switchboard.files
+ []
+
+But if the value is 'yes', then the message will be archived.
+
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ... X-Archive: Yes
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> len(switchboard.files)
+ 1
+ >>> filebase = switchboard.files[0]
+ >>> qmsg, qdata = switchboard.dequeue(filebase)
+ >>> switchboard.finish(filebase)
+ >>> print qmsg.as_string()
+ Subject: A sample message
+ X-Archive: Yes
+ <BLANKLINE>
+ A message of great import.
+ <BLANKLINE>
+ >>> sorted(qdata.items())
+ [('_parsemsg', False), ('received_time', ...), ('version', 3)]
+
+Without either archiving header, and all other things being the same, the
+message will get archived.
+
+ >>> msg = message_from_string("""\
+ ... Subject: A sample message
+ ...
+ ... A message of great import.
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> len(switchboard.files)
+ 1
+ >>> filebase = switchboard.files[0]
+ >>> qmsg, qdata = switchboard.dequeue(filebase)
+ >>> switchboard.finish(filebase)
+ >>> print qmsg.as_string()
+ Subject: A sample message
+ <BLANKLINE>
+ A message of great import.
+ <BLANKLINE>
+ >>> sorted(qdata.items())
+ [('_parsemsg', False), ('received_time', ...), ('version', 3)]
diff --git a/mailman/pipeline/docs/avoid-duplicates.txt b/mailman/pipeline/docs/avoid-duplicates.txt
new file mode 100644
index 000000000..9fd332d1b
--- /dev/null
+++ b/mailman/pipeline/docs/avoid-duplicates.txt
@@ -0,0 +1,169 @@
+Avoid duplicates
+================
+
+The AvoidDuplicates handler module implements several strategies to try to
+reduce the reception of duplicate messages. It does this by removing certain
+recipients from the list of recipients that earlier handler modules
+(e.g. CalcRecips) calculates.
+
+ >>> from mailman.configuration import config
+ >>> handler = config.handlers['avoid-duplicates']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+Create some members we're going to use.
+
+ >>> from mailman.interfaces import MemberRole
+ >>> address_a = config.db.user_manager.create_address(
+ ... u'aperson@example.com')
+ >>> address_b = config.db.user_manager.create_address(
+ ... u'bperson@example.com')
+ >>> member_a = address_a.subscribe(mlist, MemberRole.member)
+ >>> member_b = address_b.subscribe(mlist, MemberRole.member)
+ >>> # This is the message metadata dictionary as it would be produced by
+ >>> # the CalcRecips handler.
+ >>> recips = dict(recips=[u'aperson@example.com', u'bperson@example.com'])
+
+
+Short circuiting
+----------------
+
+The module short-circuits if there are no recipients.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: A message of great import
+ ...
+ ... Something
+ ... """)
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> msgdata
+ {}
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: A message of great import
+ <BLANKLINE>
+ Something
+ <BLANKLINE>
+
+
+Suppressing the list copy
+-------------------------
+
+Members can elect not to receive a list copy of any message on which they are
+explicitly named as a recipient. This is done by setting their
+receive_list_copy preference to False. However, if they aren't mentioned in
+one of the recipient headers (i.e. To, CC, Resent-To, or Resent-CC), then they
+will get a list copy.
+
+ >>> member_a.preferences.receive_list_copy = False
+ >>> msg = message_from_string("""\
+ ... From: Claire Person <cperson@example.com>
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'aperson@example.com', u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person <cperson@example.com>
+ <BLANKLINE>
+ Something of great import.
+ <BLANKLINE>
+
+If they're mentioned on the CC line, they won't get a list copy.
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person <cperson@example.com>
+ ... CC: aperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person <cperson@example.com>
+ CC: aperson@example.com
+ <BLANKLINE>
+ Something of great import.
+ <BLANKLINE>
+
+But if they're mentioned on the CC line and have receive_list_copy set to True
+(the default), then they still get a list copy.
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person <cperson@example.com>
+ ... CC: bperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'aperson@example.com', u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person <cperson@example.com>
+ CC: bperson@example.com
+ <BLANKLINE>
+ Something of great import.
+ <BLANKLINE>
+
+Other headers checked for recipients include the To...
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person <cperson@example.com>
+ ... To: aperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person <cperson@example.com>
+ To: aperson@example.com
+ <BLANKLINE>
+ Something of great import.
+ <BLANKLINE>
+
+...Resent-To...
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person <cperson@example.com>
+ ... Resent-To: aperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person <cperson@example.com>
+ Resent-To: aperson@example.com
+ <BLANKLINE>
+ Something of great import.
+ <BLANKLINE>
+
+...and Resent-CC headers.
+
+ >>> msg = message_from_string("""\
+ ... From: Claire Person <cperson@example.com>
+ ... Resent-Cc: aperson@example.com
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = recips.copy()
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'bperson@example.com']
+ >>> print msg.as_string()
+ From: Claire Person <cperson@example.com>
+ Resent-Cc: aperson@example.com
+ <BLANKLINE>
+ Something of great import.
+ <BLANKLINE>
diff --git a/mailman/pipeline/docs/calc-recips.txt b/mailman/pipeline/docs/calc-recips.txt
new file mode 100644
index 000000000..057351873
--- /dev/null
+++ b/mailman/pipeline/docs/calc-recips.txt
@@ -0,0 +1,101 @@
+Calculating recipients
+======================
+
+Every message that makes it through to the list membership gets sent to a set
+of recipient addresses. These addresses are calculated by one of the handler
+modules and depends on a host of factors.
+
+ >>> from mailman.configuration import config
+ >>> handler = config.handlers['calculate-recipients']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+Recipients are calculate from the list members, so add a bunch of members to
+start out with. First, create a bunch of addresses...
+
+ >>> usermgr = config.db.user_manager
+ >>> address_a = usermgr.create_address(u'aperson@example.com')
+ >>> address_b = usermgr.create_address(u'bperson@example.com')
+ >>> address_c = usermgr.create_address(u'cperson@example.com')
+ >>> address_d = usermgr.create_address(u'dperson@example.com')
+ >>> address_e = usermgr.create_address(u'eperson@example.com')
+ >>> address_f = usermgr.create_address(u'fperson@example.com')
+
+...then subscribe these addresses to the mailing list as members...
+
+ >>> from mailman.interfaces import MemberRole
+ >>> member_a = address_a.subscribe(mlist, MemberRole.member)
+ >>> member_b = address_b.subscribe(mlist, MemberRole.member)
+ >>> member_c = address_c.subscribe(mlist, MemberRole.member)
+ >>> member_d = address_d.subscribe(mlist, MemberRole.member)
+ >>> member_e = address_e.subscribe(mlist, MemberRole.member)
+ >>> member_f = address_f.subscribe(mlist, MemberRole.member)
+
+...then make some of the members digest members.
+
+ >>> from mailman.constants import DeliveryMode
+ >>> member_d.preferences.delivery_mode = DeliveryMode.plaintext_digests
+ >>> member_e.preferences.delivery_mode = DeliveryMode.mime_digests
+ >>> member_f.preferences.delivery_mode = DeliveryMode.summary_digests
+
+
+Short-circuiting
+----------------
+
+Sometimes, the list of recipients already exists in the message metadata.
+This can happen for example, when a message was previously delivered to some
+but not all of the recipients.
+
+ >>> msg = message_from_string("""\
+ ... From: Xavier Person <xperson@example.com>
+ ...
+ ... Something of great import.
+ ... """)
+ >>> recips = set((u'qperson@example.com', u'zperson@example.com'))
+ >>> msgdata = dict(recips=recips)
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'qperson@example.com', u'zperson@example.com']
+
+
+Regular delivery recipients
+---------------------------
+
+Regular delivery recipients are those people who get messages from the list as
+soon as they are posted. In other words, these folks are not digest members.
+
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'aperson@example.com', u'bperson@example.com', u'cperson@example.com']
+
+Members can elect not to receive a list copy of their own postings.
+
+ >>> member_c.preferences.receive_own_postings = False
+ >>> msg = message_from_string("""\
+ ... From: Claire Person <cperson@example.com>
+ ...
+ ... Something of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ [u'aperson@example.com', u'bperson@example.com']
+
+Members can also elect not to receive a list copy of any message on which they
+are explicitly named as a recipient. However, see the AvoidDuplicates handler
+for details.
+
+
+Digest recipients
+-----------------
+
+XXX Test various digest deliveries.
+
+
+Urgent messages
+---------------
+
+XXX Test various urgent deliveries:
+ * test_urgent_moderator()
+ * test_urgent_admin()
+ * test_urgent_reject()
diff --git a/mailman/pipeline/docs/cleanse.txt b/mailman/pipeline/docs/cleanse.txt
new file mode 100644
index 000000000..1597095b3
--- /dev/null
+++ b/mailman/pipeline/docs/cleanse.txt
@@ -0,0 +1,95 @@
+Cleansing headers
+=================
+
+All messages posted to a list get their headers cleansed. Some headers are
+related to additional permissions that can be granted to the message and other
+headers can be used to fish for membership.
+
+ >>> from mailman.configuration import config
+ >>> handler = config.handlers['cleanse']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+Headers such as Approved, Approve, and Urgent are used to grant special
+pemissions to individual messages. All may contain a password; the first two
+headers are used by list administrators to pre-approve a message normal held
+for approval. The latter header is used to send a regular message to all
+members, regardless of whether they get digests or not. Because all three
+headers contain passwords, they must be removed from any posted message.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Approved: foobar
+ ... Approve: barfoo
+ ... Urgent: notreally
+ ... Subject: A message of great import
+ ...
+ ... Blah blah blah
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: A message of great import
+ <BLANKLINE>
+ Blah blah blah
+ <BLANKLINE>
+
+Other headers can be used by list members to fish the list for membership, so
+we don't let them go through. These are a mix of standard headers and custom
+headers supported by some mail readers. For example, X-PMRC is supported by
+Pegasus mail. I don't remember what program uses X-Confirm-Reading-To though
+(Some Microsoft product perhaps?).
+
+ >>> msg = message_from_string("""\
+ ... From: bperson@example.com
+ ... Reply-To: bperson@example.org
+ ... Sender: asystem@example.net
+ ... Return-Receipt-To: another@example.com
+ ... Disposition-Notification-To: athird@example.com
+ ... X-Confirm-Reading-To: afourth@example.com
+ ... X-PMRQC: afifth@example.com
+ ... Subject: a message to you
+ ...
+ ... How are you doing?
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: bperson@example.com
+ Reply-To: bperson@example.org
+ Sender: asystem@example.net
+ Subject: a message to you
+ <BLANKLINE>
+ How are you doing?
+ <BLANKLINE>
+
+
+Anonymous lists
+---------------
+
+Anonymous mailing lists also try to cleanse certain identifying headers from
+the original posting, so that it is at least a bit more difficult to determine
+who sent the message. This isn't perfect though, for example, the body of the
+messages are never scrubbed (though that might not be a bad idea). The From
+and Reply-To headers in the posted message are taken from list attributes.
+
+Hotmail apparently sets X-Originating-Email.
+
+ >>> mlist.anonymous_list = True
+ >>> mlist.description = u'A Test Mailing List'
+ >>> mlist.preferred_language = u'en'
+ >>> msg = message_from_string("""\
+ ... From: bperson@example.com
+ ... Reply-To: bperson@example.org
+ ... Sender: asystem@example.net
+ ... X-Originating-Email: cperson@example.com
+ ... Subject: a message to you
+ ...
+ ... How are you doing?
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> print msg.as_string()
+ Subject: a message to you
+ From: A Test Mailing List <_xtest@example.com>
+ Reply-To: _xtest@example.com
+ <BLANKLINE>
+ How are you doing?
+ <BLANKLINE>
diff --git a/mailman/pipeline/docs/cook-headers.txt b/mailman/pipeline/docs/cook-headers.txt
new file mode 100644
index 000000000..b1aae6774
--- /dev/null
+++ b/mailman/pipeline/docs/cook-headers.txt
@@ -0,0 +1,328 @@
+Cooking headers
+===============
+
+Messages that flow through the global pipeline get their headers 'cooked',
+which basically means that their headers go through several mostly unrelated
+transformations. Some headers get added, others get changed. Some of these
+changes depend on mailing list settings and others depend on how the message
+is getting sent through the system. We'll take things one-by-one.
+
+ >>> from mailman.pipeline.cook_headers import process
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.subject_prefix = u''
+ >>> mlist.include_list_post_header = False
+ >>> mlist.archive = True
+ >>> # XXX This will almost certainly change once we've worked out the web
+ >>> # space layout for mailing lists now.
+ >>> mlist.web_page_url = u'http://lists.example.com/'
+
+
+Saving the original sender
+--------------------------
+
+Because the original sender headers may get deleted or changed, CookHeaders
+will place the sender in the message metadata for safe keeping.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> msgdata['original_sender']
+ u'aperson@example.com'
+
+But if there was no original sender, then the empty string will be saved.
+
+ >>> msg = message_from_string("""\
+ ... Subject: No original sender
+ ...
+ ... A message of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> msgdata['original_sender']
+ ''
+
+
+X-BeenThere header
+------------------
+
+The X-BeenThere header is what Mailman uses to recognize messages that have
+already been processed by this mailing list. It's one small measure against
+mail loops.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg['x-beenthere']
+ u'_xtest@example.com'
+
+Mailman appends X-BeenThere headers, so if there already is one in the
+original message, the posted message will contain two such headers.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... X-BeenThere: another@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> sorted(msg.get_all('x-beenthere'))
+ [u'_xtest@example.com', u'another@example.com']
+
+
+Mailman version header
+----------------------
+
+Mailman will also insert an X-Mailman-Version header...
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> from mailman.Version import VERSION
+ >>> msg['x-mailman-version'] == VERSION
+ True
+
+...but only if one doesn't already exist.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... X-Mailman-Version: 3000
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg['x-mailman-version']
+ u'3000'
+
+
+Precedence header
+-----------------
+
+Mailman will insert a Precedence header, which is a de-facto standard for
+telling automatic reply software (e.g. vacation(1)) not to respond to this
+message.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg['precedence']
+ u'list'
+
+But Mailman will only add that header if the original message doesn't already
+have one of them.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Precedence: junk
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg['precedence']
+ u'junk'
+
+
+RFC 2919 and 2369 headers
+-------------------------
+
+This is a helper function for the following section.
+
+ >>> def list_headers(msg):
+ ... print '---start---'
+ ... # Sort the List-* headers found in the message. We need to do
+ ... # this because CookHeaders puts them in a dictionary which does
+ ... # not have a guaranteed sort order.
+ ... for header in sorted(msg.keys()):
+ ... parts = header.lower().split('-')
+ ... if 'list' not in parts:
+ ... continue
+ ... for value in msg.get_all(header):
+ ... print '%s: %s' % (header, value)
+ ... print '---end---'
+
+These RFCs define headers for mailing list actions. A mailing list should
+generally add these headers, but not for messages that aren't crafted for a
+specific list (e.g. password reminders in Mailman 2.x).
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, dict(_nolist=True))
+ >>> list_headers(msg)
+ ---start---
+ ---end---
+
+Some people don't like these headers because their mail readers aren't good
+about hiding them. A list owner can turn these headers off.
+
+ >>> mlist.include_rfc2369_headers = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ ---end---
+
+But normally, a list will include these headers.
+
+ >>> mlist.include_rfc2369_headers = True
+ >>> mlist.include_list_post_header = True
+ >>> mlist.preferred_language = u'en'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ List-Archive: <http://www.example.com/pipermail/_xtest@example.com>
+ List-Help: <mailto:_xtest-request@example.com?subject=help>
+ List-Id: <_xtest.example.com>
+ List-Post: <mailto:_xtest@example.com>
+ List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-join@example.com>
+ List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-leave@example.com>
+ ---end---
+
+If the mailing list has a description, then it is included in the List-Id
+header.
+
+ >>> mlist.description = u'My test mailing list'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ List-Archive: <http://www.example.com/pipermail/_xtest@example.com>
+ List-Help: <mailto:_xtest-request@example.com?subject=help>
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Post: <mailto:_xtest@example.com>
+ List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-join@example.com>
+ List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-leave@example.com>
+ ---end---
+
+Administrative messages crafted by Mailman will have a reduced set of headers.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, dict(reduced_list_headers=True))
+ >>> list_headers(msg)
+ ---start---
+ List-Help: <mailto:_xtest-request@example.com?subject=help>
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-join@example.com>
+ List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-leave@example.com>
+ X-List-Administrivia: yes
+ ---end---
+
+With the normal set of List-* headers, it's still possible to suppress the
+List-Post header, which is reasonable for an announce only mailing list.
+
+ >>> mlist.include_list_post_header = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ List-Archive: <http://www.example.com/pipermail/_xtest@example.com>
+ List-Help: <mailto:_xtest-request@example.com?subject=help>
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-join@example.com>
+ List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-leave@example.com>
+ ---end---
+
+And if the list isn't being archived, it makes no sense to add the
+List-Archive header either.
+
+ >>> mlist.include_list_post_header = True
+ >>> mlist.archive = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> list_headers(msg)
+ ---start---
+ List-Help: <mailto:_xtest-request@example.com?subject=help>
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Post: <mailto:_xtest@example.com>
+ List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-join@example.com>
+ List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-leave@example.com>
+ ---end---
+
+
+Archived-At
+-----------
+
+RFC 5064 (draft) defines a new Archived-At header which contains the url to
+the individual message in the archives. The stock Pipermail archiver doesn't
+support this because the url can't be calculated until after the message is
+archived. Because this is done by the archive runner, this information isn't
+available to us now.
+
+ >>> print msg['archived-at']
+ None
+
+
+Personalization
+---------------
+
+The To field normally contains the list posting address. However when
+messages are fully personalized, that header will get overwritten with the
+address of the recipient. The list's posting address will be added to one of
+the recipient headers so that users will be able to reply back to the list.
+
+ >>> from mailman.interfaces import Personalization, ReplyToMunging
+ >>> mlist.personalize = Personalization.full
+ >>> mlist.reply_goes_to_list = ReplyToMunging.no_munging
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ X-BeenThere: _xtest@example.com
+ X-Mailman-Version: ...
+ Precedence: list
+ Cc: My test mailing list <_xtest@example.com>
+ List-Id: My test mailing list <_xtest.example.com>
+ List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-leave@example.com>
+ List-Post: <mailto:_xtest@example.com>
+ List-Help: <mailto:_xtest-request@example.com?subject=help>
+ List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>,
+ <mailto:_xtest-join@example.com>
+ <BLANKLINE>
+ <BLANKLINE>
diff --git a/mailman/pipeline/docs/decorate.txt b/mailman/pipeline/docs/decorate.txt
new file mode 100644
index 000000000..60afb0170
--- /dev/null
+++ b/mailman/pipeline/docs/decorate.txt
@@ -0,0 +1,318 @@
+Message decoration
+==================
+
+Message decoration is the process of adding headers and footers to the
+original message. A handler module takes care of this based on the settings
+of the mailing list and the type of message being processed.
+
+ >>> from mailman.pipeline.decorate import process
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> msg_text = """\
+ ... From: aperson@example.org
+ ...
+ ... Here is a message.
+ ... """
+ >>> msg = message_from_string(msg_text)
+
+
+Short circuiting
+----------------
+
+Digest messages get decorated during the digest creation phase so no extra
+decorations are added for digest messages.
+
+ >>> process(mlist, msg, dict(isdigest=True))
+ >>> print msg.as_string()
+ From: aperson@example.org
+ <BLANKLINE>
+ Here is a message.
+
+ >>> process(mlist, msg, dict(nodecorate=True))
+ >>> print msg.as_string()
+ From: aperson@example.org
+ <BLANKLINE>
+ Here is a message.
+
+
+Decorating simple text messages
+-------------------------------
+
+Text messages that have no declared content type character set are by default,
+encoded in us-ascii. When the mailing list's preferred language is 'en'
+(i.e. English), the character set of the mailing list and of the message will
+match. In this case, and when the header and footer have no interpolation
+placeholder variables, the message's payload will be prepended by the verbatim
+header, and appended with the verbatim footer.
+
+ >>> msg = message_from_string(msg_text)
+ >>> mlist.msg_header = u'header\n'
+ >>> mlist.msg_footer = u'footer'
+ >>> mlist.preferred_language = u'en'
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.org
+ ...
+ <BLANKLINE>
+ header
+ Here is a message.
+ footer
+
+Mailman supports a number of interpolation variables, placeholders in the
+header and footer for information to be filled in with mailing list specific
+data. An example of such information is the mailing list's "real name" (a
+short descriptive name for the mailing list).
+
+ >>> msg = message_from_string(msg_text)
+ >>> mlist.msg_header = u'$real_name header\n'
+ >>> mlist.msg_footer = u'$real_name footer'
+ >>> mlist.real_name = u'XTest'
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.org
+ ...
+ XTest header
+ Here is a message.
+ XTest footer
+
+You can't just pick any interpolation variable though; if you do, the variable
+will remain in the header or footer unchanged.
+
+ >>> msg = message_from_string(msg_text)
+ >>> mlist.msg_header = u'$dummy header\n'
+ >>> mlist.msg_footer = u'$dummy footer'
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.org
+ ...
+ $dummy header
+ Here is a message.
+ $dummy footer
+
+
+Handling RFC 3676 'format=flowed' parameters
+--------------------------------------------
+
+RFC 3676 describes a standard by which text/plain messages can marked by
+generating MUAs for better readability in compatible receiving MUAs. The
+'format' parameter on the text/plain Content-Type header gives hints as to how
+the receiving MUA may flow and delete trailing whitespace for better display
+in a proportional font.
+
+When Mailman sees text/plain messages with such RFC 3676 parameters, it
+preserves these parameters when it concatenates headers and footers to the
+message payload.
+
+ >>> mlist.msg_header = u'header'
+ >>> mlist.msg_footer = u'footer'
+ >>> mlist.preferred_language = u'en'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.org
+ ... Content-Type: text/plain; format=flowed; delsp=no
+ ...
+ ... Here is a message\x20
+ ... with soft line breaks.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> # Don't use 'print' here as above because it won't be obvious from the
+ >>> # output that the soft-line break space at the end of the 'Here is a
+ >>> # message' line will be retained in the output.
+ >>> msg['content-type']
+ u'text/plain; format="flowed"; delsp="no"; charset="us-ascii"'
+ >>> [line for line in msg.get_payload().splitlines()]
+ ['header', 'Here is a message ', 'with soft line breaks.', 'footer']
+
+
+Decorating mixed-charset messages
+---------------------------------
+
+When a message has no explicit character set, it is assumed to be us-ascii.
+However, if the mailing list's preferred language has a different character
+set, Mailman will still try to concatenate the header and footer, but it will
+convert the text to utf-8 and base-64 encode the message payload.
+
+ # 'ja' = Japanese; charset = 'euc-jp'
+ >>> mlist.preferred_language = u'ja'
+ >>> mlist.msg_header = u'$description header'
+ >>> mlist.msg_footer = u'$description footer'
+ >>> mlist.description = u'\u65e5\u672c\u8a9e'
+
+ >>> from email.message import Message
+ >>> msg = Message()
+ >>> msg.set_payload('Fran\xe7aise', 'iso-8859-1')
+ >>> print msg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="iso-8859-1"
+ Content-Transfer-Encoding: quoted-printable
+ <BLANKLINE>
+ Fran=E7aise
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: base64
+ <BLANKLINE>
+ 5pel5pys6KqeIGhlYWRlcgpGcmFuw6dhaXNlCuaXpeacrOiqniBmb290ZXI=
+
+
+Sometimes the message even has an unknown character set. In this case,
+Mailman has no choice but to decorate the original message with MIME
+attachments.
+
+ >>> mlist.preferred_language = u'en'
+ >>> mlist.msg_header = u'header'
+ >>> mlist.msg_footer = u'footer'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.org
+ ... Content-Type: text/plain; charset=unknown
+ ... Content-Transfer-Encoding: 7bit
+ ...
+ ... Here is a message.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg.set_boundary('BOUNDARY')
+ >>> print msg.as_string()
+ From: aperson@example.org
+ Content-Type: multipart/mixed; boundary="BOUNDARY"
+ <BLANKLINE>
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+ <BLANKLINE>
+ header
+ --BOUNDARY
+ Content-Type: text/plain; charset=unknown
+ Content-Transfer-Encoding: 7bit
+ <BLANKLINE>
+ Here is a message.
+ <BLANKLINE>
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+ <BLANKLINE>
+ footer
+ --BOUNDARY--
+
+
+Decorating multipart messages
+-----------------------------
+
+Multipart messages have to be decorated differently. The header and footer
+cannot be simply concatenated into the payload because that will break the
+MIME structure of the message. Instead, the header and footer are attached as
+separate MIME subparts.
+
+When the outerpart is multipart/mixed, the header and footer can have a
+Content-Disposition of 'inline' so that MUAs can display these headers as if
+they were simply concatenated.
+
+ >>> mlist.preferred_language = u'en'
+ >>> mlist.msg_header = u'header'
+ >>> mlist.msg_footer = u'footer'
+ >>> part_1 = message_from_string("""\
+ ... From: aperson@example.org
+ ...
+ ... Here is the first message.
+ ... """)
+ >>> part_2 = message_from_string("""\
+ ... From: bperson@example.com
+ ...
+ ... Here is the second message.
+ ... """)
+ >>> from email.mime.multipart import MIMEMultipart
+ >>> msg = MIMEMultipart('mixed', boundary='BOUNDARY',
+ ... _subparts=(part_1, part_2))
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ Content-Type: multipart/mixed; boundary="BOUNDARY"
+ MIME-Version: 1.0
+ <BLANKLINE>
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+ <BLANKLINE>
+ header
+ --BOUNDARY
+ From: aperson@example.org
+ <BLANKLINE>
+ Here is the first message.
+ <BLANKLINE>
+ --BOUNDARY
+ From: bperson@example.com
+ <BLANKLINE>
+ Here is the second message.
+ <BLANKLINE>
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+ <BLANKLINE>
+ footer
+ --BOUNDARY--
+
+
+Decorating other content types
+------------------------------
+
+Non-multipart non-text content types will get wrapped in a multipart/mixed so
+that the header and footer can be added as attachments.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.org
+ ... Content-Type: image/x-beautiful
+ ...
+ ... IMAGEDATAIMAGEDATAIMAGEDATA
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> msg.set_boundary('BOUNDARY')
+ >>> print msg.as_string()
+ From: aperson@example.org
+ ...
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+ <BLANKLINE>
+ header
+ --BOUNDARY
+ Content-Type: image/x-beautiful
+ <BLANKLINE>
+ IMAGEDATAIMAGEDATAIMAGEDATA
+ <BLANKLINE>
+ --BOUNDARY
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: inline
+ <BLANKLINE>
+ footer
+ --BOUNDARY--
+
+
+Personalization
+---------------
+
+A mailing list can be 'personalized', meaning that each message is unique for
+each recipient. When the list is personalized, additional interpolation
+variables are available, however the list of intended recipients must be
+provided in the message data, otherwise an exception occurs.
+
+ >>> process(mlist, None, dict(personalize=True))
+ Traceback (most recent call last):
+ ...
+ AssertionError: The number of intended recipients must be exactly 1
+
+And the number of intended recipients must be exactly 1.
+
+ >>> process(mlist, None, dict(personalize=True, recips=[1, 2, 3]))
+ Traceback (most recent call last):
+ ...
+ AssertionError: The number of intended recipients must be exactly 1
diff --git a/mailman/pipeline/docs/digests.txt b/mailman/pipeline/docs/digests.txt
new file mode 100644
index 000000000..d81e173f8
--- /dev/null
+++ b/mailman/pipeline/docs/digests.txt
@@ -0,0 +1,536 @@
+Digests
+=======
+
+Digests are a way for a user to receive list traffic in collections instead of
+as individual messages when immediately posted. There are several forms of
+digests, although only two are currently supported: MIME digests and RFC 1153
+(a.k.a. plain text) digests.
+
+ >>> from mailman.pipeline.to_digest import process
+ >>> from mailman.queue import Switchboard
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+ >>> mlist.web_page_url = u'http://www.example.com/'
+ >>> mlist.real_name = u'XTest'
+ >>> mlist.subject_prefix = u'[_XTest] '
+ >>> mlist.one_last_digest = set()
+ >>> switchboard = Switchboard(config.VIRGINQUEUE_DIR)
+
+This is a helper function used to iterate through all the accumulated digest
+messages, in the order in which they were posted. This makes it easier to
+update the tests when we switch to a different mailbox format.
+
+ >>> from mailman.tests.helpers import digest_mbox
+ >>> from itertools import count
+ >>> from string import Template
+ >>> def makemsg():
+ ... for i in count(1):
+ ... text = Template("""\
+ ... From: aperson@example.com
+ ... To: _xtest@example.com
+ ... Subject: Test message $i
+ ...
+ ... Here is message $i
+ ... """).substitute(i=i)
+ ... yield message_from_string(text)
+
+
+Short circuiting
+----------------
+
+When a message is posted to the mailing list, it is generally added to a
+running collection of messages. For now, this is a Unix mailbox file,
+although in the future this may end up being converted to a maildir style
+mailbox. In any event, there are several factors that would bypass the
+storing of posted messages to the mailbox. For example, the mailing list may
+not allow digests...
+
+ >>> mlist.digestable = False
+ >>> msg = makemsg().next()
+ >>> process(mlist, msg, {})
+ >>> sum(1 for mboxmsg in digest_mbox(mlist))
+ 0
+ >>> switchboard.files
+ []
+
+...or they may allow digests but the message is already a digest.
+
+ >>> mlist.digestable = True
+ >>> process(mlist, msg, dict(isdigest=True))
+ >>> sum(1 for mboxmsg in digest_mbox(mlist))
+ 0
+ >>> switchboard.files
+ []
+
+
+Sending a digest
+----------------
+
+For messages which are not digests, but which are posted to a digestable
+mailing list, the messages will be stored until they reach a criteria
+triggering the sending of the digest. If none of those criteria are met, then
+the message will just sit in the mailbox for a while.
+
+ >>> mlist.digest_size_threshold = 10000
+ >>> process(mlist, msg, {})
+ >>> switchboard.files
+ []
+ >>> digest = digest_mbox(mlist)
+ >>> sum(1 for mboxmsg in digest)
+ 1
+ >>> import os
+ >>> os.remove(digest._path)
+
+When the size of the digest mbox reaches the maximum size threshold, a digest
+is crafted and sent out. This puts two messages in the virgin queue, an HTML
+digest and an RFC 1153 plain text digest. The size threshold is in KB.
+
+ >>> mlist.digest_size_threshold = 1
+ >>> mlist.volume = 2
+ >>> mlist.next_digest_number = 10
+ >>> size = 0
+ >>> for msg in makemsg():
+ ... process(mlist, msg, {})
+ ... size += len(str(msg))
+ ... if size > mlist.digest_size_threshold * 1024:
+ ... break
+ >>> sum(1 for mboxmsg in digest_mbox(mlist))
+ 0
+ >>> len(switchboard.files)
+ 2
+ >>> for filebase in switchboard.files:
+ ... qmsg, qdata = switchboard.dequeue(filebase)
+ ... switchboard.finish(filebase)
+ ... if qmsg.is_multipart():
+ ... mimemsg = qmsg
+ ... mimedata = qdata
+ ... else:
+ ... rfc1153msg = qmsg
+ ... rfc1153data = qdata
+ >>> print mimemsg.as_string()
+ Content-Type: multipart/mixed; boundary="..."
+ MIME-Version: 1.0
+ From: _xtest-request@example.com
+ Subject: XTest Digest, Vol 2, Issue 10
+ To: _xtest@example.com
+ Reply-To: _xtest@example.com
+ Date: ...
+ Message-ID: ...
+ <BLANKLINE>
+ --...
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Description: XTest Digest, Vol 2, Issue 10
+ <BLANKLINE>
+ Send XTest mailing list submissions to
+ _xtest@example.com
+ <BLANKLINE>
+ To subscribe or unsubscribe via the World Wide Web, visit
+ http://www.example.com/listinfo/_xtest@example.com
+ or, via email, send a message with subject or body 'help' to
+ _xtest-request@example.com
+ <BLANKLINE>
+ You can reach the person managing the list at
+ _xtest-owner@example.com
+ <BLANKLINE>
+ When replying, please edit your Subject line so it is more specific
+ than "Re: Contents of XTest digest..."
+ <BLANKLINE>
+ --...
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Description: Today's Topics (8 messages)
+ <BLANKLINE>
+ Today's Topics:
+ <BLANKLINE>
+ 1. Test message 1 (aperson@example.com)
+ 2. Test message 2 (aperson@example.com)
+ 3. Test message 3 (aperson@example.com)
+ 4. Test message 4 (aperson@example.com)
+ 5. Test message 5 (aperson@example.com)
+ 6. Test message 6 (aperson@example.com)
+ 7. Test message 7 (aperson@example.com)
+ 8. Test message 8 (aperson@example.com)
+ <BLANKLINE>
+ --...
+ Content-Type: multipart/digest; boundary="..."
+ MIME-Version: 1.0
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 1
+ Message: 1
+ <BLANKLINE>
+ Here is message 1
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 2
+ Message: 2
+ <BLANKLINE>
+ Here is message 2
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 3
+ Message: 3
+ <BLANKLINE>
+ Here is message 3
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 4
+ Message: 4
+ <BLANKLINE>
+ Here is message 4
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 5
+ Message: 5
+ <BLANKLINE>
+ Here is message 5
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 6
+ Message: 6
+ <BLANKLINE>
+ Here is message 6
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 7
+ Message: 7
+ <BLANKLINE>
+ Here is message 7
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ From: aperson@example.com
+ To: _xtest@example.com
+ Subject: Test message 8
+ Message: 8
+ <BLANKLINE>
+ Here is message 8
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ --...
+ >>> sorted(mimedata.items())
+ [('_parsemsg', False),
+ ('isdigest', True),
+ ('listname', u'_xtest@example.com'),
+ ('received_time', ...),
+ ('recips', set([])), ('version', 3)]
+ >>> print rfc1153msg.as_string()
+ From: _xtest-request@example.com
+ Subject: XTest Digest, Vol 2, Issue 10
+ To: _xtest@example.com
+ Reply-To: _xtest@example.com
+ Date: ...
+ Message-ID: ...
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ <BLANKLINE>
+ Send XTest mailing list submissions to
+ _xtest@example.com
+ <BLANKLINE>
+ To subscribe or unsubscribe via the World Wide Web, visit
+ http://www.example.com/listinfo/_xtest@example.com
+ or, via email, send a message with subject or body 'help' to
+ _xtest-request@example.com
+ <BLANKLINE>
+ You can reach the person managing the list at
+ _xtest-owner@example.com
+ <BLANKLINE>
+ When replying, please edit your Subject line so it is more specific
+ than "Re: Contents of XTest digest..."
+ <BLANKLINE>
+ <BLANKLINE>
+ Today's Topics:
+ <BLANKLINE>
+ 1. Test message 1 (aperson@example.com)
+ 2. Test message 2 (aperson@example.com)
+ 3. Test message 3 (aperson@example.com)
+ 4. Test message 4 (aperson@example.com)
+ 5. Test message 5 (aperson@example.com)
+ 6. Test message 6 (aperson@example.com)
+ 7. Test message 7 (aperson@example.com)
+ 8. Test message 8 (aperson@example.com)
+ <BLANKLINE>
+ <BLANKLINE>
+ ----------------------------------------------------------------------
+ <BLANKLINE>
+ Message: 1
+ From: aperson@example.com
+ Subject: Test message 1
+ To: _xtest@example.com
+ Message-ID: ...
+ <BLANKLINE>
+ Here is message 1
+ <BLANKLINE>
+ <BLANKLINE>
+ ------------------------------
+ <BLANKLINE>
+ Message: 2
+ From: aperson@example.com
+ Subject: Test message 2
+ To: _xtest@example.com
+ Message-ID: ...
+ <BLANKLINE>
+ Here is message 2
+ <BLANKLINE>
+ <BLANKLINE>
+ ------------------------------
+ <BLANKLINE>
+ Message: 3
+ From: aperson@example.com
+ Subject: Test message 3
+ To: _xtest@example.com
+ Message-ID: ...
+ <BLANKLINE>
+ Here is message 3
+ <BLANKLINE>
+ <BLANKLINE>
+ ------------------------------
+ <BLANKLINE>
+ Message: 4
+ From: aperson@example.com
+ Subject: Test message 4
+ To: _xtest@example.com
+ Message-ID: ...
+ <BLANKLINE>
+ Here is message 4
+ <BLANKLINE>
+ <BLANKLINE>
+ ------------------------------
+ <BLANKLINE>
+ Message: 5
+ From: aperson@example.com
+ Subject: Test message 5
+ To: _xtest@example.com
+ Message-ID: ...
+ <BLANKLINE>
+ Here is message 5
+ <BLANKLINE>
+ <BLANKLINE>
+ ------------------------------
+ <BLANKLINE>
+ Message: 6
+ From: aperson@example.com
+ Subject: Test message 6
+ To: _xtest@example.com
+ Message-ID: ...
+ <BLANKLINE>
+ Here is message 6
+ <BLANKLINE>
+ <BLANKLINE>
+ ------------------------------
+ <BLANKLINE>
+ Message: 7
+ From: aperson@example.com
+ Subject: Test message 7
+ To: _xtest@example.com
+ Message-ID: ...
+ <BLANKLINE>
+ Here is message 7
+ <BLANKLINE>
+ <BLANKLINE>
+ ------------------------------
+ <BLANKLINE>
+ Message: 8
+ From: aperson@example.com
+ Subject: Test message 8
+ To: _xtest@example.com
+ Message-ID: ...
+ <BLANKLINE>
+ Here is message 8
+ <BLANKLINE>
+ <BLANKLINE>
+ End of XTest Digest, Vol 2, Issue 10
+ ************************************
+ <BLANKLINE>
+ >>> sorted(rfc1153data.items())
+ [('_parsemsg', False),
+ ('isdigest', True),
+ ('listname', u'_xtest@example.com'),
+ ('received_time', ...),
+ ('recips', set([])), ('version', 3)]
+
+
+Internationalized digests
+-------------------------
+
+When messages come in with a content-type character set different than that of
+the list's preferred language, recipients wil get an internationalized
+digest. French is not enabled by default site-wide, so enable that now.
+
+XXX We also have to set the default server language to French, otherwise the
+English template will be found and the masthead won't be translated.
+
+ >>> config.languages.enable_language('fr')
+ >>> config.DEFAULT_SERVER_LANGUAGE = u'fr'
+ >>> mlist.preferred_language = u'fr'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.org
+ ... To: _xtest@example.com
+ ... Subject: =?iso-2022-jp?b?GyRCMGxIVhsoQg==?=
+ ... MIME-Version: 1.0
+ ... Content-Type: text/plain; charset=iso-2022-jp
+ ... Content-Transfer-Encoding: 7bit
+ ...
+ ... \x1b$B0lHV\x1b(B
+ ... """)
+
+Set the digest threshold to zero so that the digests will be sent immediately.
+
+ >>> mlist.digest_size_threshold = 0
+ >>> process(mlist, msg, {})
+ >>> sum(1 for mboxmsg in digest_mbox(mlist))
+ 0
+ >>> len(switchboard.files)
+ 2
+ >>> for filebase in switchboard.files:
+ ... qmsg, qdata = switchboard.dequeue(filebase)
+ ... switchboard.finish(filebase)
+ ... if qmsg.is_multipart():
+ ... mimemsg = qmsg
+ ... mimedata = qdata
+ ... else:
+ ... rfc1153msg = qmsg
+ ... rfc1153data = qdata
+ >>> print mimemsg.as_string()
+ Content-Type: multipart/mixed; boundary="..."
+ MIME-Version: 1.0
+ From: _xtest-request@example.com
+ Subject: Groupe XTest, Vol. 2, Parution 11
+ To: _xtest@example.com
+ Reply-To: _xtest@example.com
+ Date: ...
+ Message-ID: ...
+ <BLANKLINE>
+ --...
+ Content-Type: text/plain; charset="iso-8859-1"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: quoted-printable
+ Content-Description: Groupe XTest, Vol. 2, Parution 11
+ <BLANKLINE>
+ Envoyez vos messages pour la liste XTest =E0
+ _xtest@example.com
+ <BLANKLINE>
+ Pour vous (d=E9s)abonner par le web, consultez
+ http://www.example.com/listinfo/_xtest@example.com
+ <BLANKLINE>
+ ou, par courriel, envoyez un message avec =AB=A0help=A0=BB dans le corps ou
+ dans le sujet =E0
+ _xtest-request@example.com
+ <BLANKLINE>
+ Vous pouvez contacter l'administrateur de la liste =E0 l'adresse
+ _xtest-owner@example.com
+ <BLANKLINE>
+ Si vous r=E9pondez, n'oubliez pas de changer l'objet du message afin
+ qu'il soit plus sp=E9cifique que =AB=A0Re: Contenu du groupe de XTest...=A0=
+ =BB
+ <BLANKLINE>
+ --...
+ Content-Type: text/plain; charset="utf-8"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: base64
+ Content-Description: Today's Topics (1 messages)
+ <BLANKLINE>
+ VGjDqG1lcyBkdSBqb3VyIDoKCiAgIDEuIOS4gOeVqiAoYXBlcnNvbkBleGFtcGxlLm9yZykK
+ <BLANKLINE>
+ --...
+ Content-Type: multipart/digest; boundary="..."
+ MIME-Version: 1.0
+ <BLANKLINE>
+ --...
+ Content-Type: message/rfc822
+ MIME-Version: 1.0
+ <BLANKLINE>
+ Content-Transfer-Encoding: 7bit
+ From: aperson@example.org
+ MIME-Version: 1.0
+ To: _xtest@example.com
+ Content-Type: text/plain; charset=iso-2022-jp
+ Subject: =?iso-2022-jp?b?GyRCMGxIVhsoQg==?=
+ Message: 1
+ <BLANKLINE>
+ $B0lHV(B
+ <BLANKLINE>
+ <BLANKLINE>
+ --...
+ --...
+ >>> sorted(mimedata.items())
+ [('_parsemsg', False),
+ ('isdigest', True),
+ ('listname', u'_xtest@example.com'),
+ ('received_time', ...),
+ ('recips', set([])), ('version', 3)]
+ >>> print rfc1153msg.as_string()
+ From: _xtest-request@example.com
+ Subject: Groupe XTest, Vol. 2, Parution 11
+ To: _xtest@example.com
+ Reply-To: _xtest@example.com
+ Date: ...
+ Message-ID: ...
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: base64
+ <BLANKLINE>
+ ...
+ <BLANKLINE>
+ >>> sorted(rfc1153data.items())
+ [('_parsemsg', False),
+ ('isdigest', True),
+ ('listname', u'_xtest@example.com'),
+ ('received_time', ...),
+ ('recips', set([])), ('version', 3)]
+
+
+Clean up
+--------
+
+ >>> config.DEFAULT_SERVER_LANGUAGE = u'en'
diff --git a/mailman/pipeline/docs/file-recips.txt b/mailman/pipeline/docs/file-recips.txt
new file mode 100644
index 000000000..03328f97e
--- /dev/null
+++ b/mailman/pipeline/docs/file-recips.txt
@@ -0,0 +1,97 @@
+File recipients
+===============
+
+Mailman can calculate the recipients for a message from a Sendmail-style
+include file. This file must be called members.txt and it must live in the
+list's data directory.
+
+ >>> from mailman.configuration import config
+ >>> handler = config.handlers['file-recipients']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+
+Short circuiting
+----------------
+
+If the message's metadata already has recipients, this handler immediately
+returns.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message.
+ ... """)
+ >>> msgdata = {'recips': 7}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ <BLANKLINE>
+ A message.
+ <BLANKLINE>
+ >>> msgdata
+ {'recips': 7}
+
+
+Missing file
+------------
+
+The include file must live inside the list's data directory, under the name
+members.txt. If the file doesn't exist, the list of recipients will be
+empty.
+
+ >>> import os
+ >>> file_path = os.path.join(mlist.full_path, 'members.txt')
+ >>> open(file_path)
+ Traceback (most recent call last):
+ ...
+ IOError: [Errno ...]
+ No such file or directory: u'.../_xtest@example.com/members.txt'
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ []
+
+
+Existing file
+-------------
+
+If the file exists, it contains a list of addresses, one per line. These
+addresses are returned as the set of recipients.
+
+ >>> fp = open(file_path, 'w')
+ >>> try:
+ ... print >> fp, 'bperson@example.com'
+ ... print >> fp, 'cperson@example.com'
+ ... print >> fp, 'dperson@example.com'
+ ... print >> fp, 'eperson@example.com'
+ ... print >> fp, 'fperson@example.com'
+ ... print >> fp, 'gperson@example.com'
+ ... finally:
+ ... fp.close()
+
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ ['bperson@example.com', 'cperson@example.com', 'dperson@example.com',
+ 'eperson@example.com', 'fperson@example.com', 'gperson@example.com']
+
+However, if the sender of the original message is a member of the list and
+their address is in the include file, the sender's address is /not/ included
+in the recipients list.
+
+ >>> from mailman.interfaces import MemberRole
+ >>> address_1 = config.db.user_manager.create_address(
+ ... u'cperson@example.com')
+ >>> address_1.subscribe(mlist, MemberRole.member)
+ <Member: cperson@example.com on _xtest@example.com as MemberRole.member>
+
+ >>> msg = message_from_string("""\
+ ... From: cperson@example.com
+ ...
+ ... A message.
+ ... """)
+ >>> msgdata = {}
+ >>> handler.process(mlist, msg, msgdata)
+ >>> sorted(msgdata['recips'])
+ ['bperson@example.com', 'dperson@example.com',
+ 'eperson@example.com', 'fperson@example.com', 'gperson@example.com']
diff --git a/mailman/pipeline/docs/filtering.txt b/mailman/pipeline/docs/filtering.txt
new file mode 100644
index 000000000..c5dca1531
--- /dev/null
+++ b/mailman/pipeline/docs/filtering.txt
@@ -0,0 +1,341 @@
+Content filtering
+=================
+
+Mailman can filter the content of messages posted to a mailing list by
+stripping MIME subparts, and possibly reorganizing the MIME structure of a
+message. It does this with the MimeDel handler module, although other
+handlers can potentially do other kinds of finer level content filtering.
+
+ >>> from mailman.pipeline.mime_delete import process
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+
+Several mailing list options control content filtering. First, the feature
+must be enabled, then there are two options that control which MIME types get
+filtered and which get passed. Finally, there is an option to control whether
+text/html parts will get converted to plain text. Let's set up some defaults
+for these variables, then we'll explain them in more detail below.
+
+ >>> mlist.filter_content = True
+ >>> mlist.filter_mime_types = []
+ >>> mlist.pass_mime_types = []
+ >>> mlist.convert_html_to_plaintext = False
+
+
+Filtering the outer content type
+--------------------------------
+
+A simple filtering setting will just search the content types of the messages
+parts, discarding all parts with a matching MIME type. If the message's outer
+content type matches the filter, the entire message will be discarded.
+
+ >>> mlist.filter_mime_types = ['image/jpeg']
+ >>> # XXX Change this to an enum
+ >>> mlist.filter_action = 0 # Discard
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: image/jpeg
+ ... MIME-Version: 1.0
+ ...
+ ... xxxxx
+ ... """)
+ >>> process(mlist, msg, {})
+ Traceback (most recent call last):
+ ...
+ DiscardMessage
+
+However, if we turn off content filtering altogether, then the handler
+short-circuits.
+
+ >>> mlist.filter_content = False
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: image/jpeg
+ MIME-Version: 1.0
+ <BLANKLINE>
+ xxxxx
+ >>> msgdata
+ {}
+
+Similarly, no content filtering is performed on digest messages, which are
+crafted internally by Mailman.
+
+ >>> mlist.filter_content = True
+ >>> msgdata = {'isdigest': True}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: image/jpeg
+ MIME-Version: 1.0
+ <BLANKLINE>
+ xxxxx
+ >>> msgdata
+ {'isdigest': True}
+
+
+Simple multipart filtering
+--------------------------
+
+If one of the subparts in a multipart message matches the filter type, then
+just that subpart will be stripped.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: multipart/mixed; boundary=BOUNDARY
+ ... MIME-Version: 1.0
+ ...
+ ... --BOUNDARY
+ ... Content-Type: image/jpeg
+ ... MIME-Version: 1.0
+ ...
+ ... xxx
+ ...
+ ... --BOUNDARY
+ ... Content-Type: image/gif
+ ... MIME-Version: 1.0
+ ...
+ ... yyy
+ ... --BOUNDARY--
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: multipart/mixed; boundary=BOUNDARY
+ MIME-Version: 1.0
+ X-Content-Filtered-By: Mailman/MimeDel ...
+ <BLANKLINE>
+ --BOUNDARY
+ Content-Type: image/gif
+ MIME-Version: 1.0
+ <BLANKLINE>
+ yyy
+ --BOUNDARY--
+ <BLANKLINE>
+
+
+Collapsing multipart/alternative messages
+-----------------------------------------
+
+When content filtering encounters a multipart/alternative part, and the
+results of filtering leave only one of the subparts, then the
+multipart/alternative may be collapsed. For example, in the following
+message, the outer content type is a multipart/mixed. Inside this part is
+just a single subpart that has a content type of multipart/alternative. This
+inner multipart has two subparts, a jpeg and a gif.
+
+Content filtering will remove the jpeg part, leaving the multipart/alternative
+with only a single gif subpart. Because there's only one subpart left, the
+MIME structure of the message will be reorganized, removing the inner
+multipart/alternative so that the outer multipart/mixed has just a single gif
+subpart.
+
+ >>> mlist.collapse_alternatives = True
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: multipart/mixed; boundary=BOUNDARY
+ ... MIME-Version: 1.0
+ ...
+ ... --BOUNDARY
+ ... Content-Type: multipart/alternative; boundary=BOUND2
+ ... MIME-Version: 1.0
+ ...
+ ... --BOUND2
+ ... Content-Type: image/jpeg
+ ... MIME-Version: 1.0
+ ...
+ ... xxx
+ ...
+ ... --BOUND2
+ ... Content-Type: image/gif
+ ... MIME-Version: 1.0
+ ...
+ ... yyy
+ ... --BOUND2--
+ ...
+ ... --BOUNDARY--
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: multipart/mixed; boundary=BOUNDARY
+ MIME-Version: 1.0
+ X-Content-Filtered-By: Mailman/MimeDel ...
+ <BLANKLINE>
+ --BOUNDARY
+ Content-Type: image/gif
+ MIME-Version: 1.0
+ <BLANKLINE>
+ yyy
+ --BOUNDARY--
+ <BLANKLINE>
+
+When the outer part is a multipart/alternative and filtering leaves this outer
+part with just one subpart, the entire message is converted to the left over
+part's content type. In other words, the left over inner part is promoted to
+being the outer part.
+
+ >>> mlist.filter_mime_types.append('text/html')
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: multipart/alternative; boundary=AAA
+ ...
+ ... --AAA
+ ... Content-Type: text/html
+ ...
+ ... <b>This is some html</b>
+ ... --AAA
+ ... Content-Type: text/plain
+ ...
+ ... This is plain text
+ ... --AAA--
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: text/plain
+ X-Content-Filtered-By: Mailman/MimeDel ...
+ <BLANKLINE>
+ This is plain text
+
+Clean up.
+
+ >>> ignore = mlist.filter_mime_types.pop()
+
+
+Conversion to plain text
+------------------------
+
+Many mailing lists prohibit HTML email, and in fact, such email can be a
+phishing or spam vector. However, many mail readers will send HTML email by
+default because users think it looks pretty. One approach to handling this
+would be to filter out text/html parts and rely on multipart/alternative
+collapsing to leave just a plain text part. This works because many mail
+readers that send HTML email actually send a plain text part in the second
+subpart of such multipart/alternatives.
+
+While this is a good suggestion for plain text-only mailing lists, often a
+mail reader will send only a text/html part with no plain text alternative.
+in this case, the site administer can enable text/html to text/plain
+conversion by defining a conversion command. A list administrator still needs
+to enable such conversion for their list though.
+
+ >>> mlist.convert_html_to_plaintext = True
+
+By default, Mailman sends the message through lynx, but since this program is
+not guaranteed to exist, we'll craft a simple, but stupid script to simulate
+the conversion process. The script expects a single argument, which is the
+name of the file containing the message payload to filter.
+
+ >>> import os, sys
+ >>> script_path = os.path.join(config.DATA_DIR, 'filter.py')
+ >>> fp = open(script_path, 'w')
+ >>> try:
+ ... print >> fp, """\
+ ... import sys
+ ... print 'Converted text/html to text/plain'
+ ... print 'Filename:', sys.argv[1]
+ ... """
+ ... finally:
+ ... fp.close()
+ >>> config.HTML_TO_PLAIN_TEXT_COMMAND = '%s %s %%(filename)s' % (
+ ... sys.executable, script_path)
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: text/html
+ ... MIME-Version: 1.0
+ ...
+ ... <html><head></head>
+ ... <body></body></html>
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ MIME-Version: 1.0
+ Content-Type: text/plain
+ X-Content-Filtered-By: Mailman/MimeDel ...
+ <BLANKLINE>
+ Converted text/html to text/plain
+ Filename: ...
+ <BLANKLINE>
+
+
+Discarding empty parts
+----------------------
+
+Similarly, if after filtering a multipart section ends up empty, then the
+entire multipart is discarded. For example, here's a message where an inner
+multipart/mixed contains two jpeg subparts. Both jpegs are filtered out, so
+the entire inner multipart/mixed is discarded.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Content-Type: multipart/mixed; boundary=AAA
+ ...
+ ... --AAA
+ ... Content-Type: multipart/mixed; boundary=BBB
+ ...
+ ... --BBB
+ ... Content-Type: image/jpeg
+ ...
+ ... xxx
+ ... --BBB
+ ... Content-Type: image/jpeg
+ ...
+ ... yyy
+ ... --BBB---
+ ... --AAA
+ ... Content-Type: multipart/alternative; boundary=CCC
+ ...
+ ... --CCC
+ ... Content-Type: text/html
+ ...
+ ... <h2>This is a header</h2>
+ ...
+ ... --CCC
+ ... Content-Type: text/plain
+ ...
+ ... A different message
+ ... --CCC--
+ ... --AAA
+ ... Content-Type: image/gif
+ ...
+ ... zzz
+ ... --AAA
+ ... Content-Type: image/gif
+ ...
+ ... aaa
+ ... --AAA--
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Content-Type: multipart/mixed; boundary=AAA
+ X-Content-Filtered-By: Mailman/MimeDel ...
+ <BLANKLINE>
+ --AAA
+ MIME-Version: 1.0
+ Content-Type: text/plain
+ <BLANKLINE>
+ Converted text/html to text/plain
+ Filename: ...
+ <BLANKLINE>
+ --AAA
+ Content-Type: image/gif
+ <BLANKLINE>
+ zzz
+ --AAA
+ Content-Type: image/gif
+ <BLANKLINE>
+ aaa
+ --AAA--
+ <BLANKLINE>
+
+
+Passing MIME types
+------------------
+
+XXX Describe the pass_mime_types setting and how it interacts with
+filter_mime_types.
diff --git a/mailman/pipeline/docs/nntp.txt b/mailman/pipeline/docs/nntp.txt
new file mode 100644
index 000000000..5652d7924
--- /dev/null
+++ b/mailman/pipeline/docs/nntp.txt
@@ -0,0 +1,68 @@
+NNTP (i.e. Usenet) Gateway
+==========================
+
+Mailman has an NNTP gateway, whereby messages posted to the mailing list can
+be forwarded onto an NNTP newsgroup. Typically this means Usenet, but since
+NNTP is to Usenet as IP is to the web, it's more general than that.
+
+ >>> from mailman.queue import Switchboard
+ >>> from mailman.configuration import config
+ >>> handler = config.handlers['to-usenet']
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+ >>> switchboard = Switchboard(config.NEWSQUEUE_DIR)
+
+Gatewaying from the mailing list to the newsgroup happens through a separate
+'nntp' queue and happen immediately when the message is posted through to the
+list. Note that gatewaying from the newsgroup to the list happens via a
+cronjob (currently not shown).
+
+There are several situations which prevent a message from being gatewayed to
+the newsgroup. The feature could be disabled, as is the default.
+
+ >>> mlist.gateway_to_news = False
+ >>> msg = message_from_string("""\
+ ... Subject: An important message
+ ...
+ ... Something of great import.
+ ... """)
+ >>> handler.process(mlist, msg, {})
+ >>> switchboard.files
+ []
+
+Even if enabled, messages that came from the newsgroup are never gated back to
+the newsgroup.
+
+ >>> mlist.gateway_to_news = True
+ >>> handler.process(mlist, msg, {'fromusenet': True})
+ >>> switchboard.files
+ []
+
+Neither are digests ever gated to the newsgroup.
+
+ >>> handler.process(mlist, msg, {'isdigest': True})
+ >>> switchboard.files
+ []
+
+However, other posted messages get gated to the newsgroup via the nntp queue.
+The list owner can set the linked newsgroup and the nntp host that its
+messages are gated to.
+
+ >>> mlist.linked_newsgroup = u'comp.lang.thing'
+ >>> mlist.nntp_host = u'news.example.com'
+ >>> handler.process(mlist, msg, {})
+ >>> len(switchboard.files)
+ 1
+ >>> filebase = switchboard.files[0]
+ >>> msg, msgdata = switchboard.dequeue(filebase)
+ >>> switchboard.finish(filebase)
+ >>> print msg.as_string()
+ Subject: An important message
+ <BLANKLINE>
+ Something of great import.
+ <BLANKLINE>
+ >>> sorted(msgdata.items())
+ [('_parsemsg', False),
+ ('listname', u'_xtest@example.com'),
+ ('received_time', ...),
+ ('version', 3)]
diff --git a/mailman/pipeline/docs/reply-to.txt b/mailman/pipeline/docs/reply-to.txt
new file mode 100644
index 000000000..ad9100ce1
--- /dev/null
+++ b/mailman/pipeline/docs/reply-to.txt
@@ -0,0 +1,128 @@
+Reply-to munging
+================
+
+Messages that flow through the global pipeline get their headers 'cooked',
+which basically means that their headers go through several mostly unrelated
+transformations. Some headers get added, others get changed. Some of these
+changes depend on mailing list settings and others depend on how the message
+is getting sent through the system. We'll take things one-by-one.
+
+ >>> from mailman.pipeline.cook_headers import process
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.subject_prefix = u''
+
+Reply-to munging refers to the behavior where a mailing list can be configured
+to change or augment an existing Reply-To header in a message posted to the
+list. Reply-to munging is fairly controversial, with arguments made either
+for or against munging.
+
+The Mailman developers, and I believe the majority consensus is to do no
+Reply-to munging, under several principles. Primarily, most reply-to munging
+is requested by people who do not have both a Reply and Reply All button on
+their mail reader. If you do not munge Reply-To, then these buttons will work
+properly, but if you munge the header, it is impossible for these buttons to
+work right, because both will reply to the list. This leads to unfortunate
+accidents where a private message is accidentally posted to the entire list.
+
+However, Mailman gives list owners the option to do Reply-To munging anyway,
+mostly as a way to shut up the really vocal minority who seem to insist on
+this mis-feature.
+
+
+Reply to list
+-------------
+
+A list can be configured to add a Reply-To header pointing back to the mailing
+list's posting address. If there's no Reply-To header in the original
+message, the list's posting address simply gets inserted.
+
+ >>> from mailman.interfaces import ReplyToMunging
+ >>> mlist.reply_goes_to_list = ReplyToMunging.point_to_list
+ >>> mlist.preferred_language = u'en'
+ >>> mlist.description = u''
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'_xtest@example.com'
+
+It's also possible to strip any existing Reply-To header first, before adding
+the list's posting address.
+
+ >>> mlist.first_strip_reply_to = True
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Reply-To: bperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'_xtest@example.com'
+
+If you don't first strip the header, then the list's posting address will just
+get appended to whatever the original version was.
+
+ >>> mlist.first_strip_reply_to = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Reply-To: bperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'bperson@example.com, _xtest@example.com'
+
+
+Explicit Reply-To
+-----------------
+
+The list can also be configured to have an explicit Reply-To header.
+
+ >>> mlist.reply_goes_to_list = ReplyToMunging.explicit_header
+ >>> mlist.reply_to_address = u'my-list@example.com'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'my-list@example.com'
+
+And as before, it's possible to either strip any existing Reply-To header...
+
+ >>> mlist.first_strip_reply_to = True
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Reply-To: bperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'my-list@example.com'
+
+...or not.
+
+ >>> mlist.first_strip_reply_to = False
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Reply-To: bperson@example.com
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(msg.get_all('reply-to'))
+ 1
+ >>> msg['reply-to']
+ u'my-list@example.com, bperson@example.com'
diff --git a/mailman/pipeline/docs/replybot.txt b/mailman/pipeline/docs/replybot.txt
new file mode 100644
index 000000000..2e3765cab
--- /dev/null
+++ b/mailman/pipeline/docs/replybot.txt
@@ -0,0 +1,216 @@
+Auto-reply handler
+==================
+
+Mailman has an auto-reply handler that sends automatic responses to messages
+it receives on its posting address, or special robot addresses. Automatic
+responses are subject to various conditions, such as headers in the original
+message or the amount of time since the last auto-response.
+
+ >>> from mailman.pipeline.replybot import process
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.real_name = u'XTest'
+ >>> mlist.web_page_url = u'http://www.example.com/'
+
+ >>> # Ensure that the virgin queue is empty, since we'll be checking this
+ >>> # for new auto-response messages.
+ >>> from mailman.queue import Switchboard
+ >>> virginq = Switchboard(config.VIRGINQUEUE_DIR)
+ >>> virginq.files
+ []
+
+
+Basic autoresponding
+--------------------
+
+Basic autoresponding occurs when the list is set up to respond to either its
+-owner address, its -request address, or to the posting address, and a message
+is sent to one of these addresses. A mailing list also has an autoresponse
+grace period which describes how much time must pass before a second response
+will be sent, with 0 meaning "there is no grace period".
+
+ >>> import datetime
+ >>> mlist.autorespond_admin = True
+ >>> mlist.autoresponse_graceperiod = datetime.timedelta()
+ >>> mlist.autoresponse_admin_text = u'admin autoresponse text'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... To: _xtest-owner@example.com
+ ...
+ ... help
+ ... """)
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> # Print only some of the meta data. The rest is uninteresting.
+ >>> qdata['listname']
+ u'_xtest@example.com'
+ >>> sorted(qdata['recips'])
+ [u'aperson@example.com']
+ >>> # Delete data that is time dependent or random
+ >>> del qmsg['message-id']
+ >>> del qmsg['date']
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: Auto-response for your message to the "XTest" mailing list
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ X-Mailer: The Mailman Replybot
+ X-Ack: No
+ Precedence: bulk
+ <BLANKLINE>
+ admin autoresponse text
+ >>> virginq.files
+ []
+
+
+Short circuiting
+----------------
+
+Several headers in the original message determine whether an autoresponse
+should even be sent. For example, if the message has an "X-Ack: No" header,
+no auto-response is sent.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... X-Ack: No
+ ...
+ ... help me
+ ... """)
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> virginq.files
+ []
+
+Mailman itself can suppress autoresponses for certain types of internally
+crafted messages, by setting the 'noack' metadata key.
+
+ >>> msg = message_from_string("""\
+ ... From: mailman@example.com
+ ...
+ ... help for you
+ ... """)
+ >>> process(mlist, msg, dict(noack=True, toowner=True))
+ >>> virginq.files
+ []
+
+If there is a Precedence: header with any of the values 'bulk', 'junk', or
+'list', then the autoresponse is also suppressed.
+
+ >>> msg = message_from_string("""\
+ ... From: asystem@example.com
+ ... Precedence: bulk
+ ...
+ ... hey!
+ ... """)
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> virginq.files
+ []
+
+ >>> msg.replace_header('precedence', 'junk')
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> virginq.files
+ []
+ >>> msg.replace_header('precedence', 'list')
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> virginq.files
+ []
+
+Unless the X-Ack: header has a value of "yes", in which case, the Precedence
+header is ignored.
+
+ >>> msg['X-Ack'] = 'yes'
+ >>> process(mlist, msg, dict(toowner=True))
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> del qmsg['message-id']
+ >>> del qmsg['date']
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: Auto-response for your message to the "XTest" mailing list
+ From: _xtest-bounces@example.com
+ To: asystem@example.com
+ X-Mailer: The Mailman Replybot
+ X-Ack: No
+ Precedence: bulk
+ <BLANKLINE>
+ admin autoresponse text
+
+
+Available auto-responses
+------------------------
+
+As shown above, a message sent to the -owner address will get an auto-response
+with the text set for owner responses. Two other types of email will get
+auto-responses: those sent to the -request address...
+
+ >>> mlist.autorespond_requests = True
+ >>> mlist.autoresponse_request_text = u'robot autoresponse text'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... To: _xtest-request@example.com
+ ...
+ ... help me
+ ... """)
+ >>> process(mlist, msg, dict(torequest=True))
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> del qmsg['message-id']
+ >>> del qmsg['date']
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: Auto-response for your message to the "XTest" mailing list
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ X-Mailer: The Mailman Replybot
+ X-Ack: No
+ Precedence: bulk
+ <BLANKLINE>
+ robot autoresponse text
+
+...and those sent to the posting address.
+
+ >>> mlist.autorespond_postings = True
+ >>> mlist.autoresponse_postings_text = u'postings autoresponse text'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... To: _xtest@example.com
+ ...
+ ... help me
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> len(virginq.files)
+ 1
+ >>> qmsg, qdata = virginq.dequeue(virginq.files[0])
+ >>> del qmsg['message-id']
+ >>> del qmsg['date']
+ >>> print qmsg.as_string()
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ Subject: Auto-response for your message to the "XTest" mailing list
+ From: _xtest-bounces@example.com
+ To: aperson@example.com
+ X-Mailer: The Mailman Replybot
+ X-Ack: No
+ Precedence: bulk
+ <BLANKLINE>
+ postings autoresponse text
+
+
+Grace periods
+-------------
+
+Auto-responses have a grace period, during which no additional responses will
+be sent. This is so as not to bombard the sender with responses. The grace
+period is measured in days.
+
+XXX Add grace period tests.
diff --git a/mailman/pipeline/docs/scrubber.txt b/mailman/pipeline/docs/scrubber.txt
new file mode 100644
index 000000000..744925f34
--- /dev/null
+++ b/mailman/pipeline/docs/scrubber.txt
@@ -0,0 +1,214 @@
+The scrubber
+============
+
+The scrubber is an integral part of Mailman, both in the normal delivery of
+messages and in components such as the archiver. Its primary purpose is to
+scrub attachments from messages so that binary goop doesn't end up in an
+archive message.
+
+ >>> from mailman.pipeline.scrubber import process, save_attachment
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.preferred_language = u'en'
+
+Helper functions for getting the attachment data.
+
+ >>> import os, re
+ >>> def read_attachment(filename, remove=True):
+ ... path = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR,
+ ... mlist.fqdn_listname, filename)
+ ... fp = open(path)
+ ... try:
+ ... data = fp.read()
+ ... finally:
+ ... fp.close()
+ ... if remove:
+ ... os.unlink(path)
+ ... return data
+
+ >>> from urlparse import urlparse
+ >>> def read_url_from_message(msg):
+ ... url = None
+ ... for line in msg.get_payload().splitlines():
+ ... mo = re.match('URL: <(?P<url>[^>]+)>', line)
+ ... if mo:
+ ... url = mo.group('url')
+ ... break
+ ... path = '/'.join(urlparse(url).path.split('/')[3:])
+ ... return read_attachment(path)
+
+
+Saving attachments
+------------------
+
+The Scrubber handler exposes a function called save_attachments() which can be
+used to strip various types of attachments and store them in the archive
+directory. This is a public interface used by components outside the normal
+processing pipeline.
+
+Site administrators can decide whether the scrubber should use the attachment
+filename suggested in the message's Content-Disposition: header or not. If
+enabled, the filename will be used when this header attribute is present (yes,
+this is an unfortunate double negative).
+
+ >>> config.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME = False
+ >>> msg = message_from_string("""\
+ ... Content-Type: image/gif; name="xtest.gif"
+ ... Content-Transfer-Encoding: base64
+ ... Content-Disposition: attachment; filename="xtest.gif"
+ ...
+ ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+ ... """)
+ >>> save_attachment(mlist, msg, 'dir')
+ u'<http://www.example.com/pipermail/_xtest@example.com/dir/xtest.gif>'
+ >>> data = read_attachment('dir/xtest.gif')
+ >>> data[:6]
+ 'GIF87a'
+ >>> len(data)
+ 34
+
+Saving the attachment does not alter the original message.
+
+ >>> print msg.as_string()
+ Content-Type: image/gif; name="xtest.gif"
+ Content-Transfer-Encoding: base64
+ Content-Disposition: attachment; filename="xtest.gif"
+ <BLANKLINE>
+ R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+
+The site administrator can also configure Mailman to ignore the
+Content-Disposition: filename. This is the default for reasons described in
+the Defaults.py.in file.
+
+ >>> config.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME = True
+ >>> msg = message_from_string("""\
+ ... Content-Type: image/gif; name="xtest.gif"
+ ... Content-Transfer-Encoding: base64
+ ... Content-Disposition: attachment; filename="xtest.gif"
+ ...
+ ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+ ... """)
+ >>> save_attachment(mlist, msg, 'dir')
+ u'<http://www.example.com/pipermail/_xtest@example.com/dir/attachment.gif>'
+ >>> data = read_attachment('dir/xtest.gif')
+ Traceback (most recent call last):
+ IOError: [Errno ...] No such file or directory:
+ u'.../archives/private/_xtest@example.com/dir/xtest.gif'
+ >>> data = read_attachment('dir/attachment.gif')
+ >>> data[:6]
+ 'GIF87a'
+ >>> len(data)
+ 34
+
+
+Scrubbing image attachments
+---------------------------
+
+When scrubbing image attachments, the original message is modified to include
+a reference to the attachment file as available through the on-line archive.
+
+ >>> msg = message_from_string("""\
+ ... MIME-Version: 1.0
+ ... Content-Type: multipart/mixed; boundary="BOUNDARY"
+ ...
+ ... --BOUNDARY
+ ... Content-type: text/plain; charset=us-ascii
+ ...
+ ... This is a message.
+ ... --BOUNDARY
+ ... Content-Type: image/gif; name="xtest.gif"
+ ... Content-Transfer-Encoding: base64
+ ... Content-Disposition: attachment; filename="xtest.gif"
+ ...
+ ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+ ... --BOUNDARY--
+ ... """)
+ >>> msgdata = {}
+
+The Scrubber.process() function is different than other handler process
+functions in that it returns the scrubbed message.
+
+ >>> scrubbed_msg = process(mlist, msg, msgdata)
+ >>> scrubbed_msg is msg
+ True
+ >>> print scrubbed_msg.as_string()
+ MIME-Version: 1.0
+ Message-ID: ...
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ <BLANKLINE>
+ This is a message.
+ -------------- next part --------------
+ A non-text attachment was scrubbed...
+ Name: xtest.gif
+ Type: image/gif
+ Size: 34 bytes
+ Desc: not available
+ URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif>
+ <BLANKLINE>
+
+This is the same as the transformed message originally passed in.
+
+ >>> print msg.as_string()
+ MIME-Version: 1.0
+ Message-ID: ...
+ Content-Type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 7bit
+ <BLANKLINE>
+ This is a message.
+ -------------- next part --------------
+ A non-text attachment was scrubbed...
+ Name: xtest.gif
+ Type: image/gif
+ Size: 34 bytes
+ Desc: not available
+ URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif>
+ <BLANKLINE>
+ >>> msgdata
+ {}
+
+The URL will point to the attachment sitting in the archive.
+
+ >>> data = read_url_from_message(msg)
+ >>> data[:6]
+ 'GIF87a'
+ >>> len(data)
+ 34
+
+
+Scrubbing text attachments
+--------------------------
+
+Similar to image attachments, text attachments will also be scrubbed, but the
+placeholder will be slightly different.
+
+ >>> msg = message_from_string("""\
+ ... MIME-Version: 1.0
+ ... Content-Type: multipart/mixed; boundary="BOUNDARY"
+ ...
+ ... --BOUNDARY
+ ... Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no
+ ...
+ ... This is a message.
+ ... --BOUNDARY
+ ... Content-type: text/plain; name="xtext.txt"
+ ... Content-Disposition: attachment; filename="xtext.txt"
+ ...
+ ... This is a text attachment.
+ ... --BOUNDARY--
+ ... """)
+ >>> scrubbed_msg = process(mlist, msg, {})
+ >>> print scrubbed_msg.as_string()
+ MIME-Version: 1.0
+ Message-ID: ...
+ Content-Transfer-Encoding: 7bit
+ Content-Type: text/plain; charset="us-ascii"; format="flowed"; delsp="no"
+ <BLANKLINE>
+ This is a message.
+ -------------- next part --------------
+ An embedded and charset-unspecified text was scrubbed...
+ Name: xtext.txt
+ URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.txt>
+ <BLANKLINE>
+ >>> read_url_from_message(msg)
+ 'This is a text attachment.'
diff --git a/mailman/pipeline/docs/subject-munging.txt b/mailman/pipeline/docs/subject-munging.txt
new file mode 100644
index 000000000..02677d6e2
--- /dev/null
+++ b/mailman/pipeline/docs/subject-munging.txt
@@ -0,0 +1,245 @@
+Subject munging
+===============
+
+Messages that flow through the global pipeline get their headers 'cooked',
+which basically means that their headers go through several mostly unrelated
+transformations. Some headers get added, others get changed. Some of these
+changes depend on mailing list settings and others depend on how the message
+is getting sent through the system. We'll take things one-by-one.
+
+ >>> from mailman.pipeline.cook_headers import process
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+ >>> mlist.subject_prefix = u''
+
+
+Inserting a prefix
+------------------
+
+Another thing CookHeaders does is 'munge' the Subject header by inserting the
+subject prefix for the list at the front. If there's no subject header in the
+original message, Mailman uses a canned default. In order to do subject
+munging, a mailing list must have a preferred language.
+
+ >>> mlist.subject_prefix = u'[XTest] '
+ >>> mlist.preferred_language = u'en'
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ...
+ ... A message of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+
+The original subject header is stored in the message metadata. We must print
+the new Subject header because it gets converted from a string to an
+email.header.Header instance which has an unhelpful repr.
+
+ >>> msgdata['origsubj']
+ u''
+ >>> print msg['subject']
+ [XTest] (no subject)
+
+If the original message had a Subject header, then the prefix is inserted at
+the beginning of the header's value.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> msgdata['origsubj']
+ u'Something important'
+ >>> print msg['subject']
+ [XTest] Something important
+
+Subject headers are not munged for digest messages.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, dict(isdigest=True))
+ >>> msg['subject']
+ u'Something important'
+
+Nor are they munged for 'fast tracked' messages, which are generally defined
+as messages that Mailman crafts internally.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, dict(_fasttrack=True))
+ >>> msg['subject']
+ u'Something important'
+
+If a Subject header already has a prefix, usually following a Re: marker,
+another one will not be added but the prefix will be moved to the front of the
+header text.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: Re: [XTest] Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest] Re: Something important
+
+If the Subjec header has a prefix at the front of the header text, that's
+where it will stay. This is called 'new style' prefixing and is the only
+option available in Mailman 3.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: [XTest] Re: Something important
+ ...
+ ... A message of great import.
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest] Re: Something important
+
+
+Internationalized headers
+-------------------------
+
+Internationalization adds some interesting twists to the handling of subject
+prefixes. Part of what makes this interesting is the encoding of i18n headers
+using RFC 2047, and lists whose preferred language is in a different character
+set than the encoded header.
+
+ >>> msg = message_from_string("""\
+ ... Subject: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ >>> unicode(msg['subject'])
+ u'[XTest] \u30e1\u30fc\u30eb\u30de\u30f3'
+
+
+Prefix numbers
+--------------
+
+Subject prefixes support a placeholder for the numeric post id. Every time a
+message is posted to the mailing list, a 'post id' gets incremented. This is
+a purely sequential integer that increases monotonically. By added a '%d'
+placeholder to the subject prefix, this post id can be included in the prefix.
+
+ >>> mlist.subject_prefix = u'[XTest %d] '
+ >>> mlist.post_id = 456
+ >>> msg = message_from_string("""\
+ ... Subject: Something important
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Something important
+
+This works even when the message is a reply, except that in this case, the
+numeric post id in the generated subject prefix is updated with the new post
+id.
+
+ >>> msg = message_from_string("""\
+ ... Subject: [XTest 123] Re: Something important
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Re: Something important
+
+If the Subject header had old style prefixing, the prefix is moved to the
+front of the header text.
+
+ >>> msg = message_from_string("""\
+ ... Subject: Re: [XTest 123] Something important
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Re: Something important
+
+
+And of course, the proper thing is done when posting id numbers are included
+in the subject prefix, and the subject is encoded non-ascii.
+
+ >>> msg = message_from_string("""\
+ ... Subject: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ >>> unicode(msg['subject'])
+ u'[XTest 456] \u30e1\u30fc\u30eb\u30de\u30f3'
+
+Even more fun is when the i18n Subject header already has a prefix, possibly
+with a different posting number.
+
+ >>> msg = message_from_string("""\
+ ... Subject: [XTest 123] Re: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Re: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+
+# XXX This requires Python email patch #1681333 to succeed.
+# >>> unicode(msg['subject'])
+# u'[XTest 456] Re: \u30e1\u30fc\u30eb\u30de\u30f3'
+
+As before, old style subject prefixes are re-ordered.
+
+ >>> msg = message_from_string("""\
+ ... Subject: Re: [XTest 123] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest 456] Re:
+ =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+
+# XXX This requires Python email patch #1681333 to succeed.
+# >>> unicode(msg['subject'])
+# u'[XTest 456] Re: \u30e1\u30fc\u30eb\u30de\u30f3'
+
+
+In this test case, we get an extra space between the prefix and the original
+subject. It's because the original is 'crooked'. Note that a Subject
+starting with '\n ' is generated by some version of Eudora Japanese edition.
+
+ >>> mlist.subject_prefix = u'[XTest] '
+ >>> msg = message_from_string("""\
+ ... Subject:
+ ... Important message
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+ >>> print msg['subject']
+ [XTest] Important message
+
+And again, with an RFC 2047 encoded header.
+
+ >>> msg = message_from_string("""\
+ ... Subject:
+ ... =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?=
+ ...
+ ... """)
+ >>> process(mlist, msg, {})
+
+# XXX This one does not appear to work the same way as
+# test_subject_munging_prefix_crooked() in the old Python-based tests. I need
+# to get Tokio to look at this.
+# >>> print msg['subject']
+# [XTest] =?iso-2022-jp?b?IBskQiVhITwlayVeJXMbKEI=?=
diff --git a/mailman/pipeline/docs/tagger.txt b/mailman/pipeline/docs/tagger.txt
new file mode 100644
index 000000000..778f7cc73
--- /dev/null
+++ b/mailman/pipeline/docs/tagger.txt
@@ -0,0 +1,237 @@
+Message tagger
+==============
+
+Mailman has a topics system which works like this: a mailing list
+administrator sets up one or more topics, which is essentially a named regular
+expression. The topic name can be any arbitrary string, and the name serves
+double duty as the 'topic tag'. Each message that flows the mailing list has
+its Subject: and Keywords: headers compared against these regular
+expressions. The message then gets tagged with the topic names of each hit.
+
+ >>> from mailman.pipeline.tagger import process
+ >>> from mailman.queue import Switchboard
+ >>> from mailman.configuration import config
+ >>> mlist = config.db.list_manager.create(u'_xtest@example.com')
+
+Topics must be enabled for Mailman to do any topic matching, even if topics
+are defined.
+
+ >>> mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', False)]
+ >>> mlist.topics_enabled = False
+ >>> mlist.topics_bodylines_limit = 0
+
+ >>> msg = message_from_string("""\
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ...
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ Subject: foobar
+ Keywords: barbaz
+ <BLANKLINE>
+ <BLANKLINE>
+ >>> msgdata
+ {}
+
+However, once topics are enabled, message will be tagged. There are two
+artifacts of tagging; an X-Topics: header is added with the topic name, and
+the message metadata gets a key with a list of matching topic names.
+
+ >>> mlist.topics_enabled = True
+ >>> msg = message_from_string("""\
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ...
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ Subject: foobar
+ Keywords: barbaz
+ X-Topics: bar fight
+ <BLANKLINE>
+ <BLANKLINE>
+ >>> msgdata['topichits']
+ ['bar fight']
+
+
+Scanning body lines
+-------------------
+
+The tagger can also look at a certain number of body lines, but only for
+Subject: and Keyword: header-like lines. When set to zero, no body lines are
+scanned.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: nothing
+ ... Keywords: at all
+ ...
+ ... X-Ignore: something else
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: nothing
+ Keywords: at all
+ <BLANKLINE>
+ X-Ignore: something else
+ Subject: foobar
+ Keywords: barbaz
+ <BLANKLINE>
+ >>> msgdata
+ {}
+
+But let the tagger scan a few body lines and the matching headers will be
+found.
+
+ >>> mlist.topics_bodylines_limit = 5
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: nothing
+ ... Keywords: at all
+ ...
+ ... X-Ignore: something else
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: nothing
+ Keywords: at all
+ X-Topics: bar fight
+ <BLANKLINE>
+ X-Ignore: something else
+ Subject: foobar
+ Keywords: barbaz
+ <BLANKLINE>
+ >>> msgdata['topichits']
+ ['bar fight']
+
+However, scanning stops at the first body line that doesn't look like a
+header.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: nothing
+ ... Keywords: at all
+ ...
+ ... This is not a header
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: nothing
+ Keywords: at all
+ <BLANKLINE>
+ This is not a header
+ Subject: foobar
+ Keywords: barbaz
+ >>> msgdata
+ {}
+
+When set to a negative number, all body lines will be scanned.
+
+ >>> mlist.topics_bodylines_limit = -1
+ >>> lots_of_headers = '\n'.join(['X-Ignore: zip'] * 100)
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: nothing
+ ... Keywords: at all
+ ...
+ ... %s
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ... """ % lots_of_headers)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> # Rather than print out 100 X-Ignore: headers, let's just prove that
+ >>> # the X-Topics: header exists, meaning that the tagger did its job.
+ >>> msg['x-topics']
+ u'bar fight'
+ >>> msgdata['topichits']
+ ['bar fight']
+
+
+Scanning sub-parts
+------------------
+
+The tagger will also scan the body lines of text subparts in a multipart
+message, using the same rules as if all those body lines lived in a single
+text payload.
+
+ >>> msg = message_from_string("""\
+ ... Subject: Was
+ ... Keywords: Raw
+ ... Content-Type: multipart/alternative; boundary="BOUNDARY"
+ ...
+ ... --BOUNDARY
+ ... From: sabo
+ ... To: obas
+ ...
+ ... Subject: farbaw
+ ... Keywords: barbaz
+ ...
+ ... --BOUNDARY--
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ Subject: Was
+ Keywords: Raw
+ Content-Type: multipart/alternative; boundary="BOUNDARY"
+ X-Topics: bar fight
+ <BLANKLINE>
+ --BOUNDARY
+ From: sabo
+ To: obas
+ <BLANKLINE>
+ Subject: farbaw
+ Keywords: barbaz
+ <BLANKLINE>
+ --BOUNDARY--
+ <BLANKLINE>
+ >>> msgdata['topichits']
+ ['bar fight']
+
+But the tagger will not descend into non-text parts.
+
+ >>> msg = message_from_string("""\
+ ... Subject: Was
+ ... Keywords: Raw
+ ... Content-Type: multipart/alternative; boundary=BOUNDARY
+ ...
+ ... --BOUNDARY
+ ... From: sabo
+ ... To: obas
+ ... Content-Type: message/rfc822
+ ...
+ ... Subject: farbaw
+ ... Keywords: barbaz
+ ...
+ ... --BOUNDARY
+ ... From: sabo
+ ... To: obas
+ ... Content-Type: message/rfc822
+ ...
+ ... Subject: farbaw
+ ... Keywords: barbaz
+ ...
+ ... --BOUNDARY--
+ ... """)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg['x-topics']
+ None
+ >>> msgdata
+ {}