diff options
Diffstat (limited to 'mailman/pipeline/docs')
| -rw-r--r-- | mailman/pipeline/docs/ack-headers.txt | 41 | ||||
| -rw-r--r-- | mailman/pipeline/docs/acknowledge.txt | 162 | ||||
| -rw-r--r-- | mailman/pipeline/docs/after-delivery.txt | 28 | ||||
| -rw-r--r-- | mailman/pipeline/docs/archives.txt | 133 | ||||
| -rw-r--r-- | mailman/pipeline/docs/avoid-duplicates.txt | 169 | ||||
| -rw-r--r-- | mailman/pipeline/docs/calc-recips.txt | 101 | ||||
| -rw-r--r-- | mailman/pipeline/docs/cleanse.txt | 95 | ||||
| -rw-r--r-- | mailman/pipeline/docs/cook-headers.txt | 328 | ||||
| -rw-r--r-- | mailman/pipeline/docs/decorate.txt | 318 | ||||
| -rw-r--r-- | mailman/pipeline/docs/digests.txt | 536 | ||||
| -rw-r--r-- | mailman/pipeline/docs/file-recips.txt | 97 | ||||
| -rw-r--r-- | mailman/pipeline/docs/filtering.txt | 341 | ||||
| -rw-r--r-- | mailman/pipeline/docs/nntp.txt | 68 | ||||
| -rw-r--r-- | mailman/pipeline/docs/reply-to.txt | 128 | ||||
| -rw-r--r-- | mailman/pipeline/docs/replybot.txt | 216 | ||||
| -rw-r--r-- | mailman/pipeline/docs/scrubber.txt | 214 | ||||
| -rw-r--r-- | mailman/pipeline/docs/subject-munging.txt | 245 | ||||
| -rw-r--r-- | mailman/pipeline/docs/tagger.txt | 237 |
18 files changed, 3457 insertions, 0 deletions
diff --git a/mailman/pipeline/docs/ack-headers.txt b/mailman/pipeline/docs/ack-headers.txt new file mode 100644 index 000000000..28a8eed9e --- /dev/null +++ b/mailman/pipeline/docs/ack-headers.txt @@ -0,0 +1,41 @@ +Acknowledgment headers +====================== + +Messages that flow through the global pipeline get their headers 'cooked', +which basically means that their headers go through several mostly unrelated +transformations. Some headers get added, others get changed. Some of these +changes depend on mailing list settings and others depend on how the message +is getting sent through the system. We'll take things one-by-one. + + >>> from mailman.configuration import config + >>> from mailman.pipeline.cook_headers import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.subject_prefix = u'' + +When the message's metadata has a 'noack' key set, an 'X-Ack: no' header is +added. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, dict(noack=True)) + >>> print msg.as_string() + From: aperson@example.com + X-Ack: no + ... + +Any existing X-Ack header in the original message is removed. + + >>> msg = message_from_string("""\ + ... X-Ack: yes + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, dict(noack=True)) + >>> print msg.as_string() + From: aperson@example.com + X-Ack: no + ... diff --git a/mailman/pipeline/docs/acknowledge.txt b/mailman/pipeline/docs/acknowledge.txt new file mode 100644 index 000000000..76c8fdf21 --- /dev/null +++ b/mailman/pipeline/docs/acknowledge.txt @@ -0,0 +1,162 @@ +Message acknowledgment +====================== + +When a user posts a message to a mailing list, and that user has chosen to +receive acknowledgments of their postings, Mailman will sent them such an +acknowledgment. + + >>> from mailman.configuration import config + >>> handler = config.handlers['acknowledge'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.real_name = u'XTest' + >>> mlist.preferred_language = u'en' + >>> # XXX This will almost certainly change once we've worked out the web + >>> # space layout for mailing lists now. + >>> mlist.web_page_url = u'http://lists.example.com/' + + >>> # Ensure that the virgin queue is empty, since we'll be checking this + >>> # for new auto-response messages. + >>> from mailman.queue import Switchboard + >>> virginq = Switchboard(config.VIRGINQUEUE_DIR) + >>> virginq.files + [] + +Subscribe a user to the mailing list. + + >>> usermgr = config.db.user_manager + >>> from mailman.interfaces import MemberRole + >>> user_1 = usermgr.create_user(u'aperson@example.com') + >>> address_1 = list(user_1.addresses)[0] + >>> address_1.subscribe(mlist, MemberRole.member) + <Member: aperson@example.com on _xtest@example.com as MemberRole.member> + + +Non-member posts +---------------- + +Non-members can't get acknowledgments of their posts to the mailing list. + + >>> msg = message_from_string("""\ + ... From: bperson@example.com + ... + ... """) + >>> handler.process(mlist, msg, {}) + >>> virginq.files + [] + +We can also specify the original sender in the message's metadata. If that +person is also not a member, no acknowledgment will be sent either. + + >>> msg = message_from_string("""\ + ... From: bperson@example.com + ... + ... """) + >>> handler.process(mlist, msg, + ... dict(original_sender=u'cperson@example.com')) + >>> virginq.files + [] + + +No acknowledgment requested +--------------------------- + +Unless the user has requested acknowledgments, they will not get one. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> handler.process(mlist, msg, {}) + >>> virginq.files + [] + +Similarly if the original sender is specified in the message metadata, and +that sender is a member but not one who has requested acknowledgments, none +will be sent. + + >>> user_2 = usermgr.create_user(u'dperson@example.com') + >>> address_2 = list(user_2.addresses)[0] + >>> address_2.subscribe(mlist, MemberRole.member) + <Member: dperson@example.com on _xtest@example.com as MemberRole.member> + + >>> handler.process(mlist, msg, + ... dict(original_sender=u'dperson@example.com')) + >>> virginq.files + [] + + +Requested acknowledgments +------------------------- + +If the member requests acknowledgments, Mailman will send them one when they +post to the mailing list. + + >>> user_1.preferences.acknowledge_posts = True + +The receipt will include the original message's subject in the response body, + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Something witty and insightful + ... + ... """) + >>> handler.process(mlist, msg, {}) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> virginq.files + [] + >>> sorted(qdata.items()) + [..., ('recips', [u'aperson@example.com']), ...] + >>> print qmsg.as_string() + ... + MIME-Version: 1.0 + ... + Subject: XTest post acknowledgment + From: _xtest-bounces@example.com + To: aperson@example.com + ... + Precedence: bulk + <BLANKLINE> + Your message entitled + <BLANKLINE> + Something witty and insightful + <BLANKLINE> + was successfully received by the XTest mailing list. + <BLANKLINE> + List info page: http://lists.example.com/listinfo/_xtest@example.com + Your preferences: http://example.com/aperson@example.com + <BLANKLINE> + +If there is no subject, then the receipt will use a generic message. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> handler.process(mlist, msg, {}) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> virginq.files + [] + >>> sorted(qdata.items()) + [..., ('recips', [u'aperson@example.com']), ...] + >>> print qmsg.as_string() + MIME-Version: 1.0 + ... + Subject: XTest post acknowledgment + From: _xtest-bounces@example.com + To: aperson@example.com + ... + Precedence: bulk + <BLANKLINE> + Your message entitled + <BLANKLINE> + (no subject) + <BLANKLINE> + was successfully received by the XTest mailing list. + <BLANKLINE> + List info page: http://lists.example.com/listinfo/_xtest@example.com + Your preferences: http://example.com/aperson@example.com + <BLANKLINE> diff --git a/mailman/pipeline/docs/after-delivery.txt b/mailman/pipeline/docs/after-delivery.txt new file mode 100644 index 000000000..5bc9b5936 --- /dev/null +++ b/mailman/pipeline/docs/after-delivery.txt @@ -0,0 +1,28 @@ +After delivery +============== + +After a message is delivered, or more correctly, after it has been processed +by the rest of the handlers in the incoming queue pipeline, a couple of +bookkeeping pieces of information are updated. + + >>> import datetime + >>> from mailman.configuration import config + >>> handler = config.handlers['after-delivery'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> post_time = datetime.datetime.now() - datetime.timedelta(minutes=10) + >>> mlist.last_post_time = post_time + >>> mlist.post_id = 10 + +Processing a message with this handler updates the last_post_time and post_id +attributes. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... Something interesting. + ... """) + >>> handler.process(mlist, msg, {}) + >>> mlist.last_post_time > post_time + True + >>> mlist.post_id + 11 diff --git a/mailman/pipeline/docs/archives.txt b/mailman/pipeline/docs/archives.txt new file mode 100644 index 000000000..b7b54f17f --- /dev/null +++ b/mailman/pipeline/docs/archives.txt @@ -0,0 +1,133 @@ +Archives +======== + +Updating the archives with posted messages is handled by a separate queue, +which allows for better memory management and prevents blocking the main +delivery processes while messages are archived. This also allows external +archivers to work in a separate process from the main Mailman delivery +processes. + + >>> from mailman.queue import Switchboard + >>> from mailman.configuration import config + >>> handler = config.handlers['to-archive'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + >>> switchboard = Switchboard(config.ARCHQUEUE_DIR) + +A helper function. + + >>> def clear(): + ... for filebase in switchboard.files: + ... msg, msgdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + +The purpose of the ToArchive handler is to make a simple decision as to +whether the message should get archived and if so, to drop the message in the +archiving queue. Really the most important things are to determine when a +message should /not/ get archived. + +For example, no digests should ever get archived. + + >>> mlist.archive = True + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, dict(isdigest=True)) + >>> switchboard.files + [] + +If the mailing list is not configured to archive, then even regular deliveries +won't be archived. + + >>> mlist.archive = False + >>> handler.process(mlist, msg, {}) + >>> switchboard.files + [] + +There are two de-facto standards for a message to indicate that it does not +want to be archived. We've seen both in the wild so both are supported. The +X-No-Archive: header can be used to indicate that the message should not be +archived. Confusingly, this header's value is actually ignored. + + >>> mlist.archive = True + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... X-No-Archive: YES + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, dict(isdigest=True)) + >>> switchboard.files + [] + +Even a 'no' value will stop the archiving of the message. + + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... X-No-Archive: No + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, dict(isdigest=True)) + >>> switchboard.files + [] + +Another header that's been observed is the X-Archive: header. Here, the +header's case folded value must be 'no' in order to prevent archiving. + + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... X-Archive: No + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, dict(isdigest=True)) + >>> switchboard.files + [] + +But if the value is 'yes', then the message will be archived. + + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... X-Archive: Yes + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, {}) + >>> len(switchboard.files) + 1 + >>> filebase = switchboard.files[0] + >>> qmsg, qdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> print qmsg.as_string() + Subject: A sample message + X-Archive: Yes + <BLANKLINE> + A message of great import. + <BLANKLINE> + >>> sorted(qdata.items()) + [('_parsemsg', False), ('received_time', ...), ('version', 3)] + +Without either archiving header, and all other things being the same, the +message will get archived. + + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, {}) + >>> len(switchboard.files) + 1 + >>> filebase = switchboard.files[0] + >>> qmsg, qdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> print qmsg.as_string() + Subject: A sample message + <BLANKLINE> + A message of great import. + <BLANKLINE> + >>> sorted(qdata.items()) + [('_parsemsg', False), ('received_time', ...), ('version', 3)] diff --git a/mailman/pipeline/docs/avoid-duplicates.txt b/mailman/pipeline/docs/avoid-duplicates.txt new file mode 100644 index 000000000..9fd332d1b --- /dev/null +++ b/mailman/pipeline/docs/avoid-duplicates.txt @@ -0,0 +1,169 @@ +Avoid duplicates +================ + +The AvoidDuplicates handler module implements several strategies to try to +reduce the reception of duplicate messages. It does this by removing certain +recipients from the list of recipients that earlier handler modules +(e.g. CalcRecips) calculates. + + >>> from mailman.configuration import config + >>> handler = config.handlers['avoid-duplicates'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + +Create some members we're going to use. + + >>> from mailman.interfaces import MemberRole + >>> address_a = config.db.user_manager.create_address( + ... u'aperson@example.com') + >>> address_b = config.db.user_manager.create_address( + ... u'bperson@example.com') + >>> member_a = address_a.subscribe(mlist, MemberRole.member) + >>> member_b = address_b.subscribe(mlist, MemberRole.member) + >>> # This is the message metadata dictionary as it would be produced by + >>> # the CalcRecips handler. + >>> recips = dict(recips=[u'aperson@example.com', u'bperson@example.com']) + + +Short circuiting +---------------- + +The module short-circuits if there are no recipients. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: A message of great import + ... + ... Something + ... """) + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> msgdata + {} + >>> print msg.as_string() + From: aperson@example.com + Subject: A message of great import + <BLANKLINE> + Something + <BLANKLINE> + + +Suppressing the list copy +------------------------- + +Members can elect not to receive a list copy of any message on which they are +explicitly named as a recipient. This is done by setting their +receive_list_copy preference to False. However, if they aren't mentioned in +one of the recipient headers (i.e. To, CC, Resent-To, or Resent-CC), then they +will get a list copy. + + >>> member_a.preferences.receive_list_copy = False + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'aperson@example.com', u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + <BLANKLINE> + Something of great import. + <BLANKLINE> + +If they're mentioned on the CC line, they won't get a list copy. + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... CC: aperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + CC: aperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> + +But if they're mentioned on the CC line and have receive_list_copy set to True +(the default), then they still get a list copy. + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... CC: bperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'aperson@example.com', u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + CC: bperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> + +Other headers checked for recipients include the To... + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... To: aperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + To: aperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> + +...Resent-To... + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... Resent-To: aperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + Resent-To: aperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> + +...and Resent-CC headers. + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... Resent-Cc: aperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + Resent-Cc: aperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> diff --git a/mailman/pipeline/docs/calc-recips.txt b/mailman/pipeline/docs/calc-recips.txt new file mode 100644 index 000000000..057351873 --- /dev/null +++ b/mailman/pipeline/docs/calc-recips.txt @@ -0,0 +1,101 @@ +Calculating recipients +====================== + +Every message that makes it through to the list membership gets sent to a set +of recipient addresses. These addresses are calculated by one of the handler +modules and depends on a host of factors. + + >>> from mailman.configuration import config + >>> handler = config.handlers['calculate-recipients'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + +Recipients are calculate from the list members, so add a bunch of members to +start out with. First, create a bunch of addresses... + + >>> usermgr = config.db.user_manager + >>> address_a = usermgr.create_address(u'aperson@example.com') + >>> address_b = usermgr.create_address(u'bperson@example.com') + >>> address_c = usermgr.create_address(u'cperson@example.com') + >>> address_d = usermgr.create_address(u'dperson@example.com') + >>> address_e = usermgr.create_address(u'eperson@example.com') + >>> address_f = usermgr.create_address(u'fperson@example.com') + +...then subscribe these addresses to the mailing list as members... + + >>> from mailman.interfaces import MemberRole + >>> member_a = address_a.subscribe(mlist, MemberRole.member) + >>> member_b = address_b.subscribe(mlist, MemberRole.member) + >>> member_c = address_c.subscribe(mlist, MemberRole.member) + >>> member_d = address_d.subscribe(mlist, MemberRole.member) + >>> member_e = address_e.subscribe(mlist, MemberRole.member) + >>> member_f = address_f.subscribe(mlist, MemberRole.member) + +...then make some of the members digest members. + + >>> from mailman.constants import DeliveryMode + >>> member_d.preferences.delivery_mode = DeliveryMode.plaintext_digests + >>> member_e.preferences.delivery_mode = DeliveryMode.mime_digests + >>> member_f.preferences.delivery_mode = DeliveryMode.summary_digests + + +Short-circuiting +---------------- + +Sometimes, the list of recipients already exists in the message metadata. +This can happen for example, when a message was previously delivered to some +but not all of the recipients. + + >>> msg = message_from_string("""\ + ... From: Xavier Person <xperson@example.com> + ... + ... Something of great import. + ... """) + >>> recips = set((u'qperson@example.com', u'zperson@example.com')) + >>> msgdata = dict(recips=recips) + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'qperson@example.com', u'zperson@example.com'] + + +Regular delivery recipients +--------------------------- + +Regular delivery recipients are those people who get messages from the list as +soon as they are posted. In other words, these folks are not digest members. + + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'aperson@example.com', u'bperson@example.com', u'cperson@example.com'] + +Members can elect not to receive a list copy of their own postings. + + >>> member_c.preferences.receive_own_postings = False + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... + ... Something of great import. + ... """) + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'aperson@example.com', u'bperson@example.com'] + +Members can also elect not to receive a list copy of any message on which they +are explicitly named as a recipient. However, see the AvoidDuplicates handler +for details. + + +Digest recipients +----------------- + +XXX Test various digest deliveries. + + +Urgent messages +--------------- + +XXX Test various urgent deliveries: + * test_urgent_moderator() + * test_urgent_admin() + * test_urgent_reject() diff --git a/mailman/pipeline/docs/cleanse.txt b/mailman/pipeline/docs/cleanse.txt new file mode 100644 index 000000000..1597095b3 --- /dev/null +++ b/mailman/pipeline/docs/cleanse.txt @@ -0,0 +1,95 @@ +Cleansing headers +================= + +All messages posted to a list get their headers cleansed. Some headers are +related to additional permissions that can be granted to the message and other +headers can be used to fish for membership. + + >>> from mailman.configuration import config + >>> handler = config.handlers['cleanse'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + +Headers such as Approved, Approve, and Urgent are used to grant special +pemissions to individual messages. All may contain a password; the first two +headers are used by list administrators to pre-approve a message normal held +for approval. The latter header is used to send a regular message to all +members, regardless of whether they get digests or not. Because all three +headers contain passwords, they must be removed from any posted message. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Approved: foobar + ... Approve: barfoo + ... Urgent: notreally + ... Subject: A message of great import + ... + ... Blah blah blah + ... """) + >>> handler.process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Subject: A message of great import + <BLANKLINE> + Blah blah blah + <BLANKLINE> + +Other headers can be used by list members to fish the list for membership, so +we don't let them go through. These are a mix of standard headers and custom +headers supported by some mail readers. For example, X-PMRC is supported by +Pegasus mail. I don't remember what program uses X-Confirm-Reading-To though +(Some Microsoft product perhaps?). + + >>> msg = message_from_string("""\ + ... From: bperson@example.com + ... Reply-To: bperson@example.org + ... Sender: asystem@example.net + ... Return-Receipt-To: another@example.com + ... Disposition-Notification-To: athird@example.com + ... X-Confirm-Reading-To: afourth@example.com + ... X-PMRQC: afifth@example.com + ... Subject: a message to you + ... + ... How are you doing? + ... """) + >>> handler.process(mlist, msg, {}) + >>> print msg.as_string() + From: bperson@example.com + Reply-To: bperson@example.org + Sender: asystem@example.net + Subject: a message to you + <BLANKLINE> + How are you doing? + <BLANKLINE> + + +Anonymous lists +--------------- + +Anonymous mailing lists also try to cleanse certain identifying headers from +the original posting, so that it is at least a bit more difficult to determine +who sent the message. This isn't perfect though, for example, the body of the +messages are never scrubbed (though that might not be a bad idea). The From +and Reply-To headers in the posted message are taken from list attributes. + +Hotmail apparently sets X-Originating-Email. + + >>> mlist.anonymous_list = True + >>> mlist.description = u'A Test Mailing List' + >>> mlist.preferred_language = u'en' + >>> msg = message_from_string("""\ + ... From: bperson@example.com + ... Reply-To: bperson@example.org + ... Sender: asystem@example.net + ... X-Originating-Email: cperson@example.com + ... Subject: a message to you + ... + ... How are you doing? + ... """) + >>> handler.process(mlist, msg, {}) + >>> print msg.as_string() + Subject: a message to you + From: A Test Mailing List <_xtest@example.com> + Reply-To: _xtest@example.com + <BLANKLINE> + How are you doing? + <BLANKLINE> diff --git a/mailman/pipeline/docs/cook-headers.txt b/mailman/pipeline/docs/cook-headers.txt new file mode 100644 index 000000000..b1aae6774 --- /dev/null +++ b/mailman/pipeline/docs/cook-headers.txt @@ -0,0 +1,328 @@ +Cooking headers +=============== + +Messages that flow through the global pipeline get their headers 'cooked', +which basically means that their headers go through several mostly unrelated +transformations. Some headers get added, others get changed. Some of these +changes depend on mailing list settings and others depend on how the message +is getting sent through the system. We'll take things one-by-one. + + >>> from mailman.pipeline.cook_headers import process + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.subject_prefix = u'' + >>> mlist.include_list_post_header = False + >>> mlist.archive = True + >>> # XXX This will almost certainly change once we've worked out the web + >>> # space layout for mailing lists now. + >>> mlist.web_page_url = u'http://lists.example.com/' + + +Saving the original sender +-------------------------- + +Because the original sender headers may get deleted or changed, CookHeaders +will place the sender in the message metadata for safe keeping. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> msgdata['original_sender'] + u'aperson@example.com' + +But if there was no original sender, then the empty string will be saved. + + >>> msg = message_from_string("""\ + ... Subject: No original sender + ... + ... A message of great import. + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> msgdata['original_sender'] + '' + + +X-BeenThere header +------------------ + +The X-BeenThere header is what Mailman uses to recognize messages that have +already been processed by this mailing list. It's one small measure against +mail loops. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> msg['x-beenthere'] + u'_xtest@example.com' + +Mailman appends X-BeenThere headers, so if there already is one in the +original message, the posted message will contain two such headers. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... X-BeenThere: another@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> sorted(msg.get_all('x-beenthere')) + [u'_xtest@example.com', u'another@example.com'] + + +Mailman version header +---------------------- + +Mailman will also insert an X-Mailman-Version header... + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> from mailman.Version import VERSION + >>> msg['x-mailman-version'] == VERSION + True + +...but only if one doesn't already exist. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... X-Mailman-Version: 3000 + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> msg['x-mailman-version'] + u'3000' + + +Precedence header +----------------- + +Mailman will insert a Precedence header, which is a de-facto standard for +telling automatic reply software (e.g. vacation(1)) not to respond to this +message. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> msg['precedence'] + u'list' + +But Mailman will only add that header if the original message doesn't already +have one of them. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Precedence: junk + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> msg['precedence'] + u'junk' + + +RFC 2919 and 2369 headers +------------------------- + +This is a helper function for the following section. + + >>> def list_headers(msg): + ... print '---start---' + ... # Sort the List-* headers found in the message. We need to do + ... # this because CookHeaders puts them in a dictionary which does + ... # not have a guaranteed sort order. + ... for header in sorted(msg.keys()): + ... parts = header.lower().split('-') + ... if 'list' not in parts: + ... continue + ... for value in msg.get_all(header): + ... print '%s: %s' % (header, value) + ... print '---end---' + +These RFCs define headers for mailing list actions. A mailing list should +generally add these headers, but not for messages that aren't crafted for a +specific list (e.g. password reminders in Mailman 2.x). + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, dict(_nolist=True)) + >>> list_headers(msg) + ---start--- + ---end--- + +Some people don't like these headers because their mail readers aren't good +about hiding them. A list owner can turn these headers off. + + >>> mlist.include_rfc2369_headers = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + ---end--- + +But normally, a list will include these headers. + + >>> mlist.include_rfc2369_headers = True + >>> mlist.include_list_post_header = True + >>> mlist.preferred_language = u'en' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + List-Archive: <http://www.example.com/pipermail/_xtest@example.com> + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: <_xtest.example.com> + List-Post: <mailto:_xtest@example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + ---end--- + +If the mailing list has a description, then it is included in the List-Id +header. + + >>> mlist.description = u'My test mailing list' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + List-Archive: <http://www.example.com/pipermail/_xtest@example.com> + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: My test mailing list <_xtest.example.com> + List-Post: <mailto:_xtest@example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + ---end--- + +Administrative messages crafted by Mailman will have a reduced set of headers. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, dict(reduced_list_headers=True)) + >>> list_headers(msg) + ---start--- + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: My test mailing list <_xtest.example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + X-List-Administrivia: yes + ---end--- + +With the normal set of List-* headers, it's still possible to suppress the +List-Post header, which is reasonable for an announce only mailing list. + + >>> mlist.include_list_post_header = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + List-Archive: <http://www.example.com/pipermail/_xtest@example.com> + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: My test mailing list <_xtest.example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + ---end--- + +And if the list isn't being archived, it makes no sense to add the +List-Archive header either. + + >>> mlist.include_list_post_header = True + >>> mlist.archive = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: My test mailing list <_xtest.example.com> + List-Post: <mailto:_xtest@example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + ---end--- + + +Archived-At +----------- + +RFC 5064 (draft) defines a new Archived-At header which contains the url to +the individual message in the archives. The stock Pipermail archiver doesn't +support this because the url can't be calculated until after the message is +archived. Because this is done by the archive runner, this information isn't +available to us now. + + >>> print msg['archived-at'] + None + + +Personalization +--------------- + +The To field normally contains the list posting address. However when +messages are fully personalized, that header will get overwritten with the +address of the recipient. The list's posting address will be added to one of +the recipient headers so that users will be able to reply back to the list. + + >>> from mailman.interfaces import Personalization, ReplyToMunging + >>> mlist.personalize = Personalization.full + >>> mlist.reply_goes_to_list = ReplyToMunging.no_munging + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + X-BeenThere: _xtest@example.com + X-Mailman-Version: ... + Precedence: list + Cc: My test mailing list <_xtest@example.com> + List-Id: My test mailing list <_xtest.example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + List-Post: <mailto:_xtest@example.com> + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + <BLANKLINE> + <BLANKLINE> diff --git a/mailman/pipeline/docs/decorate.txt b/mailman/pipeline/docs/decorate.txt new file mode 100644 index 000000000..60afb0170 --- /dev/null +++ b/mailman/pipeline/docs/decorate.txt @@ -0,0 +1,318 @@ +Message decoration +================== + +Message decoration is the process of adding headers and footers to the +original message. A handler module takes care of this based on the settings +of the mailing list and the type of message being processed. + + >>> from mailman.pipeline.decorate import process + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> msg_text = """\ + ... From: aperson@example.org + ... + ... Here is a message. + ... """ + >>> msg = message_from_string(msg_text) + + +Short circuiting +---------------- + +Digest messages get decorated during the digest creation phase so no extra +decorations are added for digest messages. + + >>> process(mlist, msg, dict(isdigest=True)) + >>> print msg.as_string() + From: aperson@example.org + <BLANKLINE> + Here is a message. + + >>> process(mlist, msg, dict(nodecorate=True)) + >>> print msg.as_string() + From: aperson@example.org + <BLANKLINE> + Here is a message. + + +Decorating simple text messages +------------------------------- + +Text messages that have no declared content type character set are by default, +encoded in us-ascii. When the mailing list's preferred language is 'en' +(i.e. English), the character set of the mailing list and of the message will +match. In this case, and when the header and footer have no interpolation +placeholder variables, the message's payload will be prepended by the verbatim +header, and appended with the verbatim footer. + + >>> msg = message_from_string(msg_text) + >>> mlist.msg_header = u'header\n' + >>> mlist.msg_footer = u'footer' + >>> mlist.preferred_language = u'en' + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.org + ... + <BLANKLINE> + header + Here is a message. + footer + +Mailman supports a number of interpolation variables, placeholders in the +header and footer for information to be filled in with mailing list specific +data. An example of such information is the mailing list's "real name" (a +short descriptive name for the mailing list). + + >>> msg = message_from_string(msg_text) + >>> mlist.msg_header = u'$real_name header\n' + >>> mlist.msg_footer = u'$real_name footer' + >>> mlist.real_name = u'XTest' + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.org + ... + XTest header + Here is a message. + XTest footer + +You can't just pick any interpolation variable though; if you do, the variable +will remain in the header or footer unchanged. + + >>> msg = message_from_string(msg_text) + >>> mlist.msg_header = u'$dummy header\n' + >>> mlist.msg_footer = u'$dummy footer' + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.org + ... + $dummy header + Here is a message. + $dummy footer + + +Handling RFC 3676 'format=flowed' parameters +-------------------------------------------- + +RFC 3676 describes a standard by which text/plain messages can marked by +generating MUAs for better readability in compatible receiving MUAs. The +'format' parameter on the text/plain Content-Type header gives hints as to how +the receiving MUA may flow and delete trailing whitespace for better display +in a proportional font. + +When Mailman sees text/plain messages with such RFC 3676 parameters, it +preserves these parameters when it concatenates headers and footers to the +message payload. + + >>> mlist.msg_header = u'header' + >>> mlist.msg_footer = u'footer' + >>> mlist.preferred_language = u'en' + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... Content-Type: text/plain; format=flowed; delsp=no + ... + ... Here is a message\x20 + ... with soft line breaks. + ... """) + >>> process(mlist, msg, {}) + >>> # Don't use 'print' here as above because it won't be obvious from the + >>> # output that the soft-line break space at the end of the 'Here is a + >>> # message' line will be retained in the output. + >>> msg['content-type'] + u'text/plain; format="flowed"; delsp="no"; charset="us-ascii"' + >>> [line for line in msg.get_payload().splitlines()] + ['header', 'Here is a message ', 'with soft line breaks.', 'footer'] + + +Decorating mixed-charset messages +--------------------------------- + +When a message has no explicit character set, it is assumed to be us-ascii. +However, if the mailing list's preferred language has a different character +set, Mailman will still try to concatenate the header and footer, but it will +convert the text to utf-8 and base-64 encode the message payload. + + # 'ja' = Japanese; charset = 'euc-jp' + >>> mlist.preferred_language = u'ja' + >>> mlist.msg_header = u'$description header' + >>> mlist.msg_footer = u'$description footer' + >>> mlist.description = u'\u65e5\u672c\u8a9e' + + >>> from email.message import Message + >>> msg = Message() + >>> msg.set_payload('Fran\xe7aise', 'iso-8859-1') + >>> print msg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="iso-8859-1" + Content-Transfer-Encoding: quoted-printable + <BLANKLINE> + Fran=E7aise + >>> process(mlist, msg, {}) + >>> print msg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="utf-8" + Content-Transfer-Encoding: base64 + <BLANKLINE> + 5pel5pys6KqeIGhlYWRlcgpGcmFuw6dhaXNlCuaXpeacrOiqniBmb290ZXI= + + +Sometimes the message even has an unknown character set. In this case, +Mailman has no choice but to decorate the original message with MIME +attachments. + + >>> mlist.preferred_language = u'en' + >>> mlist.msg_header = u'header' + >>> mlist.msg_footer = u'footer' + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... Content-Type: text/plain; charset=unknown + ... Content-Transfer-Encoding: 7bit + ... + ... Here is a message. + ... """) + >>> process(mlist, msg, {}) + >>> msg.set_boundary('BOUNDARY') + >>> print msg.as_string() + From: aperson@example.org + Content-Type: multipart/mixed; boundary="BOUNDARY" + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + header + --BOUNDARY + Content-Type: text/plain; charset=unknown + Content-Transfer-Encoding: 7bit + <BLANKLINE> + Here is a message. + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + footer + --BOUNDARY-- + + +Decorating multipart messages +----------------------------- + +Multipart messages have to be decorated differently. The header and footer +cannot be simply concatenated into the payload because that will break the +MIME structure of the message. Instead, the header and footer are attached as +separate MIME subparts. + +When the outerpart is multipart/mixed, the header and footer can have a +Content-Disposition of 'inline' so that MUAs can display these headers as if +they were simply concatenated. + + >>> mlist.preferred_language = u'en' + >>> mlist.msg_header = u'header' + >>> mlist.msg_footer = u'footer' + >>> part_1 = message_from_string("""\ + ... From: aperson@example.org + ... + ... Here is the first message. + ... """) + >>> part_2 = message_from_string("""\ + ... From: bperson@example.com + ... + ... Here is the second message. + ... """) + >>> from email.mime.multipart import MIMEMultipart + >>> msg = MIMEMultipart('mixed', boundary='BOUNDARY', + ... _subparts=(part_1, part_2)) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + Content-Type: multipart/mixed; boundary="BOUNDARY" + MIME-Version: 1.0 + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + header + --BOUNDARY + From: aperson@example.org + <BLANKLINE> + Here is the first message. + <BLANKLINE> + --BOUNDARY + From: bperson@example.com + <BLANKLINE> + Here is the second message. + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + footer + --BOUNDARY-- + + +Decorating other content types +------------------------------ + +Non-multipart non-text content types will get wrapped in a multipart/mixed so +that the header and footer can be added as attachments. + + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... Content-Type: image/x-beautiful + ... + ... IMAGEDATAIMAGEDATAIMAGEDATA + ... """) + >>> process(mlist, msg, {}) + >>> msg.set_boundary('BOUNDARY') + >>> print msg.as_string() + From: aperson@example.org + ... + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + header + --BOUNDARY + Content-Type: image/x-beautiful + <BLANKLINE> + IMAGEDATAIMAGEDATAIMAGEDATA + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + footer + --BOUNDARY-- + + +Personalization +--------------- + +A mailing list can be 'personalized', meaning that each message is unique for +each recipient. When the list is personalized, additional interpolation +variables are available, however the list of intended recipients must be +provided in the message data, otherwise an exception occurs. + + >>> process(mlist, None, dict(personalize=True)) + Traceback (most recent call last): + ... + AssertionError: The number of intended recipients must be exactly 1 + +And the number of intended recipients must be exactly 1. + + >>> process(mlist, None, dict(personalize=True, recips=[1, 2, 3])) + Traceback (most recent call last): + ... + AssertionError: The number of intended recipients must be exactly 1 diff --git a/mailman/pipeline/docs/digests.txt b/mailman/pipeline/docs/digests.txt new file mode 100644 index 000000000..d81e173f8 --- /dev/null +++ b/mailman/pipeline/docs/digests.txt @@ -0,0 +1,536 @@ +Digests +======= + +Digests are a way for a user to receive list traffic in collections instead of +as individual messages when immediately posted. There are several forms of +digests, although only two are currently supported: MIME digests and RFC 1153 +(a.k.a. plain text) digests. + + >>> from mailman.pipeline.to_digest import process + >>> from mailman.queue import Switchboard + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + >>> mlist.web_page_url = u'http://www.example.com/' + >>> mlist.real_name = u'XTest' + >>> mlist.subject_prefix = u'[_XTest] ' + >>> mlist.one_last_digest = set() + >>> switchboard = Switchboard(config.VIRGINQUEUE_DIR) + +This is a helper function used to iterate through all the accumulated digest +messages, in the order in which they were posted. This makes it easier to +update the tests when we switch to a different mailbox format. + + >>> from mailman.tests.helpers import digest_mbox + >>> from itertools import count + >>> from string import Template + >>> def makemsg(): + ... for i in count(1): + ... text = Template("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... Subject: Test message $i + ... + ... Here is message $i + ... """).substitute(i=i) + ... yield message_from_string(text) + + +Short circuiting +---------------- + +When a message is posted to the mailing list, it is generally added to a +running collection of messages. For now, this is a Unix mailbox file, +although in the future this may end up being converted to a maildir style +mailbox. In any event, there are several factors that would bypass the +storing of posted messages to the mailbox. For example, the mailing list may +not allow digests... + + >>> mlist.digestable = False + >>> msg = makemsg().next() + >>> process(mlist, msg, {}) + >>> sum(1 for mboxmsg in digest_mbox(mlist)) + 0 + >>> switchboard.files + [] + +...or they may allow digests but the message is already a digest. + + >>> mlist.digestable = True + >>> process(mlist, msg, dict(isdigest=True)) + >>> sum(1 for mboxmsg in digest_mbox(mlist)) + 0 + >>> switchboard.files + [] + + +Sending a digest +---------------- + +For messages which are not digests, but which are posted to a digestable +mailing list, the messages will be stored until they reach a criteria +triggering the sending of the digest. If none of those criteria are met, then +the message will just sit in the mailbox for a while. + + >>> mlist.digest_size_threshold = 10000 + >>> process(mlist, msg, {}) + >>> switchboard.files + [] + >>> digest = digest_mbox(mlist) + >>> sum(1 for mboxmsg in digest) + 1 + >>> import os + >>> os.remove(digest._path) + +When the size of the digest mbox reaches the maximum size threshold, a digest +is crafted and sent out. This puts two messages in the virgin queue, an HTML +digest and an RFC 1153 plain text digest. The size threshold is in KB. + + >>> mlist.digest_size_threshold = 1 + >>> mlist.volume = 2 + >>> mlist.next_digest_number = 10 + >>> size = 0 + >>> for msg in makemsg(): + ... process(mlist, msg, {}) + ... size += len(str(msg)) + ... if size > mlist.digest_size_threshold * 1024: + ... break + >>> sum(1 for mboxmsg in digest_mbox(mlist)) + 0 + >>> len(switchboard.files) + 2 + >>> for filebase in switchboard.files: + ... qmsg, qdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + ... if qmsg.is_multipart(): + ... mimemsg = qmsg + ... mimedata = qdata + ... else: + ... rfc1153msg = qmsg + ... rfc1153data = qdata + >>> print mimemsg.as_string() + Content-Type: multipart/mixed; boundary="..." + MIME-Version: 1.0 + From: _xtest-request@example.com + Subject: XTest Digest, Vol 2, Issue 10 + To: _xtest@example.com + Reply-To: _xtest@example.com + Date: ... + Message-ID: ... + <BLANKLINE> + --... + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Description: XTest Digest, Vol 2, Issue 10 + <BLANKLINE> + Send XTest mailing list submissions to + _xtest@example.com + <BLANKLINE> + To subscribe or unsubscribe via the World Wide Web, visit + http://www.example.com/listinfo/_xtest@example.com + or, via email, send a message with subject or body 'help' to + _xtest-request@example.com + <BLANKLINE> + You can reach the person managing the list at + _xtest-owner@example.com + <BLANKLINE> + When replying, please edit your Subject line so it is more specific + than "Re: Contents of XTest digest..." + <BLANKLINE> + --... + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Description: Today's Topics (8 messages) + <BLANKLINE> + Today's Topics: + <BLANKLINE> + 1. Test message 1 (aperson@example.com) + 2. Test message 2 (aperson@example.com) + 3. Test message 3 (aperson@example.com) + 4. Test message 4 (aperson@example.com) + 5. Test message 5 (aperson@example.com) + 6. Test message 6 (aperson@example.com) + 7. Test message 7 (aperson@example.com) + 8. Test message 8 (aperson@example.com) + <BLANKLINE> + --... + Content-Type: multipart/digest; boundary="..." + MIME-Version: 1.0 + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 1 + Message: 1 + <BLANKLINE> + Here is message 1 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 2 + Message: 2 + <BLANKLINE> + Here is message 2 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 3 + Message: 3 + <BLANKLINE> + Here is message 3 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 4 + Message: 4 + <BLANKLINE> + Here is message 4 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 5 + Message: 5 + <BLANKLINE> + Here is message 5 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 6 + Message: 6 + <BLANKLINE> + Here is message 6 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 7 + Message: 7 + <BLANKLINE> + Here is message 7 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 8 + Message: 8 + <BLANKLINE> + Here is message 8 + <BLANKLINE> + <BLANKLINE> + --... + --... + >>> sorted(mimedata.items()) + [('_parsemsg', False), + ('isdigest', True), + ('listname', u'_xtest@example.com'), + ('received_time', ...), + ('recips', set([])), ('version', 3)] + >>> print rfc1153msg.as_string() + From: _xtest-request@example.com + Subject: XTest Digest, Vol 2, Issue 10 + To: _xtest@example.com + Reply-To: _xtest@example.com + Date: ... + Message-ID: ... + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + <BLANKLINE> + Send XTest mailing list submissions to + _xtest@example.com + <BLANKLINE> + To subscribe or unsubscribe via the World Wide Web, visit + http://www.example.com/listinfo/_xtest@example.com + or, via email, send a message with subject or body 'help' to + _xtest-request@example.com + <BLANKLINE> + You can reach the person managing the list at + _xtest-owner@example.com + <BLANKLINE> + When replying, please edit your Subject line so it is more specific + than "Re: Contents of XTest digest..." + <BLANKLINE> + <BLANKLINE> + Today's Topics: + <BLANKLINE> + 1. Test message 1 (aperson@example.com) + 2. Test message 2 (aperson@example.com) + 3. Test message 3 (aperson@example.com) + 4. Test message 4 (aperson@example.com) + 5. Test message 5 (aperson@example.com) + 6. Test message 6 (aperson@example.com) + 7. Test message 7 (aperson@example.com) + 8. Test message 8 (aperson@example.com) + <BLANKLINE> + <BLANKLINE> + ---------------------------------------------------------------------- + <BLANKLINE> + Message: 1 + From: aperson@example.com + Subject: Test message 1 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 1 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 2 + From: aperson@example.com + Subject: Test message 2 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 2 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 3 + From: aperson@example.com + Subject: Test message 3 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 3 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 4 + From: aperson@example.com + Subject: Test message 4 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 4 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 5 + From: aperson@example.com + Subject: Test message 5 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 5 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 6 + From: aperson@example.com + Subject: Test message 6 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 6 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 7 + From: aperson@example.com + Subject: Test message 7 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 7 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 8 + From: aperson@example.com + Subject: Test message 8 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 8 + <BLANKLINE> + <BLANKLINE> + End of XTest Digest, Vol 2, Issue 10 + ************************************ + <BLANKLINE> + >>> sorted(rfc1153data.items()) + [('_parsemsg', False), + ('isdigest', True), + ('listname', u'_xtest@example.com'), + ('received_time', ...), + ('recips', set([])), ('version', 3)] + + +Internationalized digests +------------------------- + +When messages come in with a content-type character set different than that of +the list's preferred language, recipients wil get an internationalized +digest. French is not enabled by default site-wide, so enable that now. + +XXX We also have to set the default server language to French, otherwise the +English template will be found and the masthead won't be translated. + + >>> config.languages.enable_language('fr') + >>> config.DEFAULT_SERVER_LANGUAGE = u'fr' + >>> mlist.preferred_language = u'fr' + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... To: _xtest@example.com + ... Subject: =?iso-2022-jp?b?GyRCMGxIVhsoQg==?= + ... MIME-Version: 1.0 + ... Content-Type: text/plain; charset=iso-2022-jp + ... Content-Transfer-Encoding: 7bit + ... + ... \x1b$B0lHV\x1b(B + ... """) + +Set the digest threshold to zero so that the digests will be sent immediately. + + >>> mlist.digest_size_threshold = 0 + >>> process(mlist, msg, {}) + >>> sum(1 for mboxmsg in digest_mbox(mlist)) + 0 + >>> len(switchboard.files) + 2 + >>> for filebase in switchboard.files: + ... qmsg, qdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + ... if qmsg.is_multipart(): + ... mimemsg = qmsg + ... mimedata = qdata + ... else: + ... rfc1153msg = qmsg + ... rfc1153data = qdata + >>> print mimemsg.as_string() + Content-Type: multipart/mixed; boundary="..." + MIME-Version: 1.0 + From: _xtest-request@example.com + Subject: Groupe XTest, Vol. 2, Parution 11 + To: _xtest@example.com + Reply-To: _xtest@example.com + Date: ... + Message-ID: ... + <BLANKLINE> + --... + Content-Type: text/plain; charset="iso-8859-1" + MIME-Version: 1.0 + Content-Transfer-Encoding: quoted-printable + Content-Description: Groupe XTest, Vol. 2, Parution 11 + <BLANKLINE> + Envoyez vos messages pour la liste XTest =E0 + _xtest@example.com + <BLANKLINE> + Pour vous (d=E9s)abonner par le web, consultez + http://www.example.com/listinfo/_xtest@example.com + <BLANKLINE> + ou, par courriel, envoyez un message avec =AB=A0help=A0=BB dans le corps ou + dans le sujet =E0 + _xtest-request@example.com + <BLANKLINE> + Vous pouvez contacter l'administrateur de la liste =E0 l'adresse + _xtest-owner@example.com + <BLANKLINE> + Si vous r=E9pondez, n'oubliez pas de changer l'objet du message afin + qu'il soit plus sp=E9cifique que =AB=A0Re: Contenu du groupe de XTest...=A0= + =BB + <BLANKLINE> + --... + Content-Type: text/plain; charset="utf-8" + MIME-Version: 1.0 + Content-Transfer-Encoding: base64 + Content-Description: Today's Topics (1 messages) + <BLANKLINE> + VGjDqG1lcyBkdSBqb3VyIDoKCiAgIDEuIOS4gOeVqiAoYXBlcnNvbkBleGFtcGxlLm9yZykK + <BLANKLINE> + --... + Content-Type: multipart/digest; boundary="..." + MIME-Version: 1.0 + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + Content-Transfer-Encoding: 7bit + From: aperson@example.org + MIME-Version: 1.0 + To: _xtest@example.com + Content-Type: text/plain; charset=iso-2022-jp + Subject: =?iso-2022-jp?b?GyRCMGxIVhsoQg==?= + Message: 1 + <BLANKLINE> + $B0lHV(B + <BLANKLINE> + <BLANKLINE> + --... + --... + >>> sorted(mimedata.items()) + [('_parsemsg', False), + ('isdigest', True), + ('listname', u'_xtest@example.com'), + ('received_time', ...), + ('recips', set([])), ('version', 3)] + >>> print rfc1153msg.as_string() + From: _xtest-request@example.com + Subject: Groupe XTest, Vol. 2, Parution 11 + To: _xtest@example.com + Reply-To: _xtest@example.com + Date: ... + Message-ID: ... + MIME-Version: 1.0 + Content-Type: text/plain; charset="utf-8" + Content-Transfer-Encoding: base64 + <BLANKLINE> + ... + <BLANKLINE> + >>> sorted(rfc1153data.items()) + [('_parsemsg', False), + ('isdigest', True), + ('listname', u'_xtest@example.com'), + ('received_time', ...), + ('recips', set([])), ('version', 3)] + + +Clean up +-------- + + >>> config.DEFAULT_SERVER_LANGUAGE = u'en' diff --git a/mailman/pipeline/docs/file-recips.txt b/mailman/pipeline/docs/file-recips.txt new file mode 100644 index 000000000..03328f97e --- /dev/null +++ b/mailman/pipeline/docs/file-recips.txt @@ -0,0 +1,97 @@ +File recipients +=============== + +Mailman can calculate the recipients for a message from a Sendmail-style +include file. This file must be called members.txt and it must live in the +list's data directory. + + >>> from mailman.configuration import config + >>> handler = config.handlers['file-recipients'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + + +Short circuiting +---------------- + +If the message's metadata already has recipients, this handler immediately +returns. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message. + ... """) + >>> msgdata = {'recips': 7} + >>> handler.process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + <BLANKLINE> + A message. + <BLANKLINE> + >>> msgdata + {'recips': 7} + + +Missing file +------------ + +The include file must live inside the list's data directory, under the name +members.txt. If the file doesn't exist, the list of recipients will be +empty. + + >>> import os + >>> file_path = os.path.join(mlist.full_path, 'members.txt') + >>> open(file_path) + Traceback (most recent call last): + ... + IOError: [Errno ...] + No such file or directory: u'.../_xtest@example.com/members.txt' + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [] + + +Existing file +------------- + +If the file exists, it contains a list of addresses, one per line. These +addresses are returned as the set of recipients. + + >>> fp = open(file_path, 'w') + >>> try: + ... print >> fp, 'bperson@example.com' + ... print >> fp, 'cperson@example.com' + ... print >> fp, 'dperson@example.com' + ... print >> fp, 'eperson@example.com' + ... print >> fp, 'fperson@example.com' + ... print >> fp, 'gperson@example.com' + ... finally: + ... fp.close() + + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + ['bperson@example.com', 'cperson@example.com', 'dperson@example.com', + 'eperson@example.com', 'fperson@example.com', 'gperson@example.com'] + +However, if the sender of the original message is a member of the list and +their address is in the include file, the sender's address is /not/ included +in the recipients list. + + >>> from mailman.interfaces import MemberRole + >>> address_1 = config.db.user_manager.create_address( + ... u'cperson@example.com') + >>> address_1.subscribe(mlist, MemberRole.member) + <Member: cperson@example.com on _xtest@example.com as MemberRole.member> + + >>> msg = message_from_string("""\ + ... From: cperson@example.com + ... + ... A message. + ... """) + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + ['bperson@example.com', 'dperson@example.com', + 'eperson@example.com', 'fperson@example.com', 'gperson@example.com'] diff --git a/mailman/pipeline/docs/filtering.txt b/mailman/pipeline/docs/filtering.txt new file mode 100644 index 000000000..c5dca1531 --- /dev/null +++ b/mailman/pipeline/docs/filtering.txt @@ -0,0 +1,341 @@ +Content filtering +================= + +Mailman can filter the content of messages posted to a mailing list by +stripping MIME subparts, and possibly reorganizing the MIME structure of a +message. It does this with the MimeDel handler module, although other +handlers can potentially do other kinds of finer level content filtering. + + >>> from mailman.pipeline.mime_delete import process + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + +Several mailing list options control content filtering. First, the feature +must be enabled, then there are two options that control which MIME types get +filtered and which get passed. Finally, there is an option to control whether +text/html parts will get converted to plain text. Let's set up some defaults +for these variables, then we'll explain them in more detail below. + + >>> mlist.filter_content = True + >>> mlist.filter_mime_types = [] + >>> mlist.pass_mime_types = [] + >>> mlist.convert_html_to_plaintext = False + + +Filtering the outer content type +-------------------------------- + +A simple filtering setting will just search the content types of the messages +parts, discarding all parts with a matching MIME type. If the message's outer +content type matches the filter, the entire message will be discarded. + + >>> mlist.filter_mime_types = ['image/jpeg'] + >>> # XXX Change this to an enum + >>> mlist.filter_action = 0 # Discard + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxxxx + ... """) + >>> process(mlist, msg, {}) + Traceback (most recent call last): + ... + DiscardMessage + +However, if we turn off content filtering altogether, then the handler +short-circuits. + + >>> mlist.filter_content = False + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: image/jpeg + MIME-Version: 1.0 + <BLANKLINE> + xxxxx + >>> msgdata + {} + +Similarly, no content filtering is performed on digest messages, which are +crafted internally by Mailman. + + >>> mlist.filter_content = True + >>> msgdata = {'isdigest': True} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: image/jpeg + MIME-Version: 1.0 + <BLANKLINE> + xxxxx + >>> msgdata + {'isdigest': True} + + +Simple multipart filtering +-------------------------- + +If one of the subparts in a multipart message matches the filter type, then +just that subpart will be stripped. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: multipart/mixed; boundary=BOUNDARY + ... MIME-Version: 1.0 + ... + ... --BOUNDARY + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxx + ... + ... --BOUNDARY + ... Content-Type: image/gif + ... MIME-Version: 1.0 + ... + ... yyy + ... --BOUNDARY-- + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: multipart/mixed; boundary=BOUNDARY + MIME-Version: 1.0 + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --BOUNDARY + Content-Type: image/gif + MIME-Version: 1.0 + <BLANKLINE> + yyy + --BOUNDARY-- + <BLANKLINE> + + +Collapsing multipart/alternative messages +----------------------------------------- + +When content filtering encounters a multipart/alternative part, and the +results of filtering leave only one of the subparts, then the +multipart/alternative may be collapsed. For example, in the following +message, the outer content type is a multipart/mixed. Inside this part is +just a single subpart that has a content type of multipart/alternative. This +inner multipart has two subparts, a jpeg and a gif. + +Content filtering will remove the jpeg part, leaving the multipart/alternative +with only a single gif subpart. Because there's only one subpart left, the +MIME structure of the message will be reorganized, removing the inner +multipart/alternative so that the outer multipart/mixed has just a single gif +subpart. + + >>> mlist.collapse_alternatives = True + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: multipart/mixed; boundary=BOUNDARY + ... MIME-Version: 1.0 + ... + ... --BOUNDARY + ... Content-Type: multipart/alternative; boundary=BOUND2 + ... MIME-Version: 1.0 + ... + ... --BOUND2 + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxx + ... + ... --BOUND2 + ... Content-Type: image/gif + ... MIME-Version: 1.0 + ... + ... yyy + ... --BOUND2-- + ... + ... --BOUNDARY-- + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: multipart/mixed; boundary=BOUNDARY + MIME-Version: 1.0 + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --BOUNDARY + Content-Type: image/gif + MIME-Version: 1.0 + <BLANKLINE> + yyy + --BOUNDARY-- + <BLANKLINE> + +When the outer part is a multipart/alternative and filtering leaves this outer +part with just one subpart, the entire message is converted to the left over +part's content type. In other words, the left over inner part is promoted to +being the outer part. + + >>> mlist.filter_mime_types.append('text/html') + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: multipart/alternative; boundary=AAA + ... + ... --AAA + ... Content-Type: text/html + ... + ... <b>This is some html</b> + ... --AAA + ... Content-Type: text/plain + ... + ... This is plain text + ... --AAA-- + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: text/plain + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + This is plain text + +Clean up. + + >>> ignore = mlist.filter_mime_types.pop() + + +Conversion to plain text +------------------------ + +Many mailing lists prohibit HTML email, and in fact, such email can be a +phishing or spam vector. However, many mail readers will send HTML email by +default because users think it looks pretty. One approach to handling this +would be to filter out text/html parts and rely on multipart/alternative +collapsing to leave just a plain text part. This works because many mail +readers that send HTML email actually send a plain text part in the second +subpart of such multipart/alternatives. + +While this is a good suggestion for plain text-only mailing lists, often a +mail reader will send only a text/html part with no plain text alternative. +in this case, the site administer can enable text/html to text/plain +conversion by defining a conversion command. A list administrator still needs +to enable such conversion for their list though. + + >>> mlist.convert_html_to_plaintext = True + +By default, Mailman sends the message through lynx, but since this program is +not guaranteed to exist, we'll craft a simple, but stupid script to simulate +the conversion process. The script expects a single argument, which is the +name of the file containing the message payload to filter. + + >>> import os, sys + >>> script_path = os.path.join(config.DATA_DIR, 'filter.py') + >>> fp = open(script_path, 'w') + >>> try: + ... print >> fp, """\ + ... import sys + ... print 'Converted text/html to text/plain' + ... print 'Filename:', sys.argv[1] + ... """ + ... finally: + ... fp.close() + >>> config.HTML_TO_PLAIN_TEXT_COMMAND = '%s %s %%(filename)s' % ( + ... sys.executable, script_path) + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: text/html + ... MIME-Version: 1.0 + ... + ... <html><head></head> + ... <body></body></html> + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + MIME-Version: 1.0 + Content-Type: text/plain + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + Converted text/html to text/plain + Filename: ... + <BLANKLINE> + + +Discarding empty parts +---------------------- + +Similarly, if after filtering a multipart section ends up empty, then the +entire multipart is discarded. For example, here's a message where an inner +multipart/mixed contains two jpeg subparts. Both jpegs are filtered out, so +the entire inner multipart/mixed is discarded. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: multipart/mixed; boundary=AAA + ... + ... --AAA + ... Content-Type: multipart/mixed; boundary=BBB + ... + ... --BBB + ... Content-Type: image/jpeg + ... + ... xxx + ... --BBB + ... Content-Type: image/jpeg + ... + ... yyy + ... --BBB--- + ... --AAA + ... Content-Type: multipart/alternative; boundary=CCC + ... + ... --CCC + ... Content-Type: text/html + ... + ... <h2>This is a header</h2> + ... + ... --CCC + ... Content-Type: text/plain + ... + ... A different message + ... --CCC-- + ... --AAA + ... Content-Type: image/gif + ... + ... zzz + ... --AAA + ... Content-Type: image/gif + ... + ... aaa + ... --AAA-- + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: multipart/mixed; boundary=AAA + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --AAA + MIME-Version: 1.0 + Content-Type: text/plain + <BLANKLINE> + Converted text/html to text/plain + Filename: ... + <BLANKLINE> + --AAA + Content-Type: image/gif + <BLANKLINE> + zzz + --AAA + Content-Type: image/gif + <BLANKLINE> + aaa + --AAA-- + <BLANKLINE> + + +Passing MIME types +------------------ + +XXX Describe the pass_mime_types setting and how it interacts with +filter_mime_types. diff --git a/mailman/pipeline/docs/nntp.txt b/mailman/pipeline/docs/nntp.txt new file mode 100644 index 000000000..5652d7924 --- /dev/null +++ b/mailman/pipeline/docs/nntp.txt @@ -0,0 +1,68 @@ +NNTP (i.e. Usenet) Gateway +========================== + +Mailman has an NNTP gateway, whereby messages posted to the mailing list can +be forwarded onto an NNTP newsgroup. Typically this means Usenet, but since +NNTP is to Usenet as IP is to the web, it's more general than that. + + >>> from mailman.queue import Switchboard + >>> from mailman.configuration import config + >>> handler = config.handlers['to-usenet'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + >>> switchboard = Switchboard(config.NEWSQUEUE_DIR) + +Gatewaying from the mailing list to the newsgroup happens through a separate +'nntp' queue and happen immediately when the message is posted through to the +list. Note that gatewaying from the newsgroup to the list happens via a +cronjob (currently not shown). + +There are several situations which prevent a message from being gatewayed to +the newsgroup. The feature could be disabled, as is the default. + + >>> mlist.gateway_to_news = False + >>> msg = message_from_string("""\ + ... Subject: An important message + ... + ... Something of great import. + ... """) + >>> handler.process(mlist, msg, {}) + >>> switchboard.files + [] + +Even if enabled, messages that came from the newsgroup are never gated back to +the newsgroup. + + >>> mlist.gateway_to_news = True + >>> handler.process(mlist, msg, {'fromusenet': True}) + >>> switchboard.files + [] + +Neither are digests ever gated to the newsgroup. + + >>> handler.process(mlist, msg, {'isdigest': True}) + >>> switchboard.files + [] + +However, other posted messages get gated to the newsgroup via the nntp queue. +The list owner can set the linked newsgroup and the nntp host that its +messages are gated to. + + >>> mlist.linked_newsgroup = u'comp.lang.thing' + >>> mlist.nntp_host = u'news.example.com' + >>> handler.process(mlist, msg, {}) + >>> len(switchboard.files) + 1 + >>> filebase = switchboard.files[0] + >>> msg, msgdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> print msg.as_string() + Subject: An important message + <BLANKLINE> + Something of great import. + <BLANKLINE> + >>> sorted(msgdata.items()) + [('_parsemsg', False), + ('listname', u'_xtest@example.com'), + ('received_time', ...), + ('version', 3)] diff --git a/mailman/pipeline/docs/reply-to.txt b/mailman/pipeline/docs/reply-to.txt new file mode 100644 index 000000000..ad9100ce1 --- /dev/null +++ b/mailman/pipeline/docs/reply-to.txt @@ -0,0 +1,128 @@ +Reply-to munging +================ + +Messages that flow through the global pipeline get their headers 'cooked', +which basically means that their headers go through several mostly unrelated +transformations. Some headers get added, others get changed. Some of these +changes depend on mailing list settings and others depend on how the message +is getting sent through the system. We'll take things one-by-one. + + >>> from mailman.pipeline.cook_headers import process + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.subject_prefix = u'' + +Reply-to munging refers to the behavior where a mailing list can be configured +to change or augment an existing Reply-To header in a message posted to the +list. Reply-to munging is fairly controversial, with arguments made either +for or against munging. + +The Mailman developers, and I believe the majority consensus is to do no +Reply-to munging, under several principles. Primarily, most reply-to munging +is requested by people who do not have both a Reply and Reply All button on +their mail reader. If you do not munge Reply-To, then these buttons will work +properly, but if you munge the header, it is impossible for these buttons to +work right, because both will reply to the list. This leads to unfortunate +accidents where a private message is accidentally posted to the entire list. + +However, Mailman gives list owners the option to do Reply-To munging anyway, +mostly as a way to shut up the really vocal minority who seem to insist on +this mis-feature. + + +Reply to list +------------- + +A list can be configured to add a Reply-To header pointing back to the mailing +list's posting address. If there's no Reply-To header in the original +message, the list's posting address simply gets inserted. + + >>> from mailman.interfaces import ReplyToMunging + >>> mlist.reply_goes_to_list = ReplyToMunging.point_to_list + >>> mlist.preferred_language = u'en' + >>> mlist.description = u'' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'_xtest@example.com' + +It's also possible to strip any existing Reply-To header first, before adding +the list's posting address. + + >>> mlist.first_strip_reply_to = True + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Reply-To: bperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'_xtest@example.com' + +If you don't first strip the header, then the list's posting address will just +get appended to whatever the original version was. + + >>> mlist.first_strip_reply_to = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Reply-To: bperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'bperson@example.com, _xtest@example.com' + + +Explicit Reply-To +----------------- + +The list can also be configured to have an explicit Reply-To header. + + >>> mlist.reply_goes_to_list = ReplyToMunging.explicit_header + >>> mlist.reply_to_address = u'my-list@example.com' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'my-list@example.com' + +And as before, it's possible to either strip any existing Reply-To header... + + >>> mlist.first_strip_reply_to = True + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Reply-To: bperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'my-list@example.com' + +...or not. + + >>> mlist.first_strip_reply_to = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Reply-To: bperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'my-list@example.com, bperson@example.com' diff --git a/mailman/pipeline/docs/replybot.txt b/mailman/pipeline/docs/replybot.txt new file mode 100644 index 000000000..2e3765cab --- /dev/null +++ b/mailman/pipeline/docs/replybot.txt @@ -0,0 +1,216 @@ +Auto-reply handler +================== + +Mailman has an auto-reply handler that sends automatic responses to messages +it receives on its posting address, or special robot addresses. Automatic +responses are subject to various conditions, such as headers in the original +message or the amount of time since the last auto-response. + + >>> from mailman.pipeline.replybot import process + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.real_name = u'XTest' + >>> mlist.web_page_url = u'http://www.example.com/' + + >>> # Ensure that the virgin queue is empty, since we'll be checking this + >>> # for new auto-response messages. + >>> from mailman.queue import Switchboard + >>> virginq = Switchboard(config.VIRGINQUEUE_DIR) + >>> virginq.files + [] + + +Basic autoresponding +-------------------- + +Basic autoresponding occurs when the list is set up to respond to either its +-owner address, its -request address, or to the posting address, and a message +is sent to one of these addresses. A mailing list also has an autoresponse +grace period which describes how much time must pass before a second response +will be sent, with 0 meaning "there is no grace period". + + >>> import datetime + >>> mlist.autorespond_admin = True + >>> mlist.autoresponse_graceperiod = datetime.timedelta() + >>> mlist.autoresponse_admin_text = u'admin autoresponse text' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest-owner@example.com + ... + ... help + ... """) + >>> process(mlist, msg, dict(toowner=True)) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> # Print only some of the meta data. The rest is uninteresting. + >>> qdata['listname'] + u'_xtest@example.com' + >>> sorted(qdata['recips']) + [u'aperson@example.com'] + >>> # Delete data that is time dependent or random + >>> del qmsg['message-id'] + >>> del qmsg['date'] + >>> print qmsg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: Auto-response for your message to the "XTest" mailing list + From: _xtest-bounces@example.com + To: aperson@example.com + X-Mailer: The Mailman Replybot + X-Ack: No + Precedence: bulk + <BLANKLINE> + admin autoresponse text + >>> virginq.files + [] + + +Short circuiting +---------------- + +Several headers in the original message determine whether an autoresponse +should even be sent. For example, if the message has an "X-Ack: No" header, +no auto-response is sent. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... X-Ack: No + ... + ... help me + ... """) + >>> process(mlist, msg, dict(toowner=True)) + >>> virginq.files + [] + +Mailman itself can suppress autoresponses for certain types of internally +crafted messages, by setting the 'noack' metadata key. + + >>> msg = message_from_string("""\ + ... From: mailman@example.com + ... + ... help for you + ... """) + >>> process(mlist, msg, dict(noack=True, toowner=True)) + >>> virginq.files + [] + +If there is a Precedence: header with any of the values 'bulk', 'junk', or +'list', then the autoresponse is also suppressed. + + >>> msg = message_from_string("""\ + ... From: asystem@example.com + ... Precedence: bulk + ... + ... hey! + ... """) + >>> process(mlist, msg, dict(toowner=True)) + >>> virginq.files + [] + + >>> msg.replace_header('precedence', 'junk') + >>> process(mlist, msg, dict(toowner=True)) + >>> virginq.files + [] + >>> msg.replace_header('precedence', 'list') + >>> process(mlist, msg, dict(toowner=True)) + >>> virginq.files + [] + +Unless the X-Ack: header has a value of "yes", in which case, the Precedence +header is ignored. + + >>> msg['X-Ack'] = 'yes' + >>> process(mlist, msg, dict(toowner=True)) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> del qmsg['message-id'] + >>> del qmsg['date'] + >>> print qmsg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: Auto-response for your message to the "XTest" mailing list + From: _xtest-bounces@example.com + To: asystem@example.com + X-Mailer: The Mailman Replybot + X-Ack: No + Precedence: bulk + <BLANKLINE> + admin autoresponse text + + +Available auto-responses +------------------------ + +As shown above, a message sent to the -owner address will get an auto-response +with the text set for owner responses. Two other types of email will get +auto-responses: those sent to the -request address... + + >>> mlist.autorespond_requests = True + >>> mlist.autoresponse_request_text = u'robot autoresponse text' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest-request@example.com + ... + ... help me + ... """) + >>> process(mlist, msg, dict(torequest=True)) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> del qmsg['message-id'] + >>> del qmsg['date'] + >>> print qmsg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: Auto-response for your message to the "XTest" mailing list + From: _xtest-bounces@example.com + To: aperson@example.com + X-Mailer: The Mailman Replybot + X-Ack: No + Precedence: bulk + <BLANKLINE> + robot autoresponse text + +...and those sent to the posting address. + + >>> mlist.autorespond_postings = True + >>> mlist.autoresponse_postings_text = u'postings autoresponse text' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... + ... help me + ... """) + >>> process(mlist, msg, {}) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> del qmsg['message-id'] + >>> del qmsg['date'] + >>> print qmsg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: Auto-response for your message to the "XTest" mailing list + From: _xtest-bounces@example.com + To: aperson@example.com + X-Mailer: The Mailman Replybot + X-Ack: No + Precedence: bulk + <BLANKLINE> + postings autoresponse text + + +Grace periods +------------- + +Auto-responses have a grace period, during which no additional responses will +be sent. This is so as not to bombard the sender with responses. The grace +period is measured in days. + +XXX Add grace period tests. diff --git a/mailman/pipeline/docs/scrubber.txt b/mailman/pipeline/docs/scrubber.txt new file mode 100644 index 000000000..744925f34 --- /dev/null +++ b/mailman/pipeline/docs/scrubber.txt @@ -0,0 +1,214 @@ +The scrubber +============ + +The scrubber is an integral part of Mailman, both in the normal delivery of +messages and in components such as the archiver. Its primary purpose is to +scrub attachments from messages so that binary goop doesn't end up in an +archive message. + + >>> from mailman.pipeline.scrubber import process, save_attachment + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + +Helper functions for getting the attachment data. + + >>> import os, re + >>> def read_attachment(filename, remove=True): + ... path = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR, + ... mlist.fqdn_listname, filename) + ... fp = open(path) + ... try: + ... data = fp.read() + ... finally: + ... fp.close() + ... if remove: + ... os.unlink(path) + ... return data + + >>> from urlparse import urlparse + >>> def read_url_from_message(msg): + ... url = None + ... for line in msg.get_payload().splitlines(): + ... mo = re.match('URL: <(?P<url>[^>]+)>', line) + ... if mo: + ... url = mo.group('url') + ... break + ... path = '/'.join(urlparse(url).path.split('/')[3:]) + ... return read_attachment(path) + + +Saving attachments +------------------ + +The Scrubber handler exposes a function called save_attachments() which can be +used to strip various types of attachments and store them in the archive +directory. This is a public interface used by components outside the normal +processing pipeline. + +Site administrators can decide whether the scrubber should use the attachment +filename suggested in the message's Content-Disposition: header or not. If +enabled, the filename will be used when this header attribute is present (yes, +this is an unfortunate double negative). + + >>> config.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME = False + >>> msg = message_from_string("""\ + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... """) + >>> save_attachment(mlist, msg, 'dir') + u'<http://www.example.com/pipermail/_xtest@example.com/dir/xtest.gif>' + >>> data = read_attachment('dir/xtest.gif') + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + +Saving the attachment does not alter the original message. + + >>> print msg.as_string() + Content-Type: image/gif; name="xtest.gif" + Content-Transfer-Encoding: base64 + Content-Disposition: attachment; filename="xtest.gif" + <BLANKLINE> + R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + +The site administrator can also configure Mailman to ignore the +Content-Disposition: filename. This is the default for reasons described in +the Defaults.py.in file. + + >>> config.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME = True + >>> msg = message_from_string("""\ + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... """) + >>> save_attachment(mlist, msg, 'dir') + u'<http://www.example.com/pipermail/_xtest@example.com/dir/attachment.gif>' + >>> data = read_attachment('dir/xtest.gif') + Traceback (most recent call last): + IOError: [Errno ...] No such file or directory: + u'.../archives/private/_xtest@example.com/dir/xtest.gif' + >>> data = read_attachment('dir/attachment.gif') + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + + +Scrubbing image attachments +--------------------------- + +When scrubbing image attachments, the original message is modified to include +a reference to the attachment file as available through the on-line archive. + + >>> msg = message_from_string("""\ + ... MIME-Version: 1.0 + ... Content-Type: multipart/mixed; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... Content-type: text/plain; charset=us-ascii + ... + ... This is a message. + ... --BOUNDARY + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... --BOUNDARY-- + ... """) + >>> msgdata = {} + +The Scrubber.process() function is different than other handler process +functions in that it returns the scrubbed message. + + >>> scrubbed_msg = process(mlist, msg, msgdata) + >>> scrubbed_msg is msg + True + >>> print scrubbed_msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + <BLANKLINE> + This is a message. + -------------- next part -------------- + A non-text attachment was scrubbed... + Name: xtest.gif + Type: image/gif + Size: 34 bytes + Desc: not available + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> + <BLANKLINE> + +This is the same as the transformed message originally passed in. + + >>> print msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + <BLANKLINE> + This is a message. + -------------- next part -------------- + A non-text attachment was scrubbed... + Name: xtest.gif + Type: image/gif + Size: 34 bytes + Desc: not available + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> + <BLANKLINE> + >>> msgdata + {} + +The URL will point to the attachment sitting in the archive. + + >>> data = read_url_from_message(msg) + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + + +Scrubbing text attachments +-------------------------- + +Similar to image attachments, text attachments will also be scrubbed, but the +placeholder will be slightly different. + + >>> msg = message_from_string("""\ + ... MIME-Version: 1.0 + ... Content-Type: multipart/mixed; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no + ... + ... This is a message. + ... --BOUNDARY + ... Content-type: text/plain; name="xtext.txt" + ... Content-Disposition: attachment; filename="xtext.txt" + ... + ... This is a text attachment. + ... --BOUNDARY-- + ... """) + >>> scrubbed_msg = process(mlist, msg, {}) + >>> print scrubbed_msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Transfer-Encoding: 7bit + Content-Type: text/plain; charset="us-ascii"; format="flowed"; delsp="no" + <BLANKLINE> + This is a message. + -------------- next part -------------- + An embedded and charset-unspecified text was scrubbed... + Name: xtext.txt + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.txt> + <BLANKLINE> + >>> read_url_from_message(msg) + 'This is a text attachment.' diff --git a/mailman/pipeline/docs/subject-munging.txt b/mailman/pipeline/docs/subject-munging.txt new file mode 100644 index 000000000..02677d6e2 --- /dev/null +++ b/mailman/pipeline/docs/subject-munging.txt @@ -0,0 +1,245 @@ +Subject munging +=============== + +Messages that flow through the global pipeline get their headers 'cooked', +which basically means that their headers go through several mostly unrelated +transformations. Some headers get added, others get changed. Some of these +changes depend on mailing list settings and others depend on how the message +is getting sent through the system. We'll take things one-by-one. + + >>> from mailman.pipeline.cook_headers import process + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.subject_prefix = u'' + + +Inserting a prefix +------------------ + +Another thing CookHeaders does is 'munge' the Subject header by inserting the +subject prefix for the list at the front. If there's no subject header in the +original message, Mailman uses a canned default. In order to do subject +munging, a mailing list must have a preferred language. + + >>> mlist.subject_prefix = u'[XTest] ' + >>> mlist.preferred_language = u'en' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + +The original subject header is stored in the message metadata. We must print +the new Subject header because it gets converted from a string to an +email.header.Header instance which has an unhelpful repr. + + >>> msgdata['origsubj'] + u'' + >>> print msg['subject'] + [XTest] (no subject) + +If the original message had a Subject header, then the prefix is inserted at +the beginning of the header's value. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Something important + ... + ... A message of great import. + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> msgdata['origsubj'] + u'Something important' + >>> print msg['subject'] + [XTest] Something important + +Subject headers are not munged for digest messages. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Something important + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, dict(isdigest=True)) + >>> msg['subject'] + u'Something important' + +Nor are they munged for 'fast tracked' messages, which are generally defined +as messages that Mailman crafts internally. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Something important + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, dict(_fasttrack=True)) + >>> msg['subject'] + u'Something important' + +If a Subject header already has a prefix, usually following a Re: marker, +another one will not be added but the prefix will be moved to the front of the +header text. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Re: [XTest] Something important + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest] Re: Something important + +If the Subjec header has a prefix at the front of the header text, that's +where it will stay. This is called 'new style' prefixing and is the only +option available in Mailman 3. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: [XTest] Re: Something important + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest] Re: Something important + + +Internationalized headers +------------------------- + +Internationalization adds some interesting twists to the handling of subject +prefixes. Part of what makes this interesting is the encoding of i18n headers +using RFC 2047, and lists whose preferred language is in a different character +set than the encoded header. + + >>> msg = message_from_string("""\ + ... Subject: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + >>> unicode(msg['subject']) + u'[XTest] \u30e1\u30fc\u30eb\u30de\u30f3' + + +Prefix numbers +-------------- + +Subject prefixes support a placeholder for the numeric post id. Every time a +message is posted to the mailing list, a 'post id' gets incremented. This is +a purely sequential integer that increases monotonically. By added a '%d' +placeholder to the subject prefix, this post id can be included in the prefix. + + >>> mlist.subject_prefix = u'[XTest %d] ' + >>> mlist.post_id = 456 + >>> msg = message_from_string("""\ + ... Subject: Something important + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Something important + +This works even when the message is a reply, except that in this case, the +numeric post id in the generated subject prefix is updated with the new post +id. + + >>> msg = message_from_string("""\ + ... Subject: [XTest 123] Re: Something important + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Re: Something important + +If the Subject header had old style prefixing, the prefix is moved to the +front of the header text. + + >>> msg = message_from_string("""\ + ... Subject: Re: [XTest 123] Something important + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Re: Something important + + +And of course, the proper thing is done when posting id numbers are included +in the subject prefix, and the subject is encoded non-ascii. + + >>> msg = message_from_string("""\ + ... Subject: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + >>> unicode(msg['subject']) + u'[XTest 456] \u30e1\u30fc\u30eb\u30de\u30f3' + +Even more fun is when the i18n Subject header already has a prefix, possibly +with a different posting number. + + >>> msg = message_from_string("""\ + ... Subject: [XTest 123] Re: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Re: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + +# XXX This requires Python email patch #1681333 to succeed. +# >>> unicode(msg['subject']) +# u'[XTest 456] Re: \u30e1\u30fc\u30eb\u30de\u30f3' + +As before, old style subject prefixes are re-ordered. + + >>> msg = message_from_string("""\ + ... Subject: Re: [XTest 123] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Re: + =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + +# XXX This requires Python email patch #1681333 to succeed. +# >>> unicode(msg['subject']) +# u'[XTest 456] Re: \u30e1\u30fc\u30eb\u30de\u30f3' + + +In this test case, we get an extra space between the prefix and the original +subject. It's because the original is 'crooked'. Note that a Subject +starting with '\n ' is generated by some version of Eudora Japanese edition. + + >>> mlist.subject_prefix = u'[XTest] ' + >>> msg = message_from_string("""\ + ... Subject: + ... Important message + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest] Important message + +And again, with an RFC 2047 encoded header. + + >>> msg = message_from_string("""\ + ... Subject: + ... =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + +# XXX This one does not appear to work the same way as +# test_subject_munging_prefix_crooked() in the old Python-based tests. I need +# to get Tokio to look at this. +# >>> print msg['subject'] +# [XTest] =?iso-2022-jp?b?IBskQiVhITwlayVeJXMbKEI=?= diff --git a/mailman/pipeline/docs/tagger.txt b/mailman/pipeline/docs/tagger.txt new file mode 100644 index 000000000..778f7cc73 --- /dev/null +++ b/mailman/pipeline/docs/tagger.txt @@ -0,0 +1,237 @@ +Message tagger +============== + +Mailman has a topics system which works like this: a mailing list +administrator sets up one or more topics, which is essentially a named regular +expression. The topic name can be any arbitrary string, and the name serves +double duty as the 'topic tag'. Each message that flows the mailing list has +its Subject: and Keywords: headers compared against these regular +expressions. The message then gets tagged with the topic names of each hit. + + >>> from mailman.pipeline.tagger import process + >>> from mailman.queue import Switchboard + >>> from mailman.configuration import config + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + +Topics must be enabled for Mailman to do any topic matching, even if topics +are defined. + + >>> mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', False)] + >>> mlist.topics_enabled = False + >>> mlist.topics_bodylines_limit = 0 + + >>> msg = message_from_string("""\ + ... Subject: foobar + ... Keywords: barbaz + ... + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: foobar + Keywords: barbaz + <BLANKLINE> + <BLANKLINE> + >>> msgdata + {} + +However, once topics are enabled, message will be tagged. There are two +artifacts of tagging; an X-Topics: header is added with the topic name, and +the message metadata gets a key with a list of matching topic names. + + >>> mlist.topics_enabled = True + >>> msg = message_from_string("""\ + ... Subject: foobar + ... Keywords: barbaz + ... + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: foobar + Keywords: barbaz + X-Topics: bar fight + <BLANKLINE> + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + + +Scanning body lines +------------------- + +The tagger can also look at a certain number of body lines, but only for +Subject: and Keyword: header-like lines. When set to zero, no body lines are +scanned. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... X-Ignore: something else + ... Subject: foobar + ... Keywords: barbaz + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + <BLANKLINE> + X-Ignore: something else + Subject: foobar + Keywords: barbaz + <BLANKLINE> + >>> msgdata + {} + +But let the tagger scan a few body lines and the matching headers will be +found. + + >>> mlist.topics_bodylines_limit = 5 + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... X-Ignore: something else + ... Subject: foobar + ... Keywords: barbaz + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + X-Topics: bar fight + <BLANKLINE> + X-Ignore: something else + Subject: foobar + Keywords: barbaz + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + +However, scanning stops at the first body line that doesn't look like a +header. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... This is not a header + ... Subject: foobar + ... Keywords: barbaz + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + <BLANKLINE> + This is not a header + Subject: foobar + Keywords: barbaz + >>> msgdata + {} + +When set to a negative number, all body lines will be scanned. + + >>> mlist.topics_bodylines_limit = -1 + >>> lots_of_headers = '\n'.join(['X-Ignore: zip'] * 100) + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... %s + ... Subject: foobar + ... Keywords: barbaz + ... """ % lots_of_headers) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> # Rather than print out 100 X-Ignore: headers, let's just prove that + >>> # the X-Topics: header exists, meaning that the tagger did its job. + >>> msg['x-topics'] + u'bar fight' + >>> msgdata['topichits'] + ['bar fight'] + + +Scanning sub-parts +------------------ + +The tagger will also scan the body lines of text subparts in a multipart +message, using the same rules as if all those body lines lived in a single +text payload. + + >>> msg = message_from_string("""\ + ... Subject: Was + ... Keywords: Raw + ... Content-Type: multipart/alternative; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY-- + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: Was + Keywords: Raw + Content-Type: multipart/alternative; boundary="BOUNDARY" + X-Topics: bar fight + <BLANKLINE> + --BOUNDARY + From: sabo + To: obas + <BLANKLINE> + Subject: farbaw + Keywords: barbaz + <BLANKLINE> + --BOUNDARY-- + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + +But the tagger will not descend into non-text parts. + + >>> msg = message_from_string("""\ + ... Subject: Was + ... Keywords: Raw + ... Content-Type: multipart/alternative; boundary=BOUNDARY + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... Content-Type: message/rfc822 + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... Content-Type: message/rfc822 + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY-- + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg['x-topics'] + None + >>> msgdata + {} |
