diff options
Diffstat (limited to 'src/mailman/pipeline')
39 files changed, 6815 insertions, 0 deletions
diff --git a/src/mailman/pipeline/__init__.py b/src/mailman/pipeline/__init__.py new file mode 100644 index 000000000..f73061874 --- /dev/null +++ b/src/mailman/pipeline/__init__.py @@ -0,0 +1,54 @@ +# Copyright (C) 2008-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""The built in set of pipeline handlers.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'initialize', + ] + + +import os +import sys + +from mailman.interfaces.handler import IHandler + + + +def initialize(): + """Initialize the built-in handlers. + + Rules are auto-discovered by searching for IHandler implementations in all + importable modules in this subpackage. + """ + # Find all rules found in all modules inside our package. + import mailman.pipeline + here = os.path.dirname(mailman.pipeline.__file__) + for filename in os.listdir(here): + basename, extension = os.path.splitext(filename) + if extension <> '.py': + continue + module_name = 'mailman.pipeline.' + basename + __import__(module_name, fromlist='*') + module = sys.modules[module_name] + for name in getattr(module, '__all__', ()): + handler = getattr(module, name) + if IHandler.implementedBy(handler): + yield handler diff --git a/src/mailman/pipeline/acknowledge.py b/src/mailman/pipeline/acknowledge.py new file mode 100644 index 000000000..de520df65 --- /dev/null +++ b/src/mailman/pipeline/acknowledge.py @@ -0,0 +1,80 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Send an acknowledgment of the successful post to the sender. + +This only happens if the sender has set their AcknowledgePosts attribute. +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Acknowledge', + ] + + +from zope.interface import implements + +from mailman import Message +from mailman import Utils +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler + + + +class Acknowledge: + """Send an acknowledgment.""" + implements(IHandler) + + name = 'acknowledge' + description = _("""Send an acknowledgment of a posting.""") + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + # Extract the sender's address and find them in the user database + sender = msgdata.get('original_sender', msg.get_sender()) + member = mlist.members.get_member(sender) + if member is None or not member.acknowledge_posts: + # Either the sender is not a member, in which case we can't know + # whether they want an acknowlegment or not, or they are a member + # who definitely does not want an acknowlegment. + return + # Okay, they are a member that wants an acknowledgment of their post. + # Give them their original subject. BAW: do we want to use the + # decoded header? + original_subject = msgdata.get( + 'origsubj', msg.get('subject', _('(no subject)'))) + # Get the user's preferred language. + lang = msgdata.get('lang', member.preferred_language) + # Now get the acknowledgement template. + realname = mlist.real_name + text = Utils.maketext( + 'postack.txt', + {'subject' : Utils.oneline(original_subject, + Utils.GetCharSet(lang)), + 'listname' : realname, + 'listinfo_url': mlist.script_url('listinfo'), + 'optionsurl' : member.options_url, + }, lang=lang, mlist=mlist, raw=True) + # Craft the outgoing message, with all headers and attributes + # necessary for general delivery. Then enqueue it to the outgoing + # queue. + subject = _('$realname post acknowledgment') + usermsg = Message.UserNotification(sender, mlist.bounces_address, + subject, text, lang) + usermsg.send(mlist) diff --git a/src/mailman/pipeline/after_delivery.py b/src/mailman/pipeline/after_delivery.py new file mode 100644 index 000000000..4626ba292 --- /dev/null +++ b/src/mailman/pipeline/after_delivery.py @@ -0,0 +1,48 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Perform some bookkeeping after a successful post.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'AfterDelivery', + ] + + +import datetime + +from zope.interface import implements + +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler + + + +class AfterDelivery: + """Perform some bookkeeping after a successful post.""" + + implements(IHandler) + + name = 'after-delivery' + description = _('Perform some bookkeeping after a successful post.') + + def process(self, mlist, msg, msgdata): + """See `IHander`.""" + mlist.last_post_time = datetime.datetime.now() + mlist.post_id += 1 diff --git a/src/mailman/pipeline/avoid_duplicates.py b/src/mailman/pipeline/avoid_duplicates.py new file mode 100644 index 000000000..0458e117c --- /dev/null +++ b/src/mailman/pipeline/avoid_duplicates.py @@ -0,0 +1,116 @@ +# Copyright (C) 2002-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""If the user wishes it, do not send duplicates of the same message. + +This module keeps an in-memory dictionary of Message-ID: and recipient pairs. +If a message with an identical Message-ID: is about to be sent to someone who +has already received a copy, we either drop the message, add a duplicate +warning header, or pass it through, depending on the user's preferences. +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'AvoidDuplicates', + ] + + +from email.Utils import getaddresses, formataddr +from zope.interface import implements + +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler + + +COMMASPACE = ', ' + + + +class AvoidDuplicates: + """If the user wishes it, do not send duplicates of the same message.""" + + implements(IHandler) + + name = 'avoid-duplicates' + description = _('Suppress some duplicates of the same message.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + recips = msgdata.get('recips') + # Short circuit + if not recips: + return + # Seed this set with addresses we don't care about dup avoiding. + listaddrs = set((mlist.posting_address, + mlist.bounces_address, + mlist.owner_address, + mlist.request_address)) + explicit_recips = listaddrs.copy() + # Figure out the set of explicit recipients. + cc_addresses = {} + for header in ('to', 'cc', 'resent-to', 'resent-cc'): + addrs = getaddresses(msg.get_all(header, [])) + header_addresses = dict((addr, formataddr((name, addr))) + for name, addr in addrs + if addr) + if header == 'cc': + # Yes, it's possible that an address is mentioned in multiple + # CC headers using different names. In that case, the last + # real name will win, but that doesn't seem like such a big + # deal. Besides, how else would you chose? + cc_addresses.update(header_addresses) + # Ignore the list addresses for purposes of dup avoidance. + explicit_recips |= set(header_addresses) + # Now strip out the list addresses. + explicit_recips -= listaddrs + if not explicit_recips: + # No one was explicitly addressed, so we can't do any dup + # collapsing + return + newrecips = set() + for r in recips: + # If this recipient is explicitly addressed... + if r in explicit_recips: + send_duplicate = True + # If the member wants to receive duplicates, or if the + # recipient is not a member at all, they will get a copy. + # header. + member = mlist.members.get_member(r) + if member and not member.receive_list_copy: + send_duplicate = False + # We'll send a duplicate unless the user doesn't wish it. If + # personalization is enabled, the add-dupe-header flag will + # add a X-Mailman-Duplicate: yes header for this user's + # message. + if send_duplicate: + msgdata.setdefault('add-dup-header', set()).add(r) + newrecips.add(r) + elif r in cc_addresses: + del cc_addresses[r] + else: + # Otherwise, this is the first time they've been in the recips + # list. Add them to the newrecips list and flag them as + # having received this message. + newrecips.add(r) + # Set the new list of recipients. XXX recips should always be a set. + msgdata['recips'] = list(newrecips) + # RFC 2822 specifies zero or one CC header + if cc_addresses: + del msg['cc'] + msg['CC'] = COMMASPACE.join(cc_addresses.values()) diff --git a/src/mailman/pipeline/calculate_recipients.py b/src/mailman/pipeline/calculate_recipients.py new file mode 100644 index 000000000..9837c1e6b --- /dev/null +++ b/src/mailman/pipeline/calculate_recipients.py @@ -0,0 +1,148 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Calculate the regular (i.e. non-digest) recipients of the message. + +This module calculates the non-digest recipients for the message based on the +list's membership and configuration options. It places the list of recipients +on the `recips' attribute of the message. This attribute is used by the +SendmailDeliver and BulkDeliver modules. +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'CalculateRecipients', + ] + +from zope.interface import implements + +from mailman import Utils +from mailman.config import config +from mailman.core import errors +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler +from mailman.interfaces.member import DeliveryStatus + + + +class CalculateRecipients: + """Calculate the regular (i.e. non-digest) recipients of the message.""" + + implements(IHandler) + + name = 'calculate-recipients' + description = _('Calculate the regular recipients of the message.') + + def process(self, mlist, msg, msgdata): + # Short circuit if we've already calculated the recipients list, + # regardless of whether the list is empty or not. + if 'recips' in msgdata: + return + # Should the original sender should be included in the recipients list? + include_sender = True + sender = msg.get_sender() + member = mlist.members.get_member(sender) + if member and not member.receive_own_postings: + include_sender = False + # Support for urgent messages, which bypasses digests and disabled + # delivery and forces an immediate delivery to all members Right Now. + # We are specifically /not/ allowing the site admins password to work + # here because we want to discourage the practice of sending the site + # admin password through email in the clear. (see also Approve.py) + # + # XXX This is broken. + missing = object() + password = msg.get('urgent', missing) + if password is not missing: + if mlist.Authenticate((config.AuthListModerator, + config.AuthListAdmin), + password): + recips = mlist.getMemberCPAddresses( + mlist.getRegularMemberKeys() + + mlist.getDigestMemberKeys()) + msgdata['recips'] = recips + return + else: + # Bad Urgent: password, so reject it instead of passing it on. + # I think it's better that the sender know they screwed up + # than to deliver it normally. + realname = mlist.real_name + text = _("""\ +Your urgent message to the %(realname)s mailing list was not authorized for +delivery. The original message as received by Mailman is attached. +""") + raise errors.RejectMessage(Utils.wrap(text)) + # Calculate the regular recipients of the message + recips = set(member.address.address + for member in mlist.regular_members.members + if member.delivery_status == DeliveryStatus.enabled) + # Remove the sender if they don't want to receive their own posts + if not include_sender and member.address.address in recips: + recips.remove(member.address.address) + # Handle topic classifications + do_topic_filters(mlist, msg, msgdata, recips) + # Bookkeeping + msgdata['recips'] = recips + + + +def do_topic_filters(mlist, msg, msgdata, recips): + if not mlist.topics_enabled: + # MAS: if topics are currently disabled for the list, send to all + # regardless of ReceiveNonmatchingTopics + return + hits = msgdata.get('topichits') + zaprecips = [] + if hits: + # The message hit some topics, so only deliver this message to those + # who are interested in one of the hit topics. + for user in recips: + utopics = mlist.getMemberTopics(user) + if not utopics: + # This user is not interested in any topics, so they get all + # postings. + continue + # BAW: Slow, first-match, set intersection! + for topic in utopics: + if topic in hits: + # The user wants this message + break + else: + # The user was interested in topics, but not any of the ones + # this message matched, so zap him. + zaprecips.append(user) + else: + # The semantics for a message that did not hit any of the pre-canned + # topics is to troll through the membership list, looking for users + # who selected at least one topic of interest, but turned on + # ReceiveNonmatchingTopics. + for user in recips: + if not mlist.getMemberTopics(user): + # The user did not select any topics of interest, so he gets + # this message by default. + continue + if not mlist.getMemberOption( + user, config.ReceiveNonmatchingTopics): + # The user has interest in some topics, but elects not to + # receive message that match no topics, so zap him. + zaprecips.append(user) + # Otherwise, the user wants non-matching messages. + # Prune out the non-receiving users + for user in zaprecips: + recips.remove(user) diff --git a/src/mailman/pipeline/cleanse.py b/src/mailman/pipeline/cleanse.py new file mode 100644 index 000000000..330f415c2 --- /dev/null +++ b/src/mailman/pipeline/cleanse.py @@ -0,0 +1,75 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Cleanse certain headers from all messages.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Cleanse', + ] + + +import logging + +from email.Utils import formataddr +from zope.interface import implements + +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler +from mailman.pipeline.cook_headers import uheader + + +log = logging.getLogger('mailman.post') + + + +class Cleanse: + """Cleanse certain headers from all messages.""" + + implements(IHandler) + + name = 'cleanse' + description = _('Cleanse certain headers from all messages.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + # Remove headers that could contain passwords. + del msg['approved'] + del msg['approve'] + del msg['urgent'] + # We remove other headers from anonymous lists. + if mlist.anonymous_list: + log.info('post to %s from %s anonymized', + mlist.fqdn_listname, msg.get('from')) + del msg['from'] + del msg['reply-to'] + del msg['sender'] + # Hotmail sets this one + del msg['x-originating-email'] + i18ndesc = str(uheader(mlist, mlist.description, 'From')) + msg['From'] = formataddr((i18ndesc, mlist.posting_address)) + msg['Reply-To'] = mlist.posting_address + # Some headers can be used to fish for membership. + del msg['return-receipt-to'] + del msg['disposition-notification-to'] + del msg['x-confirm-reading-to'] + # Pegasus mail uses this one... sigh. + del msg['x-pmrqc'] + # Don't let this header be spoofed. See RFC 5064. + del msg['archived-at'] diff --git a/src/mailman/pipeline/cleanse_dkim.py b/src/mailman/pipeline/cleanse_dkim.py new file mode 100644 index 000000000..38623079c --- /dev/null +++ b/src/mailman/pipeline/cleanse_dkim.py @@ -0,0 +1,58 @@ +# Copyright (C) 2006-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Remove any 'DomainKeys' (or similar) headers. + +The values contained in these header lines are intended to be used by the +recipient to detect forgery or tampering in transit, and the modifications +made by Mailman to the headers and body of the message will cause these keys +to appear invalid. Removing them will at least avoid this misleading result, +and it will also give the MTA the opportunity to regenerate valid keys +originating at the Mailman server for the outgoing message. +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'CleanseDKIM', + ] + + +from lazr.config import as_boolean +from zope.interface import implements + +from mailman.config import config +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler + + + +class CleanseDKIM: + """Remove DomainKeys headers.""" + + implements(IHandler) + + name = 'cleanse-dkim' + description = _('Remove DomainKeys headers.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + if as_boolean(config.mta.remove_dkim_headers): + del msg['domainkey-signature'] + del msg['dkim-signature'] + del msg['authentication-results'] diff --git a/src/mailman/pipeline/cook_headers.py b/src/mailman/pipeline/cook_headers.py new file mode 100644 index 000000000..529d7ce5d --- /dev/null +++ b/src/mailman/pipeline/cook_headers.py @@ -0,0 +1,357 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Cook a message's headers.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'CookHeaders', + ] + + +import re + +from email.errors import HeaderParseError +from email.header import Header, decode_header, make_header +from email.utils import parseaddr, formataddr, getaddresses +from zope.interface import implements + +from mailman import Utils +from mailman.config import config +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler +from mailman.interfaces.mailinglist import Personalization, ReplyToMunging +from mailman.version import VERSION + + +CONTINUATION = ',\n\t' +COMMASPACE = ', ' +MAXLINELEN = 78 + +nonascii = re.compile('[^\s!-~]') + + + +def uheader(mlist, s, header_name=None, continuation_ws='\t', maxlinelen=None): + # Get the charset to encode the string in. Then search if there is any + # non-ascii character is in the string. If there is and the charset is + # us-ascii then we use iso-8859-1 instead. If the string is ascii only + # we use 'us-ascii' if another charset is specified. + charset = Utils.GetCharSet(mlist.preferred_language) + if nonascii.search(s): + # use list charset but ... + if charset == 'us-ascii': + charset = 'iso-8859-1' + else: + # there is no nonascii so ... + charset = 'us-ascii' + return Header(s, charset, maxlinelen, header_name, continuation_ws) + + + +def process(mlist, msg, msgdata): + # Set the "X-Ack: no" header if noack flag is set. + if msgdata.get('noack'): + del msg['x-ack'] + msg['X-Ack'] = 'no' + # Because we're going to modify various important headers in the email + # message, we want to save some of the information in the msgdata + # dictionary for later. Specifically, the sender header will get waxed, + # but we need it for the Acknowledge module later. + msgdata['original_sender'] = msg.get_sender() + # VirginRunner sets _fasttrack for internally crafted messages. + fasttrack = msgdata.get('_fasttrack') + if not msgdata.get('isdigest') and not fasttrack: + try: + prefix_subject(mlist, msg, msgdata) + except (UnicodeError, ValueError): + # TK: Sometimes subject header is not MIME encoded for 8bit + # simply abort prefixing. + pass + # Mark message so we know we've been here, but leave any existing + # X-BeenThere's intact. + msg['X-BeenThere'] = mlist.posting_address + # Add Precedence: and other useful headers. None of these are standard + # and finding information on some of them are fairly difficult. Some are + # just common practice, and we'll add more here as they become necessary. + # Good places to look are: + # + # http://www.dsv.su.se/~jpalme/ietf/jp-ietf-home.html + # http://www.faqs.org/rfcs/rfc2076.html + # + # None of these headers are added if they already exist. BAW: some + # consider the advertising of this a security breach. I.e. if there are + # known exploits in a particular version of Mailman and we know a site is + # using such an old version, they may be vulnerable. It's too easy to + # edit the code to add a configuration variable to handle this. + if 'x-mailman-version' not in msg: + msg['X-Mailman-Version'] = VERSION + # We set "Precedence: list" because this is the recommendation from the + # sendmail docs, the most authoritative source of this header's semantics. + if 'precedence' not in msg: + msg['Precedence'] = 'list' + # Reply-To: munging. Do not do this if the message is "fast tracked", + # meaning it is internally crafted and delivered to a specific user. BAW: + # Yuck, I really hate this feature but I've caved under the sheer pressure + # of the (very vocal) folks want it. OTOH, RFC 2822 allows Reply-To: to + # be a list of addresses, so instead of replacing the original, simply + # augment it. RFC 2822 allows max one Reply-To: header so collapse them + # if we're adding a value, otherwise don't touch it. (Should we collapse + # in all cases?) + if not fasttrack: + # A convenience function, requires nested scopes. pair is (name, addr) + new = [] + d = {} + def add(pair): + lcaddr = pair[1].lower() + if lcaddr in d: + return + d[lcaddr] = pair + new.append(pair) + # List admin wants an explicit Reply-To: added + if mlist.reply_goes_to_list == ReplyToMunging.explicit_header: + add(parseaddr(mlist.reply_to_address)) + # If we're not first stripping existing Reply-To: then we need to add + # the original Reply-To:'s to the list we're building up. In both + # cases we'll zap the existing field because RFC 2822 says max one is + # allowed. + if not mlist.first_strip_reply_to: + orig = msg.get_all('reply-to', []) + for pair in getaddresses(orig): + add(pair) + # Set Reply-To: header to point back to this list. Add this last + # because some folks think that some MUAs make it easier to delete + # addresses from the right than from the left. + if mlist.reply_goes_to_list == ReplyToMunging.point_to_list: + i18ndesc = uheader(mlist, mlist.description, 'Reply-To') + add((str(i18ndesc), mlist.posting_address)) + del msg['reply-to'] + # Don't put Reply-To: back if there's nothing to add! + if new: + # Preserve order + msg['Reply-To'] = COMMASPACE.join( + [formataddr(pair) for pair in new]) + # The To field normally contains the list posting address. However + # when messages are fully personalized, that header will get + # overwritten with the address of the recipient. We need to get the + # posting address in one of the recipient headers or they won't be + # able to reply back to the list. It's possible the posting address + # was munged into the Reply-To header, but if not, we'll add it to a + # Cc header. BAW: should we force it into a Reply-To header in the + # above code? + # Also skip Cc if this is an anonymous list as list posting address + # is already in From and Reply-To in this case. + if (mlist.personalize == Personalization.full and + mlist.reply_goes_to_list <> ReplyToMunging.point_to_list and + not mlist.anonymous_list): + # Watch out for existing Cc headers, merge, and remove dups. Note + # that RFC 2822 says only zero or one Cc header is allowed. + new = [] + d = {} + for pair in getaddresses(msg.get_all('cc', [])): + add(pair) + i18ndesc = uheader(mlist, mlist.description, 'Cc') + add((str(i18ndesc), mlist.posting_address)) + del msg['Cc'] + msg['Cc'] = COMMASPACE.join([formataddr(pair) for pair in new]) + # Add list-specific headers as defined in RFC 2369 and RFC 2919, but only + # if the message is being crafted for a specific list (e.g. not for the + # password reminders). + # + # BAW: Some people really hate the List-* headers. It seems that the free + # version of Eudora (possibly on for some platforms) does not hide these + # headers by default, pissing off their users. Too bad. Fix the MUAs. + if msgdata.get('_nolist') or not mlist.include_rfc2369_headers: + return + # This will act like an email address for purposes of formataddr() + listid = '{0}.{1}'.format(mlist.list_name, mlist.host_name) + cset = Utils.GetCharSet(mlist.preferred_language) + if mlist.description: + # Don't wrap the header since here we just want to get it properly RFC + # 2047 encoded. + i18ndesc = uheader(mlist, mlist.description, 'List-Id', maxlinelen=998) + listid_h = formataddr((str(i18ndesc), listid)) + else: + # without desc we need to ensure the MUST brackets + listid_h = '<{0}>'.format(listid) + # We always add a List-ID: header. + del msg['list-id'] + msg['List-Id'] = listid_h + # For internally crafted messages, we also add a (nonstandard), + # "X-List-Administrivia: yes" header. For all others (i.e. those coming + # from list posts), we add a bunch of other RFC 2369 headers. + requestaddr = mlist.request_address + subfieldfmt = '<{0}>, <mailto:{1}>' + listinfo = mlist.script_url('listinfo') + headers = {} + # XXX reduced_list_headers used to suppress List-Help, List-Subject, and + # List-Unsubscribe from UserNotification. That doesn't seem to make sense + # any more, so always add those three headers (others will still be + # suppressed). + headers.update({ + 'List-Help' : '<mailto:{0}?subject=help>'.format(requestaddr), + 'List-Unsubscribe': subfieldfmt.format(listinfo, mlist.leave_address), + 'List-Subscribe' : subfieldfmt.format(listinfo, mlist.join_address), + }) + if msgdata.get('reduced_list_headers'): + headers['X-List-Administrivia'] = 'yes' + else: + # List-Post: is controlled by a separate attribute + if mlist.include_list_post_header: + headers['List-Post'] = '<mailto:{0}>'.format(mlist.posting_address) + # Add RFC 2369 and 5064 archiving headers, if archiving is enabled. + if mlist.archive: + for archiver in config.archivers: + headers['List-Archive'] = '<{0}>'.format( + archiver.list_url(mlist)) + permalink = archiver.permalink(mlist, msg) + if permalink is not None: + headers['Archived-At'] = permalink + # XXX RFC 2369 also defines a List-Owner header which we are not currently + # supporting, but should. + for h, v in headers.items(): + # First we delete any pre-existing headers because the RFC permits + # only one copy of each, and we want to be sure it's ours. + del msg[h] + # Wrap these lines if they are too long. 78 character width probably + # shouldn't be hardcoded, but is at least text-MUA friendly. The + # adding of 2 is for the colon-space separator. + if len(h) + 2 + len(v) > 78: + v = CONTINUATION.join(v.split(', ')) + msg[h] = v + + + +def prefix_subject(mlist, msg, msgdata): + # Add the subject prefix unless the message is a digest or is being fast + # tracked (e.g. internally crafted, delivered to a single user such as the + # list admin). + if not mlist.subject_prefix.strip(): + return + prefix = mlist.subject_prefix + subject = msg.get('subject', '') + # Try to figure out what the continuation_ws is for the header + if isinstance(subject, Header): + lines = str(subject).splitlines() + else: + lines = subject.splitlines() + ws = '\t' + if len(lines) > 1 and lines[1] and lines[1][0] in ' \t': + ws = lines[1][0] + msgdata['origsubj'] = subject + # The subject may be multilingual but we take the first charset as major + # one and try to decode. If it is decodable, returned subject is in one + # line and cset is properly set. If fail, subject is mime-encoded and + # cset is set as us-ascii. See detail for ch_oneline() (CookHeaders one + # line function). + subject, cset = ch_oneline(subject) + # TK: Python interpreter has evolved to be strict on ascii charset code + # range. It is safe to use unicode string when manupilating header + # contents with re module. It would be best to return unicode in + # ch_oneline() but here is temporary solution. + subject = unicode(subject, cset) + # If the subject_prefix contains '%d', it is replaced with the + # mailing list sequential number. Sequential number format allows + # '%d' or '%05d' like pattern. + prefix_pattern = re.escape(prefix) + # unescape '%' :-< + prefix_pattern = '%'.join(prefix_pattern.split(r'\%')) + p = re.compile('%\d*d') + if p.search(prefix, 1): + # prefix have number, so we should search prefix w/number in subject. + # Also, force new style. + prefix_pattern = p.sub(r'\s*\d+\s*', prefix_pattern) + subject = re.sub(prefix_pattern, '', subject) + rematch = re.match('((RE|AW|SV|VS)(\[\d+\])?:\s*)+', subject, re.I) + if rematch: + subject = subject[rematch.end():] + recolon = 'Re:' + else: + recolon = '' + # At this point, subject may become null if someone post mail with + # subject: [subject prefix] + if subject.strip() == '': + subject = _('(no subject)') + cset = Utils.GetCharSet(mlist.preferred_language) + # and substitute %d in prefix with post_id + try: + prefix = prefix % mlist.post_id + except TypeError: + pass + # Get the header as a Header instance, with proper unicode conversion + if not recolon: + h = uheader(mlist, prefix, 'Subject', continuation_ws=ws) + else: + h = uheader(mlist, prefix, 'Subject', continuation_ws=ws) + h.append(recolon) + # TK: Subject is concatenated and unicode string. + subject = subject.encode(cset, 'replace') + h.append(subject, cset) + del msg['subject'] + msg['Subject'] = h + ss = uheader(mlist, recolon, 'Subject', continuation_ws=ws) + ss.append(subject, cset) + msgdata['stripped_subject'] = ss + + + +def ch_oneline(headerstr): + # Decode header string in one line and convert into single charset + # copied and modified from ToDigest.py and Utils.py + # return (string, cset) tuple as check for failure + try: + d = decode_header(headerstr) + # At this point, we should rstrip() every string because some + # MUA deliberately add trailing spaces when composing return + # message. + d = [(s.rstrip(), c) for (s, c) in d] + # Find all charsets in the original header. We use 'utf-8' rather + # than using the first charset (in mailman 2.1.x) if multiple + # charsets are used. + csets = [] + for (s, c) in d: + if c and c not in csets: + csets.append(c) + if len(csets) == 0: + cset = 'us-ascii' + elif len(csets) == 1: + cset = csets[0] + else: + cset = 'utf-8' + h = make_header(d) + ustr = unicode(h) + oneline = ''.join(ustr.splitlines()) + return oneline.encode(cset, 'replace'), cset + except (LookupError, UnicodeError, ValueError, HeaderParseError): + # possibly charset problem. return with undecoded string in one line. + return ''.join(headerstr.splitlines()), 'us-ascii' + + + +class CookHeaders: + """Modify message headers.""" + + implements(IHandler) + + name = 'cook-headers' + description = _('Modify message headers.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/decorate.py b/src/mailman/pipeline/decorate.py new file mode 100644 index 000000000..e1fa0c155 --- /dev/null +++ b/src/mailman/pipeline/decorate.py @@ -0,0 +1,231 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Decorate a message by sticking the header and footer around it.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Decorate', + ] + + +import re +import logging + +from email.MIMEText import MIMEText +from zope.interface import implements + +from mailman import Utils +from mailman.Message import Message +from mailman.config import config +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler +from mailman.utilities.string import expand + + +log = logging.getLogger('mailman.error') + + + +def process(mlist, msg, msgdata): + # Digests and Mailman-craft messages should not get additional headers + if msgdata.get('isdigest') or msgdata.get('nodecorate'): + return + d = {} + if msgdata.get('personalize'): + # Calculate the extra personalization dictionary. Note that the + # length of the recips list better be exactly 1. + recips = msgdata.get('recips', []) + assert len(recips) == 1, ( + 'The number of intended recipients must be exactly 1') + recipient = recips[0].lower() + user = config.db.user_manager.get_user(recipient) + member = mlist.members.get_member(recipient) + d['user_address'] = recipient + if user is not None and member is not None: + d['user_delivered_to'] = member.address.original_address + # BAW: Hmm, should we allow this? + d['user_password'] = user.password + d['user_language'] = member.preferred_language + d['user_name'] = (user.real_name if user.real_name + else member.address.original_address) + d['user_optionsurl'] = member.options_url + # These strings are descriptive for the log file and shouldn't be i18n'd + d.update(msgdata.get('decoration-data', {})) + header = decorate(mlist, mlist.msg_header, d) + footer = decorate(mlist, mlist.msg_footer, d) + # Escape hatch if both the footer and header are empty + if not header and not footer: + return + # Be MIME smart here. We only attach the header and footer by + # concatenation when the message is a non-multipart of type text/plain. + # Otherwise, if it is not a multipart, we make it a multipart, and then we + # add the header and footer as text/plain parts. + # + # BJG: In addition, only add the footer if the message's character set + # matches the charset of the list's preferred language. This is a + # suboptimal solution, and should be solved by allowing a list to have + # multiple headers/footers, for each language the list supports. + # + # Also, if the list's preferred charset is us-ascii, we can always + # safely add the header/footer to a plain text message since all + # charsets Mailman supports are strict supersets of us-ascii -- + # no, UTF-16 emails are not supported yet. + # + # TK: Message with 'charset=' cause trouble. So, instead of + # mgs.get_content_charset('us-ascii') ... + mcset = msg.get_content_charset() or 'us-ascii' + lcset = Utils.GetCharSet(mlist.preferred_language) + msgtype = msg.get_content_type() + # BAW: If the charsets don't match, should we add the header and footer by + # MIME multipart chroming the message? + wrap = True + if not msg.is_multipart() and msgtype == 'text/plain': + # Save the RFC-3676 format parameters. + format = msg.get_param('format') + delsp = msg.get_param('delsp') + # Save 'Content-Transfer-Encoding' header in case decoration fails. + cte = msg.get('content-transfer-encoding') + # header/footer is now in unicode (2.2) + try: + oldpayload = unicode(msg.get_payload(decode=True), mcset) + del msg['content-transfer-encoding'] + frontsep = endsep = '' + if header and not header.endswith('\n'): + frontsep = '\n' + if footer and not oldpayload.endswith('\n'): + endsep = '\n' + payload = header + frontsep + oldpayload + endsep + footer + # When setting the payload for the message, try various charset + # encodings until one does not produce a UnicodeError. We'll try + # charsets in this order: the list's charset, the message's + # charset, then utf-8. It's okay if some of these are duplicates. + for cset in (lcset, mcset, 'utf-8'): + try: + msg.set_payload(payload.encode(cset), cset) + except UnicodeError: + pass + else: + if format: + msg.set_param('format', format) + if delsp: + msg.set_param('delsp', delsp) + wrap = False + break + except (LookupError, UnicodeError): + if cte: + # Restore the original c-t-e. + del msg['content-transfer-encoding'] + msg['Content-Transfer-Encoding'] = cte + elif msg.get_content_type() == 'multipart/mixed': + # The next easiest thing to do is just prepend the header and append + # the footer as additional subparts + payload = msg.get_payload() + if not isinstance(payload, list): + payload = [payload] + if footer: + mimeftr = MIMEText(footer.encode(lcset), 'plain', lcset) + mimeftr['Content-Disposition'] = 'inline' + payload.append(mimeftr) + if header: + mimehdr = MIMEText(header.encode(lcset), 'plain', lcset) + mimehdr['Content-Disposition'] = 'inline' + payload.insert(0, mimehdr) + msg.set_payload(payload) + wrap = False + # If we couldn't add the header or footer in a less intrusive way, we can + # at least do it by MIME encapsulation. We want to keep as much of the + # outer chrome as possible. + if not wrap: + return + # Because of the way Message objects are passed around to process(), we + # need to play tricks with the outer message -- i.e. the outer one must + # remain the same instance. So we're going to create a clone of the outer + # message, with all the header chrome intact, then copy the payload to it. + # This will give us a clone of the original message, and it will form the + # basis of the interior, wrapped Message. + inner = Message() + # Which headers to copy? Let's just do the Content-* headers + for h, v in msg.items(): + if h.lower().startswith('content-'): + inner[h] = v + inner.set_payload(msg.get_payload()) + # For completeness + inner.set_unixfrom(msg.get_unixfrom()) + inner.preamble = msg.preamble + inner.epilogue = msg.epilogue + # Don't copy get_charset, as this might be None, even if + # get_content_charset isn't. However, do make sure there is a default + # content-type, even if the original message was not MIME. + inner.set_default_type(msg.get_default_type()) + # BAW: HACK ALERT. + if hasattr(msg, '__version__'): + inner.__version__ = msg.__version__ + # Now, play games with the outer message to make it contain three + # subparts: the header (if any), the wrapped message, and the footer (if + # any). + payload = [inner] + if header: + mimehdr = MIMEText(header.encode(lcset), 'plain', lcset) + mimehdr['Content-Disposition'] = 'inline' + payload.insert(0, mimehdr) + if footer: + mimeftr = MIMEText(footer.encode(lcset), 'plain', lcset) + mimeftr['Content-Disposition'] = 'inline' + payload.append(mimeftr) + msg.set_payload(payload) + del msg['content-type'] + del msg['content-transfer-encoding'] + del msg['content-disposition'] + msg['Content-Type'] = 'multipart/mixed' + + + +def decorate(mlist, template, extradict=None): + # Create a dictionary which includes the default set of interpolation + # variables allowed in headers and footers. These will be augmented by + # any key/value pairs in the extradict. + substitutions = dict( + real_name = mlist.real_name, + list_name = mlist.list_name, + fqdn_listname = mlist.fqdn_listname, + host_name = mlist.host_name, + listinfo_page = mlist.script_url('listinfo'), + description = mlist.description, + info = mlist.info, + ) + if extradict is not None: + substitutions.update(extradict) + text = expand(template, substitutions) + # Turn any \r\n line endings into just \n + return re.sub(r' *\r?\n', r'\n', text) + + + +class Decorate: + """Decorate a message with headers and footers.""" + + implements(IHandler) + + name = 'decorate' + description = _('Decorate a message with headers and footers.') + + def process(self, mlist, msg, msgdata): + "See `IHandler`.""" + process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/docs/ack-headers.txt b/src/mailman/pipeline/docs/ack-headers.txt new file mode 100644 index 000000000..ca41df03e --- /dev/null +++ b/src/mailman/pipeline/docs/ack-headers.txt @@ -0,0 +1,40 @@ +Acknowledgment headers +====================== + +Messages that flow through the global pipeline get their headers 'cooked', +which basically means that their headers go through several mostly unrelated +transformations. Some headers get added, others get changed. Some of these +changes depend on mailing list settings and others depend on how the message +is getting sent through the system. We'll take things one-by-one. + + >>> from mailman.pipeline.cook_headers import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.subject_prefix = u'' + +When the message's metadata has a 'noack' key set, an 'X-Ack: no' header is +added. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, dict(noack=True)) + >>> print msg.as_string() + From: aperson@example.com + X-Ack: no + ... + +Any existing X-Ack header in the original message is removed. + + >>> msg = message_from_string("""\ + ... X-Ack: yes + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, dict(noack=True)) + >>> print msg.as_string() + From: aperson@example.com + X-Ack: no + ... diff --git a/src/mailman/pipeline/docs/acknowledge.txt b/src/mailman/pipeline/docs/acknowledge.txt new file mode 100644 index 000000000..a4c68f900 --- /dev/null +++ b/src/mailman/pipeline/docs/acknowledge.txt @@ -0,0 +1,159 @@ +Message acknowledgment +====================== + +When a user posts a message to a mailing list, and that user has chosen to +receive acknowledgments of their postings, Mailman will sent them such an +acknowledgment. + + >>> handler = config.handlers['acknowledge'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.real_name = u'XTest' + >>> mlist.preferred_language = u'en' + >>> # XXX This will almost certainly change once we've worked out the web + >>> # space layout for mailing lists now. + + >>> # Ensure that the virgin queue is empty, since we'll be checking this + >>> # for new auto-response messages. + >>> virginq = config.switchboards['virgin'] + >>> virginq.files + [] + +Subscribe a user to the mailing list. + + >>> usermgr = config.db.user_manager + >>> from mailman.interfaces.member import MemberRole + >>> user_1 = usermgr.create_user(u'aperson@example.com') + >>> address_1 = list(user_1.addresses)[0] + >>> address_1.subscribe(mlist, MemberRole.member) + <Member: aperson@example.com on _xtest@example.com as MemberRole.member> + + +Non-member posts +---------------- + +Non-members can't get acknowledgments of their posts to the mailing list. + + >>> msg = message_from_string("""\ + ... From: bperson@example.com + ... + ... """) + >>> handler.process(mlist, msg, {}) + >>> virginq.files + [] + +We can also specify the original sender in the message's metadata. If that +person is also not a member, no acknowledgment will be sent either. + + >>> msg = message_from_string("""\ + ... From: bperson@example.com + ... + ... """) + >>> handler.process(mlist, msg, + ... dict(original_sender=u'cperson@example.com')) + >>> virginq.files + [] + + +No acknowledgment requested +--------------------------- + +Unless the user has requested acknowledgments, they will not get one. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> handler.process(mlist, msg, {}) + >>> virginq.files + [] + +Similarly if the original sender is specified in the message metadata, and +that sender is a member but not one who has requested acknowledgments, none +will be sent. + + >>> user_2 = usermgr.create_user(u'dperson@example.com') + >>> address_2 = list(user_2.addresses)[0] + >>> address_2.subscribe(mlist, MemberRole.member) + <Member: dperson@example.com on _xtest@example.com as MemberRole.member> + + >>> handler.process(mlist, msg, + ... dict(original_sender=u'dperson@example.com')) + >>> virginq.files + [] + + +Requested acknowledgments +------------------------- + +If the member requests acknowledgments, Mailman will send them one when they +post to the mailing list. + + >>> user_1.preferences.acknowledge_posts = True + +The receipt will include the original message's subject in the response body, + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Something witty and insightful + ... + ... """) + >>> handler.process(mlist, msg, {}) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> virginq.files + [] + >>> sorted(qdata.items()) + [..., ('recips', [u'aperson@example.com']), ...] + >>> print qmsg.as_string() + ... + MIME-Version: 1.0 + ... + Subject: XTest post acknowledgment + From: _xtest-bounces@example.com + To: aperson@example.com + ... + Precedence: bulk + <BLANKLINE> + Your message entitled + <BLANKLINE> + Something witty and insightful + <BLANKLINE> + was successfully received by the XTest mailing list. + <BLANKLINE> + List info page: http://lists.example.com/listinfo/_xtest@example.com + Your preferences: http://example.com/aperson@example.com + <BLANKLINE> + +If there is no subject, then the receipt will use a generic message. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> handler.process(mlist, msg, {}) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> virginq.files + [] + >>> sorted(qdata.items()) + [..., ('recips', [u'aperson@example.com']), ...] + >>> print qmsg.as_string() + MIME-Version: 1.0 + ... + Subject: XTest post acknowledgment + From: _xtest-bounces@example.com + To: aperson@example.com + ... + Precedence: bulk + <BLANKLINE> + Your message entitled + <BLANKLINE> + (no subject) + <BLANKLINE> + was successfully received by the XTest mailing list. + <BLANKLINE> + List info page: http://lists.example.com/listinfo/_xtest@example.com + Your preferences: http://example.com/aperson@example.com + <BLANKLINE> diff --git a/src/mailman/pipeline/docs/after-delivery.txt b/src/mailman/pipeline/docs/after-delivery.txt new file mode 100644 index 000000000..b910e89a6 --- /dev/null +++ b/src/mailman/pipeline/docs/after-delivery.txt @@ -0,0 +1,27 @@ +After delivery +============== + +After a message is delivered, or more correctly, after it has been processed +by the rest of the handlers in the incoming queue pipeline, a couple of +bookkeeping pieces of information are updated. + + >>> import datetime + >>> handler = config.handlers['after-delivery'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> post_time = datetime.datetime.now() - datetime.timedelta(minutes=10) + >>> mlist.last_post_time = post_time + >>> mlist.post_id = 10 + +Processing a message with this handler updates the last_post_time and post_id +attributes. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... Something interesting. + ... """) + >>> handler.process(mlist, msg, {}) + >>> mlist.last_post_time > post_time + True + >>> mlist.post_id + 11 diff --git a/src/mailman/pipeline/docs/archives.txt b/src/mailman/pipeline/docs/archives.txt new file mode 100644 index 000000000..d90228525 --- /dev/null +++ b/src/mailman/pipeline/docs/archives.txt @@ -0,0 +1,133 @@ +Archives +======== + +Updating the archives with posted messages is handled by a separate queue, +which allows for better memory management and prevents blocking the main +delivery processes while messages are archived. This also allows external +archivers to work in a separate process from the main Mailman delivery +processes. + + >>> from mailman.app.lifecycle import create_list + >>> handler = config.handlers['to-archive'] + >>> mlist = create_list(u'_xtest@example.com') + >>> switchboard = config.switchboards['archive'] + +A helper function. + + >>> def clear(): + ... for filebase in switchboard.files: + ... msg, msgdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + +The purpose of the ToArchive handler is to make a simple decision as to +whether the message should get archived and if so, to drop the message in the +archiving queue. Really the most important things are to determine when a +message should /not/ get archived. + +For example, no digests should ever get archived. + + >>> mlist.archive = True + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, dict(isdigest=True)) + >>> switchboard.files + [] + +If the mailing list is not configured to archive, then even regular deliveries +won't be archived. + + >>> mlist.archive = False + >>> handler.process(mlist, msg, {}) + >>> switchboard.files + [] + +There are two de-facto standards for a message to indicate that it does not +want to be archived. We've seen both in the wild so both are supported. The +X-No-Archive: header can be used to indicate that the message should not be +archived. Confusingly, this header's value is actually ignored. + + >>> mlist.archive = True + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... X-No-Archive: YES + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, dict(isdigest=True)) + >>> switchboard.files + [] + +Even a 'no' value will stop the archiving of the message. + + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... X-No-Archive: No + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, dict(isdigest=True)) + >>> switchboard.files + [] + +Another header that's been observed is the X-Archive: header. Here, the +header's case folded value must be 'no' in order to prevent archiving. + + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... X-Archive: No + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, dict(isdigest=True)) + >>> switchboard.files + [] + +But if the value is 'yes', then the message will be archived. + + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... X-Archive: Yes + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, {}) + >>> len(switchboard.files) + 1 + >>> filebase = switchboard.files[0] + >>> qmsg, qdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> print qmsg.as_string() + Subject: A sample message + X-Archive: Yes + <BLANKLINE> + A message of great import. + <BLANKLINE> + >>> dump_msgdata(qdata) + _parsemsg: False + version : 3 + +Without either archiving header, and all other things being the same, the +message will get archived. + + >>> msg = message_from_string("""\ + ... Subject: A sample message + ... + ... A message of great import. + ... """) + >>> handler.process(mlist, msg, {}) + >>> len(switchboard.files) + 1 + >>> filebase = switchboard.files[0] + >>> qmsg, qdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> print qmsg.as_string() + Subject: A sample message + <BLANKLINE> + A message of great import. + <BLANKLINE> + >>> dump_msgdata(qdata) + _parsemsg: False + version : 3 diff --git a/src/mailman/pipeline/docs/avoid-duplicates.txt b/src/mailman/pipeline/docs/avoid-duplicates.txt new file mode 100644 index 000000000..fe91a9a71 --- /dev/null +++ b/src/mailman/pipeline/docs/avoid-duplicates.txt @@ -0,0 +1,168 @@ +Avoid duplicates +================ + +The AvoidDuplicates handler module implements several strategies to try to +reduce the reception of duplicate messages. It does this by removing certain +recipients from the list of recipients that earlier handler modules +(e.g. CalcRecips) calculates. + + >>> handler = config.handlers['avoid-duplicates'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + +Create some members we're going to use. + + >>> from mailman.interfaces.member import MemberRole + >>> address_a = config.db.user_manager.create_address( + ... u'aperson@example.com') + >>> address_b = config.db.user_manager.create_address( + ... u'bperson@example.com') + >>> member_a = address_a.subscribe(mlist, MemberRole.member) + >>> member_b = address_b.subscribe(mlist, MemberRole.member) + >>> # This is the message metadata dictionary as it would be produced by + >>> # the CalcRecips handler. + >>> recips = dict(recips=[u'aperson@example.com', u'bperson@example.com']) + + +Short circuiting +---------------- + +The module short-circuits if there are no recipients. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: A message of great import + ... + ... Something + ... """) + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> msgdata + {} + >>> print msg.as_string() + From: aperson@example.com + Subject: A message of great import + <BLANKLINE> + Something + <BLANKLINE> + + +Suppressing the list copy +------------------------- + +Members can elect not to receive a list copy of any message on which they are +explicitly named as a recipient. This is done by setting their +receive_list_copy preference to False. However, if they aren't mentioned in +one of the recipient headers (i.e. To, CC, Resent-To, or Resent-CC), then they +will get a list copy. + + >>> member_a.preferences.receive_list_copy = False + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'aperson@example.com', u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + <BLANKLINE> + Something of great import. + <BLANKLINE> + +If they're mentioned on the CC line, they won't get a list copy. + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... CC: aperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + CC: aperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> + +But if they're mentioned on the CC line and have receive_list_copy set to True +(the default), then they still get a list copy. + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... CC: bperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'aperson@example.com', u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + CC: bperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> + +Other headers checked for recipients include the To... + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... To: aperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + To: aperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> + +...Resent-To... + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... Resent-To: aperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + Resent-To: aperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> + +...and Resent-CC headers. + + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... Resent-Cc: aperson@example.com + ... + ... Something of great import. + ... """) + >>> msgdata = recips.copy() + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'bperson@example.com'] + >>> print msg.as_string() + From: Claire Person <cperson@example.com> + Resent-Cc: aperson@example.com + <BLANKLINE> + Something of great import. + <BLANKLINE> diff --git a/src/mailman/pipeline/docs/calc-recips.txt b/src/mailman/pipeline/docs/calc-recips.txt new file mode 100644 index 000000000..adfbeabbf --- /dev/null +++ b/src/mailman/pipeline/docs/calc-recips.txt @@ -0,0 +1,100 @@ +Calculating recipients +====================== + +Every message that makes it through to the list membership gets sent to a set +of recipient addresses. These addresses are calculated by one of the handler +modules and depends on a host of factors. + + >>> handler = config.handlers['calculate-recipients'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + +Recipients are calculate from the list members, so add a bunch of members to +start out with. First, create a bunch of addresses... + + >>> usermgr = config.db.user_manager + >>> address_a = usermgr.create_address(u'aperson@example.com') + >>> address_b = usermgr.create_address(u'bperson@example.com') + >>> address_c = usermgr.create_address(u'cperson@example.com') + >>> address_d = usermgr.create_address(u'dperson@example.com') + >>> address_e = usermgr.create_address(u'eperson@example.com') + >>> address_f = usermgr.create_address(u'fperson@example.com') + +...then subscribe these addresses to the mailing list as members... + + >>> from mailman.interfaces.member import MemberRole + >>> member_a = address_a.subscribe(mlist, MemberRole.member) + >>> member_b = address_b.subscribe(mlist, MemberRole.member) + >>> member_c = address_c.subscribe(mlist, MemberRole.member) + >>> member_d = address_d.subscribe(mlist, MemberRole.member) + >>> member_e = address_e.subscribe(mlist, MemberRole.member) + >>> member_f = address_f.subscribe(mlist, MemberRole.member) + +...then make some of the members digest members. + + >>> from mailman.constants import DeliveryMode + >>> member_d.preferences.delivery_mode = DeliveryMode.plaintext_digests + >>> member_e.preferences.delivery_mode = DeliveryMode.mime_digests + >>> member_f.preferences.delivery_mode = DeliveryMode.summary_digests + + +Short-circuiting +---------------- + +Sometimes, the list of recipients already exists in the message metadata. +This can happen for example, when a message was previously delivered to some +but not all of the recipients. + + >>> msg = message_from_string("""\ + ... From: Xavier Person <xperson@example.com> + ... + ... Something of great import. + ... """) + >>> recips = set((u'qperson@example.com', u'zperson@example.com')) + >>> msgdata = dict(recips=recips) + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'qperson@example.com', u'zperson@example.com'] + + +Regular delivery recipients +--------------------------- + +Regular delivery recipients are those people who get messages from the list as +soon as they are posted. In other words, these folks are not digest members. + + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'aperson@example.com', u'bperson@example.com', u'cperson@example.com'] + +Members can elect not to receive a list copy of their own postings. + + >>> member_c.preferences.receive_own_postings = False + >>> msg = message_from_string("""\ + ... From: Claire Person <cperson@example.com> + ... + ... Something of great import. + ... """) + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [u'aperson@example.com', u'bperson@example.com'] + +Members can also elect not to receive a list copy of any message on which they +are explicitly named as a recipient. However, see the AvoidDuplicates handler +for details. + + +Digest recipients +----------------- + +XXX Test various digest deliveries. + + +Urgent messages +--------------- + +XXX Test various urgent deliveries: + * test_urgent_moderator() + * test_urgent_admin() + * test_urgent_reject() diff --git a/src/mailman/pipeline/docs/cleanse.txt b/src/mailman/pipeline/docs/cleanse.txt new file mode 100644 index 000000000..0940cdb4b --- /dev/null +++ b/src/mailman/pipeline/docs/cleanse.txt @@ -0,0 +1,94 @@ +Cleansing headers +================= + +All messages posted to a list get their headers cleansed. Some headers are +related to additional permissions that can be granted to the message and other +headers can be used to fish for membership. + + >>> handler = config.handlers['cleanse'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + +Headers such as Approved, Approve, and Urgent are used to grant special +pemissions to individual messages. All may contain a password; the first two +headers are used by list administrators to pre-approve a message normal held +for approval. The latter header is used to send a regular message to all +members, regardless of whether they get digests or not. Because all three +headers contain passwords, they must be removed from any posted message. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Approved: foobar + ... Approve: barfoo + ... Urgent: notreally + ... Subject: A message of great import + ... + ... Blah blah blah + ... """) + >>> handler.process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Subject: A message of great import + <BLANKLINE> + Blah blah blah + <BLANKLINE> + +Other headers can be used by list members to fish the list for membership, so +we don't let them go through. These are a mix of standard headers and custom +headers supported by some mail readers. For example, X-PMRC is supported by +Pegasus mail. I don't remember what program uses X-Confirm-Reading-To though +(Some Microsoft product perhaps?). + + >>> msg = message_from_string("""\ + ... From: bperson@example.com + ... Reply-To: bperson@example.org + ... Sender: asystem@example.net + ... Return-Receipt-To: another@example.com + ... Disposition-Notification-To: athird@example.com + ... X-Confirm-Reading-To: afourth@example.com + ... X-PMRQC: afifth@example.com + ... Subject: a message to you + ... + ... How are you doing? + ... """) + >>> handler.process(mlist, msg, {}) + >>> print msg.as_string() + From: bperson@example.com + Reply-To: bperson@example.org + Sender: asystem@example.net + Subject: a message to you + <BLANKLINE> + How are you doing? + <BLANKLINE> + + +Anonymous lists +--------------- + +Anonymous mailing lists also try to cleanse certain identifying headers from +the original posting, so that it is at least a bit more difficult to determine +who sent the message. This isn't perfect though, for example, the body of the +messages are never scrubbed (though that might not be a bad idea). The From +and Reply-To headers in the posted message are taken from list attributes. + +Hotmail apparently sets X-Originating-Email. + + >>> mlist.anonymous_list = True + >>> mlist.description = u'A Test Mailing List' + >>> mlist.preferred_language = u'en' + >>> msg = message_from_string("""\ + ... From: bperson@example.com + ... Reply-To: bperson@example.org + ... Sender: asystem@example.net + ... X-Originating-Email: cperson@example.com + ... Subject: a message to you + ... + ... How are you doing? + ... """) + >>> handler.process(mlist, msg, {}) + >>> print msg.as_string() + Subject: a message to you + From: A Test Mailing List <_xtest@example.com> + Reply-To: _xtest@example.com + <BLANKLINE> + How are you doing? + <BLANKLINE> diff --git a/src/mailman/pipeline/docs/cook-headers.txt b/src/mailman/pipeline/docs/cook-headers.txt new file mode 100644 index 000000000..ce13a45b6 --- /dev/null +++ b/src/mailman/pipeline/docs/cook-headers.txt @@ -0,0 +1,326 @@ +Cooking headers +=============== + +Messages that flow through the global pipeline get their headers 'cooked', +which basically means that their headers go through several mostly unrelated +transformations. Some headers get added, others get changed. Some of these +changes depend on mailing list settings and others depend on how the message +is getting sent through the system. We'll take things one-by-one. + + >>> from mailman.pipeline.cook_headers import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.subject_prefix = u'' + >>> mlist.include_list_post_header = False + >>> mlist.archive = True + + +Saving the original sender +-------------------------- + +Because the original sender headers may get deleted or changed, CookHeaders +will place the sender in the message metadata for safe keeping. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> msgdata['original_sender'] + u'aperson@example.com' + +But if there was no original sender, then the empty string will be saved. + + >>> msg = message_from_string("""\ + ... Subject: No original sender + ... + ... A message of great import. + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> msgdata['original_sender'] + '' + + +X-BeenThere header +------------------ + +The X-BeenThere header is what Mailman uses to recognize messages that have +already been processed by this mailing list. It's one small measure against +mail loops. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> msg['x-beenthere'] + u'_xtest@example.com' + +Mailman appends X-BeenThere headers, so if there already is one in the +original message, the posted message will contain two such headers. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... X-BeenThere: another@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> sorted(msg.get_all('x-beenthere')) + [u'_xtest@example.com', u'another@example.com'] + + +Mailman version header +---------------------- + +Mailman will also insert an X-Mailman-Version header... + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> from mailman.version import VERSION + >>> msg['x-mailman-version'] == VERSION + True + +...but only if one doesn't already exist. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... X-Mailman-Version: 3000 + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> msg['x-mailman-version'] + u'3000' + + +Precedence header +----------------- + +Mailman will insert a Precedence header, which is a de-facto standard for +telling automatic reply software (e.g. vacation(1)) not to respond to this +message. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> msg['precedence'] + u'list' + +But Mailman will only add that header if the original message doesn't already +have one of them. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Precedence: junk + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> msg['precedence'] + u'junk' + + +RFC 2919 and 2369 headers +------------------------- + +This is a helper function for the following section. + + >>> def list_headers(msg): + ... print '---start---' + ... # Sort the List-* headers found in the message. We need to do + ... # this because CookHeaders puts them in a dictionary which does + ... # not have a guaranteed sort order. + ... for header in sorted(msg.keys()): + ... parts = header.lower().split('-') + ... if 'list' not in parts: + ... continue + ... for value in msg.get_all(header): + ... print '%s: %s' % (header, value) + ... print '---end---' + +These RFCs define headers for mailing list actions. A mailing list should +generally add these headers, but not for messages that aren't crafted for a +specific list (e.g. password reminders in Mailman 2.x). + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, dict(_nolist=True)) + >>> list_headers(msg) + ---start--- + ---end--- + +Some people don't like these headers because their mail readers aren't good +about hiding them. A list owner can turn these headers off. + + >>> mlist.include_rfc2369_headers = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + ---end--- + +But normally, a list will include these headers. + + >>> mlist.include_rfc2369_headers = True + >>> mlist.include_list_post_header = True + >>> mlist.preferred_language = u'en' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Message-ID: <12345> + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + List-Archive: <http://lists.example.com/archives/_xtest@example.com> + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: <_xtest.example.com> + List-Post: <mailto:_xtest@example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + ---end--- + +If the mailing list has a description, then it is included in the List-Id +header. + + >>> mlist.description = u'My test mailing list' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + List-Archive: <http://lists.example.com/archives/_xtest@example.com> + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: My test mailing list <_xtest.example.com> + List-Post: <mailto:_xtest@example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + ---end--- + +Administrative messages crafted by Mailman will have a reduced set of headers. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, dict(reduced_list_headers=True)) + >>> list_headers(msg) + ---start--- + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: My test mailing list <_xtest.example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + X-List-Administrivia: yes + ---end--- + +With the normal set of List-* headers, it's still possible to suppress the +List-Post header, which is reasonable for an announce only mailing list. + + >>> mlist.include_list_post_header = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + List-Archive: <http://lists.example.com/archives/_xtest@example.com> + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: My test mailing list <_xtest.example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + ---end--- + +And if the list isn't being archived, it makes no sense to add the +List-Archive header either. + + >>> mlist.include_list_post_header = True + >>> mlist.archive = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> list_headers(msg) + ---start--- + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Id: My test mailing list <_xtest.example.com> + List-Post: <mailto:_xtest@example.com> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + ---end--- + + +Archived-At +----------- + +RFC 5064 (draft) defines a new Archived-At header which contains the url to +the individual message in the archives. The stock Pipermail archiver doesn't +support this because the url can't be calculated until after the message is +archived. Because this is done by the archive runner, this information isn't +available to us now. + + >>> print msg['archived-at'] + None + + +Personalization +--------------- + +The To field normally contains the list posting address. However when +messages are fully personalized, that header will get overwritten with the +address of the recipient. The list's posting address will be added to one of +the recipient headers so that users will be able to reply back to the list. + + >>> from mailman.interfaces.mailinglist import ( + ... Personalization, ReplyToMunging) + >>> mlist.personalize = Personalization.full + >>> mlist.reply_goes_to_list = ReplyToMunging.no_munging + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + X-BeenThere: _xtest@example.com + X-Mailman-Version: ... + Precedence: list + Cc: My test mailing list <_xtest@example.com> + List-Id: My test mailing list <_xtest.example.com> + List-Unsubscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-leave@example.com> + List-Post: <mailto:_xtest@example.com> + List-Help: <mailto:_xtest-request@example.com?subject=help> + List-Subscribe: <http://lists.example.com/listinfo/_xtest@example.com>, + <mailto:_xtest-join@example.com> + <BLANKLINE> + <BLANKLINE> diff --git a/src/mailman/pipeline/docs/decorate.txt b/src/mailman/pipeline/docs/decorate.txt new file mode 100644 index 000000000..b805e23cf --- /dev/null +++ b/src/mailman/pipeline/docs/decorate.txt @@ -0,0 +1,317 @@ +Message decoration +================== + +Message decoration is the process of adding headers and footers to the +original message. A handler module takes care of this based on the settings +of the mailing list and the type of message being processed. + + >>> from mailman.pipeline.decorate import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> msg_text = """\ + ... From: aperson@example.org + ... + ... Here is a message. + ... """ + >>> msg = message_from_string(msg_text) + + +Short circuiting +---------------- + +Digest messages get decorated during the digest creation phase so no extra +decorations are added for digest messages. + + >>> process(mlist, msg, dict(isdigest=True)) + >>> print msg.as_string() + From: aperson@example.org + <BLANKLINE> + Here is a message. + + >>> process(mlist, msg, dict(nodecorate=True)) + >>> print msg.as_string() + From: aperson@example.org + <BLANKLINE> + Here is a message. + + +Decorating simple text messages +------------------------------- + +Text messages that have no declared content type character set are by default, +encoded in us-ascii. When the mailing list's preferred language is 'en' +(i.e. English), the character set of the mailing list and of the message will +match. In this case, and when the header and footer have no interpolation +placeholder variables, the message's payload will be prepended by the verbatim +header, and appended with the verbatim footer. + + >>> msg = message_from_string(msg_text) + >>> mlist.msg_header = u'header\n' + >>> mlist.msg_footer = u'footer' + >>> mlist.preferred_language = u'en' + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.org + ... + <BLANKLINE> + header + Here is a message. + footer + +Mailman supports a number of interpolation variables, placeholders in the +header and footer for information to be filled in with mailing list specific +data. An example of such information is the mailing list's "real name" (a +short descriptive name for the mailing list). + + >>> msg = message_from_string(msg_text) + >>> mlist.msg_header = u'$real_name header\n' + >>> mlist.msg_footer = u'$real_name footer' + >>> mlist.real_name = u'XTest' + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.org + ... + XTest header + Here is a message. + XTest footer + +You can't just pick any interpolation variable though; if you do, the variable +will remain in the header or footer unchanged. + + >>> msg = message_from_string(msg_text) + >>> mlist.msg_header = u'$dummy header\n' + >>> mlist.msg_footer = u'$dummy footer' + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.org + ... + $dummy header + Here is a message. + $dummy footer + + +Handling RFC 3676 'format=flowed' parameters +-------------------------------------------- + +RFC 3676 describes a standard by which text/plain messages can marked by +generating MUAs for better readability in compatible receiving MUAs. The +'format' parameter on the text/plain Content-Type header gives hints as to how +the receiving MUA may flow and delete trailing whitespace for better display +in a proportional font. + +When Mailman sees text/plain messages with such RFC 3676 parameters, it +preserves these parameters when it concatenates headers and footers to the +message payload. + + >>> mlist.msg_header = u'header' + >>> mlist.msg_footer = u'footer' + >>> mlist.preferred_language = u'en' + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... Content-Type: text/plain; format=flowed; delsp=no + ... + ... Here is a message\x20 + ... with soft line breaks. + ... """) + >>> process(mlist, msg, {}) + >>> # Don't use 'print' here as above because it won't be obvious from the + >>> # output that the soft-line break space at the end of the 'Here is a + >>> # message' line will be retained in the output. + >>> msg['content-type'] + u'text/plain; format="flowed"; delsp="no"; charset="us-ascii"' + >>> [line for line in msg.get_payload().splitlines()] + ['header', 'Here is a message ', 'with soft line breaks.', 'footer'] + + +Decorating mixed-charset messages +--------------------------------- + +When a message has no explicit character set, it is assumed to be us-ascii. +However, if the mailing list's preferred language has a different character +set, Mailman will still try to concatenate the header and footer, but it will +convert the text to utf-8 and base-64 encode the message payload. + + # 'ja' = Japanese; charset = 'euc-jp' + >>> mlist.preferred_language = u'ja' + >>> mlist.msg_header = u'$description header' + >>> mlist.msg_footer = u'$description footer' + >>> mlist.description = u'\u65e5\u672c\u8a9e' + + >>> from email.message import Message + >>> msg = Message() + >>> msg.set_payload('Fran\xe7aise', 'iso-8859-1') + >>> print msg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="iso-8859-1" + Content-Transfer-Encoding: quoted-printable + <BLANKLINE> + Fran=E7aise + >>> process(mlist, msg, {}) + >>> print msg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="utf-8" + Content-Transfer-Encoding: base64 + <BLANKLINE> + 5pel5pys6KqeIGhlYWRlcgpGcmFuw6dhaXNlCuaXpeacrOiqniBmb290ZXI= + + +Sometimes the message even has an unknown character set. In this case, +Mailman has no choice but to decorate the original message with MIME +attachments. + + >>> mlist.preferred_language = u'en' + >>> mlist.msg_header = u'header' + >>> mlist.msg_footer = u'footer' + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... Content-Type: text/plain; charset=unknown + ... Content-Transfer-Encoding: 7bit + ... + ... Here is a message. + ... """) + >>> process(mlist, msg, {}) + >>> msg.set_boundary('BOUNDARY') + >>> print msg.as_string() + From: aperson@example.org + Content-Type: multipart/mixed; boundary="BOUNDARY" + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + header + --BOUNDARY + Content-Type: text/plain; charset=unknown + Content-Transfer-Encoding: 7bit + <BLANKLINE> + Here is a message. + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + footer + --BOUNDARY-- + + +Decorating multipart messages +----------------------------- + +Multipart messages have to be decorated differently. The header and footer +cannot be simply concatenated into the payload because that will break the +MIME structure of the message. Instead, the header and footer are attached as +separate MIME subparts. + +When the outerpart is multipart/mixed, the header and footer can have a +Content-Disposition of 'inline' so that MUAs can display these headers as if +they were simply concatenated. + + >>> mlist.preferred_language = u'en' + >>> mlist.msg_header = u'header' + >>> mlist.msg_footer = u'footer' + >>> part_1 = message_from_string("""\ + ... From: aperson@example.org + ... + ... Here is the first message. + ... """) + >>> part_2 = message_from_string("""\ + ... From: bperson@example.com + ... + ... Here is the second message. + ... """) + >>> from email.mime.multipart import MIMEMultipart + >>> msg = MIMEMultipart('mixed', boundary='BOUNDARY', + ... _subparts=(part_1, part_2)) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + Content-Type: multipart/mixed; boundary="BOUNDARY" + MIME-Version: 1.0 + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + header + --BOUNDARY + From: aperson@example.org + <BLANKLINE> + Here is the first message. + <BLANKLINE> + --BOUNDARY + From: bperson@example.com + <BLANKLINE> + Here is the second message. + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + footer + --BOUNDARY-- + + +Decorating other content types +------------------------------ + +Non-multipart non-text content types will get wrapped in a multipart/mixed so +that the header and footer can be added as attachments. + + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... Content-Type: image/x-beautiful + ... + ... IMAGEDATAIMAGEDATAIMAGEDATA + ... """) + >>> process(mlist, msg, {}) + >>> msg.set_boundary('BOUNDARY') + >>> print msg.as_string() + From: aperson@example.org + ... + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + header + --BOUNDARY + Content-Type: image/x-beautiful + <BLANKLINE> + IMAGEDATAIMAGEDATAIMAGEDATA + <BLANKLINE> + --BOUNDARY + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Disposition: inline + <BLANKLINE> + footer + --BOUNDARY-- + + +Personalization +--------------- + +A mailing list can be 'personalized', meaning that each message is unique for +each recipient. When the list is personalized, additional interpolation +variables are available, however the list of intended recipients must be +provided in the message data, otherwise an exception occurs. + + >>> process(mlist, None, dict(personalize=True)) + Traceback (most recent call last): + ... + AssertionError: The number of intended recipients must be exactly 1 + +And the number of intended recipients must be exactly 1. + + >>> process(mlist, None, dict(personalize=True, recips=[1, 2, 3])) + Traceback (most recent call last): + ... + AssertionError: The number of intended recipients must be exactly 1 diff --git a/src/mailman/pipeline/docs/digests.txt b/src/mailman/pipeline/docs/digests.txt new file mode 100644 index 000000000..cb939f7ca --- /dev/null +++ b/src/mailman/pipeline/docs/digests.txt @@ -0,0 +1,535 @@ +Digests +======= + +Digests are a way for a user to receive list traffic in collections instead of +as individual messages when immediately posted. There are several forms of +digests, although only two are currently supported: MIME digests and RFC 1153 +(a.k.a. plain text) digests. + + >>> from mailman.pipeline.to_digest import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + >>> mlist.real_name = u'XTest' + >>> mlist.subject_prefix = u'[_XTest] ' + >>> mlist.one_last_digest = set() + >>> switchboard = config.switchboards['virgin'] + +This is a helper function used to iterate through all the accumulated digest +messages, in the order in which they were posted. This makes it easier to +update the tests when we switch to a different mailbox format. + + >>> from mailman.testing.helpers import digest_mbox + >>> from itertools import count + >>> from string import Template + >>> def makemsg(): + ... for i in count(1): + ... text = Template("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... Subject: Test message $i + ... + ... Here is message $i + ... """).substitute(i=i) + ... yield message_from_string(text) + + +Short circuiting +---------------- + +When a message is posted to the mailing list, it is generally added to a +running collection of messages. For now, this is a Unix mailbox file, +although in the future this may end up being converted to a maildir style +mailbox. In any event, there are several factors that would bypass the +storing of posted messages to the mailbox. For example, the mailing list may +not allow digests... + + >>> mlist.digestable = False + >>> msg = makemsg().next() + >>> process(mlist, msg, {}) + >>> sum(1 for mboxmsg in digest_mbox(mlist)) + 0 + >>> switchboard.files + [] + +...or they may allow digests but the message is already a digest. + + >>> mlist.digestable = True + >>> process(mlist, msg, dict(isdigest=True)) + >>> sum(1 for mboxmsg in digest_mbox(mlist)) + 0 + >>> switchboard.files + [] + + +Sending a digest +---------------- + +For messages which are not digests, but which are posted to a digestable +mailing list, the messages will be stored until they reach a criteria +triggering the sending of the digest. If none of those criteria are met, then +the message will just sit in the mailbox for a while. + + >>> mlist.digest_size_threshold = 10000 + >>> process(mlist, msg, {}) + >>> switchboard.files + [] + >>> digest = digest_mbox(mlist) + >>> sum(1 for mboxmsg in digest) + 1 + >>> import os + >>> os.remove(digest._path) + +When the size of the digest mbox reaches the maximum size threshold, a digest +is crafted and sent out. This puts two messages in the virgin queue, an HTML +digest and an RFC 1153 plain text digest. The size threshold is in KB. + + >>> mlist.digest_size_threshold = 1 + >>> mlist.volume = 2 + >>> mlist.next_digest_number = 10 + >>> size = 0 + >>> for msg in makemsg(): + ... process(mlist, msg, {}) + ... size += len(str(msg)) + ... if size > mlist.digest_size_threshold * 1024: + ... break + >>> sum(1 for mboxmsg in digest_mbox(mlist)) + 0 + >>> len(switchboard.files) + 2 + >>> for filebase in switchboard.files: + ... qmsg, qdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + ... if qmsg.is_multipart(): + ... mimemsg = qmsg + ... mimedata = qdata + ... else: + ... rfc1153msg = qmsg + ... rfc1153data = qdata + >>> print mimemsg.as_string() + Content-Type: multipart/mixed; boundary="..." + MIME-Version: 1.0 + From: _xtest-request@example.com + Subject: XTest Digest, Vol 2, Issue 10 + To: _xtest@example.com + Reply-To: _xtest@example.com + Date: ... + Message-ID: ... + <BLANKLINE> + --... + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Description: XTest Digest, Vol 2, Issue 10 + <BLANKLINE> + Send XTest mailing list submissions to + _xtest@example.com + <BLANKLINE> + To subscribe or unsubscribe via the World Wide Web, visit + http://lists.example.com/listinfo/_xtest@example.com + or, via email, send a message with subject or body 'help' to + _xtest-request@example.com + <BLANKLINE> + You can reach the person managing the list at + _xtest-owner@example.com + <BLANKLINE> + When replying, please edit your Subject line so it is more specific + than "Re: Contents of XTest digest..." + <BLANKLINE> + --... + Content-Type: text/plain; charset="us-ascii" + MIME-Version: 1.0 + Content-Transfer-Encoding: 7bit + Content-Description: Today's Topics (8 messages) + <BLANKLINE> + Today's Topics: + <BLANKLINE> + 1. Test message 1 (aperson@example.com) + 2. Test message 2 (aperson@example.com) + 3. Test message 3 (aperson@example.com) + 4. Test message 4 (aperson@example.com) + 5. Test message 5 (aperson@example.com) + 6. Test message 6 (aperson@example.com) + 7. Test message 7 (aperson@example.com) + 8. Test message 8 (aperson@example.com) + <BLANKLINE> + --... + Content-Type: multipart/digest; boundary="..." + MIME-Version: 1.0 + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 1 + Message: 1 + <BLANKLINE> + Here is message 1 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 2 + Message: 2 + <BLANKLINE> + Here is message 2 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 3 + Message: 3 + <BLANKLINE> + Here is message 3 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 4 + Message: 4 + <BLANKLINE> + Here is message 4 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 5 + Message: 5 + <BLANKLINE> + Here is message 5 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 6 + Message: 6 + <BLANKLINE> + Here is message 6 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 7 + Message: 7 + <BLANKLINE> + Here is message 7 + <BLANKLINE> + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + From: aperson@example.com + To: _xtest@example.com + Subject: Test message 8 + Message: 8 + <BLANKLINE> + Here is message 8 + <BLANKLINE> + <BLANKLINE> + --... + --... + >>> dump_msgdata(mimedata) + _parsemsg: False + isdigest : True + listname : _xtest@example.com + recips : set([]) + version : 3 + + + >>> print rfc1153msg.as_string() + From: _xtest-request@example.com + Subject: XTest Digest, Vol 2, Issue 10 + To: _xtest@example.com + Reply-To: _xtest@example.com + Date: ... + Message-ID: ... + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + <BLANKLINE> + Send XTest mailing list submissions to + _xtest@example.com + <BLANKLINE> + To subscribe or unsubscribe via the World Wide Web, visit + http://lists.example.com/listinfo/_xtest@example.com + or, via email, send a message with subject or body 'help' to + _xtest-request@example.com + <BLANKLINE> + You can reach the person managing the list at + _xtest-owner@example.com + <BLANKLINE> + When replying, please edit your Subject line so it is more specific + than "Re: Contents of XTest digest..." + <BLANKLINE> + <BLANKLINE> + Today's Topics: + <BLANKLINE> + 1. Test message 1 (aperson@example.com) + 2. Test message 2 (aperson@example.com) + 3. Test message 3 (aperson@example.com) + 4. Test message 4 (aperson@example.com) + 5. Test message 5 (aperson@example.com) + 6. Test message 6 (aperson@example.com) + 7. Test message 7 (aperson@example.com) + 8. Test message 8 (aperson@example.com) + <BLANKLINE> + <BLANKLINE> + ---------------------------------------------------------------------- + <BLANKLINE> + Message: 1 + From: aperson@example.com + Subject: Test message 1 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 1 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 2 + From: aperson@example.com + Subject: Test message 2 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 2 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 3 + From: aperson@example.com + Subject: Test message 3 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 3 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 4 + From: aperson@example.com + Subject: Test message 4 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 4 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 5 + From: aperson@example.com + Subject: Test message 5 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 5 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 6 + From: aperson@example.com + Subject: Test message 6 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 6 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 7 + From: aperson@example.com + Subject: Test message 7 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 7 + <BLANKLINE> + <BLANKLINE> + ------------------------------ + <BLANKLINE> + Message: 8 + From: aperson@example.com + Subject: Test message 8 + To: _xtest@example.com + Message-ID: ... + <BLANKLINE> + Here is message 8 + <BLANKLINE> + <BLANKLINE> + End of XTest Digest, Vol 2, Issue 10 + ************************************ + <BLANKLINE> + >>> dump_msgdata(rfc1153data) + _parsemsg: False + isdigest : True + listname : _xtest@example.com + recips : set([]) + version : 3 + + +Internationalized digests +------------------------- + +When messages come in with a content-type character set different than that of +the list's preferred language, recipients will get an internationalized +digest. French is not enabled by default site-wide, so enable that now. + + >>> config.languages.enable_language('fr') + + # Simulate the site administrator setting the default server language to + # French in the configuration file. Without this, the English template + # will be found and the masthead won't be translated. + >>> config.push('french', """ + ... [mailman] + ... default_language: fr + ... """) + + >>> mlist.preferred_language = u'fr' + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... To: _xtest@example.com + ... Subject: =?iso-2022-jp?b?GyRCMGxIVhsoQg==?= + ... MIME-Version: 1.0 + ... Content-Type: text/plain; charset=iso-2022-jp + ... Content-Transfer-Encoding: 7bit + ... + ... \x1b$B0lHV\x1b(B + ... """) + +Set the digest threshold to zero so that the digests will be sent immediately. + + >>> mlist.digest_size_threshold = 0 + >>> process(mlist, msg, {}) + >>> sum(1 for mboxmsg in digest_mbox(mlist)) + 0 + >>> len(switchboard.files) + 2 + >>> for filebase in switchboard.files: + ... qmsg, qdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + ... if qmsg.is_multipart(): + ... mimemsg = qmsg + ... mimedata = qdata + ... else: + ... rfc1153msg = qmsg + ... rfc1153data = qdata + >>> print mimemsg.as_string() + Content-Type: multipart/mixed; boundary="..." + MIME-Version: 1.0 + From: _xtest-request@example.com + Subject: Groupe XTest, Vol. 2, Parution 11 + To: _xtest@example.com + Reply-To: _xtest@example.com + Date: ... + Message-ID: ... + <BLANKLINE> + --... + Content-Type: text/plain; charset="iso-8859-1" + MIME-Version: 1.0 + Content-Transfer-Encoding: quoted-printable + Content-Description: Groupe XTest, Vol. 2, Parution 11 + <BLANKLINE> + Envoyez vos messages pour la liste XTest =E0 + _xtest@example.com + <BLANKLINE> + Pour vous (d=E9s)abonner par le web, consultez + http://lists.example.com/listinfo/_xtest@example.com + <BLANKLINE> + ou, par courriel, envoyez un message avec =AB=A0help=A0=BB dans le corps ou + dans le sujet =E0 + _xtest-request@example.com + <BLANKLINE> + Vous pouvez contacter l'administrateur de la liste =E0 l'adresse + _xtest-owner@example.com + <BLANKLINE> + Si vous r=E9pondez, n'oubliez pas de changer l'objet du message afin + qu'il soit plus sp=E9cifique que =AB=A0Re: Contenu du groupe de XTest...=A0= + =BB + <BLANKLINE> + --... + Content-Type: text/plain; charset="utf-8" + MIME-Version: 1.0 + Content-Transfer-Encoding: base64 + Content-Description: Today's Topics (1 messages) + <BLANKLINE> + VGjDqG1lcyBkdSBqb3VyIDoKCiAgIDEuIOS4gOeVqiAoYXBlcnNvbkBleGFtcGxlLm9yZykK + <BLANKLINE> + --... + Content-Type: multipart/digest; boundary="..." + MIME-Version: 1.0 + <BLANKLINE> + --... + Content-Type: message/rfc822 + MIME-Version: 1.0 + <BLANKLINE> + Content-Transfer-Encoding: 7bit + From: aperson@example.org + MIME-Version: 1.0 + To: _xtest@example.com + Content-Type: text/plain; charset=iso-2022-jp + Subject: =?iso-2022-jp?b?GyRCMGxIVhsoQg==?= + Message: 1 + <BLANKLINE> + $B0lHV(B + <BLANKLINE> + <BLANKLINE> + --... + --... + >>> dump_msgdata(mimedata) + _parsemsg: False + isdigest : True + listname : _xtest@example.com + recips : set([]) + version : 3 + + >>> print rfc1153msg.as_string() + From: _xtest-request@example.com + Subject: Groupe XTest, Vol. 2, Parution 11 + To: _xtest@example.com + Reply-To: _xtest@example.com + Date: ... + Message-ID: ... + MIME-Version: 1.0 + Content-Type: text/plain; charset="utf-8" + Content-Transfer-Encoding: base64 + <BLANKLINE> + ... + <BLANKLINE> + >>> dump_msgdata(rfc1153data) + _parsemsg: False + isdigest : True + listname : _xtest@example.com + recips : set([]) + version : 3 diff --git a/src/mailman/pipeline/docs/file-recips.txt b/src/mailman/pipeline/docs/file-recips.txt new file mode 100644 index 000000000..81510b6e7 --- /dev/null +++ b/src/mailman/pipeline/docs/file-recips.txt @@ -0,0 +1,96 @@ +File recipients +=============== + +Mailman can calculate the recipients for a message from a Sendmail-style +include file. This file must be called members.txt and it must live in the +list's data directory. + + >>> handler = config.handlers['file-recipients'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + + +Short circuiting +---------------- + +If the message's metadata already has recipients, this handler immediately +returns. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message. + ... """) + >>> msgdata = {'recips': 7} + >>> handler.process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + <BLANKLINE> + A message. + <BLANKLINE> + >>> msgdata + {'recips': 7} + + +Missing file +------------ + +The include file must live inside the list's data directory, under the name +members.txt. If the file doesn't exist, the list of recipients will be +empty. + + >>> import os + >>> file_path = os.path.join(mlist.data_path, 'members.txt') + >>> open(file_path) + Traceback (most recent call last): + ... + IOError: [Errno ...] + No such file or directory: u'.../_xtest@example.com/members.txt' + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + [] + + +Existing file +------------- + +If the file exists, it contains a list of addresses, one per line. These +addresses are returned as the set of recipients. + + >>> fp = open(file_path, 'w') + >>> try: + ... print >> fp, 'bperson@example.com' + ... print >> fp, 'cperson@example.com' + ... print >> fp, 'dperson@example.com' + ... print >> fp, 'eperson@example.com' + ... print >> fp, 'fperson@example.com' + ... print >> fp, 'gperson@example.com' + ... finally: + ... fp.close() + + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + ['bperson@example.com', 'cperson@example.com', 'dperson@example.com', + 'eperson@example.com', 'fperson@example.com', 'gperson@example.com'] + +However, if the sender of the original message is a member of the list and +their address is in the include file, the sender's address is /not/ included +in the recipients list. + + >>> from mailman.interfaces.member import MemberRole + >>> address_1 = config.db.user_manager.create_address( + ... u'cperson@example.com') + >>> address_1.subscribe(mlist, MemberRole.member) + <Member: cperson@example.com on _xtest@example.com as MemberRole.member> + + >>> msg = message_from_string("""\ + ... From: cperson@example.com + ... + ... A message. + ... """) + >>> msgdata = {} + >>> handler.process(mlist, msg, msgdata) + >>> sorted(msgdata['recips']) + ['bperson@example.com', 'dperson@example.com', + 'eperson@example.com', 'fperson@example.com', 'gperson@example.com'] diff --git a/src/mailman/pipeline/docs/filtering.txt b/src/mailman/pipeline/docs/filtering.txt new file mode 100644 index 000000000..70ca3098d --- /dev/null +++ b/src/mailman/pipeline/docs/filtering.txt @@ -0,0 +1,340 @@ +Content filtering +================= + +Mailman can filter the content of messages posted to a mailing list by +stripping MIME subparts, and possibly reorganizing the MIME structure of a +message. It does this with the MimeDel handler module, although other +handlers can potentially do other kinds of finer level content filtering. + + >>> from mailman.pipeline.mime_delete import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + +Several mailing list options control content filtering. First, the feature +must be enabled, then there are two options that control which MIME types get +filtered and which get passed. Finally, there is an option to control whether +text/html parts will get converted to plain text. Let's set up some defaults +for these variables, then we'll explain them in more detail below. + + >>> mlist.filter_content = True + >>> mlist.filter_mime_types = [] + >>> mlist.pass_mime_types = [] + >>> mlist.convert_html_to_plaintext = False + + +Filtering the outer content type +-------------------------------- + +A simple filtering setting will just search the content types of the messages +parts, discarding all parts with a matching MIME type. If the message's outer +content type matches the filter, the entire message will be discarded. + + >>> mlist.filter_mime_types = ['image/jpeg'] + >>> # XXX Change this to an enum + >>> mlist.filter_action = 0 # Discard + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxxxx + ... """) + >>> process(mlist, msg, {}) + Traceback (most recent call last): + ... + DiscardMessage + +However, if we turn off content filtering altogether, then the handler +short-circuits. + + >>> mlist.filter_content = False + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: image/jpeg + MIME-Version: 1.0 + <BLANKLINE> + xxxxx + >>> msgdata + {} + +Similarly, no content filtering is performed on digest messages, which are +crafted internally by Mailman. + + >>> mlist.filter_content = True + >>> msgdata = {'isdigest': True} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: image/jpeg + MIME-Version: 1.0 + <BLANKLINE> + xxxxx + >>> msgdata + {'isdigest': True} + + +Simple multipart filtering +-------------------------- + +If one of the subparts in a multipart message matches the filter type, then +just that subpart will be stripped. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: multipart/mixed; boundary=BOUNDARY + ... MIME-Version: 1.0 + ... + ... --BOUNDARY + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxx + ... + ... --BOUNDARY + ... Content-Type: image/gif + ... MIME-Version: 1.0 + ... + ... yyy + ... --BOUNDARY-- + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: multipart/mixed; boundary=BOUNDARY + MIME-Version: 1.0 + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --BOUNDARY + Content-Type: image/gif + MIME-Version: 1.0 + <BLANKLINE> + yyy + --BOUNDARY-- + <BLANKLINE> + + +Collapsing multipart/alternative messages +----------------------------------------- + +When content filtering encounters a multipart/alternative part, and the +results of filtering leave only one of the subparts, then the +multipart/alternative may be collapsed. For example, in the following +message, the outer content type is a multipart/mixed. Inside this part is +just a single subpart that has a content type of multipart/alternative. This +inner multipart has two subparts, a jpeg and a gif. + +Content filtering will remove the jpeg part, leaving the multipart/alternative +with only a single gif subpart. Because there's only one subpart left, the +MIME structure of the message will be reorganized, removing the inner +multipart/alternative so that the outer multipart/mixed has just a single gif +subpart. + + >>> mlist.collapse_alternatives = True + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: multipart/mixed; boundary=BOUNDARY + ... MIME-Version: 1.0 + ... + ... --BOUNDARY + ... Content-Type: multipart/alternative; boundary=BOUND2 + ... MIME-Version: 1.0 + ... + ... --BOUND2 + ... Content-Type: image/jpeg + ... MIME-Version: 1.0 + ... + ... xxx + ... + ... --BOUND2 + ... Content-Type: image/gif + ... MIME-Version: 1.0 + ... + ... yyy + ... --BOUND2-- + ... + ... --BOUNDARY-- + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: multipart/mixed; boundary=BOUNDARY + MIME-Version: 1.0 + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --BOUNDARY + Content-Type: image/gif + MIME-Version: 1.0 + <BLANKLINE> + yyy + --BOUNDARY-- + <BLANKLINE> + +When the outer part is a multipart/alternative and filtering leaves this outer +part with just one subpart, the entire message is converted to the left over +part's content type. In other words, the left over inner part is promoted to +being the outer part. + + >>> mlist.filter_mime_types.append('text/html') + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: multipart/alternative; boundary=AAA + ... + ... --AAA + ... Content-Type: text/html + ... + ... <b>This is some html</b> + ... --AAA + ... Content-Type: text/plain + ... + ... This is plain text + ... --AAA-- + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: text/plain + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + This is plain text + +Clean up. + + >>> ignore = mlist.filter_mime_types.pop() + + +Conversion to plain text +------------------------ + +Many mailing lists prohibit HTML email, and in fact, such email can be a +phishing or spam vector. However, many mail readers will send HTML email by +default because users think it looks pretty. One approach to handling this +would be to filter out text/html parts and rely on multipart/alternative +collapsing to leave just a plain text part. This works because many mail +readers that send HTML email actually send a plain text part in the second +subpart of such multipart/alternatives. + +While this is a good suggestion for plain text-only mailing lists, often a +mail reader will send only a text/html part with no plain text alternative. +in this case, the site administer can enable text/html to text/plain +conversion by defining a conversion command. A list administrator still needs +to enable such conversion for their list though. + + >>> mlist.convert_html_to_plaintext = True + +By default, Mailman sends the message through lynx, but since this program is +not guaranteed to exist, we'll craft a simple, but stupid script to simulate +the conversion process. The script expects a single argument, which is the +name of the file containing the message payload to filter. + + >>> import os, sys + >>> script_path = os.path.join(config.DATA_DIR, 'filter.py') + >>> fp = open(script_path, 'w') + >>> try: + ... print >> fp, """\ + ... import sys + ... print 'Converted text/html to text/plain' + ... print 'Filename:', sys.argv[1] + ... """ + ... finally: + ... fp.close() + >>> config.HTML_TO_PLAIN_TEXT_COMMAND = '%s %s %%(filename)s' % ( + ... sys.executable, script_path) + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: text/html + ... MIME-Version: 1.0 + ... + ... <html><head></head> + ... <body></body></html> + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + MIME-Version: 1.0 + Content-Type: text/plain + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + Converted text/html to text/plain + Filename: ... + <BLANKLINE> + + +Discarding empty parts +---------------------- + +Similarly, if after filtering a multipart section ends up empty, then the +entire multipart is discarded. For example, here's a message where an inner +multipart/mixed contains two jpeg subparts. Both jpegs are filtered out, so +the entire inner multipart/mixed is discarded. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Content-Type: multipart/mixed; boundary=AAA + ... + ... --AAA + ... Content-Type: multipart/mixed; boundary=BBB + ... + ... --BBB + ... Content-Type: image/jpeg + ... + ... xxx + ... --BBB + ... Content-Type: image/jpeg + ... + ... yyy + ... --BBB--- + ... --AAA + ... Content-Type: multipart/alternative; boundary=CCC + ... + ... --CCC + ... Content-Type: text/html + ... + ... <h2>This is a header</h2> + ... + ... --CCC + ... Content-Type: text/plain + ... + ... A different message + ... --CCC-- + ... --AAA + ... Content-Type: image/gif + ... + ... zzz + ... --AAA + ... Content-Type: image/gif + ... + ... aaa + ... --AAA-- + ... """) + >>> process(mlist, msg, {}) + >>> print msg.as_string() + From: aperson@example.com + Content-Type: multipart/mixed; boundary=AAA + X-Content-Filtered-By: Mailman/MimeDel ... + <BLANKLINE> + --AAA + MIME-Version: 1.0 + Content-Type: text/plain + <BLANKLINE> + Converted text/html to text/plain + Filename: ... + <BLANKLINE> + --AAA + Content-Type: image/gif + <BLANKLINE> + zzz + --AAA + Content-Type: image/gif + <BLANKLINE> + aaa + --AAA-- + <BLANKLINE> + + +Passing MIME types +------------------ + +XXX Describe the pass_mime_types setting and how it interacts with +filter_mime_types. diff --git a/src/mailman/pipeline/docs/nntp.txt b/src/mailman/pipeline/docs/nntp.txt new file mode 100644 index 000000000..3f48be1da --- /dev/null +++ b/src/mailman/pipeline/docs/nntp.txt @@ -0,0 +1,65 @@ +NNTP (i.e. Usenet) Gateway +========================== + +Mailman has an NNTP gateway, whereby messages posted to the mailing list can +be forwarded onto an NNTP newsgroup. Typically this means Usenet, but since +NNTP is to Usenet as IP is to the web, it's more general than that. + + >>> handler = config.handlers['to-usenet'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + >>> switchboard = config.switchboards['news'] + +Gatewaying from the mailing list to the newsgroup happens through a separate +'nntp' queue and happen immediately when the message is posted through to the +list. Note that gatewaying from the newsgroup to the list happens via a +cronjob (currently not shown). + +There are several situations which prevent a message from being gatewayed to +the newsgroup. The feature could be disabled, as is the default. + + >>> mlist.gateway_to_news = False + >>> msg = message_from_string("""\ + ... Subject: An important message + ... + ... Something of great import. + ... """) + >>> handler.process(mlist, msg, {}) + >>> switchboard.files + [] + +Even if enabled, messages that came from the newsgroup are never gated back to +the newsgroup. + + >>> mlist.gateway_to_news = True + >>> handler.process(mlist, msg, {'fromusenet': True}) + >>> switchboard.files + [] + +Neither are digests ever gated to the newsgroup. + + >>> handler.process(mlist, msg, {'isdigest': True}) + >>> switchboard.files + [] + +However, other posted messages get gated to the newsgroup via the nntp queue. +The list owner can set the linked newsgroup and the nntp host that its +messages are gated to. + + >>> mlist.linked_newsgroup = u'comp.lang.thing' + >>> mlist.nntp_host = u'news.example.com' + >>> handler.process(mlist, msg, {}) + >>> len(switchboard.files) + 1 + >>> filebase = switchboard.files[0] + >>> msg, msgdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> print msg.as_string() + Subject: An important message + <BLANKLINE> + Something of great import. + <BLANKLINE> + >>> dump_msgdata(msgdata) + _parsemsg: False + listname : _xtest@example.com + version : 3 diff --git a/src/mailman/pipeline/docs/reply-to.txt b/src/mailman/pipeline/docs/reply-to.txt new file mode 100644 index 000000000..e57b97e5d --- /dev/null +++ b/src/mailman/pipeline/docs/reply-to.txt @@ -0,0 +1,127 @@ +Reply-to munging +================ + +Messages that flow through the global pipeline get their headers 'cooked', +which basically means that their headers go through several mostly unrelated +transformations. Some headers get added, others get changed. Some of these +changes depend on mailing list settings and others depend on how the message +is getting sent through the system. We'll take things one-by-one. + + >>> from mailman.pipeline.cook_headers import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.subject_prefix = u'' + +Reply-to munging refers to the behavior where a mailing list can be configured +to change or augment an existing Reply-To header in a message posted to the +list. Reply-to munging is fairly controversial, with arguments made either +for or against munging. + +The Mailman developers, and I believe the majority consensus is to do no +Reply-to munging, under several principles. Primarily, most reply-to munging +is requested by people who do not have both a Reply and Reply All button on +their mail reader. If you do not munge Reply-To, then these buttons will work +properly, but if you munge the header, it is impossible for these buttons to +work right, because both will reply to the list. This leads to unfortunate +accidents where a private message is accidentally posted to the entire list. + +However, Mailman gives list owners the option to do Reply-To munging anyway, +mostly as a way to shut up the really vocal minority who seem to insist on +this mis-feature. + + +Reply to list +------------- + +A list can be configured to add a Reply-To header pointing back to the mailing +list's posting address. If there's no Reply-To header in the original +message, the list's posting address simply gets inserted. + + >>> from mailman.interfaces.mailinglist import ReplyToMunging + >>> mlist.reply_goes_to_list = ReplyToMunging.point_to_list + >>> mlist.preferred_language = u'en' + >>> mlist.description = u'' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'_xtest@example.com' + +It's also possible to strip any existing Reply-To header first, before adding +the list's posting address. + + >>> mlist.first_strip_reply_to = True + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Reply-To: bperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'_xtest@example.com' + +If you don't first strip the header, then the list's posting address will just +get appended to whatever the original version was. + + >>> mlist.first_strip_reply_to = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Reply-To: bperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'bperson@example.com, _xtest@example.com' + + +Explicit Reply-To +----------------- + +The list can also be configured to have an explicit Reply-To header. + + >>> mlist.reply_goes_to_list = ReplyToMunging.explicit_header + >>> mlist.reply_to_address = u'my-list@example.com' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'my-list@example.com' + +And as before, it's possible to either strip any existing Reply-To header... + + >>> mlist.first_strip_reply_to = True + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Reply-To: bperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'my-list@example.com' + +...or not. + + >>> mlist.first_strip_reply_to = False + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Reply-To: bperson@example.com + ... + ... """) + >>> process(mlist, msg, {}) + >>> len(msg.get_all('reply-to')) + 1 + >>> msg['reply-to'] + u'my-list@example.com, bperson@example.com' diff --git a/src/mailman/pipeline/docs/replybot.txt b/src/mailman/pipeline/docs/replybot.txt new file mode 100644 index 000000000..f3c3281b3 --- /dev/null +++ b/src/mailman/pipeline/docs/replybot.txt @@ -0,0 +1,213 @@ +Auto-reply handler +================== + +Mailman has an auto-reply handler that sends automatic responses to messages +it receives on its posting address, or special robot addresses. Automatic +responses are subject to various conditions, such as headers in the original +message or the amount of time since the last auto-response. + + >>> from mailman.pipeline.replybot import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.real_name = u'XTest' + + >>> # Ensure that the virgin queue is empty, since we'll be checking this + >>> # for new auto-response messages. + >>> virginq = config.switchboards['virgin'] + >>> virginq.files + [] + + +Basic autoresponding +-------------------- + +Basic autoresponding occurs when the list is set up to respond to either its +-owner address, its -request address, or to the posting address, and a message +is sent to one of these addresses. A mailing list also has an autoresponse +grace period which describes how much time must pass before a second response +will be sent, with 0 meaning "there is no grace period". + + >>> import datetime + >>> mlist.autorespond_admin = True + >>> mlist.autoresponse_graceperiod = datetime.timedelta() + >>> mlist.autoresponse_admin_text = u'admin autoresponse text' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest-owner@example.com + ... + ... help + ... """) + >>> process(mlist, msg, dict(toowner=True)) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> # Print only some of the meta data. The rest is uninteresting. + >>> qdata['listname'] + u'_xtest@example.com' + >>> sorted(qdata['recips']) + [u'aperson@example.com'] + >>> # Delete data that is time dependent or random + >>> del qmsg['message-id'] + >>> del qmsg['date'] + >>> print qmsg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: Auto-response for your message to the "XTest" mailing list + From: _xtest-bounces@example.com + To: aperson@example.com + X-Mailer: The Mailman Replybot + X-Ack: No + Precedence: bulk + <BLANKLINE> + admin autoresponse text + >>> virginq.files + [] + + +Short circuiting +---------------- + +Several headers in the original message determine whether an autoresponse +should even be sent. For example, if the message has an "X-Ack: No" header, +no auto-response is sent. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... X-Ack: No + ... + ... help me + ... """) + >>> process(mlist, msg, dict(toowner=True)) + >>> virginq.files + [] + +Mailman itself can suppress autoresponses for certain types of internally +crafted messages, by setting the 'noack' metadata key. + + >>> msg = message_from_string("""\ + ... From: mailman@example.com + ... + ... help for you + ... """) + >>> process(mlist, msg, dict(noack=True, toowner=True)) + >>> virginq.files + [] + +If there is a Precedence: header with any of the values 'bulk', 'junk', or +'list', then the autoresponse is also suppressed. + + >>> msg = message_from_string("""\ + ... From: asystem@example.com + ... Precedence: bulk + ... + ... hey! + ... """) + >>> process(mlist, msg, dict(toowner=True)) + >>> virginq.files + [] + + >>> msg.replace_header('precedence', 'junk') + >>> process(mlist, msg, dict(toowner=True)) + >>> virginq.files + [] + >>> msg.replace_header('precedence', 'list') + >>> process(mlist, msg, dict(toowner=True)) + >>> virginq.files + [] + +Unless the X-Ack: header has a value of "yes", in which case, the Precedence +header is ignored. + + >>> msg['X-Ack'] = 'yes' + >>> process(mlist, msg, dict(toowner=True)) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> del qmsg['message-id'] + >>> del qmsg['date'] + >>> print qmsg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: Auto-response for your message to the "XTest" mailing list + From: _xtest-bounces@example.com + To: asystem@example.com + X-Mailer: The Mailman Replybot + X-Ack: No + Precedence: bulk + <BLANKLINE> + admin autoresponse text + + +Available auto-responses +------------------------ + +As shown above, a message sent to the -owner address will get an auto-response +with the text set for owner responses. Two other types of email will get +auto-responses: those sent to the -request address... + + >>> mlist.autorespond_requests = True + >>> mlist.autoresponse_request_text = u'robot autoresponse text' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest-request@example.com + ... + ... help me + ... """) + >>> process(mlist, msg, dict(torequest=True)) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> del qmsg['message-id'] + >>> del qmsg['date'] + >>> print qmsg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: Auto-response for your message to the "XTest" mailing list + From: _xtest-bounces@example.com + To: aperson@example.com + X-Mailer: The Mailman Replybot + X-Ack: No + Precedence: bulk + <BLANKLINE> + robot autoresponse text + +...and those sent to the posting address. + + >>> mlist.autorespond_postings = True + >>> mlist.autoresponse_postings_text = u'postings autoresponse text' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... To: _xtest@example.com + ... + ... help me + ... """) + >>> process(mlist, msg, {}) + >>> len(virginq.files) + 1 + >>> qmsg, qdata = virginq.dequeue(virginq.files[0]) + >>> del qmsg['message-id'] + >>> del qmsg['date'] + >>> print qmsg.as_string() + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Subject: Auto-response for your message to the "XTest" mailing list + From: _xtest-bounces@example.com + To: aperson@example.com + X-Mailer: The Mailman Replybot + X-Ack: No + Precedence: bulk + <BLANKLINE> + postings autoresponse text + + +Grace periods +------------- + +Auto-responses have a grace period, during which no additional responses will +be sent. This is so as not to bombard the sender with responses. The grace +period is measured in days. + +XXX Add grace period tests. diff --git a/src/mailman/pipeline/docs/scrubber.txt b/src/mailman/pipeline/docs/scrubber.txt new file mode 100644 index 000000000..dec1c1f64 --- /dev/null +++ b/src/mailman/pipeline/docs/scrubber.txt @@ -0,0 +1,225 @@ +The scrubber +============ + +The scrubber is an integral part of Mailman, both in the normal delivery of +messages and in components such as the archiver. Its primary purpose is to +scrub attachments from messages so that binary goop doesn't end up in an +archive message. + + >>> from mailman.pipeline.scrubber import process, save_attachment + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + +Helper functions for getting the attachment data. + + >>> import os, re + >>> def read_attachment(filename, remove=True): + ... path = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR, + ... mlist.fqdn_listname, filename) + ... fp = open(path) + ... try: + ... data = fp.read() + ... finally: + ... fp.close() + ... if remove: + ... os.unlink(path) + ... return data + + >>> from urlparse import urlparse + >>> def read_url_from_message(msg): + ... url = None + ... for line in msg.get_payload().splitlines(): + ... mo = re.match('URL: <(?P<url>[^>]+)>', line) + ... if mo: + ... url = mo.group('url') + ... break + ... path = '/'.join(urlparse(url).path.split('/')[3:]) + ... return read_attachment(path) + + +Saving attachments +------------------ + +The Scrubber handler exposes a function called save_attachments() which can be +used to strip various types of attachments and store them in the archive +directory. This is a public interface used by components outside the normal +processing pipeline. + +Site administrators can decide whether the scrubber should use the attachment +filename suggested in the message's Content-Disposition: header or not. If +enabled, the filename will be used when this header attribute is present (yes, +this is an unfortunate double negative). + + >>> config.push('test config', """ + ... [scrubber] + ... use_attachment_filename: yes + ... """) + >>> msg = message_from_string("""\ + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... """) + >>> save_attachment(mlist, msg, 'dir') + u'<http://www.example.com/pipermail/_xtest@example.com/dir/xtest.gif>' + >>> data = read_attachment('dir/xtest.gif') + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + +Saving the attachment does not alter the original message. + + >>> print msg.as_string() + Content-Type: image/gif; name="xtest.gif" + Content-Transfer-Encoding: base64 + Content-Disposition: attachment; filename="xtest.gif" + <BLANKLINE> + R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + +The site administrator can also configure Mailman to ignore the +Content-Disposition: filename. This is the default. + + >>> config.pop('test config') + >>> config.push('test config', """ + ... [scrubber] + ... use_attachment_filename: no + ... """) + >>> msg = message_from_string("""\ + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... """) + >>> save_attachment(mlist, msg, 'dir') + u'<http://www.example.com/pipermail/_xtest@example.com/dir/attachment.gif>' + >>> data = read_attachment('dir/xtest.gif') + Traceback (most recent call last): + IOError: [Errno ...] No such file or directory: + u'.../archives/private/_xtest@example.com/dir/xtest.gif' + >>> data = read_attachment('dir/attachment.gif') + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + + +Scrubbing image attachments +--------------------------- + +When scrubbing image attachments, the original message is modified to include +a reference to the attachment file as available through the on-line archive. + + >>> msg = message_from_string("""\ + ... MIME-Version: 1.0 + ... Content-Type: multipart/mixed; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... Content-type: text/plain; charset=us-ascii + ... + ... This is a message. + ... --BOUNDARY + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... --BOUNDARY-- + ... """) + >>> msgdata = {} + +The Scrubber.process() function is different than other handler process +functions in that it returns the scrubbed message. + + >>> scrubbed_msg = process(mlist, msg, msgdata) + >>> scrubbed_msg is msg + True + >>> print scrubbed_msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + <BLANKLINE> + This is a message. + -------------- next part -------------- + A non-text attachment was scrubbed... + Name: xtest.gif + Type: image/gif + Size: 34 bytes + Desc: not available + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> + <BLANKLINE> + +This is the same as the transformed message originally passed in. + + >>> print msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + <BLANKLINE> + This is a message. + -------------- next part -------------- + A non-text attachment was scrubbed... + Name: xtest.gif + Type: image/gif + Size: 34 bytes + Desc: not available + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> + <BLANKLINE> + >>> msgdata + {} + +The URL will point to the attachment sitting in the archive. + + >>> data = read_url_from_message(msg) + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + + +Scrubbing text attachments +-------------------------- + +Similar to image attachments, text attachments will also be scrubbed, but the +placeholder will be slightly different. + + >>> msg = message_from_string("""\ + ... MIME-Version: 1.0 + ... Content-Type: multipart/mixed; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no + ... + ... This is a message. + ... --BOUNDARY + ... Content-type: text/plain; name="xtext.txt" + ... Content-Disposition: attachment; filename="xtext.txt" + ... + ... This is a text attachment. + ... --BOUNDARY-- + ... """) + >>> scrubbed_msg = process(mlist, msg, {}) + >>> print scrubbed_msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Transfer-Encoding: 7bit + Content-Type: text/plain; charset="us-ascii"; format="flowed"; delsp="no" + <BLANKLINE> + This is a message. + -------------- next part -------------- + An embedded and charset-unspecified text was scrubbed... + Name: xtext.txt + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.txt> + <BLANKLINE> + >>> read_url_from_message(msg) + 'This is a text attachment.' + + +Clean up +-------- + + >>> config.pop('test config') diff --git a/src/mailman/pipeline/docs/subject-munging.txt b/src/mailman/pipeline/docs/subject-munging.txt new file mode 100644 index 000000000..b2972683b --- /dev/null +++ b/src/mailman/pipeline/docs/subject-munging.txt @@ -0,0 +1,244 @@ +Subject munging +=============== + +Messages that flow through the global pipeline get their headers 'cooked', +which basically means that their headers go through several mostly unrelated +transformations. Some headers get added, others get changed. Some of these +changes depend on mailing list settings and others depend on how the message +is getting sent through the system. We'll take things one-by-one. + + >>> from mailman.pipeline.cook_headers import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.subject_prefix = u'' + + +Inserting a prefix +------------------ + +Another thing CookHeaders does is 'munge' the Subject header by inserting the +subject prefix for the list at the front. If there's no subject header in the +original message, Mailman uses a canned default. In order to do subject +munging, a mailing list must have a preferred language. + + >>> mlist.subject_prefix = u'[XTest] ' + >>> mlist.preferred_language = u'en' + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... + ... A message of great import. + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + +The original subject header is stored in the message metadata. We must print +the new Subject header because it gets converted from a string to an +email.header.Header instance which has an unhelpful repr. + + >>> msgdata['origsubj'] + u'' + >>> print msg['subject'] + [XTest] (no subject) + +If the original message had a Subject header, then the prefix is inserted at +the beginning of the header's value. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Something important + ... + ... A message of great import. + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> msgdata['origsubj'] + u'Something important' + >>> print msg['subject'] + [XTest] Something important + +Subject headers are not munged for digest messages. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Something important + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, dict(isdigest=True)) + >>> msg['subject'] + u'Something important' + +Nor are they munged for 'fast tracked' messages, which are generally defined +as messages that Mailman crafts internally. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Something important + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, dict(_fasttrack=True)) + >>> msg['subject'] + u'Something important' + +If a Subject header already has a prefix, usually following a Re: marker, +another one will not be added but the prefix will be moved to the front of the +header text. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: Re: [XTest] Something important + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest] Re: Something important + +If the Subjec header has a prefix at the front of the header text, that's +where it will stay. This is called 'new style' prefixing and is the only +option available in Mailman 3. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: [XTest] Re: Something important + ... + ... A message of great import. + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest] Re: Something important + + +Internationalized headers +------------------------- + +Internationalization adds some interesting twists to the handling of subject +prefixes. Part of what makes this interesting is the encoding of i18n headers +using RFC 2047, and lists whose preferred language is in a different character +set than the encoded header. + + >>> msg = message_from_string("""\ + ... Subject: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + >>> unicode(msg['subject']) + u'[XTest] \u30e1\u30fc\u30eb\u30de\u30f3' + + +Prefix numbers +-------------- + +Subject prefixes support a placeholder for the numeric post id. Every time a +message is posted to the mailing list, a 'post id' gets incremented. This is +a purely sequential integer that increases monotonically. By added a '%d' +placeholder to the subject prefix, this post id can be included in the prefix. + + >>> mlist.subject_prefix = u'[XTest %d] ' + >>> mlist.post_id = 456 + >>> msg = message_from_string("""\ + ... Subject: Something important + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Something important + +This works even when the message is a reply, except that in this case, the +numeric post id in the generated subject prefix is updated with the new post +id. + + >>> msg = message_from_string("""\ + ... Subject: [XTest 123] Re: Something important + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Re: Something important + +If the Subject header had old style prefixing, the prefix is moved to the +front of the header text. + + >>> msg = message_from_string("""\ + ... Subject: Re: [XTest 123] Something important + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Re: Something important + + +And of course, the proper thing is done when posting id numbers are included +in the subject prefix, and the subject is encoded non-ascii. + + >>> msg = message_from_string("""\ + ... Subject: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + >>> unicode(msg['subject']) + u'[XTest 456] \u30e1\u30fc\u30eb\u30de\u30f3' + +Even more fun is when the i18n Subject header already has a prefix, possibly +with a different posting number. + + >>> msg = message_from_string("""\ + ... Subject: [XTest 123] Re: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Re: =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + +# XXX This requires Python email patch #1681333 to succeed. +# >>> unicode(msg['subject']) +# u'[XTest 456] Re: \u30e1\u30fc\u30eb\u30de\u30f3' + +As before, old style subject prefixes are re-ordered. + + >>> msg = message_from_string("""\ + ... Subject: Re: [XTest 123] =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest 456] Re: + =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + +# XXX This requires Python email patch #1681333 to succeed. +# >>> unicode(msg['subject']) +# u'[XTest 456] Re: \u30e1\u30fc\u30eb\u30de\u30f3' + + +In this test case, we get an extra space between the prefix and the original +subject. It's because the original is 'crooked'. Note that a Subject +starting with '\n ' is generated by some version of Eudora Japanese edition. + + >>> mlist.subject_prefix = u'[XTest] ' + >>> msg = message_from_string("""\ + ... Subject: + ... Important message + ... + ... """) + >>> process(mlist, msg, {}) + >>> print msg['subject'] + [XTest] Important message + +And again, with an RFC 2047 encoded header. + + >>> msg = message_from_string("""\ + ... Subject: + ... =?iso-2022-jp?b?GyRCJWEhPCVrJV4lcxsoQg==?= + ... + ... """) + >>> process(mlist, msg, {}) + +# XXX This one does not appear to work the same way as +# test_subject_munging_prefix_crooked() in the old Python-based tests. I need +# to get Tokio to look at this. +# >>> print msg['subject'] +# [XTest] =?iso-2022-jp?b?IBskQiVhITwlayVeJXMbKEI=?= diff --git a/src/mailman/pipeline/docs/tagger.txt b/src/mailman/pipeline/docs/tagger.txt new file mode 100644 index 000000000..9f0bcd4b2 --- /dev/null +++ b/src/mailman/pipeline/docs/tagger.txt @@ -0,0 +1,235 @@ +Message tagger +============== + +Mailman has a topics system which works like this: a mailing list +administrator sets up one or more topics, which is essentially a named regular +expression. The topic name can be any arbitrary string, and the name serves +double duty as the 'topic tag'. Each message that flows the mailing list has +its Subject: and Keywords: headers compared against these regular +expressions. The message then gets tagged with the topic names of each hit. + + >>> from mailman.pipeline.tagger import process + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + +Topics must be enabled for Mailman to do any topic matching, even if topics +are defined. + + >>> mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', False)] + >>> mlist.topics_enabled = False + >>> mlist.topics_bodylines_limit = 0 + + >>> msg = message_from_string("""\ + ... Subject: foobar + ... Keywords: barbaz + ... + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: foobar + Keywords: barbaz + <BLANKLINE> + <BLANKLINE> + >>> msgdata + {} + +However, once topics are enabled, message will be tagged. There are two +artifacts of tagging; an X-Topics: header is added with the topic name, and +the message metadata gets a key with a list of matching topic names. + + >>> mlist.topics_enabled = True + >>> msg = message_from_string("""\ + ... Subject: foobar + ... Keywords: barbaz + ... + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: foobar + Keywords: barbaz + X-Topics: bar fight + <BLANKLINE> + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + + +Scanning body lines +------------------- + +The tagger can also look at a certain number of body lines, but only for +Subject: and Keyword: header-like lines. When set to zero, no body lines are +scanned. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... X-Ignore: something else + ... Subject: foobar + ... Keywords: barbaz + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + <BLANKLINE> + X-Ignore: something else + Subject: foobar + Keywords: barbaz + <BLANKLINE> + >>> msgdata + {} + +But let the tagger scan a few body lines and the matching headers will be +found. + + >>> mlist.topics_bodylines_limit = 5 + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... X-Ignore: something else + ... Subject: foobar + ... Keywords: barbaz + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + X-Topics: bar fight + <BLANKLINE> + X-Ignore: something else + Subject: foobar + Keywords: barbaz + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + +However, scanning stops at the first body line that doesn't look like a +header. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... This is not a header + ... Subject: foobar + ... Keywords: barbaz + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + <BLANKLINE> + This is not a header + Subject: foobar + Keywords: barbaz + >>> msgdata + {} + +When set to a negative number, all body lines will be scanned. + + >>> mlist.topics_bodylines_limit = -1 + >>> lots_of_headers = '\n'.join(['X-Ignore: zip'] * 100) + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... %s + ... Subject: foobar + ... Keywords: barbaz + ... """ % lots_of_headers) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> # Rather than print out 100 X-Ignore: headers, let's just prove that + >>> # the X-Topics: header exists, meaning that the tagger did its job. + >>> msg['x-topics'] + u'bar fight' + >>> msgdata['topichits'] + ['bar fight'] + + +Scanning sub-parts +------------------ + +The tagger will also scan the body lines of text subparts in a multipart +message, using the same rules as if all those body lines lived in a single +text payload. + + >>> msg = message_from_string("""\ + ... Subject: Was + ... Keywords: Raw + ... Content-Type: multipart/alternative; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY-- + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: Was + Keywords: Raw + Content-Type: multipart/alternative; boundary="BOUNDARY" + X-Topics: bar fight + <BLANKLINE> + --BOUNDARY + From: sabo + To: obas + <BLANKLINE> + Subject: farbaw + Keywords: barbaz + <BLANKLINE> + --BOUNDARY-- + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + +But the tagger will not descend into non-text parts. + + >>> msg = message_from_string("""\ + ... Subject: Was + ... Keywords: Raw + ... Content-Type: multipart/alternative; boundary=BOUNDARY + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... Content-Type: message/rfc822 + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... Content-Type: message/rfc822 + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY-- + ... """) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg['x-topics'] + None + >>> msgdata + {} diff --git a/src/mailman/pipeline/docs/to-outgoing.txt b/src/mailman/pipeline/docs/to-outgoing.txt new file mode 100644 index 000000000..5305db19f --- /dev/null +++ b/src/mailman/pipeline/docs/to-outgoing.txt @@ -0,0 +1,173 @@ +The outgoing handler +==================== + +Mailman's outgoing queue is used as the wrapper around SMTP delivery to the +upstream mail server. The ToOutgoing handler does little more than drop the +message into the outgoing queue, after calculating whether the message should +be VERP'd or not. VERP means Variable Envelope Return Path; we're using that +term somewhat incorrectly, but within the spirit of the standard, which +basically describes how to encode the recipient's address in the originator +headers for unambigous bounce processing. + + >>> handler = config.handlers['to-outgoing'] + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> switchboard = config.switchboards['out'] + + >>> def queue_size(): + ... size = len(switchboard.files) + ... for filebase in switchboard.files: + ... msg, msgdata = switchboard.dequeue(filebase) + ... switchboard.finish(filebase) + ... return size + +Craft a message destined for the outgoing queue. Include some random metadata +as if this message had passed through some other handlers. + + >>> msg = message_from_string("""\ + ... Subject: Here is a message + ... + ... Something of great import. + ... """) + +When certain conditions are met, the message will be VERP'd. For example, if +the message metadata already has a VERP key, this message will be VERP'd. + + >>> msgdata = dict(foo=1, bar=2, verp=True) + >>> handler.process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: Here is a message + <BLANKLINE> + Something of great import. + >>> msgdata['verp'] + True + +While the queued message will not be changed, the queued metadata will have an +additional key set: the mailing list name. + + >>> filebase = switchboard.files[0] + >>> qmsg, qmsgdata = switchboard.dequeue(filebase) + >>> switchboard.finish(filebase) + >>> print qmsg.as_string() + Subject: Here is a message + <BLANKLINE> + Something of great import. + >>> dump_msgdata(qmsgdata) + _parsemsg: False + bar : 2 + foo : 1 + listname : _xtest@example.com + verp : True + version : 3 + >>> queue_size() + 0 + +If the list is set to personalize deliveries, and the global configuration +option to VERP personalized deliveries is set, then the message will be +VERP'd. + + >>> config.push('test config', """ + ... [mta] + ... verp_personalized_deliveries: yes + ... """) + + >>> from mailman.interfaces.mailinglist import Personalization + >>> mlist.personalize = Personalization.individual + >>> msgdata = dict(foo=1, bar=2) + >>> handler.process(mlist, msg, msgdata) + >>> msgdata['verp'] + True + >>> queue_size() + 1 + +However, if the global configuration variable prohibits VERP'ing, even +personalized lists will not VERP. + + >>> config.pop('test config') + >>> config.push('test config', """ + ... [mta] + ... verp_personalized_deliveries: no + ... """) + + >>> msgdata = dict(foo=1, bar=2) + >>> handler.process(mlist, msg, msgdata) + >>> print msgdata.get('verp') + None + >>> queue_size() + 1 + +If the list is not personalized, then the message may still be VERP'd based on +the global configuration variable VERP_DELIVERY_INTERVAL. This variable tells +Mailman how often to VERP even non-personalized mailing lists. It can be set +to zero, which means non-personalized messages will never be VERP'd. + + >>> config.pop('test config') + >>> config.push('test config', """ + ... [mta] + ... verp_delivery_interval: 0 + ... """) + + >>> mlist.personalize = Personalization.none + >>> msgdata = dict(foo=1, bar=2) + >>> handler.process(mlist, msg, msgdata) + >>> print msgdata.get('verp') + None + >>> queue_size() + 1 + +If the interval is set to 1, then every message will be VERP'd. + + >>> config.pop('test config') + >>> config.push('test config', """ + ... [mta] + ... verp_delivery_interval: 1 + ... """) + + >>> for i in range(10): + ... msgdata = dict(foo=1, bar=2) + ... handler.process(mlist, msg, msgdata) + ... print i, msgdata['verp'] + 0 True + 1 True + 2 True + 3 True + 4 True + 5 True + 6 True + 7 True + 8 True + 9 True + >>> queue_size() + 10 + +If the interval is set to some other number, then one out of that many posts +will be VERP'd. + + >>> config.pop('test config') + >>> config.push('test config', """ + ... [mta] + ... verp_delivery_interval: 3 + ... """) + + >>> for i in range(10): + ... mlist.post_id = i + ... msgdata = dict(foo=1, bar=2) + ... handler.process(mlist, msg, msgdata) + ... print i, msgdata.get('verp', False) + 0 True + 1 False + 2 False + 3 True + 4 False + 5 False + 6 True + 7 False + 8 False + 9 True + >>> queue_size() + 10 + + +Clean up +======== + + >>> config.pop('test config') diff --git a/src/mailman/pipeline/file_recipients.py b/src/mailman/pipeline/file_recipients.py new file mode 100644 index 000000000..89d10d783 --- /dev/null +++ b/src/mailman/pipeline/file_recipients.py @@ -0,0 +1,65 @@ +# Copyright (C) 2001-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Get the normal delivery recipients from a Sendmail style :include: file.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'FileRecipients', + ] + + +import os +import errno + +from zope.interface import implements + +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler + + + +class FileRecipients: + """Get the normal delivery recipients from an include file.""" + + implements(IHandler) + + name = 'file-recipients' + description = _('Get the normal delivery recipients from an include file.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + if 'recips' in msgdata: + return + filename = os.path.join(mlist.data_path, 'members.txt') + try: + with open(filename) as fp: + addrs = set(line.strip() for line in fp) + except IOError, e: + if e.errno <> errno.ENOENT: + raise + msgdata['recips'] = set() + return + # If the sender is a member of the list, remove them from the file + # recipients. + sender = msg.get_sender() + member = mlist.members.get_member(sender) + if member is not None: + addrs.discard(member.address.address) + msgdata['recips'] = addrs diff --git a/src/mailman/pipeline/mime_delete.py b/src/mailman/pipeline/mime_delete.py new file mode 100644 index 000000000..3c4e4154f --- /dev/null +++ b/src/mailman/pipeline/mime_delete.py @@ -0,0 +1,285 @@ +# Copyright (C) 2002-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""MIME-stripping filter for Mailman. + +This module scans a message for MIME content, removing those sections whose +MIME types match one of a list of matches. multipart/alternative sections are +replaced by the first non-empty component, and multipart/mixed sections +wrapping only single sections after other processing are replaced by their +contents. +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'MIMEDelete', + ] + + +import os +import errno +import logging +import tempfile + +from email.Iterators import typed_subpart_iterator +from os.path import splitext +from zope.interface import implements + +from mailman.Utils import oneline +from mailman.config import config +from mailman.core import errors +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler +from mailman.queue import Switchboard +from mailman.version import VERSION + +log = logging.getLogger('mailman.error') + + + +def process(mlist, msg, msgdata): + # Short-circuits + if not mlist.filter_content: + return + if msgdata.get('isdigest'): + return + # We also don't care about our own digests or plaintext + ctype = msg.get_content_type() + mtype = msg.get_content_maintype() + # Check to see if the outer type matches one of the filter types + filtertypes = mlist.filter_mime_types + passtypes = mlist.pass_mime_types + if ctype in filtertypes or mtype in filtertypes: + dispose(mlist, msg, msgdata, + _("The message's content type was explicitly disallowed")) + # Check to see if there is a pass types and the outer type doesn't match + # one of these types + if passtypes and not (ctype in passtypes or mtype in passtypes): + dispose(mlist, msg, msgdata, + _("The message's content type was not explicitly allowed")) + # Filter by file extensions + filterexts = mlist.filter_filename_extensions + passexts = mlist.pass_filename_extensions + fext = get_file_ext(msg) + if fext: + if fext in filterexts: + dispose(mlist, msg, msgdata, + _("The message's file extension was explicitly disallowed")) + if passexts and not (fext in passexts): + dispose(mlist, msg, msgdata, + _("The message's file extension was not explicitly allowed")) + numparts = len([subpart for subpart in msg.walk()]) + # If the message is a multipart, filter out matching subparts + if msg.is_multipart(): + # Recursively filter out any subparts that match the filter list + prelen = len(msg.get_payload()) + filter_parts(msg, filtertypes, passtypes, filterexts, passexts) + # If the outer message is now an empty multipart (and it wasn't + # before!) then, again it gets discarded. + postlen = len(msg.get_payload()) + if postlen == 0 and prelen > 0: + dispose(mlist, msg, msgdata, + _("After content filtering, the message was empty")) + # Now replace all multipart/alternatives with just the first non-empty + # alternative. BAW: We have to special case when the outer part is a + # multipart/alternative because we need to retain most of the outer part's + # headers. For now we'll move the subpart's payload into the outer part, + # and then copy over its Content-Type: and Content-Transfer-Encoding: + # headers (any others?). + if mlist.collapse_alternatives: + collapse_multipart_alternatives(msg) + if ctype == 'multipart/alternative': + firstalt = msg.get_payload(0) + reset_payload(msg, firstalt) + # If we removed some parts, make note of this + changedp = 0 + if numparts <> len([subpart for subpart in msg.walk()]): + changedp = 1 + # Now perhaps convert all text/html to text/plain + if mlist.convert_html_to_plaintext and config.HTML_TO_PLAIN_TEXT_COMMAND: + changedp += to_plaintext(msg) + # If we're left with only two parts, an empty body and one attachment, + # recast the message to one of just that part + if msg.is_multipart() and len(msg.get_payload()) == 2: + if msg.get_payload(0).get_payload() == '': + useful = msg.get_payload(1) + reset_payload(msg, useful) + changedp = 1 + if changedp: + msg['X-Content-Filtered-By'] = 'Mailman/MimeDel {0}'.format(VERSION) + + + +def reset_payload(msg, subpart): + # Reset payload of msg to contents of subpart, and fix up content headers + payload = subpart.get_payload() + msg.set_payload(payload) + del msg['content-type'] + del msg['content-transfer-encoding'] + del msg['content-disposition'] + del msg['content-description'] + msg['Content-Type'] = subpart.get('content-type', 'text/plain') + cte = subpart.get('content-transfer-encoding') + if cte: + msg['Content-Transfer-Encoding'] = cte + cdisp = subpart.get('content-disposition') + if cdisp: + msg['Content-Disposition'] = cdisp + cdesc = subpart.get('content-description') + if cdesc: + msg['Content-Description'] = cdesc + + + +def filter_parts(msg, filtertypes, passtypes, filterexts, passexts): + # Look at all the message's subparts, and recursively filter + if not msg.is_multipart(): + return True + payload = msg.get_payload() + prelen = len(payload) + newpayload = [] + for subpart in payload: + keep = filter_parts(subpart, filtertypes, passtypes, + filterexts, passexts) + if not keep: + continue + ctype = subpart.get_content_type() + mtype = subpart.get_content_maintype() + if ctype in filtertypes or mtype in filtertypes: + # Throw this subpart away + continue + if passtypes and not (ctype in passtypes or mtype in passtypes): + # Throw this subpart away + continue + # check file extension + fext = get_file_ext(subpart) + if fext: + if fext in filterexts: + continue + if passexts and not (fext in passexts): + continue + newpayload.append(subpart) + # Check to see if we discarded all the subparts + postlen = len(newpayload) + msg.set_payload(newpayload) + if postlen == 0 and prelen > 0: + # We threw away everything + return False + return True + + + +def collapse_multipart_alternatives(msg): + if not msg.is_multipart(): + return + newpayload = [] + for subpart in msg.get_payload(): + if subpart.get_content_type() == 'multipart/alternative': + try: + firstalt = subpart.get_payload(0) + newpayload.append(firstalt) + except IndexError: + pass + else: + newpayload.append(subpart) + msg.set_payload(newpayload) + + + +def to_plaintext(msg): + changedp = False + for subpart in typed_subpart_iterator(msg, 'text', 'html'): + filename = tempfile.mktemp('.html') + fp = open(filename, 'w') + try: + fp.write(subpart.get_payload(decode=True)) + fp.close() + cmd = os.popen(config.HTML_TO_PLAIN_TEXT_COMMAND % + {'filename': filename}) + plaintext = cmd.read() + rtn = cmd.close() + if rtn: + log.error('HTML->text/plain error: %s', rtn) + finally: + try: + os.unlink(filename) + except OSError, e: + if e.errno <> errno.ENOENT: + raise + # Now replace the payload of the subpart and twiddle the Content-Type: + del subpart['content-transfer-encoding'] + subpart.set_payload(plaintext) + subpart.set_type('text/plain') + changedp = True + return changedp + + + +def dispose(mlist, msg, msgdata, why): + # filter_action == 0 just discards, see below + if mlist.filter_action == 1: + # Bounce the message to the original author + raise errors.RejectMessage, why + if mlist.filter_action == 2: + # Forward it on to the list owner + listname = mlist.internal_name() + mlist.ForwardMessage( + msg, + text=_("""\ +The attached message matched the %(listname)s mailing list's content filtering +rules and was prevented from being forwarded on to the list membership. You +are receiving the only remaining copy of the discarded message. + +"""), + subject=_('Content filtered message notification')) + if mlist.filter_action == 3 and \ + config.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES: + badq = Switchboard(config.BADQUEUE_DIR) + badq.enqueue(msg, msgdata) + # Most cases also discard the message + raise errors.DiscardMessage + + +def get_file_ext(m): + """ + Get filename extension. Caution: some virus don't put filename + in 'Content-Disposition' header. +""" + fext = '' + filename = m.get_filename('') or m.get_param('name', '') + if filename: + fext = splitext(oneline(filename,'utf-8'))[1] + if len(fext) > 1: + fext = fext[1:] + else: + fext = '' + return fext + + + +class MIMEDelete: + """Filter the MIME content of messages.""" + + implements(IHandler) + + name = 'mime-delete' + description = _('Filter the MIME content of messages.') + + def process(self, mlist, msg, msgdata): + process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/moderate.py b/src/mailman/pipeline/moderate.py new file mode 100644 index 000000000..0b38c3a5a --- /dev/null +++ b/src/mailman/pipeline/moderate.py @@ -0,0 +1,175 @@ +# Copyright (C) 2001-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Posting moderation filter.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'process', + ] + + +import re + +from email.MIMEMessage import MIMEMessage +from email.MIMEText import MIMEText + +from mailman import Message +from mailman import Utils +from mailman.config import config +from mailman.core import errors +from mailman.i18n import _ + + + +## class ModeratedMemberPost(Hold.ModeratedPost): +## # BAW: I wanted to use the reason below to differentiate between this +## # situation and normal ModeratedPost reasons. Greg Ward and Stonewall +## # Ballard thought the language was too harsh and mentioned offense taken +## # by some list members. I'd still like this class's reason to be +## # different than the base class's reason, but we'll use this until someone +## # can come up with something more clever but inoffensive. +## # +## # reason = _('Posts by member are currently quarantined for moderation') +## pass + + + +def process(mlist, msg, msgdata): + if msgdata.get('approved') or msgdata.get('fromusenet'): + return + # First of all, is the poster a member or not? + for sender in msg.get_senders(): + if mlist.isMember(sender): + break + else: + sender = None + if sender: + # If the member's moderation flag is on, then perform the moderation + # action. + if mlist.getMemberOption(sender, config.Moderate): + # Note that for member_moderation_action, 0==Hold, 1=Reject, + # 2==Discard + if mlist.member_moderation_action == 0: + # Hold. BAW: WIBNI we could add the member_moderation_notice + # to the notice sent back to the sender? + msgdata['sender'] = sender + Hold.hold_for_approval(mlist, msg, msgdata, + ModeratedMemberPost) + elif mlist.member_moderation_action == 1: + # Reject + text = mlist.member_moderation_notice + if text: + text = Utils.wrap(text) + else: + # Use the default RejectMessage notice string + text = None + raise errors.RejectMessage, text + elif mlist.member_moderation_action == 2: + # Discard. BAW: Again, it would be nice if we could send a + # discard notice to the sender + raise errors.DiscardMessage + else: + assert 0, 'bad member_moderation_action' + # Should we do anything explict to mark this message as getting past + # this point? No, because further pipeline handlers will need to do + # their own thing. + return + else: + sender = msg.get_sender() + # From here on out, we're dealing with non-members. + if matches_p(sender, mlist.accept_these_nonmembers): + return + if matches_p(sender, mlist.hold_these_nonmembers): + Hold.hold_for_approval(mlist, msg, msgdata, Hold.NonMemberPost) + # No return + if matches_p(sender, mlist.reject_these_nonmembers): + do_reject(mlist) + # No return + if matches_p(sender, mlist.discard_these_nonmembers): + do_discard(mlist, msg) + # No return + # Okay, so the sender wasn't specified explicitly by any of the non-member + # moderation configuration variables. Handle by way of generic non-member + # action. + assert 0 <= mlist.generic_nonmember_action <= 4 + if mlist.generic_nonmember_action == 0: + # Accept + return + elif mlist.generic_nonmember_action == 1: + Hold.hold_for_approval(mlist, msg, msgdata, Hold.NonMemberPost) + elif mlist.generic_nonmember_action == 2: + do_reject(mlist) + elif mlist.generic_nonmember_action == 3: + do_discard(mlist, msg) + + + +def matches_p(sender, nonmembers): + # First strip out all the regular expressions + plainaddrs = [addr for addr in nonmembers if not addr.startswith('^')] + addrdict = Utils.List2Dict(plainaddrs, foldcase=1) + if addrdict.has_key(sender): + return 1 + # Now do the regular expression matches + for are in nonmembers: + if are.startswith('^'): + try: + cre = re.compile(are, re.IGNORECASE) + except re.error: + continue + if cre.search(sender): + return 1 + return 0 + + + +def do_reject(mlist): + listowner = mlist.GetOwnerEmail() + if mlist.nonmember_rejection_notice: + raise errors.RejectMessage, \ + Utils.wrap(_(mlist.nonmember_rejection_notice)) + else: + raise errors.RejectMessage, Utils.wrap(_("""\ +You are not allowed to post to this mailing list, and your message has been +automatically rejected. If you think that your messages are being rejected in +error, contact the mailing list owner at %(listowner)s.""")) + + + +def do_discard(mlist, msg): + sender = msg.get_sender() + # Do we forward auto-discards to the list owners? + if mlist.forward_auto_discards: + lang = mlist.preferred_language + varhelp = '%s/?VARHELP=privacy/sender/discard_these_nonmembers' % \ + mlist.GetScriptURL('admin', absolute=1) + nmsg = Message.UserNotification(mlist.GetOwnerEmail(), + mlist.GetBouncesEmail(), + _('Auto-discard notification'), + lang=lang) + nmsg.set_type('multipart/mixed') + text = MIMEText(Utils.wrap(_( + 'The attached message has been automatically discarded.')), + _charset=Utils.GetCharSet(lang)) + nmsg.attach(text) + nmsg.attach(MIMEMessage(msg)) + nmsg.send(mlist) + # Discard this sucker + raise errors.DiscardMessage diff --git a/src/mailman/pipeline/owner_recipients.py b/src/mailman/pipeline/owner_recipients.py new file mode 100644 index 000000000..ceb6ae0a1 --- /dev/null +++ b/src/mailman/pipeline/owner_recipients.py @@ -0,0 +1,34 @@ +# Copyright (C) 2001-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Calculate the list owner recipients (includes moderators).""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'process', + ] + + + +def process(mlist, msg, msgdata): + # The recipients are the owner and the moderator + msgdata['recips'] = mlist.owner + mlist.moderator + # Don't decorate these messages with the header/footers + msgdata['nodecorate'] = True + msgdata['personalize'] = False diff --git a/src/mailman/pipeline/replybot.py b/src/mailman/pipeline/replybot.py new file mode 100644 index 000000000..e24777774 --- /dev/null +++ b/src/mailman/pipeline/replybot.py @@ -0,0 +1,134 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Handler for auto-responses.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Replybot', + ] + + +import time +import logging +import datetime + +from zope.interface import implements + +from mailman import Message +from mailman import Utils +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler +from mailman.utilities.string import expand + + +log = logging.getLogger('mailman.error') +NODELTA = datetime.timedelta() + + + +def process(mlist, msg, msgdata): + # Normally, the replybot should get a shot at this message, but there are + # some important short-circuits, mostly to suppress 'bot storms, at least + # for well behaved email bots (there are other governors for misbehaving + # 'bots). First, if the original message has an "X-Ack: No" header, we + # skip the replybot. Then, if the message has a Precedence header with + # values bulk, junk, or list, and there's no explicit "X-Ack: yes" header, + # we short-circuit. Finally, if the message metadata has a true 'noack' + # key, then we skip the replybot too. + ack = msg.get('x-ack', '').lower() + if ack == 'no' or msgdata.get('noack'): + return + precedence = msg.get('precedence', '').lower() + if ack <> 'yes' and precedence in ('bulk', 'junk', 'list'): + return + # Check to see if the list is even configured to autorespond to this email + # message. Note: the mailowner script sets the `toadmin' or `toowner' key + # (which for replybot purposes are equivalent), and the mailcmd script + # sets the `torequest' key. + toadmin = msgdata.get('toowner') + torequest = msgdata.get('torequest') + if ((toadmin and not mlist.autorespond_admin) or + (torequest and not mlist.autorespond_requests) or \ + (not toadmin and not torequest and not mlist.autorespond_postings)): + return + # Now see if we're in the grace period for this sender. graceperiod <= 0 + # means always autorespond, as does an "X-Ack: yes" header (useful for + # debugging). + sender = msg.get_sender() + now = time.time() + graceperiod = mlist.autoresponse_graceperiod + if graceperiod > NODELTA and ack <> 'yes': + if toadmin: + quiet_until = mlist.admin_responses.get(sender, 0) + elif torequest: + quiet_until = mlist.request_responses.get(sender, 0) + else: + quiet_until = mlist.postings_responses.get(sender, 0) + if quiet_until > now: + return + # Okay, we know we're going to auto-respond to this sender, craft the + # message, send it, and update the database. + realname = mlist.real_name + subject = _( + 'Auto-response for your message to the "$realname" mailing list') + # Do string interpolation into the autoresponse text + d = dict(listname = realname, + listurl = mlist.script_url('listinfo'), + requestemail = mlist.request_address, + owneremail = mlist.owner_address, + ) + if toadmin: + rtext = mlist.autoresponse_admin_text + elif torequest: + rtext = mlist.autoresponse_request_text + else: + rtext = mlist.autoresponse_postings_text + # Interpolation and Wrap the response text. + text = Utils.wrap(expand(rtext, d)) + outmsg = Message.UserNotification(sender, mlist.bounces_address, + subject, text, mlist.preferred_language) + outmsg['X-Mailer'] = _('The Mailman Replybot') + # prevent recursions and mail loops! + outmsg['X-Ack'] = 'No' + outmsg.send(mlist) + # update the grace period database + if graceperiod > NODELTA: + # graceperiod is in days, we need # of seconds + quiet_until = now + graceperiod * 24 * 60 * 60 + if toadmin: + mlist.admin_responses[sender] = quiet_until + elif torequest: + mlist.request_responses[sender] = quiet_until + else: + mlist.postings_responses[sender] = quiet_until + + + +class Replybot: + """Send automatic responses.""" + + implements(IHandler) + + name = 'replybot' + description = _('Send automatic responses.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/scrubber.py b/src/mailman/pipeline/scrubber.py new file mode 100644 index 000000000..3ee68612f --- /dev/null +++ b/src/mailman/pipeline/scrubber.py @@ -0,0 +1,509 @@ +# Copyright (C) 2001-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Cleanse a message for archiving.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Scrubber', + ] + + +import os +import re +import time +import errno +import hashlib +import logging +import binascii + +from email.charset import Charset +from email.generator import Generator +from email.utils import make_msgid, parsedate +from lazr.config import as_boolean +from locknix.lockfile import Lock +from mimetypes import guess_all_extensions +from string import Template +from zope.interface import implements + +from mailman import Utils +from mailman.config import config +from mailman.core.errors import DiscardMessage +from mailman.core.plugins import get_plugin +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler +from mailman.utilities.filesystem import makedirs + + +# Path characters for common platforms +pre = re.compile(r'[/\\:]') +# All other characters to strip out of Content-Disposition: filenames +# (essentially anything that isn't an alphanum, dot, dash, or underscore). +sre = re.compile(r'[^-\w.]') +# Regexp to strip out leading dots +dre = re.compile(r'^\.*') + +BR = '<br>\n' +SPACE = ' ' + +log = logging.getLogger('mailman.error') + + + +def guess_extension(ctype, ext): + # mimetypes maps multiple extensions to the same type, e.g. .doc, .dot, + # and .wiz are all mapped to application/msword. This sucks for finding + # the best reverse mapping. If the extension is one of the giving + # mappings, we'll trust that, otherwise we'll just guess. :/ + all = guess_all_extensions(ctype, strict=False) + if ext in all: + return ext + return all and all[0] + + + +# We're using a subclass of the standard Generator because we want to suppress +# headers in the subparts of multiparts. We use a hack -- the ctor argument +# skipheaders to accomplish this. It's set to true for the outer Message +# object, but false for all internal objects. We recognize that +# sub-Generators will get created passing only mangle_from_ and maxheaderlen +# to the ctors. +# +# This isn't perfect because we still get stuff like the multipart boundaries, +# but see below for how we corrupt that to our nefarious goals. +class ScrubberGenerator(Generator): + def __init__(self, outfp, mangle_from_=True, + maxheaderlen=78, skipheaders=True): + Generator.__init__(self, outfp, mangle_from_=False) + self.__skipheaders = skipheaders + + def _write_headers(self, msg): + if not self.__skipheaders: + Generator._write_headers(self, msg) + + +def safe_strftime(fmt, t): + try: + return time.strftime(fmt, t) + except (TypeError, ValueError, OverflowError): + return None + + +def calculate_attachments_dir(mlist, msg, msgdata): + # Calculate the directory that attachments for this message will go + # under. To avoid inode limitations, the scheme will be: + # archives/private/<listname>/attachments/YYYYMMDD/<msgid-hash>/<files> + # Start by calculating the date-based and msgid-hash components. + fmt = '%Y%m%d' + datestr = msg.get('Date') + if datestr: + now = parsedate(datestr) + else: + now = time.gmtime(msgdata.get('received_time', time.time())) + datedir = safe_strftime(fmt, now) + if not datedir: + datestr = msgdata.get('X-List-Received-Date') + if datestr: + datedir = safe_strftime(fmt, datestr) + if not datedir: + # What next? Unixfrom, I guess. + parts = msg.get_unixfrom().split() + try: + month = {'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, + 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12, + }.get(parts[3], 0) + day = int(parts[4]) + year = int(parts[6]) + except (IndexError, ValueError): + # Best we can do I think + month = day = year = 0 + datedir = '%04d%02d%02d' % (year, month, day) + assert datedir + # As for the msgid hash, we'll base this part on the Message-ID: so that + # all attachments for the same message end up in the same directory (we'll + # uniquify the filenames in that directory as needed). We use the first 2 + # and last 2 bytes of the SHA1 hash of the message id as the basis of the + # directory name. Clashes here don't really matter too much, and that + # still gives us a 32-bit space to work with. + msgid = msg['message-id'] + if msgid is None: + msgid = msg['Message-ID'] = make_msgid() + # We assume that the message id actually /is/ unique! + digest = hashlib.sha1(msgid).hexdigest() + return os.path.join('attachments', datedir, digest[:4] + digest[-4:]) + + +def replace_payload_by_text(msg, text, charset): + # TK: This is a common function in replacing the attachment and the main + # message by a text (scrubbing). + del msg['content-type'] + del msg['content-transfer-encoding'] + if isinstance(text, unicode): + text = text.encode(charset) + if not isinstance(charset, str): + charset = str(charset) + msg.set_payload(text, charset) + + + +def process(mlist, msg, msgdata=None): + sanitize = int(config.scrubber.archive_html_sanitizer) + outer = True + if msgdata is None: + msgdata = {} + if msgdata: + # msgdata is available if it is in GLOBAL_PIPELINE + # ie. not in digest or archiver + # check if the list owner want to scrub regular delivery + if not mlist.scrub_nondigest: + return + dir = calculate_attachments_dir(mlist, msg, msgdata) + charset = format = delsp = None + lcset = Utils.GetCharSet(mlist.preferred_language) + lcset_out = Charset(lcset).output_charset or lcset + # Now walk over all subparts of this message and scrub out various types + for part in msg.walk(): + ctype = part.get_content_type() + # If the part is text/plain, we leave it alone + if ctype == 'text/plain': + # We need to choose a charset for the scrubbed message, so we'll + # arbitrarily pick the charset of the first text/plain part in the + # message. + # + # Also get the RFC 3676 stuff from this part. This seems to + # work okay for scrub_nondigest. It will also work as far as + # scrubbing messages for the archive is concerned, but Pipermail + # doesn't pay any attention to the RFC 3676 parameters. The plain + # format digest is going to be a disaster in any case as some of + # messages will be format="flowed" and some not. ToDigest creates + # its own Content-Type: header for the plain digest which won't + # have RFC 3676 parameters. If the message Content-Type: headers + # are retained for display in the digest, the parameters will be + # there for information, but not for the MUA. This is the best we + # can do without having get_payload() process the parameters. + if charset is None: + charset = part.get_content_charset(lcset) + format = part.get_param('format') + delsp = part.get_param('delsp') + # TK: if part is attached then check charset and scrub if none + if part.get('content-disposition') and \ + not part.get_content_charset(): + url = save_attachment(mlist, part, dir) + filename = part.get_filename(_('not available')) + filename = Utils.oneline(filename, lcset) + replace_payload_by_text(part, _("""\ +An embedded and charset-unspecified text was scrubbed... +Name: $filename +URL: $url +"""), lcset) + elif ctype == 'text/html' and isinstance(sanitize, int): + if sanitize == 0: + if outer: + raise DiscardMessage + replace_payload_by_text(part, + _('HTML attachment scrubbed and removed'), + # Adding charset arg and removing content-type + # sets content-type to text/plain + lcset) + elif sanitize == 2: + # By leaving it alone, Pipermail will automatically escape it + pass + elif sanitize == 3: + # Pull it out as an attachment but leave it unescaped. This + # is dangerous, but perhaps useful for heavily moderated + # lists. + url = save_attachment(mlist, part, dir, filter_html=False) + replace_payload_by_text(part, _("""\ +An HTML attachment was scrubbed... +URL: $url +"""), lcset) + else: + # HTML-escape it and store it as an attachment, but make it + # look a /little/ bit prettier. :( + payload = Utils.websafe(part.get_payload(decode=True)) + # For whitespace in the margin, change spaces into + # non-breaking spaces, and tabs into 8 of those. Then use a + # mono-space font. Still looks hideous to me, but then I'd + # just as soon discard them. + def doreplace(s): + return s.replace(' ', ' ').replace('\t', ' '*8) + lines = [doreplace(s) for s in payload.split('\n')] + payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n' + part.set_payload(payload) + # We're replacing the payload with the decoded payload so this + # will just get in the way. + del part['content-transfer-encoding'] + url = save_attachment(mlist, part, dir, filter_html=False) + replace_payload_by_text(part, _("""\ +An HTML attachment was scrubbed... +URL: $url +"""), lcset) + elif ctype == 'message/rfc822': + # This part contains a submessage, so it too needs scrubbing + submsg = part.get_payload(0) + url = save_attachment(mlist, part, dir) + subject = submsg.get('subject', _('no subject')) + date = submsg.get('date', _('no date')) + who = submsg.get('from', _('unknown sender')) + size = len(str(submsg)) + replace_payload_by_text(part, _("""\ +An embedded message was scrubbed... +From: $who +Subject: $subject +Date: $date +Size: $size +URL: $url +"""), lcset) + # If the message isn't a multipart, then we'll strip it out as an + # attachment that would have to be separately downloaded. Pipermail + # will transform the url into a hyperlink. + elif part._payload and not part.is_multipart(): + payload = part.get_payload(decode=True) + ctype = part.get_content_type() + # XXX Under email 2.5, it is possible that payload will be None. + # This can happen when you have a Content-Type: multipart/* with + # only one part and that part has two blank lines between the + # first boundary and the end boundary. In email 3.0 you end up + # with a string in the payload. I think in this case it's safe to + # ignore the part. + if payload is None: + continue + size = len(payload) + url = save_attachment(mlist, part, dir) + desc = part.get('content-description', _('not available')) + desc = Utils.oneline(desc, lcset) + filename = part.get_filename(_('not available')) + filename = Utils.oneline(filename, lcset) + replace_payload_by_text(part, _("""\ +A non-text attachment was scrubbed... +Name: $filename +Type: $ctype +Size: $size bytes +Desc: $desc +URL: $url +"""), lcset) + outer = False + # We still have to sanitize multipart messages to flat text because + # Pipermail can't handle messages with list payloads. This is a kludge; + # def (n) clever hack ;). + if msg.is_multipart() and sanitize <> 2: + # By default we take the charset of the first text/plain part in the + # message, but if there was none, we'll use the list's preferred + # language's charset. + if not charset or charset == 'us-ascii': + charset = lcset_out + else: + # normalize to the output charset if input/output are different + charset = Charset(charset).output_charset or charset + # We now want to concatenate all the parts which have been scrubbed to + # text/plain, into a single text/plain payload. We need to make sure + # all the characters in the concatenated string are in the same + # encoding, so we'll use the 'replace' key in the coercion call. + # BAW: Martin's original patch suggested we might want to try + # generalizing to utf-8, and that's probably a good idea (eventually). + text = [] + charsets = [] + for part in msg.walk(): + # TK: bug-id 1099138 and multipart + # MAS test payload - if part may fail if there are no headers. + if not part._payload or part.is_multipart(): + continue + # All parts should be scrubbed to text/plain by now. + partctype = part.get_content_type() + if partctype <> 'text/plain': + text.append(_('Skipped content of type $partctype\n')) + continue + try: + t = part.get_payload(decode=True) or '' + # MAS: TypeError exception can occur if payload is None. This + # was observed with a message that contained an attached + # message/delivery-status part. Because of the special parsing + # of this type, this resulted in a text/plain sub-part with a + # null body. See bug 1430236. + except (binascii.Error, TypeError): + t = part.get_payload() or '' + # Email problem was solved by Mark Sapiro. (TK) + partcharset = part.get_content_charset('us-ascii') + try: + t = unicode(t, partcharset, 'replace') + except (UnicodeError, LookupError, ValueError, TypeError, + AssertionError): + # We can get here if partcharset is bogus in come way. + # Replace funny characters. We use errors='replace'. + t = unicode(t, 'ascii', 'replace') + # Separation is useful + if isinstance(t, basestring): + if not t.endswith('\n'): + t += '\n' + text.append(t) + if partcharset not in charsets: + charsets.append(partcharset) + # Now join the text and set the payload + sep = _('-------------- next part --------------\n') + assert isinstance(sep, unicode), ( + 'Expected a unicode separator, got %s' % type(sep)) + rept = sep.join(text) + # Replace entire message with text and scrubbed notice. + # Try with message charsets and utf-8 + if 'utf-8' not in charsets: + charsets.append('utf-8') + for charset in charsets: + try: + replace_payload_by_text(msg, rept, charset) + break + # Bogus charset can throw several exceptions + except (UnicodeError, LookupError, ValueError, TypeError, + AssertionError): + pass + if format: + msg.set_param('format', format) + if delsp: + msg.set_param('delsp', delsp) + return msg + + + +def save_attachment(mlist, msg, dir, filter_html=True): + fsdir = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR, + mlist.fqdn_listname, dir) + makedirs(fsdir) + # Figure out the attachment type and get the decoded data + decodedpayload = msg.get_payload(decode=True) + # BAW: mimetypes ought to handle non-standard, but commonly found types, + # e.g. image/jpg (should be image/jpeg). For now we just store such + # things as application/octet-streams since that seems the safest. + ctype = msg.get_content_type() + # i18n file name is encoded + lcset = Utils.GetCharSet(mlist.preferred_language) + filename = Utils.oneline(msg.get_filename(''), lcset) + filename, fnext = os.path.splitext(filename) + # For safety, we should confirm this is valid ext for content-type + # but we can use fnext if we introduce fnext filtering + if as_boolean(config.scrubber.use_attachment_filename_extension): + # HTML message doesn't have filename :-( + ext = fnext or guess_extension(ctype, fnext) + else: + ext = guess_extension(ctype, fnext) + if not ext: + # We don't know what it is, so assume it's just a shapeless + # application/octet-stream, unless the Content-Type: is + # message/rfc822, in which case we know we'll coerce the type to + # text/plain below. + if ctype == 'message/rfc822': + ext = '.txt' + else: + ext = '.bin' + # Allow only alphanumerics, dash, underscore, and dot + ext = sre.sub('', ext) + path = None + # We need a lock to calculate the next attachment number + with Lock(os.path.join(fsdir, 'attachments.lock')): + # Now base the filename on what's in the attachment, uniquifying it if + # necessary. + if (not filename or + not as_boolean(config.scrubber.use_attachment_filename)): + filebase = 'attachment' + else: + # Sanitize the filename given in the message headers + parts = pre.split(filename) + filename = parts[-1] + # Strip off leading dots + filename = dre.sub('', filename) + # Allow only alphanumerics, dash, underscore, and dot + filename = sre.sub('', filename) + # If the filename's extension doesn't match the type we guessed, + # which one should we go with? For now, let's go with the one we + # guessed so attachments can't lie about their type. Also, if the + # filename /has/ no extension, then tack on the one we guessed. + # The extension was removed from the name above. + filebase = filename + # Now we're looking for a unique name for this file on the file + # system. If msgdir/filebase.ext isn't unique, we'll add a counter + # after filebase, e.g. msgdir/filebase-cnt.ext + counter = 0 + extra = '' + while True: + path = os.path.join(fsdir, filebase + extra + ext) + # Generally it is not a good idea to test for file existance + # before just trying to create it, but the alternatives aren't + # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't + # NFS-safe). Besides, we have an exclusive lock now, so we're + # guaranteed that no other process will be racing with us. + if os.path.exists(path): + counter += 1 + extra = '-%04d' % counter + else: + break + # `path' now contains the unique filename for the attachment. There's + # just one more step we need to do. If the part is text/html and + # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be + # here), then send the attachment through the filter program for + # sanitization + if filter_html and ctype == 'text/html': + base, ext = os.path.splitext(path) + tmppath = base + '-tmp' + ext + fp = open(tmppath, 'w') + try: + fp.write(decodedpayload) + fp.close() + cmd = Template(config.mta.archive_html_sanitizer).safe_substitue( + filename=tmppath) + progfp = os.popen(cmd, 'r') + decodedpayload = progfp.read() + status = progfp.close() + if status: + log.error('HTML sanitizer exited with non-zero status: %s', + status) + finally: + os.unlink(tmppath) + # BAW: Since we've now sanitized the document, it should be plain + # text. Blarg, we really want the sanitizer to tell us what the type + # if the return data is. :( + ext = '.txt' + path = base + '.txt' + # Is it a message/rfc822 attachment? + elif ctype == 'message/rfc822': + submsg = msg.get_payload() + # BAW: I'm sure we can eventually do better than this. :( + decodedpayload = Utils.websafe(str(submsg)) + fp = open(path, 'w') + fp.write(decodedpayload) + fp.close() + # Now calculate the url to the list's archive. + baseurl = get_plugin('mailman.scrubber').list_url(mlist) + if not baseurl.endswith('/'): + baseurl += '/' + # Trailing space will definitely be a problem with format=flowed. + # Bracket the URL instead. + url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext) + return url + + + +class Scrubber: + """Cleanse a message for archiving.""" + + implements(IHandler) + + name = 'scrubber' + description = _('Cleanse a message for archiving.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/tagger.py b/src/mailman/pipeline/tagger.py new file mode 100644 index 000000000..9a0acc1e3 --- /dev/null +++ b/src/mailman/pipeline/tagger.py @@ -0,0 +1,187 @@ +# Copyright (C) 2001-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Extract topics from the original mail message.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'Tagger', + ] + + +import re +import email +import email.Errors +import email.Iterators +import email.Parser + +from zope.interface import implements + +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler + + +OR = '|' +CRNL = '\r\n' +EMPTYBYTES = b'' +NLTAB = '\n\t' + + + +def process(mlist, msg, msgdata): + if not mlist.topics_enabled: + return + # Extract the Subject:, Keywords:, and possibly body text + matchlines = [] + matchlines.append(msg.get('subject', None)) + matchlines.append(msg.get('keywords', None)) + if mlist.topics_bodylines_limit == 0: + # Don't scan any body lines + pass + elif mlist.topics_bodylines_limit < 0: + # Scan all body lines + matchlines.extend(scanbody(msg)) + else: + # Scan just some of the body lines + matchlines.extend(scanbody(msg, mlist.topics_bodylines_limit)) + matchlines = filter(None, matchlines) + # For each regular expression in the topics list, see if any of the lines + # of interest from the message match the regexp. If so, the message gets + # added to the specific topics bucket. + hits = {} + for name, pattern, desc, emptyflag in mlist.topics: + pattern = OR.join(pattern.splitlines()) + cre = re.compile(pattern, re.IGNORECASE) + for line in matchlines: + if cre.search(line): + hits[name] = 1 + break + if hits: + # Sort the keys and make them available both in the message metadata + # and in a message header. + msgdata['topichits'] = sorted(hits) + msg['X-Topics'] = NLTAB.join(sorted(hits)) + + + +def scanbody(msg, numlines=None): + # We only scan the body of the message if it is of MIME type text/plain, + # or if the outer type is multipart/alternative and there is a text/plain + # part. Anything else, and the body is ignored for header-scan purposes. + found = None + if msg.get_content_type() == 'text/plain': + found = msg + elif msg.is_multipart()\ + and msg.get_content_type() == 'multipart/alternative': + for found in msg.get_payload(): + if found.get_content_type() == 'text/plain': + break + else: + found = None + if not found: + return [] + # Now that we have a Message object that meets our criteria, let's extract + # the first numlines of body text. + lines = [] + lineno = 0 + reader = list(email.Iterators.body_line_iterator(msg)) + while numlines is None or lineno < numlines: + try: + line = bytes(reader.pop(0)) + except IndexError: + break + # Blank lines don't count + if not line.strip(): + continue + lineno += 1 + lines.append(line) + # Concatenate those body text lines with newlines, and then create a new + # message object from those lines. + p = _ForgivingParser() + msg = p.parsestr(EMPTYBYTES.join(lines)) + return msg.get_all('subject', []) + msg.get_all('keywords', []) + + + +class _ForgivingParser(email.Parser.HeaderParser): + # Be a little more forgiving about non-header/continuation lines, since + # we'll just read as much as we can from "header-like" lines in the body. + # + # BAW: WIBNI we didn't have to cut-n-paste this whole thing just to + # specialize the way it returns? + def _parseheaders(self, container, fp): + # Parse the headers, returning a list of header/value pairs. None as + # the header means the Unix-From header. + lastheader = '' + lastvalue = [] + lineno = 0 + while 1: + # Don't strip the line before we test for the end condition, + # because whitespace-only header lines are RFC compliant + # continuation lines. + line = fp.readline() + if not line: + break + line = line.splitlines()[0] + if not line: + break + # Ignore the trailing newline + lineno += 1 + # Check for initial Unix From_ line + if line.startswith('From '): + if lineno == 1: + container.set_unixfrom(line) + continue + else: + break + # Header continuation line + if line[0] in ' \t': + if not lastheader: + break + lastvalue.append(line) + continue + # Normal, non-continuation header. BAW: this should check to make + # sure it's a legal header, e.g. doesn't contain spaces. Also, we + # should expose the header matching algorithm in the API, and + # allow for a non-strict parsing mode (that ignores the line + # instead of raising the exception). + i = line.find(':') + if i < 0: + break + if lastheader: + container[lastheader] = NLTAB.join(lastvalue) + lastheader = line[:i] + lastvalue = [line[i+1:].lstrip()] + # Make sure we retain the last header + if lastheader: + container[lastheader] = NLTAB.join(lastvalue) + + + +class Tagger: + """Tag messages with topic matches.""" + + implements(IHandler) + + name = 'tagger' + description = _('Tag messages with topic matches.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/to_archive.py b/src/mailman/pipeline/to_archive.py new file mode 100644 index 000000000..7f1702fe9 --- /dev/null +++ b/src/mailman/pipeline/to_archive.py @@ -0,0 +1,55 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Add the message to the archives.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'ToArchive', + ] + + +from zope.interface import implements + +from mailman.config import config +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler + + + +class ToArchive: + """Add the message to the archives.""" + + implements(IHandler) + + name = 'to-archive' + description = _('Add the message to the archives.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + # Short circuits. + if msgdata.get('isdigest') or not mlist.archive: + return + # Common practice seems to favor "X-No-Archive: yes". No other value + # for this header seems to make sense, so we'll just test for it's + # presence. I'm keeping "X-Archive: no" for backwards compatibility. + if 'x-no-archive' in msg or msg.get('x-archive', '').lower() == 'no': + return + # Send the message to the archiver queue. + config.switchboards['archive'].enqueue(msg, msgdata) diff --git a/src/mailman/pipeline/to_digest.py b/src/mailman/pipeline/to_digest.py new file mode 100644 index 000000000..b85764ac9 --- /dev/null +++ b/src/mailman/pipeline/to_digest.py @@ -0,0 +1,440 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Add the message to the list's current digest and possibly send it.""" + +# Messages are accumulated to a Unix mailbox compatible file containing all +# the messages destined for the digest. This file must be parsable by the +# mailbox.UnixMailbox class (i.e. it must be ^From_ quoted). +# +# When the file reaches the size threshold, it is moved to the qfiles/digest +# directory and the DigestRunner will craft the MIME, rfc1153, and +# (eventually) URL-subject linked digests from the mbox. + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'ToDigest', + ] + + +import os +import re +import copy +import time +import logging + +from StringIO import StringIO # cStringIO can't handle unicode. +from email.charset import Charset +from email.generator import Generator +from email.header import decode_header, make_header, Header +from email.mime.base import MIMEBase +from email.mime.message import MIMEMessage +from email.mime.text import MIMEText +from email.parser import Parser +from email.utils import formatdate, getaddresses, make_msgid +from zope.interface import implements + +from mailman import Message +from mailman import Utils +from mailman import i18n +from mailman.Mailbox import Mailbox +from mailman.Mailbox import Mailbox +from mailman.config import config +from mailman.core import errors +from mailman.interfaces.handler import IHandler +from mailman.interfaces.member import DeliveryMode, DeliveryStatus +from mailman.pipeline.decorate import decorate +from mailman.pipeline.scrubber import process as scrubber + + +_ = i18n._ + +UEMPTYSTRING = '' +EMPTYSTRING = '' + +log = logging.getLogger('mailman.error') + + + +def process(mlist, msg, msgdata): + # Short circuit non-digestable lists. + if not mlist.digestable or msgdata.get('isdigest'): + return + mboxfile = os.path.join(mlist.data_path, 'digest.mbox') + mboxfp = open(mboxfile, 'a+') + mbox = Mailbox(mboxfp) + mbox.AppendMessage(msg) + # Calculate the current size of the accumulation file. This will not tell + # us exactly how big the MIME, rfc1153, or any other generated digest + # message will be, but it's the most easily available metric to decide + # whether the size threshold has been reached. + mboxfp.flush() + size = os.path.getsize(mboxfile) + if size / 1024.0 >= mlist.digest_size_threshold: + # This is a bit of a kludge to get the mbox file moved to the digest + # queue directory. + try: + # Enclose in try/except here because a error in send_digest() can + # silently stop regular delivery. Unsuccessful digest delivery + # should be tried again by cron and the site administrator will be + # notified of any error explicitly by the cron error message. + mboxfp.seek(0) + send_digests(mlist, mboxfp) + os.unlink(mboxfile) + except Exception, errmsg: + # Bare except is generally prohibited in Mailman, but we can't + # forecast what exceptions can occur here. + log.exception('send_digests() failed: %s', errmsg) + mboxfp.close() + + + +def send_digests(mlist, mboxfp): + # Set the digest volume and time + if mlist.digest_last_sent_at: + bump = False + # See if we should bump the digest volume number + timetup = time.localtime(mlist.digest_last_sent_at) + now = time.localtime(time.time()) + freq = mlist.digest_volume_frequency + if freq == 0 and timetup[0] < now[0]: + # Yearly + bump = True + elif freq == 1 and timetup[1] <> now[1]: + # Monthly, but we take a cheap way to calculate this. We assume + # that the clock isn't going to be reset backwards. + bump = True + elif freq == 2 and (timetup[1] % 4 <> now[1] % 4): + # Quarterly, same caveat + bump = True + elif freq == 3: + # Once again, take a cheap way of calculating this + weeknum_last = int(time.strftime('%W', timetup)) + weeknum_now = int(time.strftime('%W', now)) + if weeknum_now > weeknum_last or timetup[0] > now[0]: + bump = True + elif freq == 4 and timetup[7] <> now[7]: + # Daily + bump = True + if bump: + mlist.bump_digest_volume() + mlist.digest_last_sent_at = time.time() + # Wrapper around actually digest crafter to set up the language context + # properly. All digests are translated to the list's preferred language. + with i18n.using_language(mlist.preferred_language): + send_i18n_digests(mlist, mboxfp) + + + +def send_i18n_digests(mlist, mboxfp): + mbox = Mailbox(mboxfp) + # Prepare common information (first lang/charset) + lang = mlist.preferred_language + lcset = Utils.GetCharSet(lang) + lcset_out = Charset(lcset).output_charset or lcset + # Common Information (contd) + realname = mlist.real_name + volume = mlist.volume + issue = mlist.next_digest_number + digestid = _('$realname Digest, Vol $volume, Issue $issue') + digestsubj = Header(digestid, lcset, header_name='Subject') + # Set things up for the MIME digest. Only headers not added by + # CookHeaders need be added here. + # Date/Message-ID should be added here also. + mimemsg = Message.Message() + mimemsg['Content-Type'] = 'multipart/mixed' + mimemsg['MIME-Version'] = '1.0' + mimemsg['From'] = mlist.request_address + mimemsg['Subject'] = digestsubj + mimemsg['To'] = mlist.posting_address + mimemsg['Reply-To'] = mlist.posting_address + mimemsg['Date'] = formatdate(localtime=1) + mimemsg['Message-ID'] = make_msgid() + # Set things up for the rfc1153 digest + plainmsg = StringIO() + rfc1153msg = Message.Message() + rfc1153msg['From'] = mlist.request_address + rfc1153msg['Subject'] = digestsubj + rfc1153msg['To'] = mlist.posting_address + rfc1153msg['Reply-To'] = mlist.posting_address + rfc1153msg['Date'] = formatdate(localtime=1) + rfc1153msg['Message-ID'] = make_msgid() + separator70 = '-' * 70 + separator30 = '-' * 30 + # In the rfc1153 digest, the masthead contains the digest boilerplate plus + # any digest header. In the MIME digests, the masthead and digest header + # are separate MIME subobjects. In either case, it's the first thing in + # the digest, and we can calculate it now, so go ahead and add it now. + mastheadtxt = Utils.maketext( + 'masthead.txt', + {'real_name' : mlist.real_name, + 'got_list_email': mlist.posting_address, + 'got_listinfo_url': mlist.script_url('listinfo'), + 'got_request_email': mlist.request_address, + 'got_owner_email': mlist.owner_address, + }, mlist=mlist) + # MIME + masthead = MIMEText(mastheadtxt.encode(lcset), _charset=lcset) + masthead['Content-Description'] = digestid + mimemsg.attach(masthead) + # RFC 1153 + print >> plainmsg, mastheadtxt + print >> plainmsg + # Now add the optional digest header + if mlist.digest_header: + headertxt = decorate(mlist, mlist.digest_header, _('digest header')) + # MIME + header = MIMEText(headertxt.encode(lcset), _charset=lcset) + header['Content-Description'] = _('Digest Header') + mimemsg.attach(header) + # RFC 1153 + print >> plainmsg, headertxt + print >> plainmsg + # Now we have to cruise through all the messages accumulated in the + # mailbox file. We can't add these messages to the plainmsg and mimemsg + # yet, because we first have to calculate the table of contents + # (i.e. grok out all the Subjects). Store the messages in a list until + # we're ready for them. + # + # Meanwhile prepare things for the table of contents + toc = StringIO() + print >> toc, _("Today's Topics:\n") + # Now cruise through all the messages in the mailbox of digest messages, + # building the MIME payload and core of the RFC 1153 digest. We'll also + # accumulate Subject: headers and authors for the table-of-contents. + messages = [] + msgcount = 0 + msg = mbox.next() + while msg is not None: + if msg == '': + # It was an unparseable message + msg = mbox.next() + continue + msgcount += 1 + messages.append(msg) + # Get the Subject header + msgsubj = msg.get('subject', _('(no subject)')) + subject = Utils.oneline(msgsubj, in_unicode=True) + # Don't include the redundant subject prefix in the toc + mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix), + subject, re.IGNORECASE) + if mo: + subject = subject[:mo.start(2)] + subject[mo.end(2):] + username = '' + addresses = getaddresses([Utils.oneline(msg.get('from', ''), + in_unicode=True)]) + # Take only the first author we find + if isinstance(addresses, list) and addresses: + username = addresses[0][0] + if not username: + username = addresses[0][1] + if username: + username = ' ({0})'.format(username) + # Put count and Wrap the toc subject line + wrapped = Utils.wrap('{0:2}. {1}'.format(msgcount, subject), 65) + slines = wrapped.split('\n') + # See if the user's name can fit on the last line + if len(slines[-1]) + len(username) > 70: + slines.append(username) + else: + slines[-1] += username + # Add this subject to the accumulating topics + first = True + for line in slines: + if first: + print >> toc, ' ', line + first = False + else: + print >> toc, ' ', line.lstrip() + # We do not want all the headers of the original message to leak + # through in the digest messages. For this phase, we'll leave the + # same set of headers in both digests, i.e. those required in RFC 1153 + # plus a couple of other useful ones. We also need to reorder the + # headers according to RFC 1153. Later, we'll strip out headers for + # for the specific MIME or plain digests. + keeper = {} + all_keepers = set( + header for header in + config.digests.mime_digest_keep_headers.split() + + config.digests.plain_digest_keep_headers.split()) + for keep in all_keepers: + keeper[keep] = msg.get_all(keep, []) + # Now remove all unkempt headers :) + for header in msg.keys(): + del msg[header] + # And add back the kept header in the RFC 1153 designated order + for keep in all_keepers: + for field in keeper[keep]: + msg[keep] = field + # And a bit of extra stuff + msg['Message'] = repr(msgcount) + # Get the next message in the digest mailbox + msg = mbox.next() + # Now we're finished with all the messages in the digest. First do some + # sanity checking and then on to adding the toc. + if msgcount == 0: + # Why did we even get here? + return + toctext = toc.getvalue() + # MIME + try: + tocpart = MIMEText(toctext.encode(lcset), _charset=lcset) + except UnicodeError: + tocpart = MIMEText(toctext.encode('utf-8'), _charset='utf-8') + tocpart['Content-Description']= _("Today's Topics ($msgcount messages)") + mimemsg.attach(tocpart) + # RFC 1153 + print >> plainmsg, toctext + print >> plainmsg + # For RFC 1153 digests, we now need the standard separator + print >> plainmsg, separator70 + print >> plainmsg + # Now go through and add each message + mimedigest = MIMEBase('multipart', 'digest') + mimemsg.attach(mimedigest) + first = True + for msg in messages: + # MIME. Make a copy of the message object since the rfc1153 + # processing scrubs out attachments. + mimedigest.attach(MIMEMessage(copy.deepcopy(msg))) + # rfc1153 + if first: + first = False + else: + print >> plainmsg, separator30 + print >> plainmsg + # Use Mailman.pipeline.scrubber.process() to get plain text + try: + msg = scrubber(mlist, msg) + except errors.DiscardMessage: + print >> plainmsg, _('[Message discarded by content filter]') + continue + # Honor the default setting + for h in config.digests.plain_digest_keep_headers.split(): + if msg[h]: + uh = Utils.wrap('{0}: {1}'.format( + h, Utils.oneline(msg[h], in_unicode=True))) + uh = '\n\t'.join(uh.split('\n')) + print >> plainmsg, uh + print >> plainmsg + # If decoded payload is empty, this may be multipart message. + # -- just stringfy it. + payload = msg.get_payload(decode=True) \ + or msg.as_string().split('\n\n',1)[1] + mcset = msg.get_content_charset('us-ascii') + try: + payload = unicode(payload, mcset, 'replace') + except (LookupError, TypeError): + # unknown or empty charset + payload = unicode(payload, 'us-ascii', 'replace') + print >> plainmsg, payload + if not payload.endswith('\n'): + print >> plainmsg + # Now add the footer + if mlist.digest_footer: + footertxt = decorate(mlist, mlist.digest_footer) + # MIME + footer = MIMEText(footertxt.encode(lcset), _charset=lcset) + footer['Content-Description'] = _('Digest Footer') + mimemsg.attach(footer) + # RFC 1153 + # BAW: This is not strictly conformant RFC 1153. The trailer is only + # supposed to contain two lines, i.e. the "End of ... Digest" line and + # the row of asterisks. If this screws up MUAs, the solution is to + # add the footer as the last message in the RFC 1153 digest. I just + # hate the way that VM does that and I think it's confusing to users, + # so don't do it unless there's a clamor. + print >> plainmsg, separator30 + print >> plainmsg + print >> plainmsg, footertxt + print >> plainmsg + # Do the last bit of stuff for each digest type + signoff = _('End of ') + digestid + # MIME + # BAW: This stuff is outside the normal MIME goo, and it's what the old + # MIME digester did. No one seemed to complain, probably because you + # won't see it in an MUA that can't display the raw message. We've never + # got complaints before, but if we do, just wax this. It's primarily + # included for (marginally useful) backwards compatibility. + mimemsg.postamble = signoff + # rfc1153 + print >> plainmsg, signoff + print >> plainmsg, '*' * len(signoff) + # Do our final bit of housekeeping, and then send each message to the + # outgoing queue for delivery. + mlist.next_digest_number += 1 + virginq = config.switchboards['virgin'] + # Calculate the recipients lists + plainrecips = set() + mimerecips = set() + # When someone turns off digest delivery, they will get one last digest to + # ensure that there will be no gaps in the messages they receive. + # Currently, this dictionary contains the email addresses of those folks + # who should get one last digest. We need to find the corresponding + # IMember records. + digest_members = set(mlist.digest_members.members) + for address in mlist.one_last_digest: + member = mlist.digest_members.get_member(address) + if member: + digest_members.add(member) + for member in digest_members: + if member.delivery_status <> DeliveryStatus.enabled: + continue + # Send the digest to the case-preserved address of the digest members. + email_address = member.address.original_address + if member.delivery_mode == DeliveryMode.plaintext_digests: + plainrecips.add(email_address) + elif member.delivery_mode == DeliveryMode.mime_digests: + mimerecips.add(email_address) + else: + raise AssertionError( + 'Digest member "{0}" unexpected delivery mode: {1}'.format( + email_address, member.delivery_mode)) + # Zap this since we're now delivering the last digest to these folks. + mlist.one_last_digest.clear() + # MIME + virginq.enqueue(mimemsg, + recips=mimerecips, + listname=mlist.fqdn_listname, + isdigest=True) + # RFC 1153 + # If the entire digest message can't be encoded by list charset, fall + # back to 'utf-8'. + try: + rfc1153msg.set_payload(plainmsg.getvalue().encode(lcset), lcset) + except UnicodeError: + rfc1153msg.set_payload(plainmsg.getvalue().encode('utf-8'), 'utf-8') + virginq.enqueue(rfc1153msg, + recips=plainrecips, + listname=mlist.fqdn_listname, + isdigest=True) + + + +class ToDigest: + """Add the message to the digest, possibly sending it.""" + + implements(IHandler) + + name = 'to-digest' + description = _('Add the message to the digest, possibly sending it.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/to_outgoing.py b/src/mailman/pipeline/to_outgoing.py new file mode 100644 index 000000000..ff27593c4 --- /dev/null +++ b/src/mailman/pipeline/to_outgoing.py @@ -0,0 +1,78 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Re-queue the message to the outgoing queue. + +This module is only for use by the IncomingRunner for delivering messages +posted to the list membership. Anything else that needs to go out to some +recipient should just be placed in the out queue directly. +""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'ToOutgoing', + ] + + +from lazr.config import as_boolean +from zope.interface import implements + +from mailman.config import config +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler +from mailman.interfaces.mailinglist import Personalization + + + +class ToOutgoing: + """Send the message to the outgoing queue.""" + + implements(IHandler) + + name = 'to-outgoing' + description = _('Send the message to the outgoing queue.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + interval = int(config.mta.verp_delivery_interval) + # Should we VERP this message? If personalization is enabled for this + # list and VERP_PERSONALIZED_DELIVERIES is true, then yes we VERP it. + # Also, if personalization is /not/ enabled, but + # VERP_DELIVERY_INTERVAL is set (and we've hit this interval), then + # again, this message should be VERPed. Otherwise, no. + # + # Note that the verp flag may already be set, e.g. by mailpasswds + # using VERP_PASSWORD_REMINDERS. Preserve any existing verp flag. + if 'verp' in msgdata: + pass + elif mlist.personalize <> Personalization.none: + if as_boolean(config.mta.verp_personalized_deliveries): + msgdata['verp'] = True + elif interval == 0: + # Never VERP + pass + elif interval == 1: + # VERP every time + msgdata['verp'] = True + else: + # VERP every `interval' number of times + msgdata['verp'] = not (int(mlist.post_id) % interval) + # And now drop the message in qfiles/out + config.switchboards['out'].enqueue( + msg, msgdata, listname=mlist.fqdn_listname) diff --git a/src/mailman/pipeline/to_usenet.py b/src/mailman/pipeline/to_usenet.py new file mode 100644 index 000000000..220374348 --- /dev/null +++ b/src/mailman/pipeline/to_usenet.py @@ -0,0 +1,69 @@ +# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Move the message to the mail->news queue.""" + +from __future__ import absolute_import, unicode_literals + +__metaclass__ = type +__all__ = [ + 'ToUsenet', + ] + + +import logging + +from zope.interface import implements + +from mailman.config import config +from mailman.i18n import _ +from mailman.interfaces.handler import IHandler + +COMMASPACE = ', ' + +log = logging.getLogger('mailman.error') + + + +class ToUsenet: + """Move the message to the outgoing news queue.""" + + implements(IHandler) + + name = 'to-usenet' + description = _('Move the message to the outgoing news queue.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + # Short circuits. + if not mlist.gateway_to_news or \ + msgdata.get('isdigest') or \ + msgdata.get('fromusenet'): + return + # sanity checks + error = [] + if not mlist.linked_newsgroup: + error.append('no newsgroup') + if not mlist.nntp_host: + error.append('no NNTP host') + if error: + log.error('NNTP gateway improperly configured: %s', + COMMASPACE.join(error)) + return + # Put the message in the news runner's queue + config.switchboards['news'].enqueue( + msg, msgdata, listname=mlist.fqdn_listname) |
