diff options
Diffstat (limited to 'mailman/pipeline/cook_headers.py')
| -rw-r--r-- | mailman/pipeline/cook_headers.py | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/mailman/pipeline/cook_headers.py b/mailman/pipeline/cook_headers.py new file mode 100644 index 000000000..c5e21da5f --- /dev/null +++ b/mailman/pipeline/cook_headers.py @@ -0,0 +1,357 @@ +# Copyright (C) 1998-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Cook a message's headers.""" + +__metaclass__ = type +__all__ = ['CookHeaders'] + + +import re + +from email.Charset import Charset +from email.Errors import HeaderParseError +from email.Header import Header, decode_header, make_header +from email.Utils import parseaddr, formataddr, getaddresses +from zope.interface import implements + +from mailman import Utils +from mailman import Version +from mailman.app.archiving import get_archiver +from mailman.configuration import config +from mailman.i18n import _ +from mailman.interfaces import IHandler, Personalization, ReplyToMunging + +CONTINUATION = ',\n\t' +COMMASPACE = ', ' +MAXLINELEN = 78 + +nonascii = re.compile('[^\s!-~]') + + + +def uheader(mlist, s, header_name=None, continuation_ws='\t', maxlinelen=None): + # Get the charset to encode the string in. Then search if there is any + # non-ascii character is in the string. If there is and the charset is + # us-ascii then we use iso-8859-1 instead. If the string is ascii only + # we use 'us-ascii' if another charset is specified. + charset = Utils.GetCharSet(mlist.preferred_language) + if nonascii.search(s): + # use list charset but ... + if charset == 'us-ascii': + charset = 'iso-8859-1' + else: + # there is no nonascii so ... + charset = 'us-ascii' + return Header(s, charset, maxlinelen, header_name, continuation_ws) + + + +def process(mlist, msg, msgdata): + # Set the "X-Ack: no" header if noack flag is set. + if msgdata.get('noack'): + del msg['x-ack'] + msg['X-Ack'] = 'no' + # Because we're going to modify various important headers in the email + # message, we want to save some of the information in the msgdata + # dictionary for later. Specifically, the sender header will get waxed, + # but we need it for the Acknowledge module later. + msgdata['original_sender'] = msg.get_sender() + # VirginRunner sets _fasttrack for internally crafted messages. + fasttrack = msgdata.get('_fasttrack') + if not msgdata.get('isdigest') and not fasttrack: + try: + prefix_subject(mlist, msg, msgdata) + except (UnicodeError, ValueError): + # TK: Sometimes subject header is not MIME encoded for 8bit + # simply abort prefixing. + pass + # Mark message so we know we've been here, but leave any existing + # X-BeenThere's intact. + msg['X-BeenThere'] = mlist.posting_address + # Add Precedence: and other useful headers. None of these are standard + # and finding information on some of them are fairly difficult. Some are + # just common practice, and we'll add more here as they become necessary. + # Good places to look are: + # + # http://www.dsv.su.se/~jpalme/ietf/jp-ietf-home.html + # http://www.faqs.org/rfcs/rfc2076.html + # + # None of these headers are added if they already exist. BAW: some + # consider the advertising of this a security breach. I.e. if there are + # known exploits in a particular version of Mailman and we know a site is + # using such an old version, they may be vulnerable. It's too easy to + # edit the code to add a configuration variable to handle this. + if 'x-mailman-version' not in msg: + msg['X-Mailman-Version'] = Version.VERSION + # We set "Precedence: list" because this is the recommendation from the + # sendmail docs, the most authoritative source of this header's semantics. + if 'precedence' not in msg: + msg['Precedence'] = 'list' + # Reply-To: munging. Do not do this if the message is "fast tracked", + # meaning it is internally crafted and delivered to a specific user. BAW: + # Yuck, I really hate this feature but I've caved under the sheer pressure + # of the (very vocal) folks want it. OTOH, RFC 2822 allows Reply-To: to + # be a list of addresses, so instead of replacing the original, simply + # augment it. RFC 2822 allows max one Reply-To: header so collapse them + # if we're adding a value, otherwise don't touch it. (Should we collapse + # in all cases?) + if not fasttrack: + # A convenience function, requires nested scopes. pair is (name, addr) + new = [] + d = {} + def add(pair): + lcaddr = pair[1].lower() + if lcaddr in d: + return + d[lcaddr] = pair + new.append(pair) + # List admin wants an explicit Reply-To: added + if mlist.reply_goes_to_list == ReplyToMunging.explicit_header: + add(parseaddr(mlist.reply_to_address)) + # If we're not first stripping existing Reply-To: then we need to add + # the original Reply-To:'s to the list we're building up. In both + # cases we'll zap the existing field because RFC 2822 says max one is + # allowed. + if not mlist.first_strip_reply_to: + orig = msg.get_all('reply-to', []) + for pair in getaddresses(orig): + add(pair) + # Set Reply-To: header to point back to this list. Add this last + # because some folks think that some MUAs make it easier to delete + # addresses from the right than from the left. + if mlist.reply_goes_to_list == ReplyToMunging.point_to_list: + i18ndesc = uheader(mlist, mlist.description, 'Reply-To') + add((str(i18ndesc), mlist.posting_address)) + del msg['reply-to'] + # Don't put Reply-To: back if there's nothing to add! + if new: + # Preserve order + msg['Reply-To'] = COMMASPACE.join( + [formataddr(pair) for pair in new]) + # The To field normally contains the list posting address. However + # when messages are fully personalized, that header will get + # overwritten with the address of the recipient. We need to get the + # posting address in one of the recipient headers or they won't be + # able to reply back to the list. It's possible the posting address + # was munged into the Reply-To header, but if not, we'll add it to a + # Cc header. BAW: should we force it into a Reply-To header in the + # above code? + # Also skip Cc if this is an anonymous list as list posting address + # is already in From and Reply-To in this case. + if (mlist.personalize == Personalization.full and + mlist.reply_goes_to_list <> ReplyToMunging.point_to_list and + not mlist.anonymous_list): + # Watch out for existing Cc headers, merge, and remove dups. Note + # that RFC 2822 says only zero or one Cc header is allowed. + new = [] + d = {} + for pair in getaddresses(msg.get_all('cc', [])): + add(pair) + i18ndesc = uheader(mlist, mlist.description, 'Cc') + add((str(i18ndesc), mlist.posting_address)) + del msg['Cc'] + msg['Cc'] = COMMASPACE.join([formataddr(pair) for pair in new]) + # Add list-specific headers as defined in RFC 2369 and RFC 2919, but only + # if the message is being crafted for a specific list (e.g. not for the + # password reminders). + # + # BAW: Some people really hate the List-* headers. It seems that the free + # version of Eudora (possibly on for some platforms) does not hide these + # headers by default, pissing off their users. Too bad. Fix the MUAs. + if msgdata.get('_nolist') or not mlist.include_rfc2369_headers: + return + # This will act like an email address for purposes of formataddr() + listid = '%s.%s' % (mlist.list_name, mlist.host_name) + cset = Utils.GetCharSet(mlist.preferred_language) + if mlist.description: + # Don't wrap the header since here we just want to get it properly RFC + # 2047 encoded. + i18ndesc = uheader(mlist, mlist.description, 'List-Id', maxlinelen=998) + listid_h = formataddr((str(i18ndesc), listid)) + else: + # without desc we need to ensure the MUST brackets + listid_h = '<%s>' % listid + # We always add a List-ID: header. + del msg['list-id'] + msg['List-Id'] = listid_h + # For internally crafted messages, we also add a (nonstandard), + # "X-List-Administrivia: yes" header. For all others (i.e. those coming + # from list posts), we add a bunch of other RFC 2369 headers. + requestaddr = mlist.request_address + subfieldfmt = '<%s>, <mailto:%s>' + listinfo = mlist.script_url('listinfo') + headers = {} + # XXX reduced_list_headers used to suppress List-Help, List-Subject, and + # List-Unsubscribe from UserNotification. That doesn't seem to make sense + # any more, so always add those three headers (others will still be + # suppressed). + headers.update({ + 'List-Help' : '<mailto:%s?subject=help>' % requestaddr, + 'List-Unsubscribe': subfieldfmt % (listinfo, mlist.leave_address), + 'List-Subscribe' : subfieldfmt % (listinfo, mlist.join_address), + }) + archiver = get_archiver() + if msgdata.get('reduced_list_headers'): + headers['X-List-Administrivia'] = 'yes' + else: + # List-Post: is controlled by a separate attribute + if mlist.include_list_post_header: + headers['List-Post'] = '<mailto:%s>' % mlist.posting_address + # Add this header if we're archiving + if mlist.archive: + archiveurl = archiver.get_list_url(mlist) + headers['List-Archive'] = '<%s>' % archiveurl + # XXX RFC 2369 also defines a List-Owner header which we are not currently + # supporting, but should. + # + # Draft RFC 5064 defines an Archived-At header which contains the pointer + # directly to the message in the archive. If the currently defined + # archiver can tell us the URL, go ahead and include this header. + archived_at = archiver.get_message_url(mlist, msg) + if archived_at is not None: + headers['Archived-At'] = archived_at + # First we delete any pre-existing headers because the RFC permits only + # one copy of each, and we want to be sure it's ours. + for h, v in headers.items(): + del msg[h] + # Wrap these lines if they are too long. 78 character width probably + # shouldn't be hardcoded, but is at least text-MUA friendly. The + # adding of 2 is for the colon-space separator. + if len(h) + 2 + len(v) > 78: + v = CONTINUATION.join(v.split(', ')) + msg[h] = v + + + +def prefix_subject(mlist, msg, msgdata): + # Add the subject prefix unless the message is a digest or is being fast + # tracked (e.g. internally crafted, delivered to a single user such as the + # list admin). + if not mlist.subject_prefix.strip(): + return + prefix = mlist.subject_prefix + subject = msg.get('subject', '') + # Try to figure out what the continuation_ws is for the header + if isinstance(subject, Header): + lines = str(subject).splitlines() + else: + lines = subject.splitlines() + ws = '\t' + if len(lines) > 1 and lines[1] and lines[1][0] in ' \t': + ws = lines[1][0] + msgdata['origsubj'] = subject + # The subject may be multilingual but we take the first charset as major + # one and try to decode. If it is decodable, returned subject is in one + # line and cset is properly set. If fail, subject is mime-encoded and + # cset is set as us-ascii. See detail for ch_oneline() (CookHeaders one + # line function). + subject, cset = ch_oneline(subject) + # TK: Python interpreter has evolved to be strict on ascii charset code + # range. It is safe to use unicode string when manupilating header + # contents with re module. It would be best to return unicode in + # ch_oneline() but here is temporary solution. + subject = unicode(subject, cset) + # If the subject_prefix contains '%d', it is replaced with the + # mailing list sequential number. Sequential number format allows + # '%d' or '%05d' like pattern. + prefix_pattern = re.escape(prefix) + # unescape '%' :-< + prefix_pattern = '%'.join(prefix_pattern.split(r'\%')) + p = re.compile('%\d*d') + if p.search(prefix, 1): + # prefix have number, so we should search prefix w/number in subject. + # Also, force new style. + prefix_pattern = p.sub(r'\s*\d+\s*', prefix_pattern) + subject = re.sub(prefix_pattern, '', subject) + rematch = re.match('((RE|AW|SV|VS)(\[\d+\])?:\s*)+', subject, re.I) + if rematch: + subject = subject[rematch.end():] + recolon = 'Re:' + else: + recolon = '' + # At this point, subject may become null if someone post mail with + # subject: [subject prefix] + if subject.strip() == '': + subject = _('(no subject)') + cset = Utils.GetCharSet(mlist.preferred_language) + # and substitute %d in prefix with post_id + try: + prefix = prefix % mlist.post_id + except TypeError: + pass + # Get the header as a Header instance, with proper unicode conversion + if not recolon: + h = uheader(mlist, prefix, 'Subject', continuation_ws=ws) + else: + h = uheader(mlist, prefix, 'Subject', continuation_ws=ws) + h.append(recolon) + # TK: Subject is concatenated and unicode string. + subject = subject.encode(cset, 'replace') + h.append(subject, cset) + del msg['subject'] + msg['Subject'] = h + ss = uheader(mlist, recolon, 'Subject', continuation_ws=ws) + ss.append(subject, cset) + msgdata['stripped_subject'] = ss + + + +def ch_oneline(headerstr): + # Decode header string in one line and convert into single charset + # copied and modified from ToDigest.py and Utils.py + # return (string, cset) tuple as check for failure + try: + d = decode_header(headerstr) + # At this point, we should rstrip() every string because some + # MUA deliberately add trailing spaces when composing return + # message. + d = [(s.rstrip(),c) for (s,c) in d] + # Find all charsets in the original header. We use 'utf-8' rather + # than using the first charset (in mailman 2.1.x) if multiple + # charsets are used. + csets = [] + for (s,c) in d: + if c and c not in csets: + csets.append(c) + if len(csets) == 0: + cset = 'us-ascii' + elif len(csets) == 1: + cset = csets[0] + else: + cset = 'utf-8' + h = make_header(d) + ustr = unicode(h) + oneline = u''.join(ustr.splitlines()) + return oneline.encode(cset, 'replace'), cset + except (LookupError, UnicodeError, ValueError, HeaderParseError): + # possibly charset problem. return with undecoded string in one line. + return ''.join(headerstr.splitlines()), 'us-ascii' + + + +class CookHeaders: + """Modify message headers.""" + + implements(IHandler) + + name = 'cook-headers' + description = _('Modify message headers.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + process(mlist, msg, msgdata) |
