diff options
| author | Barry Warsaw | 2008-02-27 01:26:18 -0500 |
|---|---|---|
| committer | Barry Warsaw | 2008-02-27 01:26:18 -0500 |
| commit | a1c73f6c305c7f74987d99855ba59d8fa823c253 (patch) | |
| tree | 65696889450862357c9e05c8e9a589f1bdc074ac /mailman/pipeline/to_digest.py | |
| parent | 3f31f8cce369529d177cfb5a7c66346ec1e12130 (diff) | |
| download | mailman-a1c73f6c305c7f74987d99855ba59d8fa823c253.tar.gz mailman-a1c73f6c305c7f74987d99855ba59d8fa823c253.tar.zst mailman-a1c73f6c305c7f74987d99855ba59d8fa823c253.zip | |
Diffstat (limited to 'mailman/pipeline/to_digest.py')
| -rw-r--r-- | mailman/pipeline/to_digest.py | 439 |
1 files changed, 439 insertions, 0 deletions
diff --git a/mailman/pipeline/to_digest.py b/mailman/pipeline/to_digest.py new file mode 100644 index 000000000..191e3a0f1 --- /dev/null +++ b/mailman/pipeline/to_digest.py @@ -0,0 +1,439 @@ +# Copyright (C) 1998-2008 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""Add the message to the list's current digest and possibly send it.""" + +# Messages are accumulated to a Unix mailbox compatible file containing all +# the messages destined for the digest. This file must be parsable by the +# mailbox.UnixMailbox class (i.e. it must be ^From_ quoted). +# +# When the file reaches the size threshold, it is moved to the qfiles/digest +# directory and the DigestRunner will craft the MIME, rfc1153, and +# (eventually) URL-subject linked digests from the mbox. + +from __future__ import with_statement + +__metaclass__ = type +__all__ = ['ToDigest'] + + +import os +import re +import copy +import time +import logging + +from StringIO import StringIO # cStringIO can't handle unicode. +from email.charset import Charset +from email.generator import Generator +from email.header import decode_header, make_header, Header +from email.mime.base import MIMEBase +from email.mime.message import MIMEMessage +from email.mime.text import MIMEText +from email.parser import Parser +from email.utils import formatdate, getaddresses, make_msgid +from zope.interface import implements + +from mailman import Errors +from mailman import Message +from mailman import Utils +from mailman import i18n +from mailman.Mailbox import Mailbox +from mailman.Mailbox import Mailbox +from mailman.configuration import config +from mailman.pipeline.decorate import decorate +from mailman.pipeline.scrubber import process as scrubber +from mailman.interfaces import DeliveryMode, DeliveryStatus, IHandler +from mailman.queue import Switchboard + + +_ = i18n._ + +UEMPTYSTRING = u'' +EMPTYSTRING = '' + +log = logging.getLogger('mailman.error') + + + +def process(mlist, msg, msgdata): + # Short circuit non-digestable lists. + if not mlist.digestable or msgdata.get('isdigest'): + return + mboxfile = os.path.join(mlist.full_path, 'digest.mbox') + mboxfp = open(mboxfile, 'a+') + mbox = Mailbox(mboxfp) + mbox.AppendMessage(msg) + # Calculate the current size of the accumulation file. This will not tell + # us exactly how big the MIME, rfc1153, or any other generated digest + # message will be, but it's the most easily available metric to decide + # whether the size threshold has been reached. + mboxfp.flush() + size = os.path.getsize(mboxfile) + if size / 1024.0 >= mlist.digest_size_threshold: + # This is a bit of a kludge to get the mbox file moved to the digest + # queue directory. + try: + # Enclose in try/except here because a error in send_digest() can + # silently stop regular delivery. Unsuccessful digest delivery + # should be tried again by cron and the site administrator will be + # notified of any error explicitly by the cron error message. + mboxfp.seek(0) + send_digests(mlist, mboxfp) + os.unlink(mboxfile) + except Exception, errmsg: + # Bare except is generally prohibited in Mailman, but we can't + # forecast what exceptions can occur here. + log.exception('send_digests() failed: %s', errmsg) + mboxfp.close() + + + +def send_digests(mlist, mboxfp): + # Set the digest volume and time + if mlist.digest_last_sent_at: + bump = False + # See if we should bump the digest volume number + timetup = time.localtime(mlist.digest_last_sent_at) + now = time.localtime(time.time()) + freq = mlist.digest_volume_frequency + if freq == 0 and timetup[0] < now[0]: + # Yearly + bump = True + elif freq == 1 and timetup[1] <> now[1]: + # Monthly, but we take a cheap way to calculate this. We assume + # that the clock isn't going to be reset backwards. + bump = True + elif freq == 2 and (timetup[1] % 4 <> now[1] % 4): + # Quarterly, same caveat + bump = True + elif freq == 3: + # Once again, take a cheap way of calculating this + weeknum_last = int(time.strftime('%W', timetup)) + weeknum_now = int(time.strftime('%W', now)) + if weeknum_now > weeknum_last or timetup[0] > now[0]: + bump = True + elif freq == 4 and timetup[7] <> now[7]: + # Daily + bump = True + if bump: + mlist.bump_digest_volume() + mlist.digest_last_sent_at = time.time() + # Wrapper around actually digest crafter to set up the language context + # properly. All digests are translated to the list's preferred language. + with i18n.using_language(mlist.preferred_language): + send_i18n_digests(mlist, mboxfp) + + + +def send_i18n_digests(mlist, mboxfp): + mbox = Mailbox(mboxfp) + # Prepare common information (first lang/charset) + lang = mlist.preferred_language + lcset = Utils.GetCharSet(lang) + lcset_out = Charset(lcset).output_charset or lcset + # Common Information (contd) + realname = mlist.real_name + volume = mlist.volume + issue = mlist.next_digest_number + digestid = _('$realname Digest, Vol $volume, Issue $issue') + digestsubj = Header(digestid, lcset, header_name='Subject') + # Set things up for the MIME digest. Only headers not added by + # CookHeaders need be added here. + # Date/Message-ID should be added here also. + mimemsg = Message.Message() + mimemsg['Content-Type'] = 'multipart/mixed' + mimemsg['MIME-Version'] = '1.0' + mimemsg['From'] = mlist.request_address + mimemsg['Subject'] = digestsubj + mimemsg['To'] = mlist.posting_address + mimemsg['Reply-To'] = mlist.posting_address + mimemsg['Date'] = formatdate(localtime=1) + mimemsg['Message-ID'] = make_msgid() + # Set things up for the rfc1153 digest + plainmsg = StringIO() + rfc1153msg = Message.Message() + rfc1153msg['From'] = mlist.request_address + rfc1153msg['Subject'] = digestsubj + rfc1153msg['To'] = mlist.posting_address + rfc1153msg['Reply-To'] = mlist.posting_address + rfc1153msg['Date'] = formatdate(localtime=1) + rfc1153msg['Message-ID'] = make_msgid() + separator70 = '-' * 70 + separator30 = '-' * 30 + # In the rfc1153 digest, the masthead contains the digest boilerplate plus + # any digest header. In the MIME digests, the masthead and digest header + # are separate MIME subobjects. In either case, it's the first thing in + # the digest, and we can calculate it now, so go ahead and add it now. + mastheadtxt = Utils.maketext( + 'masthead.txt', + {'real_name' : mlist.real_name, + 'got_list_email': mlist.posting_address, + 'got_listinfo_url': mlist.script_url('listinfo'), + 'got_request_email': mlist.request_address, + 'got_owner_email': mlist.owner_address, + }, mlist=mlist) + # MIME + masthead = MIMEText(mastheadtxt.encode(lcset), _charset=lcset) + masthead['Content-Description'] = digestid + mimemsg.attach(masthead) + # RFC 1153 + print >> plainmsg, mastheadtxt + print >> plainmsg + # Now add the optional digest header + if mlist.digest_header: + headertxt = decorate(mlist, mlist.digest_header, _('digest header')) + # MIME + header = MIMEText(headertxt.encode(lcset), _charset=lcset) + header['Content-Description'] = _('Digest Header') + mimemsg.attach(header) + # RFC 1153 + print >> plainmsg, headertxt + print >> plainmsg + # Now we have to cruise through all the messages accumulated in the + # mailbox file. We can't add these messages to the plainmsg and mimemsg + # yet, because we first have to calculate the table of contents + # (i.e. grok out all the Subjects). Store the messages in a list until + # we're ready for them. + # + # Meanwhile prepare things for the table of contents + toc = StringIO() + print >> toc, _("Today's Topics:\n") + # Now cruise through all the messages in the mailbox of digest messages, + # building the MIME payload and core of the RFC 1153 digest. We'll also + # accumulate Subject: headers and authors for the table-of-contents. + messages = [] + msgcount = 0 + msg = mbox.next() + while msg is not None: + if msg == '': + # It was an unparseable message + msg = mbox.next() + continue + msgcount += 1 + messages.append(msg) + # Get the Subject header + msgsubj = msg.get('subject', _('(no subject)')) + subject = Utils.oneline(msgsubj, in_unicode=True) + # Don't include the redundant subject prefix in the toc + mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix), + subject, re.IGNORECASE) + if mo: + subject = subject[:mo.start(2)] + subject[mo.end(2):] + username = '' + addresses = getaddresses([Utils.oneline(msg.get('from', ''), + in_unicode=True)]) + # Take only the first author we find + if isinstance(addresses, list) and addresses: + username = addresses[0][0] + if not username: + username = addresses[0][1] + if username: + username = ' (%s)' % username + # Put count and Wrap the toc subject line + wrapped = Utils.wrap('%2d. %s' % (msgcount, subject), 65) + slines = wrapped.split('\n') + # See if the user's name can fit on the last line + if len(slines[-1]) + len(username) > 70: + slines.append(username) + else: + slines[-1] += username + # Add this subject to the accumulating topics + first = True + for line in slines: + if first: + print >> toc, ' ', line + first = False + else: + print >> toc, ' ', line.lstrip() + # We do not want all the headers of the original message to leak + # through in the digest messages. For this phase, we'll leave the + # same set of headers in both digests, i.e. those required in RFC 1153 + # plus a couple of other useful ones. We also need to reorder the + # headers according to RFC 1153. Later, we'll strip out headers for + # for the specific MIME or plain digests. + keeper = {} + all_keepers = {} + for header in (config.MIME_DIGEST_KEEP_HEADERS + + config.PLAIN_DIGEST_KEEP_HEADERS): + all_keepers[header] = True + all_keepers = all_keepers.keys() + for keep in all_keepers: + keeper[keep] = msg.get_all(keep, []) + # Now remove all unkempt headers :) + for header in msg.keys(): + del msg[header] + # And add back the kept header in the RFC 1153 designated order + for keep in all_keepers: + for field in keeper[keep]: + msg[keep] = field + # And a bit of extra stuff + msg['Message'] = `msgcount` + # Get the next message in the digest mailbox + msg = mbox.next() + # Now we're finished with all the messages in the digest. First do some + # sanity checking and then on to adding the toc. + if msgcount == 0: + # Why did we even get here? + return + toctext = toc.getvalue() + # MIME + try: + tocpart = MIMEText(toctext.encode(lcset), _charset=lcset) + except UnicodeError: + tocpart = MIMEText(toctext.encode('utf-8'), _charset='utf-8') + tocpart['Content-Description']= _("Today's Topics ($msgcount messages)") + mimemsg.attach(tocpart) + # RFC 1153 + print >> plainmsg, toctext + print >> plainmsg + # For RFC 1153 digests, we now need the standard separator + print >> plainmsg, separator70 + print >> plainmsg + # Now go through and add each message + mimedigest = MIMEBase('multipart', 'digest') + mimemsg.attach(mimedigest) + first = True + for msg in messages: + # MIME. Make a copy of the message object since the rfc1153 + # processing scrubs out attachments. + mimedigest.attach(MIMEMessage(copy.deepcopy(msg))) + # rfc1153 + if first: + first = False + else: + print >> plainmsg, separator30 + print >> plainmsg + # Use Mailman.pipeline.scrubber.process() to get plain text + try: + msg = scrubber(mlist, msg) + except Errors.DiscardMessage: + print >> plainmsg, _('[Message discarded by content filter]') + continue + # Honor the default setting + for h in config.PLAIN_DIGEST_KEEP_HEADERS: + if msg[h]: + uh = Utils.wrap('%s: %s' % (h, Utils.oneline(msg[h], + in_unicode=True))) + uh = '\n\t'.join(uh.split('\n')) + print >> plainmsg, uh + print >> plainmsg + # If decoded payload is empty, this may be multipart message. + # -- just stringfy it. + payload = msg.get_payload(decode=True) \ + or msg.as_string().split('\n\n',1)[1] + mcset = msg.get_content_charset('us-ascii') + try: + payload = unicode(payload, mcset, 'replace') + except (LookupError, TypeError): + # unknown or empty charset + payload = unicode(payload, 'us-ascii', 'replace') + print >> plainmsg, payload + if not payload.endswith('\n'): + print >> plainmsg + # Now add the footer + if mlist.digest_footer: + footertxt = decorate(mlist, mlist.digest_footer, _('digest footer')) + # MIME + footer = MIMEText(footertxt.encode(lcset), _charset=lcset) + footer['Content-Description'] = _('Digest Footer') + mimemsg.attach(footer) + # RFC 1153 + # BAW: This is not strictly conformant RFC 1153. The trailer is only + # supposed to contain two lines, i.e. the "End of ... Digest" line and + # the row of asterisks. If this screws up MUAs, the solution is to + # add the footer as the last message in the RFC 1153 digest. I just + # hate the way that VM does that and I think it's confusing to users, + # so don't do it unless there's a clamor. + print >> plainmsg, separator30 + print >> plainmsg + print >> plainmsg, footertxt + print >> plainmsg + # Do the last bit of stuff for each digest type + signoff = _('End of ') + digestid + # MIME + # BAW: This stuff is outside the normal MIME goo, and it's what the old + # MIME digester did. No one seemed to complain, probably because you + # won't see it in an MUA that can't display the raw message. We've never + # got complaints before, but if we do, just wax this. It's primarily + # included for (marginally useful) backwards compatibility. + mimemsg.postamble = signoff + # rfc1153 + print >> plainmsg, signoff + print >> plainmsg, '*' * len(signoff) + # Do our final bit of housekeeping, and then send each message to the + # outgoing queue for delivery. + mlist.next_digest_number += 1 + virginq = Switchboard(config.VIRGINQUEUE_DIR) + # Calculate the recipients lists + plainrecips = set() + mimerecips = set() + # When someone turns off digest delivery, they will get one last digest to + # ensure that there will be no gaps in the messages they receive. + # Currently, this dictionary contains the email addresses of those folks + # who should get one last digest. We need to find the corresponding + # IMember records. + digest_members = set(mlist.digest_members.members) + for address in mlist.one_last_digest: + member = mlist.digest_members.get_member(address) + if member: + digest_members.add(member) + for member in digest_members: + if member.delivery_status <> DeliveryStatus.enabled: + continue + # Send the digest to the case-preserved address of the digest members. + email_address = member.address.original_address + if member.delivery_mode == DeliveryMode.plaintext_digests: + plainrecips.add(email_address) + elif member.delivery_mode == DeliveryMode.mime_digests: + mimerecips.add(email_address) + else: + raise AssertionError( + 'Digest member "%s" unexpected delivery mode: %s' % + (email_address, member.delivery_mode)) + # Zap this since we're now delivering the last digest to these folks. + mlist.one_last_digest.clear() + # MIME + virginq.enqueue(mimemsg, + recips=mimerecips, + listname=mlist.fqdn_listname, + isdigest=True) + # RFC 1153 + # If the entire digest message can't be encoded by list charset, fall + # back to 'utf-8'. + try: + rfc1153msg.set_payload(plainmsg.getvalue().encode(lcset), lcset) + except UnicodeError: + rfc1153msg.set_payload(plainmsg.getvalue().encode('utf-8'), 'utf-8') + virginq.enqueue(rfc1153msg, + recips=plainrecips, + listname=mlist.fqdn_listname, + isdigest=True) + + + +class ToDigest: + """Add the message to the digest, possibly sending it.""" + + implements(IHandler) + + name = 'to-digest' + description = _('Add the message to the digest, possibly sending it.') + + def process(self, mlist, msg, msgdata): + """See `IHandler`.""" + process(mlist, msg, msgdata) |
