diff options
Diffstat (limited to 'Mailman/Handlers/ToDigest.py')
| -rw-r--r-- | Mailman/Handlers/ToDigest.py | 594 |
1 files changed, 254 insertions, 340 deletions
diff --git a/Mailman/Handlers/ToDigest.py b/Mailman/Handlers/ToDigest.py index a2fbc7205..237eeb3b3 100644 --- a/Mailman/Handlers/ToDigest.py +++ b/Mailman/Handlers/ToDigest.py @@ -1,4 +1,4 @@ -# Copyright (C) 1998,1999,2000 by the Free Software Foundation, Inc. +# Copyright (C) 1998,1999,2000,2001 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -15,368 +15,282 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """Add the message to the list's current digest and possibly send it. - -This handler will add the current message to the list's currently accumulating -digest. If the digest has reached its size threshold, it is delivered by -creating an OutgoingMessage of the digest, setting the `isdigest' attribute, -and injecting it into the pipeline. """ +# Messages are accumulated to a Unix mailbox compatible file containing all +# the messages destined for the digest. This file must be parsable by the +# mailbox.UnixMailbox class (i.e. it must be ^From_ quoted). +# +# When the file reaches the size threshold, it is moved to the qfiles/digest +# directory and the DigestRunner will craft the MIME, rfc1153, and +# (eventually) URL-subject linked digests from the mbox. + import os -import string import re +from types import ListType + +from mimelib.Parser import Parser +from mimelib.Generator import Generator +from mimelib.MIMEBase import MIMEBase +from mimelib.Text import Text +from mimelib.address import getaddresses +from mimelib.ReprMixin import ReprMixin +from Mailman import mm_cfg from Mailman import Utils from Mailman import Message -from Mailman import mm_cfg -from Mailman.Logging.Syslog import syslog +from Mailman.i18n import _ +from Mailman.Handlers.Decorate import decorate +from Mailman.Queue.sbcache import get_switchboard -from stat import ST_SIZE -from errno import ENOENT +from Mailman.pythonlib import mailbox +from Mailman.pythonlib.StringIO import StringIO -MIME_SEPARATOR = '__--__--' -MIME_NONSEPARATOR = ' %s ' % MIME_SEPARATOR -EXCLUDE_HEADERS = ('received', 'errors-to') +# rfc1153 says we should keep only these headers, and present them in this +# exact order. +KEEP = ['Date', 'From', 'To', 'Cc', 'Subject', 'Message-ID', 'Keywords', + # I believe we should also keep these headers though. + 'In-Reply-To', 'References', 'Content-Type', 'MIME-Version', + 'Content-Transfer-Encoding', 'Precedence', + # Mailman 2.0 adds these headers, but they don't need to be kept from + # the original message: Message + ] def process(mlist, msg, msgdata): - # short circuit non-digestable lists, or for messages that are already - # digests + # Short circuit non-digestable lists. if not mlist.digestable or msgdata.get('isdigest'): return - digestfile = os.path.join(mlist.fullpath(), 'next-digest') - topicsfile = os.path.join(mlist.fullpath(), 'next-digest-topics') - omask = os.umask(002) + mboxfile = os.path.join(mlist.fullpath(), 'digest.mbox') + omask = os.umask(007) try: - digestfp = open(digestfile, 'a+') - topicsfp = open(topicsfile, 'a+') + mboxfp = open(mboxfile, 'a+') finally: os.umask(omask) - # For the sender, use either the From: field's name comment or the mail - # address. Don't use Sender: field because by now it's been munged into - # the list-admin's address - name, addr = msg.getaddr('from') - sender = quotemime(name or addr) - # BAW: I don't like using $LANG - os.environ['LANG'] = mlist.GetPreferredLanguage(sender) - fromline = quotemime(msg.getheader('from')) - date = quotemime(msg.getheader('date')) - body = quotemime(msg.body) - subject = quotemime(msg.getheader('subject')) - # don't include the redundant subject prefix in the TOC entries - mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix), - subject, re.IGNORECASE) - if mo: - subject = subject[:mo.start(2)] + subject[mo.end(2):] - # Watch for multiline subjects - slines = [] - for sline in string.split(subject, '\n'): - if not slines: - slines.append(sline) - else: - slines.append(' ' + sline) - topicsfp.write(' %2d. %s (%s)\n' % (mlist.next_post_number, - string.join(slines, '\n'), - sender)) - # We exclude specified headers and all X-* headers - kept_headers = [] - keeping = 0 - have_content_type = 0 - have_content_description = 0 - # speed up the inner loop - lower, split, excludes = string.lower, string.split, EXCLUDE_HEADERS - for h in msg.headers: - if lower(h[:2]) == 'x-' or lower(split(h, ':')[0]) in excludes: - keeping = 0 - elif h and h[0] in (' ', '\t'): - if keeping and kept_headers: - # continuation of something we're keeping - kept_headers[-1] = kept_headers[-1] + h - else: - keeping = 1 - if lower(h[:7]) == 'content-': - kept_headers.append(h) - if lower(h[:12]) == 'content-type': - have_content_type = 1 - elif lower(h[:19]) == 'content-description': - have_content_description = 1 - else: - kept_headers.append(quotemime(h)) - # after processing the headers - if have_content_type and not have_content_description: - kept_headers.append('Content-Description: %s\n' % subject) - # TBD: reply-to munging happens elsewhere in the pipeline - digestfp.write('--%s\n\n%s: %d\n%s\n%s' % - (MIME_SEPARATOR, _("Message"), mlist.next_post_number, - string.join(kept_headers, ''), - body)) - digestfp.write('\n') - mlist.next_post_number = mlist.next_post_number + 1 - topicsfp.close() - digestfp.close() - # if the current digest size exceeds the threshold, send the digest by - # injection into the list's message pipeline - try: - size = os.stat(digestfile)[ST_SIZE] - if size/1024.0 >= mlist.digest_size_threshhold: - inject_digest(mlist, digestfile, topicsfile) - except OSError, e: - code, msg = e - if code == ENOENT: - syslog('error', 'Lost digest file: %s' % digestfile) - syslog('error', str(e)) - - - -def inject_digest(mlist, digestfile, topicsfile): - fp = open(topicsfile, 'r+') - topicsdata = fp.read() - fp.close() - topicscount = string.count(topicsdata, '\n') - fp = open(digestfile) - # - # filters for recipient calculation - def delivery_enabled_p(x, s=mlist, v=mm_cfg.DisableDelivery): - return not s.GetUserOption(x, v) - def likes_mime_p(x, s=mlist, v=mm_cfg.DisableMime): - return not s.GetUserOption(x, v) - def hates_mime_p(x, s=mlist, v=mm_cfg.DisableMime): - return s.GetUserOption(x, v) - # - # These people have switched their options from digest delivery to - # non-digest delivery. they need to get one last digest, but be sure they - # haven't switched back to digest delivery in the meantime! - digestmembers = {} - if hasattr(mlist, 'one_last_digest'): - digestmembers.update(mlist.one_last_digest) - del mlist.one_last_digest - for addr in mlist.GetDigestMembers(): - digestmembers[addr] = addr - recipients = filter(delivery_enabled_p, digestmembers.keys()) - mime_recips = filter(likes_mime_p, recipients) - text_recips = filter(hates_mime_p, recipients) - # - # log this digest injection - syslog('digest', - '%s v %d - %d msgs, %d recips (%d mime, %d text, %d disabled)' % - (mlist.real_name, mlist.next_digest_number, topicscount, - len(digestmembers), len(mime_recips), len(text_recips), - len(digestmembers) - len(recipients))) - # do any deliveries - if mime_recips or text_recips: - digest = Digest(mlist, topicsdata, fp.read()) - # Generate the MIME digest, but only queue it for delivery so we don't - # hold the lock too long. - if mime_recips: - msg = digest.asMIME() - msg['To'] = mlist.GetListEmail() - msg.Enqueue(mlist, recips=mime_recips, isdigest=1, approved=1) - if text_recips: - # Generate the RFC934 "plain text" digest, and again, just queue - # it - msg = digest.asText() - msg['To'] = mlist.GetListEmail() - msg.Enqueue(mlist, recips=text_recips, isdigest=1, approved=1) - # zap accumulated digest information for the next round - os.unlink(digestfile) - os.unlink(topicsfile) - mlist.next_digest_number = mlist.next_digest_number + 1 - mlist.next_post_number = 1 - syslog('digest', 'next %s digest: #%d, post#%d' % - (mlist.internal_name(), mlist.next_digest_number, - mlist.next_post_number)) + g = Generator(mboxfp) + g.write(msg) + # Calculate the current size of the accumulation file. This will not tell + # us exactly how big the MIME, rfc1153, or any other generated digest + # message will be, but it's the most easily available metric to decide + # whether the size threshold has been reached. + size = mboxfp.tell() + if size / 1024.0 >= mlist.digest_size_threshhold: + # This is a bit of a kludge to get the mbox file moved to the digest + # queue directory. + mboxfp.seek(0) + send_digests(mlist, mboxfp) + os.unlink(mboxfile) + mboxfp.close() -def quotemime(text): - # TBD: ug. - if not text: - return '' - return string.join(string.split(text, MIME_SEPARATOR), MIME_NONSEPARATOR) +# factory callable for UnixMailboxes. This ensures that any object we get out +# of the mailbox is an instance of our subclass. (requires Python 2.1's +# mailbox module) +def msgfactory(fp): + p = Parser(Message.Message) + return p.parse(fp) + - -class Digest: - """A digest, representable as either a MIME or plain text message.""" - def __init__(self, mlist, toc, body): - self.__mlist = mlist - self.__toc = toc - self.__body = body - self.__volume = 'Vol %d #%d' % (mlist.volume, mlist.next_digest_number) - numtopics = string.count(self.__toc, '\n') - self.__numinfo = '%d msg%s' % (numtopics, numtopics <> 1 and 's' or '') - - def ComposeBaseHeaders(self, msg): - """Populate the message with the presentation-independent headers.""" - realname = self.__mlist.real_name - volume = self.__volume - numinfo = self.__numinfo - msg['From'] = self.__mlist.GetRequestEmail() - msg['Subject'] = _('%(realname)s digest, %(volume)s - %(numinfo)s') - msg['Reply-to'] = self.__mlist.GetListEmail() - msg['X-Mailer'] = "Mailman v%s" % mm_cfg.VERSION - msg['MIME-version'] = '1.0' - - def TemplateRefs(self): - """Resolve references in a format string against list settings. - - The resolution is done against a copy of the lists attribute - dictionary, with the addition of some of settings for computed - items - got_listinfo_url, got_request_email, got_list_email, and - got_owner_email. - - """ - # Collect the substitutions: - if hasattr(self, 'substitutions'): - return Utils.SafeDict(self.substitutions) - mlist = self.__mlist - substs = Utils.SafeDict() - substs.update(mlist.__dict__) - substs.update( - {'got_listinfo_url' : mlist.GetScriptURL('listinfo', absolute=1), - 'got_request_email': mlist.GetRequestEmail(), - 'got_list_email' : mlist.GetListEmail(), - 'got_owner_email' : mlist.GetAdminEmail(), - 'cgiext' : mm_cfg.CGIEXT, - }) - return substs - - def asMIME(self): - return self.Present(mime=1) - - def asText(self): - return self.Present(mime=0) - - def Present(self, mime): - """Produce a rendering of the digest, as an OutgoingMessage.""" - msg = Message.OutgoingMessage() - self.ComposeBaseHeaders(msg) - digestboundary = MIME_SEPARATOR - if mime: - import mimetools - envboundary = mimetools.choose_boundary() - msg['Content-type'] = 'multipart/mixed; boundary=' + envboundary - else: - envboundary = MIME_SEPARATOR - msg['Content-type'] = 'text/plain' - dashbound = "--" + envboundary - # holds lines of the message - lines = [] - # Masthead: - if mime: - realname = self.__mlist.real_name - volume = self.__volume - lines.append(dashbound) - lines.append("Content-type: text/plain; charset=" + Utils.GetCharSet()) - lines.append("Content-description:" + - _(" Masthead (%(realname)s digest, %(volume)s)")) - lines.append('') - masthead = Utils.maketext('masthead.txt', self.TemplateRefs(), - self.__mlist.preferred_language) - lines = lines + string.split(masthead, '\n') - # List-specific header: - if self.__mlist.digest_header: - lines.append('') - if mime: - lines.append(dashbound) - lines.append("Content-type: text/plain; charset=" + Utils.GetCharSet()) - lines.append("Content-description: " + _("Digest Header")) - lines.append('') - lines.append(self.__mlist.digest_header % self.TemplateRefs()) - # Table of contents: - lines.append('') - if mime: - numinfo = self.__numinfo - lines.append(dashbound) - lines.append("Content-type: text/plain; charset=" + Utils.GetCharSet()) - lines.append("Content-description: " + - _("Today's Topics (%(numinfo)s)")) - lines.append('') - lines.append(_("Today's Topics:")) - lines.append('') - lines.append(self.__toc) - # Digest text: - if mime: - lines.append(dashbound) - lines.append('Content-type: multipart/digest; boundary="%s"' - % digestboundary) - lines.append('') - lines.append(self.__body) - # End multipart digest text part - lines.append('') - lines.append("--" + digestboundary + "--") - else: - lines.extend(filter_headers( - self.__body, - mm_cfg.DEFAULT_PLAIN_DIGEST_KEEP_HEADERS, - digestboundary)) - # List-specific footer: - if self.__mlist.digest_footer: - lines.append(dashbound) - if mime: - lines.append("Content-type: text/plain; charset=" + Utils.GetCharSet()) - lines.append("Content-description: " + _("Digest Footer")) - lines.append('') - lines.append(self.__mlist.digest_footer % self.TemplateRefs()) - # Close: - if mime: - # Close encompassing mime envelope. - lines.append('') - lines.append(dashbound + "--") - lines.append('') - realname = self.__mlist.real_name - lines.append(_("End of %(realname)s Digest")) - msg.body = string.join(lines, '\n') - return msg +# We want mimelib's MIMEBase class, but we also want a str() able object. +class ReprMIME(MIMEBase, ReprMixin): + pass -def filter_headers(body, keep_headers, mimesep): - """Return copy of body that omits non-crucial headers.""" - SEPARATOR = 0 - HEADER = 1 - BODY = 2 - # simple state machine - state = SEPARATOR - lines = string.split(body, '\n') - lineno = 1 - text = [lines[0]] - keptlast = 0 - for lineno in range(1, len(lines)): - line = lines[lineno] - if state == BODY: - # Snarf the body up to, and including, the next separator - text.append(line) - if string.strip(line) == '--' + mimesep: - state = SEPARATOR - continue - elif state == SEPARATOR: - state = HEADER - # Keep the one (blank) line between separator and headers - text.append(line) - keptlast = 0 - continue - elif state == HEADER: - if not string.strip(line): - state = BODY - text.append(line) - continue - elif line[0] in (' ', '\t'): - # Continuation line, keep if the prior line was kept - if keptlast: - text.append(line) - continue +def send_digests(mlist, mboxfp): + mbox = mailbox.UnixMailbox(mboxfp, msgfactory) + # Prepare common information + digestid = '%s Digest, Vol %d, Issue %d' % ( + mlist.real_name, mlist.volume, mlist.next_digest_number) + # Set things up for the MIME digest. Only headers not added by + # CookHeaders need be added here. + mimemsg = ReprMIME('multipart', 'mixed') + mimemsg['From'] = mlist.GetRequestEmail() + mimemsg['Subject'] = digestid + mimemsg['To'] = mlist.GetListEmail() + # Set things up for the rfc1153 digest + plainmsg = StringIO() + rfc1153msg = Message.Message() + rfc1153msg['From'] = mlist.GetRequestEmail() + rfc1153msg['Subject'] = digestid + rfc1153msg['To'] = mlist.GetListEmail() + separator70 = '-' * 70 + separator30 = '-' * 30 + # In the rfc1153 digest, the masthead contains the digest boilerplate plus + # any digest footer. In the MIME digests, the masthead and digest header + # are separate MIME subobjects. In either case, it's the first thing in + # the digest, and we can calculate it now, so go ahead and add it now. + mastheadtxt = Utils.maketext( + 'masthead.txt', + {'real_name' : mlist.real_name, + 'got_list_email': mlist.GetListEmail(), + 'got_listinfo_url': mlist.GetScriptURL('listinfo', absolute=1), + 'got_request_email': mlist.GetRequestEmail(), + 'got_owner_email': mlist.GetOwnerEmail(), + }, mlist.preferred_language) + # MIME + masthead = Text(mastheadtxt) + masthead['Content-Description'] = digestid + mimemsg.add_payload(masthead) + # rfc1153 + print >> plainmsg, mastheadtxt + print >> plainmsg + # Now add the optional digest header + if mlist.digest_header: + headertxt = decorate(mlist, mlist.digest_header, 'digest header') + # MIME + header = Text(headertxt) + header['Content-Description'] = 'Digest Header' + mimemsg.add_payload(header) + # rfc1153 + print >> plainmsg, headertxt + print >> plainmsg + # Now we have to cruise through all the messages accumulated in the + # mailbox file. We can't add these messages to the plainmsg and mimemsg + # yet, because we first have to calculate the table of contents + # (i.e. grok out all the Subjects). Store the messages in a list until + # we're ready for them. + # + # Meanwhile prepare things for the table of contents + toc = StringIO() + print >> toc, "Today's Topics:\n" + # Now cruise through all the messages in the mailbox of digest messages, + # building the MIME payload and core of the rfc1153 digest. We'll also + # accumulate Subject: headers and authors for the table-of-contents. + messages = [] + msgcount = 0 + msg = mbox.next() + while msg: + msgcount += 1 + messages.append(msg) + # Get the Subject header + subject = msg.get('subject', _('(no subject)')) + # Don't include the redundant subject prefix in the toc + mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix), + subject, re.IGNORECASE) + if mo: + subject = subject[:mo.start(2)] + subject[mo.end(2):] + addresses = getaddresses([msg['From']]) + realname = '' + # Take only the first author we find + if type(addresses) is ListType and len(addresses) > 0: + realname = addresses[0][0] + if realname: + realname = ' (%s)' % realname + # Wrap the toc subject line + wrapped = Utils.wrap('%2d. %s' % (msgcount, subject)) + # Split by lines and see if the realname can fit on the last line + slines = wrapped.split('\n') + if len(slines[-1]) + len(realname) > 70: + slines.append(realname) + else: + slines[-1] += realname + # Add this subject to the accumulating topics + first = 1 + for line in slines: + if first: + print >> toc, ' ', line + first = 0 else: - i = string.find(line, ':') - if i < 0: - # Malformed header line. Interesting, keep it. - text.append(line) - keptlast = 1 - else: - field = line[:i] - if string.lower(field) in keep_headers: - text.append(line) - keptlast = 1 - else: - keptlast = 0 - return text + print >> toc, ' ', line + # We do not want all the headers of the original message to leak + # through in the digest messages. For simplicity, we'll leave the + # same set of headers in both digests, i.e. those required in rfc1153 + # plus a couple of other useful ones. We also need to reorder the + # headers according to rfc1153. + keeper = {} + for keep in KEEP: + keeper[keep] = msg.getall(keep) + # Now remove all unkempt headers :) + for header in msg.keys(): + del msg[header] + # And add back the kept header in the rfc1153 designated order + for keep in KEEP: + for field in keeper[keep]: + msg[keep] = field + # And a bit of extra stuff + msg['Message'] = `msgcount` + # Append to the rfc1153 body, adding a separator if necessary + msg = mbox.next() + # Now we're finished with all the messages in the digest. First do some + # sanity checking and then on to adding the toc. + if msgcount == 0: + # Why did we even get here? + return + toctext = toc.getvalue() + # MIME + tocpart = Text(toctext) + tocpart['Content-Description'] = "Today's Topics (%d messages)" % msgcount + mimemsg.add_payload(tocpart) + # rfc1153 + print >> plainmsg, toctext + print >> plainmsg + # For rfc1153 digests, we now need the standard separator + print >> plainmsg, separator70 + print >> plainmsg + # Now go through and add each message + mimedigest = MIMEBase('multipart', 'digest') + mimemsg.add_payload(mimedigest) + first = 1 + for msg in messages: + # MIME + mimedigest.add_payload(msg) + # rfc1153 + if first: + first = 0 + else: + print >> plainmsg, separator30 + print >> plainmsg + g = Generator(plainmsg) + g.write(msg, unixfrom=0) + # Now add the footer + if mlist.digest_footer: + footertxt = decorate(mlist, mlist.digest_footer, 'digest footer') + # MIME + footer = Text(footertxt) + footer['Content-Description'] = 'Digest Footer' + mimemsg.add_payload(footer) + # rfc1153 + # BAW: This is not strictly conformant rfc1153. The trailer is only + # supposed to contain two lines, i.e. the "End of ... Digest" line and + # the row of asterisks. If this screws up MUAs, the solution is to + # add the footer as the last message in the rfc1153 digest. I just + # hate the way that VM does that and I think it's confusing to users, + # so don't do it unless there's a clamor. + print >> plainmsg, separator30 + print >> plainmsg + print >> plainmsg, footertxt + print >> plainmsg + # Do the last bit of stuff for each digest type + signoff = 'End of ' + digestid + # MIME + # BAW: This stuff is outside the normal MIME goo, and it's what the old + # MIME digester did. No one seemed to complain, probably because you + # won't see it in an MUA that can't display the raw message. We've never + # got complaints before, but if we do, just wax this. It's primarily + # included for (marginally useful) backwards compatibility. + mimemsg.postamble = signoff + # rfc1153 + print >> plainmsg, signoff + print >> plainmsg, '*' * len(signoff) + # Do our final bit of housekeeping, and then send each message to the + # outgoing queue for delivery. + mlist.next_digest_number += 1 + virginq = get_switchboard(mm_cfg.VIRGINQUEUE_DIR) + # Calculate the recipients lists + plainrecips = [] + mimerecips = [] + for user in mlist.GetDigestDeliveryMembers(): + if mlist.GetUserOption(user, mm_cfg.DisableMime): + plainrecips.append(user) + else: + mimerecips.append(user) + # MIME + virginq.enqueue(mimemsg, recips=mimerecips, listname=mlist.internal_name()) + # rfc1153 + rfc1153msg.add_payload(plainmsg.getvalue()) + virginq.enqueue(rfc1153msg, + recips = plainrecips, + listname = mlist.internal_name()) |
