summaryrefslogtreecommitdiff
path: root/mailman/pipeline/to_digest.py
diff options
context:
space:
mode:
Diffstat (limited to 'mailman/pipeline/to_digest.py')
-rw-r--r--mailman/pipeline/to_digest.py439
1 files changed, 439 insertions, 0 deletions
diff --git a/mailman/pipeline/to_digest.py b/mailman/pipeline/to_digest.py
new file mode 100644
index 000000000..191e3a0f1
--- /dev/null
+++ b/mailman/pipeline/to_digest.py
@@ -0,0 +1,439 @@
+# Copyright (C) 1998-2008 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+"""Add the message to the list's current digest and possibly send it."""
+
+# Messages are accumulated to a Unix mailbox compatible file containing all
+# the messages destined for the digest. This file must be parsable by the
+# mailbox.UnixMailbox class (i.e. it must be ^From_ quoted).
+#
+# When the file reaches the size threshold, it is moved to the qfiles/digest
+# directory and the DigestRunner will craft the MIME, rfc1153, and
+# (eventually) URL-subject linked digests from the mbox.
+
+from __future__ import with_statement
+
+__metaclass__ = type
+__all__ = ['ToDigest']
+
+
+import os
+import re
+import copy
+import time
+import logging
+
+from StringIO import StringIO # cStringIO can't handle unicode.
+from email.charset import Charset
+from email.generator import Generator
+from email.header import decode_header, make_header, Header
+from email.mime.base import MIMEBase
+from email.mime.message import MIMEMessage
+from email.mime.text import MIMEText
+from email.parser import Parser
+from email.utils import formatdate, getaddresses, make_msgid
+from zope.interface import implements
+
+from mailman import Errors
+from mailman import Message
+from mailman import Utils
+from mailman import i18n
+from mailman.Mailbox import Mailbox
+from mailman.Mailbox import Mailbox
+from mailman.configuration import config
+from mailman.pipeline.decorate import decorate
+from mailman.pipeline.scrubber import process as scrubber
+from mailman.interfaces import DeliveryMode, DeliveryStatus, IHandler
+from mailman.queue import Switchboard
+
+
+_ = i18n._
+
+UEMPTYSTRING = u''
+EMPTYSTRING = ''
+
+log = logging.getLogger('mailman.error')
+
+
+
+def process(mlist, msg, msgdata):
+ # Short circuit non-digestable lists.
+ if not mlist.digestable or msgdata.get('isdigest'):
+ return
+ mboxfile = os.path.join(mlist.full_path, 'digest.mbox')
+ mboxfp = open(mboxfile, 'a+')
+ mbox = Mailbox(mboxfp)
+ mbox.AppendMessage(msg)
+ # Calculate the current size of the accumulation file. This will not tell
+ # us exactly how big the MIME, rfc1153, or any other generated digest
+ # message will be, but it's the most easily available metric to decide
+ # whether the size threshold has been reached.
+ mboxfp.flush()
+ size = os.path.getsize(mboxfile)
+ if size / 1024.0 >= mlist.digest_size_threshold:
+ # This is a bit of a kludge to get the mbox file moved to the digest
+ # queue directory.
+ try:
+ # Enclose in try/except here because a error in send_digest() can
+ # silently stop regular delivery. Unsuccessful digest delivery
+ # should be tried again by cron and the site administrator will be
+ # notified of any error explicitly by the cron error message.
+ mboxfp.seek(0)
+ send_digests(mlist, mboxfp)
+ os.unlink(mboxfile)
+ except Exception, errmsg:
+ # Bare except is generally prohibited in Mailman, but we can't
+ # forecast what exceptions can occur here.
+ log.exception('send_digests() failed: %s', errmsg)
+ mboxfp.close()
+
+
+
+def send_digests(mlist, mboxfp):
+ # Set the digest volume and time
+ if mlist.digest_last_sent_at:
+ bump = False
+ # See if we should bump the digest volume number
+ timetup = time.localtime(mlist.digest_last_sent_at)
+ now = time.localtime(time.time())
+ freq = mlist.digest_volume_frequency
+ if freq == 0 and timetup[0] < now[0]:
+ # Yearly
+ bump = True
+ elif freq == 1 and timetup[1] <> now[1]:
+ # Monthly, but we take a cheap way to calculate this. We assume
+ # that the clock isn't going to be reset backwards.
+ bump = True
+ elif freq == 2 and (timetup[1] % 4 <> now[1] % 4):
+ # Quarterly, same caveat
+ bump = True
+ elif freq == 3:
+ # Once again, take a cheap way of calculating this
+ weeknum_last = int(time.strftime('%W', timetup))
+ weeknum_now = int(time.strftime('%W', now))
+ if weeknum_now > weeknum_last or timetup[0] > now[0]:
+ bump = True
+ elif freq == 4 and timetup[7] <> now[7]:
+ # Daily
+ bump = True
+ if bump:
+ mlist.bump_digest_volume()
+ mlist.digest_last_sent_at = time.time()
+ # Wrapper around actually digest crafter to set up the language context
+ # properly. All digests are translated to the list's preferred language.
+ with i18n.using_language(mlist.preferred_language):
+ send_i18n_digests(mlist, mboxfp)
+
+
+
+def send_i18n_digests(mlist, mboxfp):
+ mbox = Mailbox(mboxfp)
+ # Prepare common information (first lang/charset)
+ lang = mlist.preferred_language
+ lcset = Utils.GetCharSet(lang)
+ lcset_out = Charset(lcset).output_charset or lcset
+ # Common Information (contd)
+ realname = mlist.real_name
+ volume = mlist.volume
+ issue = mlist.next_digest_number
+ digestid = _('$realname Digest, Vol $volume, Issue $issue')
+ digestsubj = Header(digestid, lcset, header_name='Subject')
+ # Set things up for the MIME digest. Only headers not added by
+ # CookHeaders need be added here.
+ # Date/Message-ID should be added here also.
+ mimemsg = Message.Message()
+ mimemsg['Content-Type'] = 'multipart/mixed'
+ mimemsg['MIME-Version'] = '1.0'
+ mimemsg['From'] = mlist.request_address
+ mimemsg['Subject'] = digestsubj
+ mimemsg['To'] = mlist.posting_address
+ mimemsg['Reply-To'] = mlist.posting_address
+ mimemsg['Date'] = formatdate(localtime=1)
+ mimemsg['Message-ID'] = make_msgid()
+ # Set things up for the rfc1153 digest
+ plainmsg = StringIO()
+ rfc1153msg = Message.Message()
+ rfc1153msg['From'] = mlist.request_address
+ rfc1153msg['Subject'] = digestsubj
+ rfc1153msg['To'] = mlist.posting_address
+ rfc1153msg['Reply-To'] = mlist.posting_address
+ rfc1153msg['Date'] = formatdate(localtime=1)
+ rfc1153msg['Message-ID'] = make_msgid()
+ separator70 = '-' * 70
+ separator30 = '-' * 30
+ # In the rfc1153 digest, the masthead contains the digest boilerplate plus
+ # any digest header. In the MIME digests, the masthead and digest header
+ # are separate MIME subobjects. In either case, it's the first thing in
+ # the digest, and we can calculate it now, so go ahead and add it now.
+ mastheadtxt = Utils.maketext(
+ 'masthead.txt',
+ {'real_name' : mlist.real_name,
+ 'got_list_email': mlist.posting_address,
+ 'got_listinfo_url': mlist.script_url('listinfo'),
+ 'got_request_email': mlist.request_address,
+ 'got_owner_email': mlist.owner_address,
+ }, mlist=mlist)
+ # MIME
+ masthead = MIMEText(mastheadtxt.encode(lcset), _charset=lcset)
+ masthead['Content-Description'] = digestid
+ mimemsg.attach(masthead)
+ # RFC 1153
+ print >> plainmsg, mastheadtxt
+ print >> plainmsg
+ # Now add the optional digest header
+ if mlist.digest_header:
+ headertxt = decorate(mlist, mlist.digest_header, _('digest header'))
+ # MIME
+ header = MIMEText(headertxt.encode(lcset), _charset=lcset)
+ header['Content-Description'] = _('Digest Header')
+ mimemsg.attach(header)
+ # RFC 1153
+ print >> plainmsg, headertxt
+ print >> plainmsg
+ # Now we have to cruise through all the messages accumulated in the
+ # mailbox file. We can't add these messages to the plainmsg and mimemsg
+ # yet, because we first have to calculate the table of contents
+ # (i.e. grok out all the Subjects). Store the messages in a list until
+ # we're ready for them.
+ #
+ # Meanwhile prepare things for the table of contents
+ toc = StringIO()
+ print >> toc, _("Today's Topics:\n")
+ # Now cruise through all the messages in the mailbox of digest messages,
+ # building the MIME payload and core of the RFC 1153 digest. We'll also
+ # accumulate Subject: headers and authors for the table-of-contents.
+ messages = []
+ msgcount = 0
+ msg = mbox.next()
+ while msg is not None:
+ if msg == '':
+ # It was an unparseable message
+ msg = mbox.next()
+ continue
+ msgcount += 1
+ messages.append(msg)
+ # Get the Subject header
+ msgsubj = msg.get('subject', _('(no subject)'))
+ subject = Utils.oneline(msgsubj, in_unicode=True)
+ # Don't include the redundant subject prefix in the toc
+ mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix),
+ subject, re.IGNORECASE)
+ if mo:
+ subject = subject[:mo.start(2)] + subject[mo.end(2):]
+ username = ''
+ addresses = getaddresses([Utils.oneline(msg.get('from', ''),
+ in_unicode=True)])
+ # Take only the first author we find
+ if isinstance(addresses, list) and addresses:
+ username = addresses[0][0]
+ if not username:
+ username = addresses[0][1]
+ if username:
+ username = ' (%s)' % username
+ # Put count and Wrap the toc subject line
+ wrapped = Utils.wrap('%2d. %s' % (msgcount, subject), 65)
+ slines = wrapped.split('\n')
+ # See if the user's name can fit on the last line
+ if len(slines[-1]) + len(username) > 70:
+ slines.append(username)
+ else:
+ slines[-1] += username
+ # Add this subject to the accumulating topics
+ first = True
+ for line in slines:
+ if first:
+ print >> toc, ' ', line
+ first = False
+ else:
+ print >> toc, ' ', line.lstrip()
+ # We do not want all the headers of the original message to leak
+ # through in the digest messages. For this phase, we'll leave the
+ # same set of headers in both digests, i.e. those required in RFC 1153
+ # plus a couple of other useful ones. We also need to reorder the
+ # headers according to RFC 1153. Later, we'll strip out headers for
+ # for the specific MIME or plain digests.
+ keeper = {}
+ all_keepers = {}
+ for header in (config.MIME_DIGEST_KEEP_HEADERS +
+ config.PLAIN_DIGEST_KEEP_HEADERS):
+ all_keepers[header] = True
+ all_keepers = all_keepers.keys()
+ for keep in all_keepers:
+ keeper[keep] = msg.get_all(keep, [])
+ # Now remove all unkempt headers :)
+ for header in msg.keys():
+ del msg[header]
+ # And add back the kept header in the RFC 1153 designated order
+ for keep in all_keepers:
+ for field in keeper[keep]:
+ msg[keep] = field
+ # And a bit of extra stuff
+ msg['Message'] = `msgcount`
+ # Get the next message in the digest mailbox
+ msg = mbox.next()
+ # Now we're finished with all the messages in the digest. First do some
+ # sanity checking and then on to adding the toc.
+ if msgcount == 0:
+ # Why did we even get here?
+ return
+ toctext = toc.getvalue()
+ # MIME
+ try:
+ tocpart = MIMEText(toctext.encode(lcset), _charset=lcset)
+ except UnicodeError:
+ tocpart = MIMEText(toctext.encode('utf-8'), _charset='utf-8')
+ tocpart['Content-Description']= _("Today's Topics ($msgcount messages)")
+ mimemsg.attach(tocpart)
+ # RFC 1153
+ print >> plainmsg, toctext
+ print >> plainmsg
+ # For RFC 1153 digests, we now need the standard separator
+ print >> plainmsg, separator70
+ print >> plainmsg
+ # Now go through and add each message
+ mimedigest = MIMEBase('multipart', 'digest')
+ mimemsg.attach(mimedigest)
+ first = True
+ for msg in messages:
+ # MIME. Make a copy of the message object since the rfc1153
+ # processing scrubs out attachments.
+ mimedigest.attach(MIMEMessage(copy.deepcopy(msg)))
+ # rfc1153
+ if first:
+ first = False
+ else:
+ print >> plainmsg, separator30
+ print >> plainmsg
+ # Use Mailman.pipeline.scrubber.process() to get plain text
+ try:
+ msg = scrubber(mlist, msg)
+ except Errors.DiscardMessage:
+ print >> plainmsg, _('[Message discarded by content filter]')
+ continue
+ # Honor the default setting
+ for h in config.PLAIN_DIGEST_KEEP_HEADERS:
+ if msg[h]:
+ uh = Utils.wrap('%s: %s' % (h, Utils.oneline(msg[h],
+ in_unicode=True)))
+ uh = '\n\t'.join(uh.split('\n'))
+ print >> plainmsg, uh
+ print >> plainmsg
+ # If decoded payload is empty, this may be multipart message.
+ # -- just stringfy it.
+ payload = msg.get_payload(decode=True) \
+ or msg.as_string().split('\n\n',1)[1]
+ mcset = msg.get_content_charset('us-ascii')
+ try:
+ payload = unicode(payload, mcset, 'replace')
+ except (LookupError, TypeError):
+ # unknown or empty charset
+ payload = unicode(payload, 'us-ascii', 'replace')
+ print >> plainmsg, payload
+ if not payload.endswith('\n'):
+ print >> plainmsg
+ # Now add the footer
+ if mlist.digest_footer:
+ footertxt = decorate(mlist, mlist.digest_footer, _('digest footer'))
+ # MIME
+ footer = MIMEText(footertxt.encode(lcset), _charset=lcset)
+ footer['Content-Description'] = _('Digest Footer')
+ mimemsg.attach(footer)
+ # RFC 1153
+ # BAW: This is not strictly conformant RFC 1153. The trailer is only
+ # supposed to contain two lines, i.e. the "End of ... Digest" line and
+ # the row of asterisks. If this screws up MUAs, the solution is to
+ # add the footer as the last message in the RFC 1153 digest. I just
+ # hate the way that VM does that and I think it's confusing to users,
+ # so don't do it unless there's a clamor.
+ print >> plainmsg, separator30
+ print >> plainmsg
+ print >> plainmsg, footertxt
+ print >> plainmsg
+ # Do the last bit of stuff for each digest type
+ signoff = _('End of ') + digestid
+ # MIME
+ # BAW: This stuff is outside the normal MIME goo, and it's what the old
+ # MIME digester did. No one seemed to complain, probably because you
+ # won't see it in an MUA that can't display the raw message. We've never
+ # got complaints before, but if we do, just wax this. It's primarily
+ # included for (marginally useful) backwards compatibility.
+ mimemsg.postamble = signoff
+ # rfc1153
+ print >> plainmsg, signoff
+ print >> plainmsg, '*' * len(signoff)
+ # Do our final bit of housekeeping, and then send each message to the
+ # outgoing queue for delivery.
+ mlist.next_digest_number += 1
+ virginq = Switchboard(config.VIRGINQUEUE_DIR)
+ # Calculate the recipients lists
+ plainrecips = set()
+ mimerecips = set()
+ # When someone turns off digest delivery, they will get one last digest to
+ # ensure that there will be no gaps in the messages they receive.
+ # Currently, this dictionary contains the email addresses of those folks
+ # who should get one last digest. We need to find the corresponding
+ # IMember records.
+ digest_members = set(mlist.digest_members.members)
+ for address in mlist.one_last_digest:
+ member = mlist.digest_members.get_member(address)
+ if member:
+ digest_members.add(member)
+ for member in digest_members:
+ if member.delivery_status <> DeliveryStatus.enabled:
+ continue
+ # Send the digest to the case-preserved address of the digest members.
+ email_address = member.address.original_address
+ if member.delivery_mode == DeliveryMode.plaintext_digests:
+ plainrecips.add(email_address)
+ elif member.delivery_mode == DeliveryMode.mime_digests:
+ mimerecips.add(email_address)
+ else:
+ raise AssertionError(
+ 'Digest member "%s" unexpected delivery mode: %s' %
+ (email_address, member.delivery_mode))
+ # Zap this since we're now delivering the last digest to these folks.
+ mlist.one_last_digest.clear()
+ # MIME
+ virginq.enqueue(mimemsg,
+ recips=mimerecips,
+ listname=mlist.fqdn_listname,
+ isdigest=True)
+ # RFC 1153
+ # If the entire digest message can't be encoded by list charset, fall
+ # back to 'utf-8'.
+ try:
+ rfc1153msg.set_payload(plainmsg.getvalue().encode(lcset), lcset)
+ except UnicodeError:
+ rfc1153msg.set_payload(plainmsg.getvalue().encode('utf-8'), 'utf-8')
+ virginq.enqueue(rfc1153msg,
+ recips=plainrecips,
+ listname=mlist.fqdn_listname,
+ isdigest=True)
+
+
+
+class ToDigest:
+ """Add the message to the digest, possibly sending it."""
+
+ implements(IHandler)
+
+ name = 'to-digest'
+ description = _('Add the message to the digest, possibly sending it.')
+
+ def process(self, mlist, msg, msgdata):
+ """See `IHandler`."""
+ process(mlist, msg, msgdata)