summaryrefslogtreecommitdiff
path: root/src/mailman/queue/digest.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/mailman/queue/digest.py')
-rw-r--r--src/mailman/queue/digest.py365
1 files changed, 365 insertions, 0 deletions
diff --git a/src/mailman/queue/digest.py b/src/mailman/queue/digest.py
new file mode 100644
index 000000000..e066be993
--- /dev/null
+++ b/src/mailman/queue/digest.py
@@ -0,0 +1,365 @@
+# Copyright (C) 2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
+
+"""Digest queue runner."""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'DigestRunner',
+ ]
+
+
+import re
+
+# cStringIO doesn't support unicode.
+from StringIO import StringIO
+from contextlib import nested
+from copy import deepcopy
+from email.header import Header
+from email.message import Message
+from email.mime.message import MIMEMessage
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.utils import formatdate, getaddresses, make_msgid
+
+from mailman import i18n
+from mailman.Utils import GetCharSet, maketext, oneline, wrap
+from mailman.config import config
+from mailman.core.errors import DiscardMessage
+from mailman.i18n import _
+from mailman.pipeline.decorate import decorate
+from mailman.pipeline.scrubber import process as scrubber
+from mailman.queue import Runner
+from mailman.utilities.mailbox import Mailbox
+
+
+
+class Digester:
+ """Base digester class."""
+
+ def __init__(self, mlist, volume, digest_number):
+ self._mlist = mlist
+ self._charset = GetCharSet(mlist.preferred_language)
+ # This will be used in the Subject, so use $-strings.
+ realname = mlist.real_name
+ issue = digest_number
+ self._digest_id = _('$realname Digest, Vol $volume, Issue $issue')
+ self._subject = Header(self._digest_id,
+ self._charset,
+ header_name='Subject')
+ self._message = self._make_message()
+ self._message['From'] = mlist.request_address
+ self._message['Subject'] = self._subject
+ self._message['To'] = mlist.posting_address
+ self._message['Reply-To'] = mlist.posting_address
+ self._message['Date'] = formatdate(localtime=True)
+ self._message['Message-ID'] = make_msgid()
+ # In the rfc1153 digest, the masthead contains the digest boilerplate
+ # plus any digest header. In the MIME digests, the masthead and
+ # digest header are separate MIME subobjects. In either case, it's
+ # the first thing in the digest, and we can calculate it now, so go
+ # ahead and add it now.
+ self._masthead = maketext(
+ 'masthead.txt', dict(
+ real_name=mlist.real_name,
+ got_list_email=mlist.posting_address,
+ got_listinfo_url=mlist.script_url('listinfo'),
+ got_request_email=mlist.request_address,
+ got_owner_email=mlist.owner_address,
+ ),
+ mlist=mlist)
+ # Set things up for the table of contents.
+ self._header = decorate(mlist, mlist.digest_header)
+ self._toc = StringIO()
+ print >> self._toc, _("Today's Topics:\n")
+
+ def add_to_toc(self, msg, count):
+ """Add a message to the table of contents."""
+ subject = msg.get('subject', _('(no subject)'))
+ subject = oneline(subject, in_unicode=True)
+ # Don't include the redundant subject prefix in the toc
+ mo = re.match('(re:? *)?({0})'.format(
+ re.escape(self._mlist.subject_prefix)),
+ subject, re.IGNORECASE)
+ if mo:
+ subject = subject[:mo.start(2)] + subject[mo.end(2):]
+ # Take only the first author we find.
+ username = ''
+ addresses = getaddresses(
+ [oneline(msg.get('from', ''), in_unicode=True)])
+ if addresses:
+ username = addresses[0][0]
+ if not username:
+ username = addresses[0][1]
+ if username:
+ username = ' ({0})'.format(username)
+ lines = wrap('{0:2}. {1}'. format(count, subject), 65).split('\n')
+ # See if the user's name can fit on the last line
+ if len(lines[-1]) + len(username) > 70:
+ lines.append(username)
+ else:
+ lines[-1] += username
+ # Add this subject to the accumulating topics
+ first = True
+ for line in lines:
+ if first:
+ print >> self._toc, ' ', line
+ first = False
+ else:
+ print >> self._toc, ' ', line.lstrip()
+
+ def add_message(self, msg, count):
+ """Add the message to the digest."""
+ # We do not want all the headers of the original message to leak
+ # through in the digest messages.
+ keepers = {}
+ for header in self._keepers:
+ keepers[header] = msg.get_all(keeper, [])
+ # Remove all the unkempt <wink> headers. Use .keys() to allow for
+ # destructive iteration...
+ for header in msg.keys():
+ del msg[header]
+ # ... and add them in the designated order.
+ for header in self._keepers:
+ for value in keepers[header]:
+ msg[header] = value
+ # Add some useful extra stuff.
+ msg['Message'] = unicode(count)
+
+
+
+class MIMEDigester(Digester):
+ """A MIME digester."""
+
+ def __init__(self, mlist, volume, digest_number):
+ super(MIMEDigester, self).__init__(mlist, volume, digest_number)
+ masthead = MIMEText(self._masthead.encode(self._charset),
+ _charset=self._charset)
+ masthead['Content-Description'] = self._subject
+ self._message.attach(masthead)
+ # Add the optional digest header.
+ if mlist.digest_header:
+ header = MIMEText(self._header.encode(self._charset),
+ _charset=self._charset)
+ header['Content-Description'] = _('Digest Header')
+ self._message.attach(header)
+ # Calculate the set of headers we're to keep in the MIME digest.
+ self._keepers = set(config.digests.mime_digest_keep_headers.split())
+
+ def _make_message(self):
+ return MIMEMultipart('mixed')
+
+ def add_toc(self, count):
+ """Add the table of contents."""
+ toc_text = self._toc.getvalue()
+ try:
+ toc_part = MIMEText(toc_text.encode(self._charset),
+ _charset=self._charset)
+ except UnicodeError:
+ toc_part = MIMEText(toc_text.encode('utf-8'), _charset='utf-8')
+ toc_part['Content-Description']= _("Today's Topics ($count messages)")
+ self._message.attach(toc_part)
+
+ def add_message(self, msg, count):
+ """Add the message to the digest."""
+ # Make a copy of the message object, since the RFC 1153 processing
+ # scrubs out attachments.
+ self._message.attach(MIMEMessage(deepcopy(msg)))
+
+ def finish(self):
+ """Finish up the digest, producing the email-ready copy."""
+ if self._mlist.digest_footer:
+ footer_text = decorate(self._mlist, self._mlist.digest_footer)
+ footer = MIMEText(footer_text.encode(self._charset),
+ _charset=self._charset)
+ footer['Content-Description'] = _('Digest Footer')
+ self._message.attach(footer)
+ # This stuff is outside the normal MIME goo, and it's what the old
+ # MIME digester did. No one seemed to complain, probably because you
+ # won't see it in an MUA that can't display the raw message. We've
+ # never got complaints before, but if we do, just wax this. It's
+ # primarily included for (marginally useful) backwards compatibility.
+ self._message.postamble = _('End of ') + self._digest_id
+ return self._message
+
+
+
+class RFC1153Digester(Digester):
+ """A digester of the format specified by RFC 1153."""
+
+ def __init__(self, mlist, volume, digest_number):
+ super(RFC1153Digester, self).__init__(mlist, volume, digest_number)
+ self._separator70 = '-' * 70
+ self._separator30 = '-' * 30
+ self._text = StringIO()
+ print >> self._text, self._masthead
+ print >> self._text
+ # Add the optional digest header.
+ if mlist.digest_header:
+ print >> self._text, self._header
+ print >> self._text
+ # Calculate the set of headers we're to keep in the RFC1153 digest.
+ self._keepers = set(config.digests.plain_digest_keep_headers.split())
+
+ def _make_message(self):
+ return Message()
+
+ def add_toc(self, count):
+ """Add the table of contents."""
+ print >> self._text, self._toc.getvalue()
+ print >> self._text
+ print >> self._text, self._separator70
+ print >> self._text
+
+ def add_message(self, msg, count):
+ """Add the message to the digest."""
+ if count > 1:
+ print >> self._text, self._separator30
+ print >> self._text
+ # Scrub attachements.
+ try:
+ msg = scrubber(self._mlist, msg)
+ except DiscardMessage:
+ print >> self._text, _('[Message discarded by content filter]')
+ return
+ # Each message section contains a few headers.
+ for header in config.digests.plain_digest_keep_headers.split():
+ if header in msg:
+ value = oneline(msg[header], in_unicode=True)
+ value = wrap('{0}: {1}'.format(header, value))
+ value = '\n\t'.join(value.split('\n'))
+ print >> self._text, value
+ print >> self._text
+ # Add the payload. If the decoded payload is empty, this may be a
+ # multipart message. In that case, just stringify it.
+ payload = msg.get_payload(decode=True)
+ payload = (payload if payload else msg.as_string().split('\n\n', 1)[1])
+ try:
+ charset = msg.get_content_charset('us-ascii')
+ payload = unicode(payload, charset, 'replace')
+ except (LookupError, TypeError):
+ # Unknown or empty charset.
+ payload = unicode(payload, 'us-ascii', 'replace')
+ print >> self._text, payload
+ if not payload.endswith('\n'):
+ print >> self._text
+
+ def finish(self):
+ """Finish up the digest, producing the email-ready copy."""
+ if self._mlist.digest_footer:
+ footer_text = decorate(self._mlist, self._mlist.digest_footer)
+ # This is not strictly conformant RFC 1153. The trailer is only
+ # supposed to contain two lines, i.e. the "End of ... Digest" line
+ # and the row of asterisks. If this screws up MUAs, the solution
+ # is to add the footer as the last message in the RFC 1153 digest.
+ # I just hate the way that VM does that and I think it's confusing
+ # to users, so don't do it unless there's a clamor.
+ print >> self._text, self._separator30
+ print >> self._text
+ print >> self._text, footer_text
+ print >> self._text
+ # Add the sign-off.
+ sign_off = _('End of ') + self._digest_id
+ print >> self._text, sign_off
+ print >> self._text, '*' * len(sign_off)
+ # If the digest message can't be encoded by the list character set,
+ # fall back to utf-8.
+ text = self._text.getvalue()
+ try:
+ self._message.set_payload(text.encode(self._charset),
+ charset=self._charset)
+ except UnicodeError:
+ self._message.set_payload(text.encode('utf-8'), charset='utf-8')
+ return self._message
+
+
+
+class DigestRunner(Runner):
+ """The digest queue runner."""
+
+ def _dispose(self, mlist, msg, msgdata):
+ """See `IRunner`."""
+ volume = msgdata['volume']
+ digest_number = msgdata['digest_number']
+ with nested(Mailbox(msgdata['digest_path']),
+ i18n.using_language(mlist.preferred_language)) as (
+ mailbox, language):
+ # Create the digesters.
+ mime_digest = MIMEDigester(mlist, volume, digest_number)
+ rfc1153_digest = RFC1153Digester(mlist, volume, digest_number)
+ # Cruise through all the messages in the mailbox, first building
+ # the table of contents and accumulating Subject: headers and
+ # authors. The question really is whether it's better from a
+ # performance and memory footprint to go through the mailbox once
+ # and cache the messages in a list, or to cruise through the
+ # mailbox twice. We'll do the latter, but it's a complete guess.
+ count = None
+ for count, (key, message) in enumerate(mailbox.iteritems(), 1):
+ mime_digest.add_to_toc(message, count)
+ rfc1153_digest.add_to_toc(message, count)
+ assert count is not None, 'No digest messages?'
+ # Add the table of contents.
+ mime_digest.add_toc(count)
+ rfc1153_digest.add_toc(count)
+ # Cruise through the set of messages a second time, adding them to
+ # the actual digest.
+ for count, (key, message) in enumerate(mailbox.iteritems(), 1):
+ mime_digest.add_message(message, count)
+ rfc1153_digest.add_message(message, count)
+ # Finish up the digests.
+ mime = mime_digest.finish()
+ rfc1153 = rfc1153_digest.finish()
+ # Calculate the recipients lists
+ mime_recipients = set()
+ rfc1153_recipients = set()
+ # When someone turns off digest delivery, they will get one last
+ # digest to ensure that there will be no gaps in the messages they
+ # receive.
+ digest_members = set(mlist.digest_members.members)
+ for address in mlist.one_last_digest:
+ member = mlist.digest_members.get_member(address)
+ if member:
+ digest_members.add(member)
+ for member in digest_members:
+ if member.delivery_status <> DeliveryStatus.enabled:
+ continue
+ # Send the digest to the case-preserved address of the digest
+ # members.
+ email_address = member.address.original_address
+ if member.delivery_mode == DeliveryMode.plaintext_digests:
+ rfc1153_recipients.add(email_address)
+ elif member.delivery_mode == DeliveryMode.mime_digests:
+ mime_recipients.add(email_address)
+ else:
+ raise AssertionError(
+ 'Digest member "{0}" unexpected delivery mode: {1}'.format(
+ email_address, member.delivery_mode))
+ # Send the digests to the virgin queue for final delivery.
+ queue = config.switchboards['virgin']
+ queue.enqueue(mime,
+ recips=mime_recipients,
+ listname=mlist.fqdn_listname,
+ isdigest=True)
+ queue.enqueue(rfc1153,
+ recips=rfc1153_recipients,
+ listname=mlist.fqdn_listname,
+ isdigest=True)
+ # Now that we've delivered the last digest to folks who were waiting
+ # for it, clear that recipient set.
+ mlist.one_last_digest.clear()