1 files changed, 365 insertions, 0 deletions
diff --git a/src/mailman/queue/digest.py b/src/mailman/queue/digest.py
new file mode 100644
index 000000000..e066be993
--- /dev/null
+++ b/src/mailman/queue/digest.py
@@ -0,0 +1,365 @@
+# Copyright (C) 2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Digest queue runner."""
+
+from __future__ import absolute_import, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+    'DigestRunner',
+    ]
+
+
+import re
+
+# cStringIO doesn't support unicode.
+from StringIO import StringIO
+from contextlib import nested
+from copy import deepcopy
+from email.header import Header
+from email.message import Message
+from email.mime.message import MIMEMessage
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.utils import formatdate, getaddresses, make_msgid
+
+from mailman import i18n
+from mailman.Utils import GetCharSet, maketext, oneline, wrap
+from mailman.config import config
+from mailman.core.errors import DiscardMessage
+from mailman.i18n import _
+from mailman.pipeline.decorate import decorate
+from mailman.pipeline.scrubber import process as scrubber
+from mailman.queue import Runner
+from mailman.utilities.mailbox import Mailbox
+
+
+
+class Digester:
+    """Base digester class."""
+
+    def __init__(self, mlist, volume, digest_number):
+        self._mlist = mlist
+        self._charset = GetCharSet(mlist.preferred_language)
+        # This will be used in the Subject, so use $-strings.
+        realname = mlist.real_name
+        issue = digest_number
+        self._digest_id = _('$realname Digest, Vol $volume, Issue $issue')
+        self._subject = Header(self._digest_id,
+                               self._charset,
+                               header_name='Subject')
+        self._message = self._make_message()
+        self._message['From'] = mlist.request_address
+        self._message['Subject'] = self._subject
+        self._message['To'] = mlist.posting_address
+        self._message['Reply-To'] = mlist.posting_address
+        self._message['Date'] = formatdate(localtime=True)
+        self._message['Message-ID'] = make_msgid()
+        # In the rfc1153 digest, the masthead contains the digest boilerplate
+        # plus any digest header.  In the MIME digests, the masthead and
+        # digest header are separate MIME subobjects.  In either case, it's
+        # the first thing in the digest, and we can calculate it now, so go
+        # ahead and add it now.
+        self._masthead = maketext(
+            'masthead.txt', dict(
+                real_name=mlist.real_name,
+                got_list_email=mlist.posting_address,
+                got_listinfo_url=mlist.script_url('listinfo'),
+                got_request_email=mlist.request_address,
+                got_owner_email=mlist.owner_address,
+                ),
+            mlist=mlist)
+        # Set things up for the table of contents.
+        self._header = decorate(mlist, mlist.digest_header)
+        self._toc = StringIO()
+        print >> self._toc, _("Today's Topics:\n")
+
+    def add_to_toc(self, msg, count):
+        """Add a message to the table of contents."""
+        subject = msg.get('subject', _('(no subject)'))
+        subject = oneline(subject, in_unicode=True)
+        # Don't include the redundant subject prefix in the toc
+        mo = re.match('(re:? *)?({0})'.format(
+            re.escape(self._mlist.subject_prefix)),
+                      subject, re.IGNORECASE)
+        if mo:
+            subject = subject[:mo.start(2)] + subject[mo.end(2):]
+        # Take only the first author we find.
+        username = ''
+        addresses = getaddresses(
+            [oneline(msg.get('from', ''), in_unicode=True)])
+        if addresses:
+            username = addresses[0][0]
+            if not username:
+                username = addresses[0][1]
+        if username:
+            username = ' ({0})'.format(username)
+        lines = wrap('{0:2}. {1}'. format(count, subject), 65).split('\n')
+        # See if the user's name can fit on the last line
+        if len(lines[-1]) + len(username) > 70:
+            lines.append(username)
+        else:
+            lines[-1] += username
+        # Add this subject to the accumulating topics
+        first = True
+        for line in lines:
+            if first:
+                print >> self._toc, ' ', line
+                first = False
+            else:
+                print >> self._toc, '     ', line.lstrip()
+
+    def add_message(self, msg, count):
+        """Add the message to the digest."""
+        # We do not want all the headers of the original message to leak
+        # through in the digest messages.
+        keepers = {}
+        for header in self._keepers:
+            keepers[header] = msg.get_all(keeper, [])
+        # Remove all the unkempt <wink> headers.  Use .keys() to allow for
+        # destructive iteration...
+        for header in msg.keys():
+            del msg[header]
+        # ... and add them in the designated order.
+        for header in self._keepers:
+            for value in keepers[header]:
+                msg[header] = value
+        # Add some useful extra stuff.
+        msg['Message'] = unicode(count)
+
+
+
+class MIMEDigester(Digester):
+    """A MIME digester."""
+
+    def __init__(self, mlist, volume, digest_number):
+        super(MIMEDigester, self).__init__(mlist, volume, digest_number)
+        masthead = MIMEText(self._masthead.encode(self._charset),
+                            _charset=self._charset)
+        masthead['Content-Description'] = self._subject
+        self._message.attach(masthead)
+        # Add the optional digest header.
+        if mlist.digest_header:
+            header = MIMEText(self._header.encode(self._charset),
+                              _charset=self._charset)
+            header['Content-Description'] = _('Digest Header')
+            self._message.attach(header)
+        # Calculate the set of headers we're to keep in the MIME digest.
+        self._keepers = set(config.digests.mime_digest_keep_headers.split())
+
+    def _make_message(self):
+        return MIMEMultipart('mixed')
+
+    def add_toc(self, count):
+        """Add the table of contents."""
+        toc_text = self._toc.getvalue()
+        try:
+            toc_part = MIMEText(toc_text.encode(self._charset),
+                                _charset=self._charset)
+        except UnicodeError:
+            toc_part = MIMEText(toc_text.encode('utf-8'), _charset='utf-8')
+        toc_part['Content-Description']= _("Today's Topics ($count messages)")
+        self._message.attach(toc_part)
+
+    def add_message(self, msg, count):
+        """Add the message to the digest."""
+        # Make a copy of the message object, since the RFC 1153 processing
+        # scrubs out attachments.
+        self._message.attach(MIMEMessage(deepcopy(msg)))
+
+    def finish(self):
+        """Finish up the digest, producing the email-ready copy."""
+        if self._mlist.digest_footer:
+            footer_text = decorate(self._mlist, self._mlist.digest_footer)
+            footer = MIMEText(footer_text.encode(self._charset),
+                              _charset=self._charset)
+            footer['Content-Description'] = _('Digest Footer')
+            self._message.attach(footer)
+        # This stuff is outside the normal MIME goo, and it's what the old
+        # MIME digester did.  No one seemed to complain, probably because you
+        # won't see it in an MUA that can't display the raw message.  We've
+        # never got complaints before, but if we do, just wax this.  It's
+        # primarily included for (marginally useful) backwards compatibility.
+        self._message.postamble = _('End of ') + self._digest_id
+        return self._message
+
+
+
+class RFC1153Digester(Digester):
+    """A digester of the format specified by RFC 1153."""
+
+    def __init__(self, mlist, volume, digest_number):
+        super(RFC1153Digester, self).__init__(mlist, volume, digest_number)
+        self._separator70 = '-' * 70
+        self._separator30 = '-' * 30
+        self._text = StringIO()
+        print >> self._text, self._masthead
+        print >> self._text
+        # Add the optional digest header.
+        if mlist.digest_header:
+            print >> self._text, self._header
+            print >> self._text
+        # Calculate the set of headers we're to keep in the RFC1153 digest.
+        self._keepers = set(config.digests.plain_digest_keep_headers.split())
+
+    def _make_message(self):
+        return Message()
+
+    def add_toc(self, count):
+        """Add the table of contents."""
+        print >> self._text, self._toc.getvalue()
+        print >> self._text
+        print >> self._text, self._separator70
+        print >> self._text
+
+    def add_message(self, msg, count):
+        """Add the message to the digest."""
+        if count > 1:
+            print >> self._text, self._separator30
+            print >> self._text
+        # Scrub attachements.
+        try:
+            msg = scrubber(self._mlist, msg)
+        except DiscardMessage:
+            print >> self._text, _('[Message discarded by content filter]')
+            return
+        # Each message section contains a few headers.
+        for header in config.digests.plain_digest_keep_headers.split():
+            if header in msg:
+                value = oneline(msg[header], in_unicode=True)
+                value = wrap('{0}: {1}'.format(header, value))
+                value = '\n\t'.join(value.split('\n'))
+                print >> self._text, value
+        print >> self._text
+        # Add the payload.  If the decoded payload is empty, this may be a
+        # multipart message.  In that case, just stringify it.
+        payload = msg.get_payload(decode=True)
+        payload = (payload if payload else msg.as_string().split('\n\n', 1)[1])
+        try:
+            charset = msg.get_content_charset('us-ascii')
+            payload = unicode(payload, charset, 'replace')
+        except (LookupError, TypeError):
+            # Unknown or empty charset.
+            payload = unicode(payload, 'us-ascii', 'replace')
+        print >> self._text, payload
+        if not payload.endswith('\n'):
+            print >> self._text
+
+    def finish(self):
+        """Finish up the digest, producing the email-ready copy."""
+        if self._mlist.digest_footer:
+            footer_text = decorate(self._mlist, self._mlist.digest_footer)
+            # This is not strictly conformant RFC 1153.  The trailer is only
+            # supposed to contain two lines, i.e. the "End of ... Digest" line
+            # and the row of asterisks.  If this screws up MUAs, the solution
+            # is to add the footer as the last message in the RFC 1153 digest.
+            # I just hate the way that VM does that and I think it's confusing
+            # to users, so don't do it unless there's a clamor.
+            print >> self._text, self._separator30
+            print >> self._text
+            print >> self._text, footer_text
+            print >> self._text
+        # Add the sign-off.
+        sign_off = _('End of ') + self._digest_id
+        print >> self._text, sign_off
+        print >> self._text, '*' * len(sign_off)
+        # If the digest message can't be encoded by the list character set,
+        # fall back to utf-8.
+        text = self._text.getvalue()
+        try:
+            self._message.set_payload(text.encode(self._charset),
+                                      charset=self._charset)
+        except UnicodeError:
+            self._message.set_payload(text.encode('utf-8'), charset='utf-8')
+        return self._message
+
+
+
+class DigestRunner(Runner):
+    """The digest queue runner."""
+
+    def _dispose(self, mlist, msg, msgdata):
+        """See `IRunner`."""
+        volume = msgdata['volume']
+        digest_number = msgdata['digest_number']
+        with nested(Mailbox(msgdata['digest_path']),
+                    i18n.using_language(mlist.preferred_language)) as (
+            mailbox, language):
+            # Create the digesters.
+            mime_digest = MIMEDigester(mlist, volume, digest_number)
+            rfc1153_digest = RFC1153Digester(mlist, volume, digest_number)
+            # Cruise through all the messages in the mailbox, first building
+            # the table of contents and accumulating Subject: headers and
+            # authors.  The question really is whether it's better from a
+            # performance and memory footprint to go through the mailbox once
+            # and cache the messages in a list, or to cruise through the
+            # mailbox twice.  We'll do the latter, but it's a complete guess.
+            count = None
+            for count, (key, message) in enumerate(mailbox.iteritems(), 1):
+                mime_digest.add_to_toc(message, count)
+                rfc1153_digest.add_to_toc(message, count)
+            assert count is not None, 'No digest messages?'
+            # Add the table of contents.
+            mime_digest.add_toc(count)
+            rfc1153_digest.add_toc(count)
+            # Cruise through the set of messages a second time, adding them to
+            # the actual digest.
+            for count, (key, message) in enumerate(mailbox.iteritems(), 1):
+                mime_digest.add_message(message, count)
+                rfc1153_digest.add_message(message, count)
+            # Finish up the digests.
+            mime = mime_digest.finish()
+            rfc1153 = rfc1153_digest.finish()
+        # Calculate the recipients lists
+        mime_recipients = set()
+        rfc1153_recipients = set()
+        # When someone turns off digest delivery, they will get one last
+        # digest to ensure that there will be no gaps in the messages they
+        # receive.
+        digest_members = set(mlist.digest_members.members)
+        for address in mlist.one_last_digest:
+            member = mlist.digest_members.get_member(address)
+            if member:
+                digest_members.add(member)
+        for member in digest_members:
+            if member.delivery_status <> DeliveryStatus.enabled:
+                continue
+            # Send the digest to the case-preserved address of the digest
+            # members.
+            email_address = member.address.original_address
+            if member.delivery_mode == DeliveryMode.plaintext_digests:
+                rfc1153_recipients.add(email_address)
+            elif member.delivery_mode == DeliveryMode.mime_digests:
+                mime_recipients.add(email_address)
+            else:
+                raise AssertionError(
+                    'Digest member "{0}" unexpected delivery mode: {1}'.format(
+                        email_address, member.delivery_mode))
+        # Send the digests to the virgin queue for final delivery.
+        queue = config.switchboards['virgin']
+        queue.enqueue(mime,
+                      recips=mime_recipients,
+                      listname=mlist.fqdn_listname,
+                      isdigest=True)
+        queue.enqueue(rfc1153,
+                      recips=rfc1153_recipients,
+                      listname=mlist.fqdn_listname,
+                      isdigest=True)
+        # Now that we've delivered the last digest to folks who were waiting
+        # for it, clear that recipient set.
+        mlist.one_last_digest.clear()