From 3d192123461559445bd7e68ef163828bb51852e6 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Sat, 14 Jun 2008 16:02:01 -0400 Subject: start of archiving work --- mailman/app/archiving.py | 41 +++++++++++--------------------------- mailman/interfaces/archiver.py | 7 ++----- mailman/pipeline/cook_headers.py | 23 +++++++++------------ mailman/pipeline/docs/archives.txt | 6 +++--- mailman/pipeline/scrubber.py | 4 ++-- setup.py | 3 ++- 6 files changed, 30 insertions(+), 54 deletions(-) diff --git a/mailman/app/archiving.py b/mailman/app/archiving.py index c790bc3dc..5a752063d 100644 --- a/mailman/app/archiving.py +++ b/mailman/app/archiving.py @@ -20,18 +20,14 @@ __metaclass__ = type __all__ = [ 'Pipermail', - 'get_primary_archiver', ] import os -import pkg_resources from string import Template from zope.interface import implements -from zope.interface.verify import verifyObject -from mailman.app.plugins import get_plugins from mailman.configuration import config from mailman.interfaces import IArchiver @@ -64,47 +60,34 @@ class Pipermail: implements(IArchiver) - def __init__(self, mlist): - self._mlist = mlist - - def get_list_url(self): + @staticmethod + def list_url(mlist): """See `IArchiver`.""" - if self._mlist.archive_private: - url = self._mlist.script_url('private') + '/index.html' + if mlist.archive_private: + url = mlist.script_url('private') + '/index.html' else: - web_host = config.domains.get( - self._mlist.host_name, self._mlist.host_name) + web_host = config.domains.get(mlist.host_name, mlist.host_name) url = Template(config.PUBLIC_ARCHIVE_URL).safe_substitute( - listname=self._mlist.fqdn_listname, + listname=mlist.fqdn_listname, hostname=web_host, - fqdn_listname=self._mlist.fqdn_listname, + fqdn_listname=mlist.fqdn_listname, ) return url - def get_message_url(self, message): + @staticmethod + def permalink(mlist, message): """See `IArchiver`.""" # Not currently implemented. return None - def archive_message(self, message): + @staticmethod + def archive_message(mlist, message): """See `IArchiver`.""" text = str(message) fileobj = StringIO(text) - h = HyperArchive(PipermailMailingListAdapter(self._mlist)) + h = HyperArchive(PipermailMailingListAdapter(mlist)) h.processUnixMailbox(fileobj) h.close() fileobj.close() # There's no good way to know the url for the archived message. return None - - - -def get_primary_archiver(mlist): - """Return the primary archiver.""" - entry_points = list(pkg_resources.iter_entry_points('mailman.archiver')) - if len(entry_points) == 0: - return None - for ep in entry_points: - if ep.name == 'default': - return ep.load()(mlist) - return None diff --git a/mailman/interfaces/archiver.py b/mailman/interfaces/archiver.py index 3b96c5c53..40b05b76c 100644 --- a/mailman/interfaces/archiver.py +++ b/mailman/interfaces/archiver.py @@ -24,14 +24,14 @@ from zope.interface import Interface, Attribute class IArchiver(Interface): """An interface to the archiver.""" - def get_list_url(mlist): + def list_url(mlist): """Return the url to the top of the list's archive. :param mlist: The IMailingList object. :returns: The url string. """ - def get_message_url(mlist, message): + def permalink(mlist, message): """Return the url to the message in the archive. This url points directly to the message in the archive. This method @@ -46,9 +46,6 @@ class IArchiver(Interface): def archive_message(mlist, message): """Send the message to the archiver. - This uses `get_message_url()` to calculate and return the url to the - message in the archives. - :param mlist: The IMailingList object. :param message: The message object. :returns: The url string or None if the message's archive url cannot diff --git a/mailman/pipeline/cook_headers.py b/mailman/pipeline/cook_headers.py index c237c171a..b3cb7e832 100644 --- a/mailman/pipeline/cook_headers.py +++ b/mailman/pipeline/cook_headers.py @@ -30,7 +30,7 @@ from email.Utils import parseaddr, formataddr, getaddresses from zope.interface import implements from mailman import Utils -from mailman.app.archiving import get_primary_archiver +from mailman.app.plugins import get_plugins from mailman.configuration import config from mailman.i18n import _ from mailman.interfaces import IHandler, Personalization, ReplyToMunging @@ -206,29 +206,24 @@ def process(mlist, msg, msgdata): 'List-Unsubscribe': subfieldfmt % (listinfo, mlist.leave_address), 'List-Subscribe' : subfieldfmt % (listinfo, mlist.join_address), }) - archiver = get_primary_archiver(mlist) if msgdata.get('reduced_list_headers'): headers['X-List-Administrivia'] = 'yes' else: # List-Post: is controlled by a separate attribute if mlist.include_list_post_header: headers['List-Post'] = '' % mlist.posting_address - # Add this header if we're archiving + # Add RFC 2369 and 5064 archiving headers, if archiving is enabled. if mlist.archive: - archiveurl = archiver.get_list_url() - headers['List-Archive'] = '<%s>' % archiveurl + for archiver in get_plugins('mailman.app.archiver'): + headers['List-Archive'] = '<%s>' % archiver.list_url(mlist) + permalink = archiver.permalink(mlist, msg) + if permalink is not None: + headers['Archived-At'] = permalink # XXX RFC 2369 also defines a List-Owner header which we are not currently # supporting, but should. - # - # Draft RFC 5064 defines an Archived-At header which contains the pointer - # directly to the message in the archive. If the currently defined - # archiver can tell us the URL, go ahead and include this header. - archived_at = archiver.get_message_url(msg) - if archived_at is not None: - headers['Archived-At'] = archived_at - # First we delete any pre-existing headers because the RFC permits only - # one copy of each, and we want to be sure it's ours. for h, v in headers.items(): + # First we delete any pre-existing headers because the RFC permits + # only one copy of each, and we want to be sure it's ours. del msg[h] # Wrap these lines if they are too long. 78 character width probably # shouldn't be hardcoded, but is at least text-MUA friendly. The diff --git a/mailman/pipeline/docs/archives.txt b/mailman/pipeline/docs/archives.txt index b7b54f17f..67ad45c89 100644 --- a/mailman/pipeline/docs/archives.txt +++ b/mailman/pipeline/docs/archives.txt @@ -7,11 +7,11 @@ delivery processes while messages are archived. This also allows external archivers to work in a separate process from the main Mailman delivery processes. - >>> from mailman.queue import Switchboard + >>> from mailman.app.lifecycle import create_list >>> from mailman.configuration import config + >>> from mailman.queue import Switchboard >>> handler = config.handlers['to-archive'] - >>> mlist = config.db.list_manager.create(u'_xtest@example.com') - >>> mlist.preferred_language = u'en' + >>> mlist = create_list(u'_xtest@example.com') >>> switchboard = Switchboard(config.ARCHQUEUE_DIR) A helper function. diff --git a/mailman/pipeline/scrubber.py b/mailman/pipeline/scrubber.py index ca1fa37e0..bf6effd3a 100644 --- a/mailman/pipeline/scrubber.py +++ b/mailman/pipeline/scrubber.py @@ -40,7 +40,7 @@ from zope.interface import implements from mailman import Utils from mailman.Errors import DiscardMessage -from mailman.app.archiving import get_primary_archiver +from mailman.app.plugins import get_plugin from mailman.configuration import config from mailman.i18n import _ from mailman.interfaces import IHandler @@ -497,7 +497,7 @@ def save_attachment(mlist, msg, dir, filter_html=True): fp.write(decodedpayload) fp.close() # Now calculate the url to the list's archive. - baseurl = get_primary_archiver(mlist).get_list_url() + baseurl = get_plugin('mailman.scrubber').list_url(mlist) if not baseurl.endswith('/'): baseurl += '/' # Trailing space will definitely be a problem with format=flowed. diff --git a/setup.py b/setup.py index 36f13e716..91dc0b663 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,8 @@ Any other spelling is incorrect.""", entry_points = { 'console_scripts': list(scripts), # Entry point for plugging in different database backends. - 'mailman.archiver' : 'default = mailman.app.archiving:Pipermail', + 'mailman.archiver' : 'pipermail = mailman.app.archiving:Pipermail', + 'mailman.scrubber' : 'stock = mailman.app.archiving:Pipermail', 'mailman.commands' : list(commands), 'mailman.database' : 'stock = mailman.database:StockDatabase', 'mailman.mta' : 'stock = mailman.MTA:Manual', -- cgit v1.2.3-70-g09d2 From ae08b9bd032410014124c0885e2ed4b7b9cb4591 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 2 Jul 2008 22:29:20 -0400 Subject: Implement a prototypical archiver that supports Archived-At permalink header, using the current concept of the hash. This can change, but at least now I have the interfaces and infrastructure to support this header. Of course, Pipermail doesn't support a permalink, so that archiver no-ops. Add an adapter to provide the interface that Pipermail requires over and above the IMailingList interface. Add an is_enabled flag to IArchiver. --- mailman/app/archiving.py | 69 ++++++++++++++++++++++++++++++++-- mailman/docs/archivers.txt | 63 +++++++++++++++++++++++++++++++ mailman/interfaces/archiver.py | 22 +++++++++++ mailman/pipeline/cook_headers.py | 2 + mailman/pipeline/docs/cook-headers.txt | 1 + setup.py | 5 ++- 6 files changed, 157 insertions(+), 5 deletions(-) create mode 100644 mailman/docs/archivers.txt diff --git a/mailman/app/archiving.py b/mailman/app/archiving.py index 5a752063d..15e987daf 100644 --- a/mailman/app/archiving.py +++ b/mailman/app/archiving.py @@ -20,25 +20,34 @@ __metaclass__ = type __all__ = [ 'Pipermail', + 'Prototype', ] import os +import hashlib +from base64 import b32encode +from cStringIO import StringIO +from email.utils import make_msgid from string import Template +from urlparse import urljoin from zope.interface import implements +from zope.interface.interface import adapter_hooks from mailman.configuration import config -from mailman.interfaces import IArchiver +from mailman.interfaces.archiver import IArchiver, IPipermailMailingList +from mailman.interfaces.mailinglist import IMailingList from mailman.Archiver.HyperArch import HyperArchive -from cStringIO import StringIO class PipermailMailingListAdapter: """An adapter for MailingList objects to work with Pipermail.""" + implements(IPipermailMailingList) + def __init__(self, mlist): self._mlist = mlist @@ -46,7 +55,7 @@ class PipermailMailingListAdapter: return getattr(self._mlist, name) def archive_dir(self): - """The directory for storing Pipermail artifacts.""" + """See `IPipermailMailingList`.""" if self._mlist.archive_private: basedir = config.PRIVATE_ARCHIVE_FILE_DIR else: @@ -54,12 +63,24 @@ class PipermailMailingListAdapter: return os.path.join(basedir, self._mlist.fqdn_listname) +def adapt_mailing_list_for_pipermail(iface, obj): + """Adapt IMailingLists to IPipermailMailingList.""" + if IMailingList.providedBy(obj) and iface is IPipermailMailingList: + return PipermailMailingListAdapter(obj) + return None + +adapter_hooks.append(adapt_mailing_list_for_pipermail) + + class Pipermail: """The stock Pipermail archiver.""" implements(IArchiver) + name = 'pipermail' + is_enabled = True + @staticmethod def list_url(mlist): """See `IArchiver`.""" @@ -85,9 +106,49 @@ class Pipermail: """See `IArchiver`.""" text = str(message) fileobj = StringIO(text) - h = HyperArchive(PipermailMailingListAdapter(mlist)) + h = HyperArchive(IPipermailMailingList(mlist)) h.processUnixMailbox(fileobj) h.close() fileobj.close() # There's no good way to know the url for the archived message. return None + + + +class Prototype: + """A prototype of a third party archiver. + + Mailman proposes a draft specification for interoperability between list + servers and archivers: . + """ + + implements(IArchiver) + + name = 'prototype' + is_enabled = False + + @staticmethod + def list_url(mlist): + """See `IArchiver`.""" + web_host = config.domains.get(mlist.host_name, mlist.host_name) + return 'http://' + web_host + + @staticmethod + def permalink(mlist, msg): + """See `IArchiver`.""" + message_id = msg.get('message-id') + # It is not the archiver's job to ensure the message has a Message-ID. + assert message_id is not None, 'No Message-ID found' + # The angle brackets are not part of the Message-ID. See RFC 2822. + if message_id.startswith('<') and message_id.endswith('>'): + message_id = message_id[1:-1] + digest = hashlib.sha1(message_id).digest() + message_id_hash = b32encode(digest) + del msg['x-message-id-hash'] + msg['X-Message-ID-Hash'] = message_id_hash + return urljoin(Prototype.list_url(mlist), message_id_hash) + + @staticmethod + def archive_message(mlist, message): + """See `IArchiver`.""" + raise NotImplementedError diff --git a/mailman/docs/archivers.txt b/mailman/docs/archivers.txt new file mode 100644 index 000000000..9e4fbc121 --- /dev/null +++ b/mailman/docs/archivers.txt @@ -0,0 +1,63 @@ += Archivers = + +Mailman supports pluggable archivers, and it comes with several default +archivers. + + >>> from mailman.app.lifecycle import create_list + >>> mlist = create_list(u'test@example.com') + >>> msg = message_from_string("""\ + ... From: aperson@example.org + ... To: test@example.com + ... Subject: An archived message + ... Message-ID: <12345> + ... + ... Here is an archived message. + ... """) + +Archivers support an interface which provides the RFC 2369 List-Archive +header, and one that provides a 'permalink' to the specific message object in +the archive. This latter is appropriate for the message footer or for the RFC +5064 Archived-At header. + +Pipermail does not support a permalink, so that interface returns None. +Mailman defines a draft spec for how list servers and archivers can +interoperate. + + >>> from operator import attrgetter + >>> name = attrgetter('name') + >>> from mailman.app.plugins import get_plugins + >>> archivers = {} + >>> for archiver in sorted(get_plugins('mailman.archiver'), key=name): + ... print archiver.name + ... print ' ', archiver.list_url(mlist) + ... print ' ', archiver.permalink(mlist, msg) + ... archivers[archiver.name] = archiver + pipermail + http://www.example.com/pipermail/test@example.com + None + prototype + http://www.example.com + http://www.example.com/RSZCG7IGPHFIRW3EMTVMMDNJMNCVCOLE + + +== Sending the message to the archiver == + +The archiver is also able to archive the message. + + >>> mlist.web_page_url = u'http://lists.example.com/' + >>> archivers['pipermail'].archive_message(mlist, msg) + + >>> import os + >>> from mailman.interfaces.archiver import IPipermailMailingList + >>> pckpath = os.path.join( + ... IPipermailMailingList(mlist).archive_dir(), + ... 'pipermail.pck') + >>> os.path.exists(pckpath) + True + +Note however that the prototype archiver can't archive messages. + + >>> archivers['prototype'].archive_message(mlist, msg) + Traceback (most recent call last): + ... + NotImplementedError diff --git a/mailman/interfaces/archiver.py b/mailman/interfaces/archiver.py index 40b05b76c..ac6efcb93 100644 --- a/mailman/interfaces/archiver.py +++ b/mailman/interfaces/archiver.py @@ -17,13 +17,24 @@ """Interface for archiving schemes.""" +__metaclass__ = type +__all__ = [ + 'IArchiver', + 'IPipermailMailingList', + ] + from zope.interface import Interface, Attribute +from mailman.interfaces.mailinglist import IMailingList class IArchiver(Interface): """An interface to the archiver.""" + name = Attribute('The name of this archiver') + + is_enabled = Attribute('True if this archiver is enabled.') + def list_url(mlist): """Return the url to the top of the list's archive. @@ -53,3 +64,14 @@ class IArchiver(Interface): """ # XXX How to handle attachments? + + + +class IPipermailMailingList(IMailingList): + """An interface that adapts IMailingList as needed for Pipermail.""" + + def archive_dir(): + """The directory for storing Pipermail artifacts. + + Pipermail expects this to be a function, not a property. + """ diff --git a/mailman/pipeline/cook_headers.py b/mailman/pipeline/cook_headers.py index 4cda42c81..ad4728be8 100644 --- a/mailman/pipeline/cook_headers.py +++ b/mailman/pipeline/cook_headers.py @@ -215,6 +215,8 @@ def process(mlist, msg, msgdata): # Add RFC 2369 and 5064 archiving headers, if archiving is enabled. if mlist.archive: for archiver in get_plugins('mailman.archiver'): + if not archiver.is_enabled: + continue headers['List-Archive'] = '<%s>' % archiver.list_url(mlist) permalink = archiver.permalink(mlist, msg) if permalink is not None: diff --git a/mailman/pipeline/docs/cook-headers.txt b/mailman/pipeline/docs/cook-headers.txt index a85ba9e63..d764bd796 100644 --- a/mailman/pipeline/docs/cook-headers.txt +++ b/mailman/pipeline/docs/cook-headers.txt @@ -186,6 +186,7 @@ But normally, a list will include these headers. >>> mlist.preferred_language = u'en' >>> msg = message_from_string("""\ ... From: aperson@example.com + ... Message-ID: <12345> ... ... """) >>> process(mlist, msg, {}) diff --git a/setup.py b/setup.py index 91dc0b663..4c2644401 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,10 @@ Any other spelling is incorrect.""", entry_points = { 'console_scripts': list(scripts), # Entry point for plugging in different database backends. - 'mailman.archiver' : 'pipermail = mailman.app.archiving:Pipermail', + 'mailman.archiver' : [ + 'pipermail = mailman.app.archiving:Pipermail', + 'prototype = mailman.app.archiving:Prototype', + ], 'mailman.scrubber' : 'stock = mailman.app.archiving:Pipermail', 'mailman.commands' : list(commands), 'mailman.database' : 'stock = mailman.database:StockDatabase', -- cgit v1.2.3-70-g09d2