diff options
50 files changed, 535 insertions, 4047 deletions
diff --git a/src/mailman/Archiver/Archiver.py b/src/mailman/Archiver/Archiver.py deleted file mode 100644 index 1e2af535f..000000000 --- a/src/mailman/Archiver/Archiver.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (C) 1998-2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - # USA. - -"""Mixin class for putting new messages in the right place for archival. - -Public archives are separated from private ones. An external archival -mechanism (eg, pipermail) should be pointed to the right places, to do the -archival. -""" - -import os -import errno -import logging -import mailbox - -from cStringIO import StringIO -from string import Template -from zope.component import getUtility - -from mailman.config import config -from mailman.interfaces.domain import IDomainManager -from mailman.utilities.i18n import make - -log = logging.getLogger('mailman.error') - - - -def makelink(old, new): - try: - os.symlink(old, new) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - -def breaklink(link): - try: - os.unlink(link) - except OSError, e: - if e.errno <> errno.ENOENT: - raise - - - -class Archiver: - # - # Interface to Pipermail. HyperArch.py uses this method to get the - # archive directory for the mailing list - # - def InitVars(self): - # The archive file structure by default is: - # - # archives/ - # private/ - # listname.mbox/ - # listname.mbox - # listname/ - # lots-of-pipermail-stuff - # public/ - # listname.mbox@ -> ../private/listname.mbox - # listname@ -> ../private/listname - # - # IOW, the mbox and pipermail archives are always stored in the - # private archive for the list. This is safe because archives/private - # is always set to o-rx. Public archives have a symlink to get around - # the private directory, pointing directly to the private/listname - # which has o+rx permissions. Private archives do not have the - # symbolic links. - archdir = self.archive_dir(self.fqdn_listname) - omask = os.umask(0) - try: - try: - os.mkdir(archdir+'.mbox', 02775) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - # We also create an empty pipermail archive directory into - # which we'll drop an empty index.html file into. This is so - # that lists that have not yet received a posting have - # /something/ as their index.html, and don't just get a 404. - try: - os.mkdir(archdir, 02775) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - # See if there's an index.html file there already and if not, - # write in the empty archive notice. - indexfile = os.path.join(archdir, 'index.html') - fp = None - try: - fp = open(indexfile) - except IOError, e: - if e.errno <> errno.ENOENT: - raise - omask = os.umask(002) - try: - fp = open(indexfile, 'w') - finally: - os.umask(omask) - fp.write(make('emptyarchive.html', - mailing_list=self, - listname=self.real_name, - listinfo=self.GetScriptURL('listinfo'), - )) - if fp: - fp.close() - finally: - os.umask(omask) - - def ArchiveFileName(self): - """The mbox name where messages are left for archive construction.""" - return os.path.join(self.archive_dir() + '.mbox', - self.fqdn_listname + '.mbox') - - def GetBaseArchiveURL(self): - if self.archive_private: - url = self.GetScriptURL('private') + '/index.html' - else: - domain = getUtility(IDomainManager).get(self.mail_host) - web_host = (self.mail_host if domain is None else domain.url_host) - url = Template(config.PUBLIC_ARCHIVE_URL).safe_substitute( - listname=self.fqdn_listname, - hostname=web_host, - fqdn_listname=self.fqdn_listname, - ) - return url - - def __archive_file(self, afn): - """Open (creating, if necessary) the named archive file.""" - omask = os.umask(002) - try: - return mailbox.mbox(afn, 'a+') - finally: - os.umask(omask) - - # - # old ArchiveMail function, retained under a new name - # for optional archiving to an mbox - # - def __archive_to_mbox(self, post): - """Retain a text copy of the message in an mbox file.""" - try: - afn = self.ArchiveFileName() - mbox = self.__archive_file(afn) - mbox.add(post) - mbox.fp.close() - except IOError, msg: - log.error('Archive file access failure:\n\t%s %s', afn, msg) - raise - - def ExternalArchive(self, ar, txt): - cmd = Template(ar).safe_substitute( - listname=self.fqdn_listname, - hostname=self.mail_host) - extarch = os.popen(cmd, 'w') - extarch.write(txt) - status = extarch.close() - if status: - log.error('external archiver non-zero exit status: %d\n', - (status & 0xff00) >> 8) - - # - # archiving in real time this is called from list.post(msg) - # - def ArchiveMail(self, msg): - """Store postings in mbox and/or pipermail archive, depending.""" - # Fork so archival errors won't disrupt normal list delivery - if config.ARCHIVE_TO_MBOX == -1: - return - # - # We don't need an extra archiver lock here because we know the list - # itself must be locked. - if config.ARCHIVE_TO_MBOX in (1, 2): - self.__archive_to_mbox(msg) - if config.ARCHIVE_TO_MBOX == 1: - # Archive to mbox only. - return - txt = str(msg) - # should we use the internal or external archiver? - private_p = self.archive_private - if config.PUBLIC_EXTERNAL_ARCHIVER and not private_p: - self.ExternalArchive(config.PUBLIC_EXTERNAL_ARCHIVER, txt) - elif config.PRIVATE_EXTERNAL_ARCHIVER and private_p: - self.ExternalArchive(config.PRIVATE_EXTERNAL_ARCHIVER, txt) - else: - # use the internal archiver - f = StringIO(txt) - import HyperArch - h = HyperArch.HyperArchive(self) - h.processUnixMailbox(f) - h.close() - f.close() - - # - # called from MailList.MailList.Save() - # - def CheckHTMLArchiveDir(self): - # We need to make sure that the archive directory has the right perms - # for public vs private. If it doesn't exist, or some weird - # permissions errors prevent us from stating the directory, it's - # pointless to try to fix the perms, so we just return -scott - if config.ARCHIVE_TO_MBOX == -1: - # Archiving is completely disabled, don't require the skeleton. - return - pubdir = os.path.join(config.PUBLIC_ARCHIVE_FILE_DIR, - self.fqdn_listname) - privdir = self.archive_dir() - pubmbox = pubdir + '.mbox' - privmbox = privdir + '.mbox' - if self.archive_private: - breaklink(pubdir) - breaklink(pubmbox) - else: - # BAW: privdir or privmbox could be nonexistant. We'd get an - # OSError, ENOENT which should be caught and reported properly. - makelink(privdir, pubdir) - # Only make this link if the site has enabled public mbox files - if config.PUBLIC_MBOX: - makelink(privmbox, pubmbox) diff --git a/src/mailman/Archiver/HyperArch.py b/src/mailman/Archiver/HyperArch.py deleted file mode 100644 index 1419b56bc..000000000 --- a/src/mailman/Archiver/HyperArch.py +++ /dev/null @@ -1,1233 +0,0 @@ -# Copyright (C) 1998-2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""HyperArch: Pipermail archiving for Mailman - - - The Dragon De Monsyne <dragondm@integral.org> - - TODO: - - Should be able to force all HTML to be regenerated next time the - archive is run, in case a template is changed. - - Run a command to generate tarball of html archives for downloading - (probably in the 'update_dirty_archives' method). -""" - -import os -import re -import sys -import gzip -import time -import errno -import urllib -import logging -import binascii - -from email.Charset import Charset -from email.Errors import HeaderParseError -from email.Header import decode_header, make_header -from flufl.lock import Lock, TimeOutError -from lazr.config import as_boolean -from string import Template -from zope.component import getUtility - -from mailman.Archiver import HyperDatabase -from mailman.Archiver import pipermail -from mailman.config import config -from mailman.core.i18n import _, ctime -from mailman.interfaces.listmanager import IListManager -from mailman.utilities.i18n import find -from mailman.utilities.string import uncanonstr, websafe - - -log = logging.getLogger('mailman.error') -EMPTYSTRING = '' -NL = '\n' - - -# MacOSX has a default stack size that is too small for deeply recursive -# regular expressions. We see this as crashes in the Python test suite when -# running test_re.py and test_sre.py. The fix is to set the stack limit to -# 2048; the general recommendation is to do in the shell before running the -# test suite. But that's inconvenient for a daemon like the runner. -# -# AFAIK, this problem only affects the archiver, so we're adding this work -# around to this file (it'll get imported by the bundled pipermail or by the -# bin/arch script. We also only do this on darwin, a.k.a. MacOSX. -if sys.platform == 'darwin': - try: - import resource - except ImportError: - pass - else: - soft, hard = resource.getrlimit(resource.RLIMIT_STACK) - newsoft = min(hard, max(soft, 1024*2048)) - resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard)) - - - -def html_quote(s, langcode=None): - repls = ( ('&', '&'), - ("<", '<'), - (">", '>'), - ('"', '"')) - for thing, repl in repls: - s = s.replace(thing, repl) - return uncanonstr(s, langcode) - - -def url_quote(s): - return urllib.quote(s) - - -def null_to_space(s): - return s.replace('\000', ' ') - - -def sizeof(filename, lang): - try: - size = os.path.getsize(filename) - except OSError, e: - # ENOENT can happen if the .mbox file was moved away or deleted, and - # an explicit mbox file name was given to bin/arch. - if e.errno <> errno.ENOENT: raise - return _('size not available') - if size < 1000: - with _.using(lang.code): - out = _(' %(size)i bytes ') - return out - elif size < 1000000: - return ' %d KB ' % (size / 1000) - # GB?? :-) - return ' %d MB ' % (size / 1000000) - - -html_charset = '<META http-equiv="Content-Type" ' \ - 'content="text/html; charset=%s">' - -def CGIescape(arg, lang=None): - if isinstance(arg, unicode): - s = websafe(arg) - else: - s = websafe(str(arg)) - return uncanonstr(s.replace('"', '"'), lang.code) - -# Parenthesized human name -paren_name_pat = re.compile(r'([(].*[)])') - -# Subject lines preceded with 'Re:' -REpat = re.compile( r"\s*RE\s*(\[\d+\]\s*)?:\s*", re.IGNORECASE) - -# E-mail addresses and URLs in text -emailpat = re.compile(r'([-+,.\w]+@[-+.\w]+)') - -# Argh! This pattern is buggy, and will choke on URLs with GET parameters. -urlpat = re.compile(r'(\w+://[^>)\s]+)') # URLs in text - -# Blank lines -blankpat = re.compile(r'^\s*$') - -# Starting <html> directive -htmlpat = re.compile(r'^\s*<HTML>\s*$', re.IGNORECASE) -# Ending </html> directive -nohtmlpat = re.compile(r'^\s*</HTML>\s*$', re.IGNORECASE) -# Match quoted text -quotedpat = re.compile(r'^([>|:]|>)+') - - - -# Like Utils.maketext() but with caching to improve performance. -# -# _templatefilepathcache is used to associate a (templatefile, lang, listname) -# key with the file system path to a template file. This path is the one that -# the Utils.findtext() function has computed is the one to match the values in -# the key tuple. -# -# _templatecache associate a file system path as key with the text -# returned after processing the contents of that file by Utils.findtext() -# -# We keep two caches to reduce the amount of template text kept in memory, -# since the _templatefilepathcache is a many->one mapping and _templatecache -# is a one->one mapping. Imagine 1000 lists all using the same default -# English template. - -_templatefilepathcache = {} -_templatecache = {} - -def quick_maketext(templatefile, dict=None, lang=None, mlist=None): - if mlist is None: - listname = '' - else: - listname = mlist.fqdn_listname - if lang is None: - if mlist is None: - lang = config.mailman.default_language - else: - lang = mlist.preferred_language - cachekey = (templatefile, lang, listname) - filepath = _templatefilepathcache.get(cachekey) - if filepath: - template = _templatecache.get(filepath) - if filepath is None or template is None: - # Use the basic maketext, with defaults to get the raw template - template, filepath = find(templatefile, mlist=mlist, - language=lang.code) - _templatefilepathcache[cachekey] = filepath - _templatecache[filepath] = template - # Copied from Utils.maketext() - text = template - if dict is not None: - try: - try: - text = Template(template).safe_substitute(**dict) - except UnicodeError: - # Try again after coercing the template to unicode - utemplate = unicode(template, lang.charset, 'replace') - text = Template(utemplate).safe_substitute(**dict) - except (TypeError, ValueError): - # The template is really screwed up - pass - # Make sure the text is in the given character set, or html-ify any bogus - # characters. - return uncanonstr(text, lang.code) - - - -# Note: I'm overriding most, if not all of the pipermail Article class -# here -ddm -# The Article class encapsulates a single posting. The attributes are: -# -# sequence : Sequence number, unique for each article in a set of archives -# subject : Subject -# datestr : The posting date, in human-readable format -# date : The posting date, in purely numeric format -# fromdate : The posting date, in `unixfrom' format -# headers : Any other headers of interest -# author : The author's name (and possibly organization) -# email : The author's e-mail address -# msgid : A unique message ID -# in_reply_to : If !="", this is the msgid of the article being replied to -# references: A (possibly empty) list of msgid's of earlier articles in -# the thread -# body : A list of strings making up the message body - -class Article(pipermail.Article): - __super_init = pipermail.Article.__init__ - __super_set_date = pipermail.Article._set_date - - _last_article_time = time.time() - - def __init__(self, message=None, sequence=0, keepHeaders=[], - lang=config.mailman.default_language, mlist=None): - self.__super_init(message, sequence, keepHeaders) - self.prev = None - self.next = None - # Trim Re: from the subject line - i = 0 - while i != -1: - result = REpat.match(self.subject) - if result: - i = result.end(0) - self.subject = self.subject[i:] - else: - i = -1 - # Useful to keep around - self._lang = lang - self._mlist = mlist - - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - # Avoid i18n side-effects. Note that the language for this - # article (for this list) could be different from the site-wide - # preferred language, so we need to ensure no side-effects will - # occur. Think what happens when executing bin/arch. - with _.using(lang.code): - if self.author == self.email: - self.author = self.email = re.sub('@', _(' at '), - self.email) - else: - self.email = re.sub('@', _(' at '), self.email) - # Snag the content-* headers. RFC 1521 states that their values are - # case insensitive. - ctype = message.get('Content-Type', 'text/plain') - cenc = message.get('Content-Transfer-Encoding', '') - self.ctype = ctype.lower() - self.cenc = cenc.lower() - self.decoded = {} - cset = mlist.preferred_language.charset - cset_out = Charset(cset).output_charset or cset - charset = message.get_content_charset(cset_out) - if charset: - charset = charset.lower().strip() - if charset[0]=='"' and charset[-1]=='"': - charset = charset[1:-1] - if charset[0]=="'" and charset[-1]=="'": - charset = charset[1:-1] - try: - body = message.get_payload(decode=True) - except binascii.Error: - body = None - if body and charset != self._lang.charset: - # decode body - try: - body = unicode(body, charset) - except (UnicodeError, LookupError): - body = None - if body: - self.body = [l + "\n" for l in body.splitlines()] - - self.decode_headers() - - def __getstate__(self): - d = self.__dict__.copy() - # We definitely don't want to pickle the MailList instance, so just - # pickle a reference to it. - if d.has_key('_mlist'): - mlist = d['_mlist'] - del d['_mlist'] - else: - mlist = None - if mlist: - d['__listname'] = self._mlist.fqdn_listname - else: - d['__listname'] = None - # Delete a few other things we don't want in the pickle - for attr in ('prev', 'next', 'body'): - if d.has_key(attr): - del d[attr] - d['body'] = [] - return d - - def __setstate__(self, d): - # For loading older Articles via pickle. All this stuff was added - # when Simone Piunni and Tokio Kikuchi i18n'ified Pipermail. See SF - # patch #594771. - self.__dict__ = d - listname = d.get('__listname') - if listname: - del d['__listname'] - d['_mlist'] = getUtility(IListManager).get(listname) - if not d.has_key('_lang'): - if hasattr(self, '_mlist'): - self._lang = self._mlist.preferred_language - else: - self._lang = config.mailman.default_language - if not d.has_key('cenc'): - self.cenc = None - if not d.has_key('decoded'): - self.decoded = {} - - def setListIfUnset(self, mlist): - if getattr(self, '_mlist', None) is None: - self._mlist = mlist - - def quote(self, buf): - return html_quote(buf, self._lang.code) - - def decode_headers(self): - """MIME-decode headers. - - If the email, subject, or author attributes contain non-ASCII - characters using the encoded-word syntax of RFC 2047, decoded versions - of those attributes are placed in the self.decoded (a dictionary). - - If the list's charset differs from the header charset, an attempt is - made to decode the headers as Unicode. If that fails, they are left - undecoded. - """ - author = self.decode_charset(self.author) - subject = self.decode_charset(self.subject) - if author: - self.decoded['author'] = author - email = self.decode_charset(self.email) - if email: - self.decoded['email'] = email - if subject: - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - with _.using(self._lang.code): - atmark = _(' at ') - subject = re.sub(r'([-+,.\w]+)@([-+.\w]+)', - '\g<1>' + atmark + '\g<2>', subject) - self.decoded['subject'] = subject - self.decoded['stripped'] = self.strip_subject(subject or self.subject) - - def strip_subject(self, subject): - # Strip subject_prefix and Re: for subject sorting - # This part was taken from CookHeaders.py (TK) - prefix = self._mlist.subject_prefix.strip() - if prefix: - prefix_pat = re.escape(prefix) - prefix_pat = '%'.join(prefix_pat.split(r'\%')) - prefix_pat = re.sub(r'%\d*d', r'\s*\d+\s*', prefix_pat) - subject = re.sub(prefix_pat, '', subject) - subject = subject.lstrip() - strip_pat = re.compile('^((RE|AW|SV|VS)(\[\d+\])?:\s*)+', re.I) - stripped = strip_pat.sub('', subject) - return stripped - - def decode_charset(self, field): - # TK: This function was rewritten for unifying to Unicode. - # Convert 'field' into Unicode one line string. - try: - pairs = decode_header(field) - ustr = make_header(pairs).__unicode__() - except (LookupError, UnicodeError, ValueError, HeaderParseError): - # assume list's language - cset = self._mlist.preferred_language.charset - if cset == 'us-ascii': - cset = 'iso-8859-1' # assume this for English list - ustr = unicode(field, cset, 'replace') - return u''.join(ustr.splitlines()) - - def as_html(self): - d = self.__dict__.copy() - # Avoid i18n side-effects - with _.using(self._lang.code): - d["prev"], d["prev_wsubj"] = self._get_prev() - d["next"], d["next_wsubj"] = self._get_next() - - d["email_html"] = self.quote(self.email) - d["title"] = self.quote(self.subject) - d["subject_html"] = self.quote(self.subject) - d["subject_url"] = url_quote(self.subject) - d["in_reply_to_url"] = url_quote(self.in_reply_to) - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - # Point the mailto url back to the list - author = re.sub('@', _(' at '), self.author) - emailurl = self._mlist.posting_address - else: - author = self.author - emailurl = self.email - d["author_html"] = self.quote(author) - d["email_url"] = url_quote(emailurl) - d["datestr_html"] = self.quote(ctime(int(self.date))) - d["body"] = self._get_body() - d['listurl'] = self._mlist.script_url('listinfo') - d['listname'] = self._mlist.display_name - d['encoding'] = '' - charset = self._lang.charset - d["encoding"] = html_charset % charset - - self._add_decoded(d) - return quick_maketext( - 'article.html', d, - lang=self._lang, mlist=self._mlist) - - def _get_prev(self): - """Return the href and subject for the previous message""" - if self.prev: - subject = self._get_subject_enc(self.prev) - prev = ('<LINK REL="Previous" HREF="%s">' - % (url_quote(self.prev.filename))) - prev_wsubj = ('<LI>' + _('Previous message (by thread):') + - ' <A HREF="%s">%s\n</A></li>' - % (url_quote(self.prev.filename), - self.quote(subject))) - else: - prev = prev_wsubj = "" - return prev, prev_wsubj - - def _get_subject_enc(self, art): - """Return the subject of art, decoded if possible. - - If the charset of the current message and art match and the - article's subject is encoded, decode it. - """ - return art.decoded.get('subject', art.subject) - - def _get_next(self): - """Return the href and subject for the previous message""" - if self.next: - subject = self._get_subject_enc(self.next) - next = ('<LINK REL="Next" HREF="%s">' - % (url_quote(self.next.filename))) - next_wsubj = ('<LI>' + _('Next message (by thread):') + - ' <A HREF="%s">%s\n</A></li>' - % (url_quote(self.next.filename), - self.quote(subject))) - else: - next = next_wsubj = "" - return next, next_wsubj - - _rx_quote = re.compile('=([A-F0-9][A-F0-9])') - _rx_softline = re.compile('=[ \t]*$') - - def _get_body(self): - """Return the message body ready for HTML, decoded if necessary""" - try: - body = self.html_body - except AttributeError: - body = self.body - return null_to_space(EMPTYSTRING.join(body)) - - def _add_decoded(self, d): - """Add encoded-word keys to HTML output""" - for src, dst in (('author', 'author_html'), - ('email', 'email_html'), - ('subject', 'subject_html'), - ('subject', 'title')): - if self.decoded.has_key(src): - d[dst] = self.quote(self.decoded[src]) - - def as_text(self): - d = self.__dict__.copy() - # We need to guarantee a valid From_ line, even if there are - # bososities in the headers. - if not d.get('fromdate', '').strip(): - d['fromdate'] = time.ctime(time.time()) - if not d.get('email', '').strip(): - d['email'] = 'bogus@does.not.exist.com' - if not d.get('datestr', '').strip(): - d['datestr'] = time.ctime(time.time()) - # - headers = ['From %(email)s %(fromdate)s', - 'From: %(email)s (%(author)s)', - 'Date: %(datestr)s', - 'Subject: %(subject)s'] - if d['_in_reply_to']: - headers.append('In-Reply-To: %(_in_reply_to)s') - if d['_references']: - headers.append('References: %(_references)s') - if d['_message_id']: - headers.append('Message-ID: %(_message_id)s') - body = EMPTYSTRING.join(self.body) - cset = self._lang.charset - # Coerce the body to Unicode and replace any invalid characters. - if not isinstance(body, unicode): - body = unicode(body, cset, 'replace') - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - with _.using(self._lang.code): - atmark = _(' at ') - body = re.sub(r'([-+,.\w]+)@([-+.\w]+)', - '\g<1>' + atmark + '\g<2>', body) - # Return body to character set of article. - body = body.encode(cset, 'replace') - return NL.join(headers) % d + '\n\n' + body + '\n' - - def _set_date(self, message): - self.__super_set_date(message) - self.fromdate = time.ctime(int(self.date)) - - def loadbody_fromHTML(self,fileobj): - self.body = [] - begin = 0 - while 1: - line = fileobj.readline() - if not line: - break - if not begin: - if line.strip() == '<!--beginarticle-->': - begin = 1 - continue - if line.strip() == '<!--endarticle-->': - break - self.body.append(line) - - def finished_update_article(self): - self.body = [] - try: - del self.html_body - except AttributeError: - pass - - -class HyperArchive(pipermail.T): - __super_init = pipermail.T.__init__ - __super_update_archive = pipermail.T.update_archive - __super_update_dirty_archives = pipermail.T.update_dirty_archives - __super_add_article = pipermail.T.add_article - - # some defaults - DIRMODE = 02775 - FILEMODE = 0660 - - VERBOSE = 0 - DEFAULTINDEX = 'thread' - ARCHIVE_PERIOD = 'month' - - THREADLAZY = 0 - THREADLEVELS = 3 - - ALLOWHTML = 1 # "Lines between <html></html>" handled as is. - SHOWHTML = 0 # Eg, nuke leading whitespace in html manner. - IQUOTES = 1 # Italicize quoted text. - SHOWBR = 0 # Add <br> onto every line - - def __init__(self, maillist): - # can't init the database while other processes are writing to it! - dir = maillist.archive_dir() - db = HyperDatabase.HyperDatabase(dir, maillist) - self.__super_init(dir, reload=1, database=db) - - self.maillist = maillist - self._lock_file = None - self.lang = maillist.preferred_language - self.charset = maillist.preferred_language.charset - - if hasattr(self.maillist,'archive_volume_frequency'): - if self.maillist.archive_volume_frequency == 0: - self.ARCHIVE_PERIOD='year' - elif self.maillist.archive_volume_frequency == 2: - self.ARCHIVE_PERIOD='quarter' - elif self.maillist.archive_volume_frequency == 3: - self.ARCHIVE_PERIOD='week' - elif self.maillist.archive_volume_frequency == 4: - self.ARCHIVE_PERIOD='day' - else: - self.ARCHIVE_PERIOD='month' - - yre = r'(?P<year>[0-9]{4,4})' - mre = r'(?P<month>[01][0-9])' - dre = r'(?P<day>[0123][0-9])' - self._volre = { - 'year': '^' + yre + '$', - 'quarter': '^' + yre + r'q(?P<quarter>[1234])$', - 'month': '^' + yre + r'-(?P<month>[a-zA-Z]+)$', - 'week': r'^Week-of-Mon-' + yre + mre + dre, - 'day': '^' + yre + mre + dre + '$' - } - - def _makeArticle(self, msg, sequence): - return Article(msg, sequence, - lang=self.maillist.preferred_language, - mlist=self.maillist) - - def html_foot(self): - mlist = self.maillist - # Convenience - def quotetime(s): - return html_quote(ctime(s), self.lang.code) - # Avoid i18n side-effects - with _.using(mlist.preferred_language.code): - d = {"lastdate": quotetime(self.lastdate), - "archivedate": quotetime(self.archivedate), - "listinfo": mlist.script_url('listinfo'), - "version": self.version, - } - i = {"thread": _("thread"), - "subject": _("subject"), - "author": _("author"), - "date": _("date") - } - for t in i.keys(): - cap = t[0].upper() + t[1:] - if self.type == cap: - d["%s_ref" % (t)] = "" - else: - d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>' - % (t, i[t])) - return quick_maketext( - 'archidxfoot.html', d, - mlist=mlist) - - def html_head(self): - mlist = self.maillist - # Convenience - def quotetime(s): - return html_quote(ctime(s), self.lang.code) - # Avoid i18n side-effects - with _.using(mlist.preferred_language.code): - d = {"listname": html_quote(mlist.display_name, self.lang.code), - "archtype": self.type, - "archive": self.volNameToDesc(self.archive), - "listinfo": mlist.script_url('listinfo'), - "firstdate": quotetime(self.firstdate), - "lastdate": quotetime(self.lastdate), - "size": self.size, - } - i = {"thread": _("thread"), - "subject": _("subject"), - "author": _("author"), - "date": _("date"), - } - for t in i.keys(): - cap = t[0].upper() + t[1:] - if self.type == cap: - d["%s_ref" % (t)] = "" - d["archtype"] = i[t] - else: - d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>' - % (t, i[t])) - if self.charset: - d["encoding"] = html_charset % self.charset - else: - d["encoding"] = "" - return quick_maketext( - 'archidxhead.html', d, - mlist=mlist) - - def html_TOC(self): - mlist = self.maillist - listname = mlist.fqdn_listname - mbox = os.path.join(mlist.archive_dir()+'.mbox', listname+'.mbox') - d = {"listname": mlist.display_name, - "listinfo": mlist.script_url('listinfo'), - "fullarch": '../%s.mbox/%s.mbox' % (listname, listname), - "size": sizeof(mbox, mlist.preferred_language), - 'meta': '', - } - # Avoid i18n side-effects - with _.using(mlist.preferred_language.code): - if not self.archives: - d["noarchive_msg"] = _( - '<P>Currently, there are no archives. </P>') - d["archive_listing_start"] = "" - d["archive_listing_end"] = "" - d["archive_listing"] = "" - else: - d["noarchive_msg"] = "" - d["archive_listing_start"] = quick_maketext( - 'archliststart.html', - lang=mlist.preferred_language, - mlist=mlist) - d["archive_listing_end"] = quick_maketext( - 'archlistend.html', - mlist=mlist) - - accum = [] - for a in self.archives: - accum.append(self.html_TOC_entry(a)) - d["archive_listing"] = EMPTYSTRING.join(accum) - # The TOC is always in the charset of the list's preferred language - d['meta'] += html_charset % mlist.preferred_language.charset - # The site can disable public access to the mbox file. - if as_boolean(config.archiver.pipermail.public_mbox): - template = 'archtoc.html' - else: - template = 'archtocnombox.html' - return quick_maketext(template, d, mlist=mlist) - - def html_TOC_entry(self, arch): - # Check to see if the archive is gzip'd or not - txtfile = os.path.join(self.maillist.archive_dir(), arch + '.txt') - gzfile = txtfile + '.gz' - # which exists? .txt.gz first, then .txt - if os.path.exists(gzfile): - file = gzfile - url = arch + '.txt.gz' - templ = '<td><A href="%(url)s">[ ' + _('Gzip\'d Text%(sz)s') \ - + ']</a></td>' - elif os.path.exists(txtfile): - file = txtfile - url = arch + '.txt' - templ = '<td><A href="%(url)s">[ ' + _('Text%(sz)s') + ']</a></td>' - else: - # neither found? - file = None - # in Python 1.5.2 we have an easy way to get the size - if file: - textlink = templ % { - 'url': url, - 'sz' : sizeof(file, self.maillist.preferred_language) - } - else: - # there's no archive file at all... hmmm. - textlink = '' - return quick_maketext( - 'archtocentry.html', - {'archive': arch, - 'archivelabel': self.volNameToDesc(arch), - 'textlink': textlink - }, - mlist=self.maillist) - - def GetArchLock(self): - if self._lock_file: - return 1 - self._lock_file = Lock( - os.path.join(config.LOCK_DIR, - self.maillist.fqdn_listname + '-arch.lock')) - try: - self._lock_file.lock(timeout=0.5) - except TimeOutError: - return 0 - return 1 - - def DropArchLock(self): - if self._lock_file: - self._lock_file.unlock(unconditionally=1) - self._lock_file = None - - def processListArch(self): - name = self.maillist.ArchiveFileName() - wname= name+'.working' - ename= name+'.err_unarchived' - try: - os.stat(name) - except (IOError,os.error): - #no archive file, nothin to do -ddm - return - - #see if arch is locked here -ddm - if not self.GetArchLock(): - #another archiver is running, nothing to do. -ddm - return - - #if the working file is still here, the archiver may have - # crashed during archiving. Save it, log an error, and move on. - try: - wf = open(wname) - log.error('Archive working file %s present. ' - 'Check %s for possibly unarchived msgs', - wname, ename) - omask = os.umask(007) - try: - ef = open(ename, 'a+') - finally: - os.umask(omask) - ef.seek(1,2) - if ef.read(1) <> '\n': - ef.write('\n') - ef.write(wf.read()) - ef.close() - wf.close() - os.unlink(wname) - except IOError: - pass - os.rename(name,wname) - archfile = open(wname) - self.processUnixMailbox(archfile) - archfile.close() - os.unlink(wname) - self.DropArchLock() - - def get_filename(self, article): - return '%06i.html' % (article.sequence,) - - def get_archives(self, article): - """Return a list of indexes where the article should be filed. - A string can be returned if the list only contains one entry, - and the empty list is legal.""" - res = self.dateToVolName(float(article.date)) - self.message(_("figuring article archives\n")) - self.message(res + "\n") - return res - - def volNameToDesc(self, volname): - volname = volname.strip() - # Don't make these module global constants since we have to runtime - # translate them anyway. - monthdict = [ - '', - _('January'), _('February'), _('March'), _('April'), - _('May'), _('June'), _('July'), _('August'), - _('September'), _('October'), _('November'), _('December') - ] - for each in self._volre.keys(): - match = re.match(self._volre[each], volname) - # Let ValueErrors percolate up - if match: - year = int(match.group('year')) - if each == 'quarter': - d =["", _("First"), _("Second"), _("Third"), _("Fourth") ] - ord = d[int(match.group('quarter'))] - return _("%(ord)s quarter %(year)i") - elif each == 'month': - monthstr = match.group('month').lower() - for i in range(1, 13): - monthname = time.strftime("%B", (1999,i,1,0,0,0,0,1,0)) - if monthstr.lower() == monthname.lower(): - month = monthdict[i] - return _("%(month)s %(year)i") - raise ValueError, "%s is not a month!" % monthstr - elif each == 'week': - month = monthdict[int(match.group("month"))] - day = int(match.group("day")) - return _("The Week Of Monday %(day)i %(month)s %(year)i") - elif each == 'day': - month = monthdict[int(match.group("month"))] - day = int(match.group("day")) - return _("%(day)i %(month)s %(year)i") - else: - return match.group('year') - raise ValueError, "%s is not a valid volname" % volname - -# The following two methods should be inverses of each other. -ddm - - def dateToVolName(self,date): - datetuple=time.localtime(date) - if self.ARCHIVE_PERIOD=='year': - return time.strftime("%Y",datetuple) - elif self.ARCHIVE_PERIOD=='quarter': - if datetuple[1] in [1,2,3]: - return time.strftime("%Yq1",datetuple) - elif datetuple[1] in [4,5,6]: - return time.strftime("%Yq2",datetuple) - elif datetuple[1] in [7,8,9]: - return time.strftime("%Yq3",datetuple) - else: - return time.strftime("%Yq4",datetuple) - elif self.ARCHIVE_PERIOD == 'day': - return time.strftime("%Y%m%d", datetuple) - elif self.ARCHIVE_PERIOD == 'week': - # Reconstruct "seconds since epoch", and subtract weekday - # multiplied by the number of seconds in a day. - monday = time.mktime(datetuple) - datetuple[6] * 24 * 60 * 60 - # Build a new datetuple from this "seconds since epoch" value - datetuple = time.localtime(monday) - return time.strftime("Week-of-Mon-%Y%m%d", datetuple) - # month. -ddm - else: - return time.strftime("%Y-%B",datetuple) - - - def volNameToDate(self, volname): - volname = volname.strip() - for each in self._volre.keys(): - match = re.match(self._volre[each],volname) - if match: - year = int(match.group('year')) - month = 1 - day = 1 - if each == 'quarter': - q = int(match.group('quarter')) - month = (q * 3) - 2 - elif each == 'month': - monthstr = match.group('month').lower() - m = [] - for i in range(1,13): - m.append( - time.strftime("%B",(1999,i,1,0,0,0,0,1,0)).lower()) - try: - month = m.index(monthstr) + 1 - except ValueError: - pass - elif each == 'week' or each == 'day': - month = int(match.group("month")) - day = int(match.group("day")) - try: - return time.mktime((year,month,1,0,0,0,0,1,-1)) - except OverflowError: - return 0.0 - return 0.0 - - def sortarchives(self): - def sf(a, b): - al = self.volNameToDate(a) - bl = self.volNameToDate(b) - if al > bl: - return 1 - elif al < bl: - return -1 - else: - return 0 - if self.ARCHIVE_PERIOD in ('month','year','quarter'): - self.archives.sort(sf) - else: - self.archives.sort() - self.archives.reverse() - - def message(self, msg): - if self.VERBOSE: - f = sys.stderr - f.write(msg) - if msg[-1:] != '\n': - f.write('\n') - f.flush() - - def open_new_archive(self, archive, archivedir): - index_html = os.path.join(archivedir, 'index.html') - try: - os.unlink(index_html) - except: - pass - os.symlink(self.DEFAULTINDEX+'.html',index_html) - - def write_index_header(self): - self.depth=0 - print self.html_head() - if not self.THREADLAZY and self.type=='Thread': - self.message(_("Computing threaded index\n")) - self.updateThreadedIndex() - - def write_index_footer(self): - for i in range(self.depth): - print '</UL>' - print self.html_foot() - - def write_index_entry(self, article): - subject = self.get_header("subject", article) - author = self.get_header("author", article) - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - try: - author = re.sub('@', _(' at '), author) - except UnicodeError: - # Non-ASCII author contains '@' ... no valid email anyway - pass - subject = CGIescape(subject, self.lang) - author = CGIescape(author, self.lang) - - d = { - 'filename': urllib.quote(article.filename), - 'subject': subject, - 'sequence': article.sequence, - 'author': author - } - print quick_maketext( - 'archidxentry.html', d, - mlist=self.maillist) - - def get_header(self, field, article): - # if we have no decoded header, return the encoded one - result = article.decoded.get(field) - if result is None: - return getattr(article, field) - # otherwise, the decoded one will be Unicode - return result - - def write_threadindex_entry(self, article, depth): - if depth < 0: - self.message('depth<0') - depth = 0 - if depth > self.THREADLEVELS: - depth = self.THREADLEVELS - if depth < self.depth: - for i in range(self.depth-depth): - print '</UL>' - elif depth > self.depth: - for i in range(depth-self.depth): - print '<UL>' - print '<!--%i %s -->' % (depth, article.threadKey) - self.depth = depth - self.write_index_entry(article) - - def write_TOC(self): - self.sortarchives() - omask = os.umask(002) - try: - toc = open(os.path.join(self.basedir, 'index.html'), 'w') - finally: - os.umask(omask) - toc.write(self.html_TOC()) - toc.close() - - def write_article(self, index, article, path): - # called by add_article - omask = os.umask(002) - try: - f = open(path, 'w') - finally: - os.umask(omask) - f.write(article.as_html()) - f.close() - - # Write the text article to the text archive. - path = os.path.join(self.basedir, "%s.txt" % index) - omask = os.umask(002) - try: - f = open(path, 'a+') - finally: - os.umask(omask) - f.write(article.as_text()) - f.close() - - def update_archive(self, archive): - self.__super_update_archive(archive) - # only do this if the gzip module was imported globally, and - # gzip'ing was enabled via Defaults.GZIP_ARCHIVE_TXT_FILES. See - # above. - if gzip: - archz = None - archt = None - txtfile = os.path.join(self.basedir, '%s.txt' % archive) - gzipfile = os.path.join(self.basedir, '%s.txt.gz' % archive) - oldgzip = os.path.join(self.basedir, '%s.old.txt.gz' % archive) - try: - # open the plain text file - archt = open(txtfile) - except IOError: - return - try: - os.rename(gzipfile, oldgzip) - archz = gzip.open(oldgzip) - except (IOError, RuntimeError, os.error): - pass - try: - ou = os.umask(002) - newz = gzip.open(gzipfile, 'w') - finally: - # XXX why is this a finally? - os.umask(ou) - if archz: - newz.write(archz.read()) - archz.close() - os.unlink(oldgzip) - # XXX do we really need all this in a try/except? - try: - newz.write(archt.read()) - newz.close() - archt.close() - except IOError: - pass - os.unlink(txtfile) - - _skip_attrs = ('maillist', '_lock_file', 'charset') - - def getstate(self): - d={} - for each in self.__dict__.keys(): - if not (each in self._skip_attrs - or each.upper() == each): - d[each] = self.__dict__[each] - return d - - # Add <A HREF="..."> tags around URLs and e-mail addresses. - - def __processbody_URLquote(self, lines): - # XXX a lot to do here: - # 1. use lines directly, rather than source and dest - # 2. make it clearer - # 3. make it faster - # TK: Prepare for unicode obscure. - atmark = _(' at ') - if lines and isinstance(lines[0], unicode): - atmark = unicode(atmark, self.lang.charset, 'replace') - source = lines[:] - dest = lines - last_line_was_quoted = 0 - for i in xrange(0, len(source)): - Lorig = L = source[i] - prefix = suffix = "" - if L is None: - continue - # Italicise quoted text - if self.IQUOTES: - quoted = quotedpat.match(L) - if quoted is None: - last_line_was_quoted = 0 - else: - quoted = quoted.end(0) - prefix = CGIescape(L[:quoted], self.lang) + '<i>' - suffix = '</I>' - if self.SHOWHTML: - suffix += '<BR>' - if not last_line_was_quoted: - prefix = '<BR>' + prefix - L = L[quoted:] - last_line_was_quoted = 1 - # Check for an e-mail address - L2 = "" - jr = emailpat.search(L) - kr = urlpat.search(L) - while jr is not None or kr is not None: - if jr == None: - j = -1 - else: - j = jr.start(0) - if kr is None: - k = -1 - else: - k = kr.start(0) - if j != -1 and (j < k or k == -1): - text = jr.group(1) - length = len(text) - if as_boolean( - config.archiver.pipermail.obscure_email_addresses): - text = re.sub('@', atmark, text) - URL = self.maillist.script_url('listinfo') - else: - URL = 'mailto:' + text - pos = j - elif k != -1 and (j > k or j == -1): - text = URL = kr.group(1) - length = len(text) - pos = k - else: # j==k - raise ValueError, "j==k: This can't happen!" - #length = len(text) - #self.message("URL: %s %s %s \n" - # % (CGIescape(L[:pos]), URL, CGIescape(text))) - L2 += '%s<A HREF="%s">%s</A>' % ( - CGIescape(L[:pos], self.lang), - html_quote(URL), CGIescape(text, self.lang)) - L = L[pos+length:] - jr = emailpat.search(L) - kr = urlpat.search(L) - if jr is None and kr is None: - L = CGIescape(L, self.lang) - L = prefix + L2 + L + suffix - source[i] = None - dest[i] = L - - # Perform Hypermail-style processing of <HTML></HTML> directives - # in message bodies. Lines between <HTML> and </HTML> will be written - # out precisely as they are; other lines will be passed to func2 - # for further processing . - - def __processbody_HTML(self, lines): - # XXX need to make this method modify in place - source = lines[:] - dest = lines - l = len(source) - i = 0 - while i < l: - while i < l and htmlpat.match(source[i]) is None: - i = i + 1 - if i < l: - source[i] = None - i = i + 1 - while i < l and nohtmlpat.match(source[i]) is None: - dest[i], source[i] = source[i], None - i = i + 1 - if i < l: - source[i] = None - i = i + 1 - - def format_article(self, article): - # called from add_article - # TBD: Why do the HTML formatting here and keep it in the - # pipermail database? It makes more sense to do the html - # formatting as the article is being written as html and toss - # the data after it has been written to the archive file. - lines = filter(None, article.body) - # Handle <HTML> </HTML> directives - if self.ALLOWHTML: - self.__processbody_HTML(lines) - self.__processbody_URLquote(lines) - if not self.SHOWHTML and lines: - lines.insert(0, '<PRE>') - lines.append('</PRE>') - else: - # Do fancy formatting here - if self.SHOWBR: - lines = map(lambda x:x + "<BR>", lines) - else: - for i in range(0, len(lines)): - s = lines[i] - if s[0:1] in ' \t\n': - lines[i] = '<P>' + s - article.html_body = lines - return article - - def update_article(self, arcdir, article, prev, next): - seq = article.sequence - filename = os.path.join(arcdir, article.filename) - self.message(_('Updating HTML for article %(seq)s')) - try: - f = open(filename) - article.loadbody_fromHTML(f) - f.close() - except IOError, e: - if e.errno <> errno.ENOENT: raise - self.message(_('article file %(filename)s is missing!')) - article.prev = prev - article.next = next - omask = os.umask(002) - try: - f = open(filename, 'w') - finally: - os.umask(omask) - f.write(article.as_html()) - f.close() diff --git a/src/mailman/Archiver/HyperDatabase.py b/src/mailman/Archiver/HyperDatabase.py deleted file mode 100644 index fecb544e8..000000000 --- a/src/mailman/Archiver/HyperDatabase.py +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright (C) 1998-2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -# -# site modules -# -import os -import marshal -import time -import errno - -# -# package/project modules -# -import pipermail -from flufl.lock import Lock, NotLockedError - -CACHESIZE = pipermail.CACHESIZE - -try: - import cPickle - pickle = cPickle -except ImportError: - import pickle - -# -# we're using a python dict in place of -# of bsddb.btree database. only defining -# the parts of the interface used by class HyperDatabase -# only one thing can access this at a time. -# -class DumbBTree: - """Stores pickles of Article objects - - This dictionary-like object stores pickles of all the Article - objects. The object itself is stored using marshal. It would be - much simpler, and probably faster, to store the actual objects in - the DumbBTree and pickle it. - - TBD: Also needs a more sensible name, like IteratableDictionary or - SortedDictionary. - """ - - def __init__(self, path): - self.current_index = 0 - self.path = path - self.lockfile = Lock(self.path + ".lock") - self.lock() - self.__dirty = 0 - self.dict = {} - self.sorted = [] - self.load() - - def __repr__(self): - return "DumbBTree(%s)" % self.path - - def __sort(self, dirty=None): - if self.__dirty == 1 or dirty: - self.sorted = self.dict.keys() - self.sorted.sort() - self.__dirty = 0 - - def lock(self): - self.lockfile.lock() - - def unlock(self): - try: - self.lockfile.unlock() - except NotLockedError: - pass - - def __delitem__(self, item): - # if first hasn't been called, we can skip the sort - if self.current_index == 0: - del self.dict[item] - self.__dirty = 1 - return - try: - ci = self.sorted[self.current_index] - except IndexError: - ci = None - if ci == item: - try: - ci = self.sorted[self.current_index + 1] - except IndexError: - ci = None - del self.dict[item] - self.__sort(dirty=1) - if ci is not None: - self.current_index = self.sorted.index(ci) - else: - self.current_index = self.current_index + 1 - - def clear(self): - # bulk clearing much faster than deleting each item, esp. with the - # implementation of __delitem__() above :( - self.dict = {} - - def first(self): - self.__sort() # guarantee that the list is sorted - if not self.sorted: - raise KeyError - else: - key = self.sorted[0] - self.current_index = 1 - return key, self.dict[key] - - def last(self): - if not self.sorted: - raise KeyError - else: - key = self.sorted[-1] - self.current_index = len(self.sorted) - 1 - return key, self.dict[key] - - def next(self): - try: - key = self.sorted[self.current_index] - except IndexError: - raise KeyError - self.current_index = self.current_index + 1 - return key, self.dict[key] - - def has_key(self, key): - return self.dict.has_key(key) - - def set_location(self, loc): - if not self.dict.has_key(loc): - raise KeyError - self.current_index = self.sorted.index(loc) - - def __getitem__(self, item): - return self.dict[item] - - def __setitem__(self, item, val): - # if first hasn't been called, then we don't need to worry - # about sorting again - if self.current_index == 0: - self.dict[item] = val - self.__dirty = 1 - return - try: - current_item = self.sorted[self.current_index] - except IndexError: - current_item = item - self.dict[item] = val - self.__sort(dirty=1) - self.current_index = self.sorted.index(current_item) - - def __len__(self): - return len(self.sorted) - - def load(self): - try: - fp = open(self.path) - try: - self.dict = marshal.load(fp) - finally: - fp.close() - except IOError, e: - if e.errno <> errno.ENOENT: raise - pass - except EOFError: - pass - else: - self.__sort(dirty=1) - - def close(self): - omask = os.umask(007) - try: - fp = open(self.path, 'w') - finally: - os.umask(omask) - fp.write(marshal.dumps(self.dict)) - fp.close() - self.unlock() - - -# this is lifted straight out of pipermail with -# the bsddb.btree replaced with above class. -# didn't use inheritance because of all the -# __internal stuff that needs to be here -scott -# -class HyperDatabase(pipermail.Database): - __super_addArticle = pipermail.Database.addArticle - - def __init__(self, basedir, mlist): - self.__cache = {} - self.__currentOpenArchive = None # The currently open indices - self._mlist = mlist - self.basedir = os.path.expanduser(basedir) - # Recently added articles, indexed only by message ID - self.changed={} - - def firstdate(self, archive): - self.__openIndices(archive) - date = 'None' - try: - datekey, msgid = self.dateIndex.first() - date = time.asctime(time.localtime(float(datekey[0]))) - except KeyError: - pass - return date - - def lastdate(self, archive): - self.__openIndices(archive) - date = 'None' - try: - datekey, msgid = self.dateIndex.last() - date = time.asctime(time.localtime(float(datekey[0]))) - except KeyError: - pass - return date - - def numArticles(self, archive): - self.__openIndices(archive) - return len(self.dateIndex) - - def addArticle(self, archive, article, subject=None, author=None, - date=None): - self.__openIndices(archive) - self.__super_addArticle(archive, article, subject, author, date) - - def __openIndices(self, archive): - if self.__currentOpenArchive == archive: - return - self.__closeIndices() - arcdir = os.path.join(self.basedir, 'database') - omask = os.umask(0) - try: - try: - os.mkdir(arcdir, 02770) - except OSError, e: - if e.errno <> errno.EEXIST: raise - finally: - os.umask(omask) - for i in ('date', 'author', 'subject', 'article', 'thread'): - t = DumbBTree(os.path.join(arcdir, archive + '-' + i)) - setattr(self, i + 'Index', t) - self.__currentOpenArchive = archive - - def __closeIndices(self): - for i in ('date', 'author', 'subject', 'thread', 'article'): - attr = i + 'Index' - if hasattr(self, attr): - index = getattr(self, attr) - if i == 'article': - if not hasattr(self, 'archive_length'): - self.archive_length = {} - l = len(index) - self.archive_length[self.__currentOpenArchive] = l - index.close() - delattr(self, attr) - self.__currentOpenArchive = None - - def close(self): - self.__closeIndices() - - def hasArticle(self, archive, msgid): - self.__openIndices(archive) - return self.articleIndex.has_key(msgid) - - def setThreadKey(self, archive, key, msgid): - self.__openIndices(archive) - self.threadIndex[key]=msgid - - def getArticle(self, archive, msgid): - self.__openIndices(archive) - if not self.__cache.has_key(msgid): - # get the pickled object out of the DumbBTree - buf = self.articleIndex[msgid] - article = self.__cache[msgid] = pickle.loads(buf) - # For upgrading older archives - article.setListIfUnset(self._mlist) - else: - article = self.__cache[msgid] - return article - - def first(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index + 'Index') - try: - key, msgid = index.first() - return msgid - except KeyError: - return None - - def next(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index + 'Index') - try: - key, msgid = index.next() - return msgid - except KeyError: - return None - - def getOldestArticle(self, archive, subject): - self.__openIndices(archive) - subject = subject.lower() - try: - key, tempid=self.subjectIndex.set_location(subject) - self.subjectIndex.next() - [subject2, date]= key.split('\0') - if subject!=subject2: return None - return tempid - except KeyError: - return None - - def newArchive(self, archive): - pass - - def clearIndex(self, archive, index): - self.__openIndices(archive) - if hasattr(self.threadIndex, 'clear'): - self.threadIndex.clear() - return - finished=0 - try: - key, msgid=self.threadIndex.first() - except KeyError: finished=1 - while not finished: - del self.threadIndex[key] - try: - key, msgid=self.threadIndex.next() - except KeyError: finished=1 diff --git a/src/mailman/Archiver/__init__.py b/src/mailman/Archiver/__init__.py deleted file mode 100644 index be0c61ce0..000000000 --- a/src/mailman/Archiver/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 1998-2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -from Archiver import * diff --git a/src/mailman/Archiver/pipermail.py b/src/mailman/Archiver/pipermail.py deleted file mode 100644 index e11cb7173..000000000 --- a/src/mailman/Archiver/pipermail.py +++ /dev/null @@ -1,872 +0,0 @@ -#! /usr/bin/env python - -import os -import re -import sys -import time -import logging -import mailbox - -import cPickle as pickle - -from cStringIO import StringIO -from email.utils import parseaddr, parsedate_tz, mktime_tz, formatdate -from string import lowercase - -__version__ = '0.11 (Mailman edition)' -VERSION = __version__ -CACHESIZE = 100 # Number of slots in the cache - -from mailman.core import errors -from mailman.core.i18n import _ - -SPACE = ' ' - -log = logging.getLogger('mailman.error') - - - -msgid_pat = re.compile(r'(<.*>)') -def strip_separators(s): - "Remove quotes or parenthesization from a Message-ID string" - if not s: - return "" - if s[0] in '"<([' and s[-1] in '">)]': - s = s[1:-1] - return s - -smallNameParts = ['van', 'von', 'der', 'de'] - -def fixAuthor(author): - "Canonicalize a name into Last, First format" - # If there's a comma, guess that it's already in "Last, First" format - if ',' in author: - return author - L = author.split() - i = len(L) - 1 - if i == 0: - return author # The string's one word--forget it - if author.upper() == author or author.lower() == author: - # Damn, the name is all upper- or lower-case. - while i > 0 and L[i-1].lower() in smallNameParts: - i = i - 1 - else: - # Mixed case; assume that small parts of the last name will be - # in lowercase, and check them against the list. - while i>0 and (L[i-1][0] in lowercase or - L[i-1].lower() in smallNameParts): - i = i - 1 - author = SPACE.join(L[-1:] + L[i:-1]) + ', ' + SPACE.join(L[:i]) - return author - -# Abstract class for databases - -class DatabaseInterface: - def __init__(self): pass - def close(self): pass - def getArticle(self, archive, msgid): pass - def hasArticle(self, archive, msgid): pass - def addArticle(self, archive, article, subject=None, author=None, - date=None): pass - def firstdate(self, archive): pass - def lastdate(self, archive): pass - def first(self, archive, index): pass - def next(self, archive, index): pass - def numArticles(self, archive): pass - def newArchive(self, archive): pass - def setThreadKey(self, archive, key, msgid): pass - def getOldestArticle(self, subject): pass - -class Database(DatabaseInterface): - """Define the basic sorting logic for a database - - Assumes that the database internally uses dateIndex, authorIndex, - etc. - """ - - # TBD Factor out more of the logic shared between BSDDBDatabase - # and HyperDatabase and place it in this class. - - def __init__(self): - # This method need not be called by subclasses that do their - # own initialization. - self.dateIndex = {} - self.authorIndex = {} - self.subjectIndex = {} - self.articleIndex = {} - self.changed = {} - - def addArticle(self, archive, article, subject=None, author=None, - date=None): - # create the keys; always end w/ msgid which will be unique - authorkey = (author or article.author, article.date, - article.msgid) - subjectkey = (subject or article.subject, article.date, - article.msgid) - datekey = date or article.date, article.msgid - - # Add the new article - self.dateIndex[datekey] = article.msgid - self.authorIndex[authorkey] = article.msgid - self.subjectIndex[subjectkey] = article.msgid - - self.store_article(article) - self.changed[archive, article.msgid] = None - - parentID = article.parentID - if parentID is not None and self.articleIndex.has_key(parentID): - parent = self.getArticle(archive, parentID) - myThreadKey = parent.threadKey + article.date + '-' - else: - myThreadKey = article.date + '-' - article.threadKey = myThreadKey - key = myThreadKey, article.msgid - self.setThreadKey(archive, key, article.msgid) - - def store_article(self, article): - """Store article without message body to save space""" - # TBD this is not thread safe! - temp = article.body - temp2 = article.html_body - article.body = [] - del article.html_body - self.articleIndex[article.msgid] = pickle.dumps(article) - article.body = temp - article.html_body = temp2 - - -# The Article class encapsulates a single posting. The attributes -# are: -# -# sequence : Sequence number, unique for each article in a set of archives -# subject : Subject -# datestr : The posting date, in human-readable format -# date : The posting date, in purely numeric format -# headers : Any other headers of interest -# author : The author's name (and possibly organization) -# email : The author's e-mail address -# msgid : A unique message ID -# in_reply_to: If != "", this is the msgid of the article being replied to -# references : A (possibly empty) list of msgid's of earlier articles -# in the thread -# body : A list of strings making up the message body - -class Article: - _last_article_time = time.time() - - def __init__(self, message = None, sequence = 0, keepHeaders = []): - if message is None: - return - self.sequence = sequence - - self.parentID = None - self.threadKey = None - # otherwise the current sequence number is used. - id = strip_separators(message['Message-Id']) - if id == "": - self.msgid = str(self.sequence) - else: self.msgid = id - - if message.has_key('Subject'): - self.subject = str(message['Subject']) - else: - self.subject = _('No subject') - if self.subject == "": self.subject = _('No subject') - - self._set_date(message) - - # Figure out the e-mail address and poster's name. Use the From: - # field first, followed by Reply-To: - self.author, self.email = parseaddr(message.get('From', '')) - e = message['Reply-To'] - if not self.email and e is not None: - ignoreauthor, self.email = parseaddr(e) - self.email = strip_separators(self.email) - self.author = strip_separators(self.author) - - if self.author == "": - self.author = self.email - - # Save the In-Reply-To:, References:, and Message-ID: lines - # - # TBD: The original code does some munging on these fields, which - # shouldn't be necessary, but changing this may break code. For - # safety, I save the original headers on different attributes for use - # in writing the plain text periodic flat files. - self._in_reply_to = message['in-reply-to'] - self._references = message['references'] - self._message_id = message['message-id'] - - i_r_t = message['In-Reply-To'] - if i_r_t is None: - self.in_reply_to = '' - else: - match = msgid_pat.search(i_r_t) - if match is None: self.in_reply_to = '' - else: self.in_reply_to = strip_separators(match.group(1)) - - references = message['References'] - if references is None: - self.references = [] - else: - self.references = map(strip_separators, references.split()) - - # Save any other interesting headers - self.headers = {} - for i in keepHeaders: - if message.has_key(i): - self.headers[i] = message[i] - - # Read the message body - s = StringIO(message.get_payload(decode=True)\ - or message.as_string().split('\n\n',1)[1]) - self.body = s.readlines() - - def _set_date(self, message): - def floatdate(header): - missing = [] - datestr = message.get(header, missing) - if datestr is missing: - return None - date = parsedate_tz(datestr) - try: - return mktime_tz(date) - except (TypeError, ValueError, OverflowError): - return None - date = floatdate('date') - if date is None: - date = floatdate('x-list-received-date') - if date is None: - # What's left to try? - date = self._last_article_time + 1 - self._last_article_time = date - self.date = '%011i' % date - self.datestr = message.get('date') \ - or message.get('x-list-received-date') \ - or formatdate(date) - - def __repr__(self): - return '<Article ID = '+repr(self.msgid)+'>' - - def finished_update_article(self): - pass - -# Pipermail formatter class - -class T: - DIRMODE = 0755 # Mode to give to created directories - FILEMODE = 0644 # Mode to give to created files - INDEX_EXT = ".html" # Extension for indexes - - def __init__(self, basedir = None, reload = 1, database = None): - # If basedir isn't provided, assume the current directory - if basedir is None: - self.basedir = os.getcwd() - else: - basedir = os.path.expanduser(basedir) - self.basedir = basedir - self.database = database - - # If the directory doesn't exist, create it. This code shouldn't get - # run anymore, we create the directory in Archiver.py. It should only - # get used by legacy lists created that are only receiving their first - # message in the HTML archive now -- Marc - try: - os.stat(self.basedir) - except os.error, errdata: - errno, errmsg = errdata - if errno != 2: - raise os.error, errdata - else: - self.message(_('Creating archive directory ') + self.basedir) - omask = os.umask(0) - try: - os.mkdir(self.basedir, self.DIRMODE) - finally: - os.umask(omask) - - # Try to load previously pickled state - try: - if not reload: - raise IOError - f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r') - self.message(_('Reloading pickled archive state')) - d = pickle.load(f) - f.close() - for key, value in d.items(): - setattr(self, key, value) - except (IOError, EOFError): - # No pickled version, so initialize various attributes - self.archives = [] # Archives - self._dirty_archives = [] # Archives that will have to be updated - self.sequence = 0 # Sequence variable used for - # numbering articles - self.update_TOC = 0 # Does the TOC need updating? - # - # make the basedir variable work when passed in as an __init__ arg - # and different from the one in the pickle. Let the one passed in - # as an __init__ arg take precedence if it's stated. This way, an - # archive can be moved from one place to another and still work. - # - if basedir != self.basedir: - self.basedir = basedir - - def close(self): - "Close an archive, save its state, and update any changed archives." - self.update_dirty_archives() - self.update_TOC = 0 - self.write_TOC() - # Save the collective state - self.message(_('Pickling archive state into ') - + os.path.join(self.basedir, 'pipermail.pck')) - self.database.close() - del self.database - - omask = os.umask(007) - try: - f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w') - finally: - os.umask(omask) - pickle.dump(self.getstate(), f) - f.close() - - def getstate(self): - # can override this in subclass - return self.__dict__ - - # - # Private methods - # - # These will be neither overridden nor called by custom archivers. - # - - - # Create a dictionary of various parameters that will be passed - # to the write_index_{header,footer} functions - def __set_parameters(self, archive): - # Determine the earliest and latest date in the archive - firstdate = self.database.firstdate(archive) - lastdate = self.database.lastdate(archive) - - # Get the current time - now = time.asctime(time.localtime(time.time())) - self.firstdate = firstdate - self.lastdate = lastdate - self.archivedate = now - self.size = self.database.numArticles(archive) - self.archive = archive - self.version = __version__ - - # Find the message ID of an article's parent, or return None - # if no parent can be found. - - def __findParent(self, article, children = []): - parentID = None - if article.in_reply_to: - parentID = article.in_reply_to - elif article.references: - # Remove article IDs that aren't in the archive - refs = filter(self.articleIndex.has_key, article.references) - if not refs: - return None - maxdate = self.database.getArticle(self.archive, - refs[0]) - for ref in refs[1:]: - a = self.database.getArticle(self.archive, ref) - if a.date > maxdate.date: - maxdate = a - parentID = maxdate.msgid - else: - # Look for the oldest matching subject - try: - key, tempid = \ - self.subjectIndex.set_location(article.subject) - print key, tempid - self.subjectIndex.next() - [subject, date] = key.split('\0') - print article.subject, subject, date - if subject == article.subject and tempid not in children: - parentID = tempid - except KeyError: - pass - return parentID - - # Update the threaded index completely - def updateThreadedIndex(self): - # Erase the threaded index - self.database.clearIndex(self.archive, 'thread') - - # Loop over all the articles - msgid = self.database.first(self.archive, 'date') - while msgid is not None: - try: - article = self.database.getArticle(self.archive, msgid) - except KeyError: - pass - else: - if article.parentID is None or \ - not self.database.hasArticle(self.archive, - article.parentID): - # then - pass - else: - parent = self.database.getArticle(self.archive, - article.parentID) - article.threadKey = parent.threadKey+article.date+'-' - self.database.setThreadKey(self.archive, - (article.threadKey, article.msgid), - msgid) - msgid = self.database.next(self.archive, 'date') - - # - # Public methods: - # - # These are part of the public interface of the T class, but will - # never be overridden (unless you're trying to do something very new). - - # Update a single archive's indices, whether the archive's been - # dirtied or not. - def update_archive(self, archive): - self.archive = archive - self.message(_("Updating index files for archive [%(archive)s]")) - arcdir = os.path.join(self.basedir, archive) - self.__set_parameters(archive) - - for hdr in ('Date', 'Subject', 'Author'): - self._update_simple_index(hdr, archive, arcdir) - - self._update_thread_index(archive, arcdir) - - def _update_simple_index(self, hdr, archive, arcdir): - self.message(" " + hdr) - self.type = hdr - hdr = hdr.lower() - - self._open_index_file_as_stdout(arcdir, hdr) - self.write_index_header() - count = 0 - # Loop over the index entries - msgid = self.database.first(archive, hdr) - while msgid is not None: - try: - article = self.database.getArticle(self.archive, msgid) - except KeyError: - pass - else: - count = count + 1 - self.write_index_entry(article) - msgid = self.database.next(archive, hdr) - # Finish up this index - self.write_index_footer() - self._restore_stdout() - - def _update_thread_index(self, archive, arcdir): - self.message(_(" Thread")) - self._open_index_file_as_stdout(arcdir, "thread") - self.type = 'Thread' - self.write_index_header() - - # To handle the prev./next in thread pointers, we need to - # track articles 5 at a time. - - # Get the first 5 articles - L = [None] * 5 - i = 2 - msgid = self.database.first(self.archive, 'thread') - - while msgid is not None and i < 5: - L[i] = self.database.getArticle(self.archive, msgid) - i = i + 1 - msgid = self.database.next(self.archive, 'thread') - - while L[2] is not None: - article = L[2] - artkey = None - if article is not None: - artkey = article.threadKey - if artkey is not None: - self.write_threadindex_entry(article, artkey.count('-') - 1) - if self.database.changed.has_key((archive,article.msgid)): - a1 = L[1] - a3 = L[3] - self.update_article(arcdir, article, a1, a3) - if a3 is not None: - self.database.changed[(archive, a3.msgid)] = None - if a1 is not None: - key = archive, a1.msgid - if not self.database.changed.has_key(key): - self.update_article(arcdir, a1, L[0], L[2]) - else: - del self.database.changed[key] - if L[0]: - L[0].finished_update_article() - L = L[1:] # Rotate the list - if msgid is None: - L.append(msgid) - else: - L.append(self.database.getArticle(self.archive, msgid)) - msgid = self.database.next(self.archive, 'thread') - - self.write_index_footer() - self._restore_stdout() - - def _open_index_file_as_stdout(self, arcdir, index_name): - path = os.path.join(arcdir, index_name + self.INDEX_EXT) - omask = os.umask(002) - try: - self.__f = open(path, 'w') - finally: - os.umask(omask) - self.__stdout = sys.stdout - sys.stdout = self.__f - - def _restore_stdout(self): - sys.stdout = self.__stdout - self.__f.close() - del self.__f - del self.__stdout - - # Update only archives that have been marked as "changed". - def update_dirty_archives(self): - for i in self._dirty_archives: - self.update_archive(i) - self._dirty_archives = [] - - # Read a Unix mailbox file from the file object <input>, - # and create a series of Article objects. Each article - # object will then be archived. - - def _makeArticle(self, msg, sequence): - return Article(msg, sequence) - - def processUnixMailbox(self, path, start=None, end=None): - mbox = iter(mailbox.mbox(path)) - if start is None: - start = 0 - counter = 0 - while counter < start: - try: - m = next(mbox) - except errors.DiscardMessage: - continue - if m is None: - return - counter += 1 - while True: - try: - m = next(mbox) - except StopIteration: - break - except errors.DiscardMessage: - continue - except Exception: - log.error('uncaught archiver exception') - raise - if m == '': - # It was an unparseable message - continue - msgid = m.get('message-id', 'n/a') - self.message(_('#%(counter)05d %(msgid)s')) - a = self._makeArticle(m, self.sequence) - self.sequence += 1 - self.add_article(a) - if end is not None and counter >= end: - break - counter += 1 - - def new_archive(self, archive, archivedir): - self.archives.append(archive) - self.update_TOC = 1 - self.database.newArchive(archive) - # If the archive directory doesn't exist, create it - try: - os.stat(archivedir) - except os.error, errdata: - errno, errmsg = errdata - if errno == 2: - omask = os.umask(0) - try: - os.mkdir(archivedir, self.DIRMODE) - finally: - os.umask(omask) - else: - raise os.error, errdata - self.open_new_archive(archive, archivedir) - - def add_article(self, article): - archives = self.get_archives(article) - if not archives: - return - if type(archives) == type(''): - archives = [archives] - - article.filename = filename = self.get_filename(article) - temp = self.format_article(article) - for arch in archives: - self.archive = arch # why do this??? - archivedir = os.path.join(self.basedir, arch) - if arch not in self.archives: - self.new_archive(arch, archivedir) - - # Write the HTML-ized article - self.write_article(arch, temp, os.path.join(archivedir, - filename)) - - if article.decoded.has_key('author'): - author = fixAuthor(article.decoded['author']) - else: - author = fixAuthor(article.author) - if article.decoded.has_key('stripped'): - subject = article.decoded['stripped'].lower() - else: - subject = article.subject.lower() - - article.parentID = parentID = self.get_parent_info(arch, article) - if parentID: - parent = self.database.getArticle(arch, parentID) - article.threadKey = parent.threadKey + article.date + '-' - else: - article.threadKey = article.date + '-' - key = article.threadKey, article.msgid - - self.database.setThreadKey(arch, key, article.msgid) - self.database.addArticle(arch, temp, author=author, - subject=subject) - - if arch not in self._dirty_archives: - self._dirty_archives.append(arch) - - def get_parent_info(self, archive, article): - parentID = None - if article.in_reply_to: - parentID = article.in_reply_to - elif article.references: - refs = self._remove_external_references(article.references) - if refs: - maxdate = self.database.getArticle(archive, refs[0]) - for ref in refs[1:]: - a = self.database.getArticle(archive, ref) - if a.date > maxdate.date: - maxdate = a - parentID = maxdate.msgid - else: - # Get the oldest article with a matching subject, and - # assume this is a follow-up to that article - parentID = self.database.getOldestArticle(archive, - article.subject) - - if parentID and not self.database.hasArticle(archive, parentID): - parentID = None - return parentID - - def write_article(self, index, article, path): - omask = os.umask(002) - try: - f = open(path, 'w') - finally: - os.umask(omask) - temp_stdout, sys.stdout = sys.stdout, f - self.write_article_header(article) - sys.stdout.writelines(article.body) - self.write_article_footer(article) - sys.stdout = temp_stdout - f.close() - - def _remove_external_references(self, refs): - keep = [] - for ref in refs: - if self.database.hasArticle(self.archive, ref): - keep.append(ref) - return keep - - # Abstract methods: these will need to be overridden by subclasses - # before anything useful can be done. - - def get_filename(self, article): - pass - def get_archives(self, article): - """Return a list of indexes where the article should be filed. - A string can be returned if the list only contains one entry, - and the empty list is legal.""" - pass - def format_article(self, article): - pass - def write_index_header(self): - pass - def write_index_footer(self): - pass - def write_index_entry(self, article): - pass - def write_threadindex_entry(self, article, depth): - pass - def write_article_header(self, article): - pass - def write_article_footer(self, article): - pass - def write_article_entry(self, article): - pass - def update_article(self, archivedir, article, prev, next): - pass - def write_TOC(self): - pass - def open_new_archive(self, archive, dir): - pass - def message(self, msg): - pass - - -class BSDDBdatabase(Database): - __super_addArticle = Database.addArticle - - def __init__(self, basedir): - self.__cachekeys = [] - self.__cachedict = {} - self.__currentOpenArchive = None # The currently open indices - self.basedir = os.path.expanduser(basedir) - self.changed = {} # Recently added articles, indexed only by - # message ID - - def firstdate(self, archive): - self.__openIndices(archive) - date = 'None' - try: - date, msgid = self.dateIndex.first() - date = time.asctime(time.localtime(float(date))) - except KeyError: - pass - return date - - def lastdate(self, archive): - self.__openIndices(archive) - date = 'None' - try: - date, msgid = self.dateIndex.last() - date = time.asctime(time.localtime(float(date))) - except KeyError: - pass - return date - - def numArticles(self, archive): - self.__openIndices(archive) - return len(self.dateIndex) - - def addArticle(self, archive, article, subject=None, author=None, - date=None): - self.__openIndices(archive) - self.__super_addArticle(archive, article, subject, author, date) - - # Open the BSDDB files that are being used as indices - # (dateIndex, authorIndex, subjectIndex, articleIndex) - def __openIndices(self, archive): - if self.__currentOpenArchive == archive: - return - - import bsddb - self.__closeIndices() - arcdir = os.path.join(self.basedir, 'database') - omask = os.umask(0) - try: - try: - os.mkdir(arcdir, 02775) - except OSError: - # BAW: Hmm... - pass - finally: - os.umask(omask) - for hdr in ('date', 'author', 'subject', 'article', 'thread'): - path = os.path.join(arcdir, archive + '-' + hdr) - t = bsddb.btopen(path, 'c') - setattr(self, hdr + 'Index', t) - self.__currentOpenArchive = archive - - # Close the BSDDB files that are being used as indices (if they're - # open--this is safe to call if they're already closed) - def __closeIndices(self): - if self.__currentOpenArchive is not None: - pass - for hdr in ('date', 'author', 'subject', 'thread', 'article'): - attr = hdr + 'Index' - if hasattr(self, attr): - index = getattr(self, attr) - if hdr == 'article': - if not hasattr(self, 'archive_length'): - self.archive_length = {} - self.archive_length[self.__currentOpenArchive] = len(index) - index.close() - delattr(self,attr) - self.__currentOpenArchive = None - - def close(self): - self.__closeIndices() - def hasArticle(self, archive, msgid): - self.__openIndices(archive) - return self.articleIndex.has_key(msgid) - def setThreadKey(self, archive, key, msgid): - self.__openIndices(archive) - self.threadIndex[key] = msgid - def getArticle(self, archive, msgid): - self.__openIndices(archive) - if self.__cachedict.has_key(msgid): - self.__cachekeys.remove(msgid) - self.__cachekeys.append(msgid) - return self.__cachedict[msgid] - if len(self.__cachekeys) == CACHESIZE: - delkey, self.__cachekeys = (self.__cachekeys[0], - self.__cachekeys[1:]) - del self.__cachedict[delkey] - s = self.articleIndex[msgid] - article = pickle.loads(s) - self.__cachekeys.append(msgid) - self.__cachedict[msgid] = article - return article - - def first(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index+'Index') - try: - key, msgid = index.first() - return msgid - except KeyError: - return None - def next(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index+'Index') - try: - key, msgid = index.next() - except KeyError: - return None - else: - return msgid - - def getOldestArticle(self, archive, subject): - self.__openIndices(archive) - subject = subject.lower() - try: - key, tempid = self.subjectIndex.set_location(subject) - self.subjectIndex.next() - [subject2, date] = key.split('\0') - if subject != subject2: - return None - return tempid - except KeyError: # XXX what line raises the KeyError? - return None - - def newArchive(self, archive): - pass - - def clearIndex(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index+'Index') - finished = 0 - try: - key, msgid = self.threadIndex.first() - except KeyError: - finished = 1 - while not finished: - del self.threadIndex[key] - try: - key, msgid = self.threadIndex.next() - except KeyError: - finished = 1 - - diff --git a/src/mailman/app/bounces.py b/src/mailman/app/bounces.py index d88621e9b..34d90ac82 100644 --- a/src/mailman/app/bounces.py +++ b/src/mailman/app/bounces.py @@ -59,15 +59,16 @@ DOT = '.' -def bounce_message(mlist, msg, e=None): +def bounce_message(mlist, msg, error=None): """Bounce the message back to the original author. :param mlist: The mailing list that the message was posted to. :type mlist: `IMailingList` :param msg: The original message. :type msg: `email.message.Message` - :param e: Optional exception causing the bounce. - :type e: Exception + :param error: Optional exception causing the bounce. The exception + instance must have a `.message` attribute. + :type error: Exception """ # Bounce a message back to the sender, with an error message if provided # in the exception argument. @@ -77,10 +78,10 @@ def bounce_message(mlist, msg, e=None): return subject = msg.get('subject', _('(no subject)')) subject = oneline(subject, mlist.preferred_language.charset) - if e is None: + if error is None: notice = _('[No bounce details are available]') else: - notice = _(e.notice) + notice = _(error.message) # Currently we always craft bounces as MIME messages. bmsg = UserNotification(msg.sender, mlist.owner_address, subject, lang=mlist.preferred_language) diff --git a/src/mailman/app/docs/lifecycle.rst b/src/mailman/app/docs/lifecycle.rst index c9d3ed10d..08a25ccff 100644 --- a/src/mailman/app/docs/lifecycle.rst +++ b/src/mailman/app/docs/lifecycle.rst @@ -140,7 +140,7 @@ artifacts. :: >>> from mailman.app.lifecycle import remove_list - >>> remove_list(mlist_2.fqdn_listname, mlist_2, True) + >>> remove_list(mlist_2.fqdn_listname, mlist_2) >>> from mailman.interfaces.listmanager import IListManager >>> from zope.component import getUtility diff --git a/src/mailman/app/lifecycle.py b/src/mailman/app/lifecycle.py index 6826d68f1..5082034bc 100644 --- a/src/mailman/app/lifecycle.py +++ b/src/mailman/app/lifecycle.py @@ -89,7 +89,7 @@ def create_list(fqdn_listname, owners=None): -def remove_list(fqdn_listname, mailing_list=None, archives=True): +def remove_list(fqdn_listname, mailing_list=None): """Remove the list and all associated artifacts and subscriptions.""" removeables = [] # mailing_list will be None when only residual archives are being removed. @@ -108,15 +108,6 @@ def remove_list(fqdn_listname, mailing_list=None, archives=True): fn_listname = filename.split('.')[0] if fn_listname == fqdn_listname: removeables.append(os.path.join(config.LOCK_DIR, filename)) - if archives: - private_dir = config.PRIVATE_ARCHIVE_FILE_DIR - public_dir = config.PUBLIC_ARCHIVE_FILE_DIR - removeables.extend([ - os.path.join(private_dir, fqdn_listname), - os.path.join(private_dir, fqdn_listname + '.mbox'), - os.path.join(public_dir, fqdn_listname), - os.path.join(public_dir, fqdn_listname + '.mbox'), - ]) # Now that we know what files and directories to delete, delete them. for target in removeables: if not os.path.exists(target): diff --git a/src/mailman/archiving/docs/common.rst b/src/mailman/archiving/docs/common.rst index 5f7cfe42b..c5fecaefe 100644 --- a/src/mailman/archiving/docs/common.rst +++ b/src/mailman/archiving/docs/common.rst @@ -21,7 +21,6 @@ header, and one that provides a *permalink* to the specific message object in the archive. This latter is appropriate for the message footer or for the RFC 5064 ``Archived-At:`` header. -Pipermail does not support a permalink, so that interface returns ``None``. Mailman defines a draft spec for how list servers and archivers can interoperate. @@ -38,9 +37,6 @@ interoperate. mhonarc http://lists.example.com/.../test@example.com http://lists.example.com/.../RSZCG7IGPHFIRW3EMTVMMDNJMNCVCOLE - pipermail - http://www.example.com/pipermail/test@example.com - None prototype http://lists.example.com http://lists.example.com/RSZCG7IGPHFIRW3EMTVMMDNJMNCVCOLE @@ -173,20 +169,17 @@ A MHonArc_ archiver is also available. Messages sent to a local MHonArc instance are added to its archive via a subprocess call. + >>> from mailman.testing.helpers import LogFileMark + >>> mark = LogFileMark('mailman.archiver') >>> archiver.archive_message(mlist, msg) - >>> archive_log = open(os.path.join(config.LOG_DIR, 'archiver')) - >>> try: - ... contents = archive_log.read() - ... finally: - ... archive_log.close() - >>> print 'LOG:', contents - LOG: ... /usr/bin/mhonarc -add - -dbfile /.../private/test@example.com.mbox/mhonarc.db - -outdir /.../mhonarc/test@example.com - -stderr /.../logs/mhonarc - -stdout /.../logs/mhonarc - -spammode -umask 022 - ... + >>> print 'LOG:', mark.readline() + LOG: ... /usr/bin/mhonarc + -add + -dbfile .../test@example.com.mbox/mhonarc.db + -outdir .../mhonarc/test@example.com + -stderr .../logs/mhonarc + -stdout .../logs/mhonarc -spammode -umask 022 + .. _`The Mail Archive`: http://www.mail-archive.com .. _MHonArc: http://www.mhonarc.org diff --git a/src/mailman/archiving/pipermail.py b/src/mailman/archiving/pipermail.py deleted file mode 100644 index 03dcd97f4..000000000 --- a/src/mailman/archiving/pipermail.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (C) 2007-2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Pipermail archiver.""" - -from __future__ import absolute_import, unicode_literals - -__metaclass__ = type -__all__ = [ - 'Pipermail', - ] - - -import os -import mailbox -import tempfile - -from zope.interface import implements -from zope.interface.interface import adapter_hooks - -from mailman.config import config -from mailman.interfaces.archiver import IArchiver, IPipermailMailingList -from mailman.interfaces.mailinglist import IMailingList -from mailman.utilities.filesystem import makedirs -from mailman.utilities.string import expand - -from mailman.Archiver.HyperArch import HyperArchive - - - -class PipermailMailingListAdapter: - """An adapter for MailingList objects to work with Pipermail.""" - - implements(IPipermailMailingList) - - def __init__(self, mlist): - self._mlist = mlist - - def __getattr__(self, name): - return getattr(self._mlist, name) - - def archive_dir(self): - """See `IPipermailMailingList`.""" - if self._mlist.archive_private: - basedir = config.PRIVATE_ARCHIVE_FILE_DIR - else: - basedir = config.PUBLIC_ARCHIVE_FILE_DIR - # Make sure the archive directory exists. - archive_dir = os.path.join(basedir, self._mlist.fqdn_listname) - makedirs(archive_dir) - return archive_dir - - -def adapt_mailing_list_for_pipermail(iface, obj): - """Adapt `IMailingLists` to `IPipermailMailingList`. - - :param iface: The interface to adapt to. - :type iface: `zope.interface.Interface` - :param obj: The object being adapted. - :type obj: any object - :return: An `IPipermailMailingList` instance if adaptation succeeded or - None if it didn't. - """ - return (PipermailMailingListAdapter(obj) - if IMailingList.providedBy(obj) and iface is IPipermailMailingList - else None) - -adapter_hooks.append(adapt_mailing_list_for_pipermail) - - - -class Pipermail: - """The stock Pipermail archiver.""" - - implements(IArchiver) - - name = 'pipermail' - - @staticmethod - def list_url(mlist): - """See `IArchiver`.""" - if mlist.archive_private: - return mlist.script_url('private') + '/index.html' - else: - return expand(config.archiver.pipermail.base_url, - dict(listname=mlist.fqdn_listname, - hostname=mlist.domain.url_host, - fqdn_listname=mlist.fqdn_listname, - )) - - @staticmethod - def permalink(mlist, message): - """See `IArchiver`.""" - # Not currently implemented. - return None - - @staticmethod - def archive_message(mlist, message): - """See `IArchiver`.""" - fd, path = tempfile.mkstemp('.mbox') - os.close(fd) - try: - mbox = mailbox.mbox(path, create=True) - mbox.add(message) - finally: - mbox.close() - h = HyperArchive(IPipermailMailingList(mlist)) - try: - h.processUnixMailbox(path) - finally: - h.close() - os.remove(path) - # There's no good way to know the url for the archived message. - return None diff --git a/src/mailman/commands/cli_lists.py b/src/mailman/commands/cli_lists.py index 5629f33c1..af6afe22d 100644 --- a/src/mailman/commands/cli_lists.py +++ b/src/mailman/commands/cli_lists.py @@ -252,12 +252,6 @@ class Remove: def add(self, parser, command_parser): """See `ICLISubCommand`.""" command_parser.add_argument( - '-a', '--archives', - default=False, action='store_true', - help=_("""\ -Remove the list's archives too, or if the list has already been deleted, -remove any residual archives.""")) - command_parser.add_argument( '-q', '--quiet', default=False, action='store_true', help=_('Suppress status messages')) @@ -278,15 +272,9 @@ remove any residual archives.""")) fqdn_listname = args.listname[0] mlist = getUtility(IListManager).get(fqdn_listname) if mlist is None: - if args.archives: - log(_('No such list: $fqdn_listname; ' - 'removing residual archives.')) - else: - log(_('No such list: $fqdn_listname')) - return + log(_('No such list: $fqdn_listname')) + return else: log(_('Removed list: $fqdn_listname')) - if not args.archives: - log(_('Not removing archives. Reinvoke with -a to remove them.')) - remove_list(fqdn_listname, mlist, args.archives) + remove_list(fqdn_listname, mlist) config.db.commit() diff --git a/src/mailman/commands/docs/info.rst b/src/mailman/commands/docs/info.rst index ad034a1a6..7f69eada5 100644 --- a/src/mailman/commands/docs/info.rst +++ b/src/mailman/commands/docs/info.rst @@ -70,8 +70,6 @@ The File System Hierarchy layout is the same every by definition. LOG_DIR = /var/log/mailman MESSAGES_DIR = /var/lib/mailman/messages PID_FILE = /var/run/mailman/master.pid - PRIVATE_ARCHIVE_FILE_DIR = /var/lib/mailman/archives/private - PUBLIC_ARCHIVE_FILE_DIR = /var/lib/mailman/archives/public QUEUE_DIR = /var/spool/mailman TEMPLATE_DIR = .../mailman/templates VAR_DIR = /var/lib/mailman diff --git a/src/mailman/commands/docs/remove.rst b/src/mailman/commands/docs/remove.rst index f0f4e64f6..35dc53c5e 100644 --- a/src/mailman/commands/docs/remove.rst +++ b/src/mailman/commands/docs/remove.rst @@ -24,7 +24,6 @@ A system administrator can remove mailing lists by the command line. >>> command = Remove() >>> command.process(args) Removed list: test@example.com - Not removing archives. Reinvoke with -a to remove them. >>> print list_manager.get('test@example.com') None @@ -40,46 +39,3 @@ You can also remove lists quietly. >>> print list_manager.get('test@example.com') None - - -Removing archives -================= - -By default 'mailman remove' does not remove a mailing list's archives. -:: - - >>> create_list('test@example.com') - <mailing list "test@example.com" at ...> - - # Fake an mbox file for the mailing list. - >>> import os - >>> def make_mbox(fqdn_listname): - ... mbox_dir = os.path.join( - ... config.PUBLIC_ARCHIVE_FILE_DIR, fqdn_listname + '.mbox') - ... os.makedirs(mbox_dir) - ... mbox_file = os.path.join(mbox_dir, fqdn_listname + '.mbox') - ... with open(mbox_file, 'w') as fp: - ... print >> fp, 'A message' - ... assert os.path.exists(mbox_file) - ... return mbox_file - - >>> mbox_file = make_mbox('test@example.com') - >>> args.quiet = False - >>> command.process(args) - Removed list: test@example.com - Not removing archives. Reinvoke with -a to remove them. - - >>> os.path.exists(mbox_file) - True - -Even if the mailing list has been deleted, you can still delete the archives -afterward. -:: - - >>> args.archives = True - - >>> command.process(args) - No such list: test@example.com; removing residual archives. - - >>> os.path.exists(mbox_file) - False diff --git a/src/mailman/config/config.py b/src/mailman/config/config.py index e3b4f88a7..47ef021b5 100644 --- a/src/mailman/config/config.py +++ b/src/mailman/config/config.py @@ -174,8 +174,6 @@ class Configuration: log_dir = category.log_dir, messages_dir = category.messages_dir, archive_dir = category.archive_dir, - pipermail_private_dir = category.pipermail_private_dir, - pipermail_public_dir = category.pipermail_public_dir, queue_dir = category.queue_dir, var_dir = var_dir, template_dir = ( @@ -209,10 +207,6 @@ class Configuration: # Ensure that all paths are normalized and made absolute. Handle the # few special cases first. Most of these are due to backward # compatibility. - self.PUBLIC_ARCHIVE_FILE_DIR = os.path.abspath( - substitutions.pop('pipermail_public_dir')) - self.PRIVATE_ARCHIVE_FILE_DIR = os.path.abspath( - substitutions.pop('pipermail_private_dir')) self.PID_FILE = os.path.abspath(substitutions.pop('pid_file')) for key in substitutions: attribute = key.upper() diff --git a/src/mailman/config/schema.cfg b/src/mailman/config/schema.cfg index 7e15ab82f..3344e965a 100644 --- a/src/mailman/config/schema.cfg +++ b/src/mailman/config/schema.cfg @@ -62,6 +62,10 @@ post_hook: # Which paths.* file system layout to use. layout: dev +# Can MIME filtered messages be preserved by list owners? +filtered_messages_are_preservable: no + + [shell] # `bin/mailman shell` (also `withlist`) gives you an interactive prompt that # you can use to interact with an initialized and configured Mailman system. @@ -116,10 +120,6 @@ messages_dir: $var_dir/messages # Directory for archive backends to store their messages in. Archivers should # create a subdirectory in here to store their files. archive_dir: $var_dir/archives -# Directory for public Pipermail archiver artifacts. -pipermail_public_dir: $var_dir/archives/public -# Directory for private Pipermail archiver artifacts. -pipermail_private_dir: $var_dir/archives/private # Root directory for site-specific template override files. template_dir: $var_dir/templates # There are also a number of paths to specific file locations that can be @@ -233,12 +233,11 @@ migrations_path: mailman.database.schema # - http -- Internal wsgi-based web interface # - locks -- Lock state changes # - mischief -- Various types of hostile activity -# - post -- Information about messages posted to mailing lists # - runner -- Runner process start/stops # - smtp -- Successful SMTP activity # - smtp-failure -- Unsuccessful SMTP activity # - subscribe -- Information about leaves/joins -# - vette -- Information related to admindb activity +# - vette -- Message vetting information format: %(asctime)s (%(process)d) %(message)s datefmt: %b %d %H:%M:%S %Y propagate: no @@ -553,32 +552,6 @@ base_url: http://$hostname/archives/$fqdn_listname # This is the stock mail-archive.com archiver. class: mailman.archiving.mailarchive.MailArchive -[archiver.pipermail] -# This is the stock Pipermail archiver. -class: mailman.archiving.pipermail.Pipermail - -# This sets the default `clobber date' policy for the archiver. When a -# message is to be archived either by Pipermail or an external archiver, -# Mailman can modify the Date: header to be the date the message was received -# instead of the Date: in the original message. This is useful if you -# typically receive messages with outrageous dates. Set this to 0 to retain -# the date of the original message, or to 1 to always clobber the date. Set -# it to 2 to perform `smart overrides' on the date; when the date is outside -# allowable_sane_date_skew (either too early or too late), then the received -# date is substituted instead. -clobber_date_policy: 2 -allowable_sane_date_skew: 15d - -# Pipermail archives contain the raw email addresses of the posting authors. -# Some view this as a goldmine for spam harvesters. Set this to 'yes' to -# moderately obscure email addresses, but note that this breaks mailto: URLs -# in the archives too. -obscure_email_addresses: yes - -# When the archive is public, should Pipermail also make the raw Unix mbox -# file publically available? -public_mbox: no - [archiver.prototype] # This is a prototypical sample archiver. @@ -593,59 +566,6 @@ priority: 0 class: mailman.styles.default.DefaultStyle -[scrubber] -# A filter that converts from multipart messages to "flat" messages -# (i.e. containing a single payload). This is required for Pipermail, and you -# may want to set it to 0 for external archivers. You can also replace it -# with your own module as long as it contains a process() function that takes -# a MailList object and a Message object. It should raise -# Errors.DiscardMessage if it wants to throw the message away. Otherwise it -# should modify the Message object as necessary. -archive_scrubber: mailman.archiving.pipermail.Pipermail - -# This variable defines what happens to text/html subparts. They can be -# stripped completely, escaped, or filtered through an external program. The -# legal values are: -# 0 - Strip out text/html parts completely, leaving a notice of the removal in -# the message. If the outer part is text/html, the entire message is -# discarded. -# 1 - Remove any embedded text/html parts, leaving them as HTML-escaped -# attachments which can be separately viewed. Outer text/html parts are -# simply HTML-escaped. -# 2 - Leave it inline, but HTML-escape it -# 3 - Remove text/html as attachments but don't HTML-escape them. Note: this -# is very dangerous because it essentially means anybody can send an HTML -# email to your site containing evil JavaScript or web bugs, or other -# nasty things, and folks viewing your archives will be susceptible. You -# should only consider this option if you do heavy moderation of your list -# postings. -# -# Note: given the current archiving code, it is not possible to leave -# text/html parts inline and un-escaped. I wouldn't think it'd be a good idea -# to do anyway. -# -# The value can also be a string, in which case it is the name of a command to -# filter the HTML page through. The resulting output is left in an attachment -# or as the entirety of the message when the outer part is text/html. The -# format of the string must include a $filename substitution variable which -# will contain the name of the temporary file that the program should operate -# on. It should write the processed message to stdout. Set this to -# HTML_TO_PLAIN_TEXT_COMMAND to specify an HTML to plain text conversion -# program. -archive_html_sanitizer: 1 - -# Control parameter whether the scrubber should use the message attachment's -# filename as is indicated by the filename parameter or use 'attachement-xxx' -# instead. The default is set 'no' because the applications on PC and Mac -# begin to use longer non-ascii filenames. -use_attachment_filename: no - -# Use of attachment filename extension per se is may be dangerous because -# viruses fakes it. You can set this 'yes' if you filter the attachment by -# filename extension. -use_attachment_filename_extension: no - - [digests] # Headers which should be kept in both RFC 1153 (plain) and MIME digests. RFC # 1153 also specifies these headers in this exact order, so order matters. diff --git a/src/mailman/core/errors.py b/src/mailman/core/errors.py index ea1c78967..529ac86fe 100644 --- a/src/mailman/core/errors.py +++ b/src/mailman/core/errors.py @@ -110,17 +110,6 @@ class DiscardMessage(HandlerError): class RejectMessage(HandlerError): """The message will be bounced back to the sender""" - def __init__(self, notice=None): - super(RejectMessage, self).__init__() - if notice is None: - notice = _('Your message was rejected') - if notice.endswith('\n\n'): - pass - elif notice.endswith('\n'): - notice += '\n' - else: - notice += '\n\n' - self.notice = notice diff --git a/src/mailman/core/initialize.py b/src/mailman/core/initialize.py index 721877056..389a45f3b 100644 --- a/src/mailman/core/initialize.py +++ b/src/mailman/core/initialize.py @@ -108,9 +108,7 @@ def initialize_1(config_path=None): # By default, set the umask so that only owner and group can read and # write our files. Specifically we must have g+rw and we probably want # o-rwx although I think in most cases it doesn't hurt if other can read - # or write the files. Note that the Pipermail archive has more - # restrictive permissions in order to handle private archives, but it - # handles that correctly. + # or write the files. os.umask(007) # config_path will be set if the command line argument -C is given. That # case overrides all others. When not given on the command line, the diff --git a/src/mailman/core/pipelines.py b/src/mailman/core/pipelines.py index bd709f41e..d5cee588b 100644 --- a/src/mailman/core/pipelines.py +++ b/src/mailman/core/pipelines.py @@ -31,13 +31,16 @@ import logging from zope.interface import implements from zope.interface.verify import verifyObject +from mailman.app.bounces import bounce_message from mailman.app.finder import find_components from mailman.config import config +from mailman.core import errors from mailman.core.i18n import _ from mailman.interfaces.handler import IHandler from mailman.interfaces.pipeline import IPipeline -log = logging.getLogger('mailman.debug') +dlog = logging.getLogger('mailman.debug') +vlog = logging.getLogger('mailman.vette') @@ -52,9 +55,19 @@ def process(mlist, msg, msgdata, pipeline_name='built-in'): message_id = msg.get('message-id', 'n/a') pipeline = config.pipelines[pipeline_name] for handler in pipeline: - log.debug('[pipeline] processing {0}: {1}'.format( - handler.name, message_id)) - handler.process(mlist, msg, msgdata) + dlog.debug('{0} pipeline {1} processing: {2}'.format( + message_id, pipeline_name, handler.name)) + try: + handler.process(mlist, msg, msgdata) + except errors.DiscardMessage as error: + vlog.info( + '{0} discarded by "{1}" pipeline handler "{2}": {3}'.format( + message_id, pipeline_name, handler.name, error.message)) + except errors.RejectMessage as error: + vlog.info( + '{0} rejected by "{1}" pipeline handler "{2}": {3}'.format( + message_id, pipeline_name, handler.name, error.message)) + bounce_message(mlist, msg, error) @@ -84,7 +97,6 @@ class BuiltInPipeline(BasePipeline): _default_handlers = ( 'mime-delete', - 'scrubber', 'tagger', 'calculate-recipients', 'avoid-duplicates', @@ -92,8 +104,8 @@ class BuiltInPipeline(BasePipeline): 'cleanse-dkim', 'cook-headers', 'rfc-2369', - 'to-digest', 'to-archive', + 'to-digest', 'to-usenet', 'after-delivery', 'acknowledge', diff --git a/src/mailman/core/tests/test_pipelines.py b/src/mailman/core/tests/test_pipelines.py index 363587d3b..0cf3732c9 100644 --- a/src/mailman/core/tests/test_pipelines.py +++ b/src/mailman/core/tests/test_pipelines.py @@ -26,16 +26,58 @@ __all__ = [ import unittest +from zope.interface import implements from mailman.app.lifecycle import create_list +from mailman.config import config +from mailman.core.errors import DiscardMessage, RejectMessage from mailman.core.pipelines import process +from mailman.interfaces.handler import IHandler +from mailman.interfaces.pipeline import IPipeline from mailman.testing.helpers import ( + LogFileMark, + get_queue_messages, reset_the_world, specialized_message_from_string as mfs) from mailman.testing.layers import ConfigLayer +class DiscardingHandler: + implements(IHandler) + name = 'discarding' + + def process(self, mlist, msg, msgdata): + raise DiscardMessage('by test handler') + + +class RejectHandler: + implements(IHandler) + name = 'rejecting' + + def process(self, mlist, msg, msgdata): + raise RejectMessage('by test handler') + + +class DiscardingPipeline: + implements(IPipeline) + name = 'test-discarding' + description = 'Discarding test pipeline' + + def __iter__(self): + yield DiscardingHandler() + + +class RejectingPipeline: + implements(IPipeline) + name = 'test-rejecting' + description = 'Rejectinging test pipeline' + + def __iter__(self): + yield RejectHandler() + + + class TestBuiltinPipeline(unittest.TestCase): """Test various aspects of the built-in postings pipeline.""" @@ -43,21 +85,51 @@ class TestBuiltinPipeline(unittest.TestCase): def setUp(self): self._mlist = create_list('test@example.com') - - def tearDown(self): - reset_the_world() - - def test_rfc2369_headers(self): - # Ensure that RFC 2369 List-* headers are added. - msg = mfs("""\ + config.pipelines['test-discarding'] = DiscardingPipeline() + config.pipelines['test-rejecting'] = RejectingPipeline() + self._msg = mfs("""\ From: Anne Person <anne@example.org> To: test@example.com Subject: a test +Message-ID: <ant> testing """) + + def tearDown(self): + reset_the_world() + del config.pipelines['test-discarding'] + del config.pipelines['test-rejecting'] + + def test_rfc2369_headers(self): + # Ensure that RFC 2369 List-* headers are added. msgdata = {} - process(self._mlist, msg, msgdata, + process(self._mlist, self._msg, msgdata, pipeline_name='default-posting-pipeline') - self.assertEqual(msg['list-id'], '<test.example.com>') - self.assertEqual(msg['list-post'], '<mailto:test@example.com>') + self.assertEqual(self._msg['list-id'], '<test.example.com>') + self.assertEqual(self._msg['list-post'], '<mailto:test@example.com>') + + def test_discarding_pipeline(self): + # If a handler in the pipeline raises DiscardMessage, the message will + # be thrown away, but with a log message. + mark = LogFileMark('mailman.vette') + process(self._mlist, self._msg, {}, 'test-discarding') + line = mark.readline()[:-1] + self.assertTrue(line.endswith( + '<ant> discarded by "test-discarding" pipeline handler ' + '"discarding": by test handler')) + + def test_rejecting_pipeline(self): + # If a handler in the pipeline raises DiscardMessage, the message will + # be thrown away, but with a log message. + mark = LogFileMark('mailman.vette') + process(self._mlist, self._msg, {}, 'test-rejecting') + line = mark.readline()[:-1] + self.assertTrue(line.endswith( + '<ant> rejected by "test-rejecting" pipeline handler ' + '"rejecting": by test handler')) + # In the rejection case, the original message will also be in the + # virgin queue. + messages = get_queue_messages('virgin') + self.assertEqual(len(messages), 1) + self.assertEqual(str(messages[0].msg['subject']), 'a test') diff --git a/src/mailman/database/schema/postgres.sql b/src/mailman/database/schema/postgres.sql index 9becdb5dc..bd7ef3f6b 100644 --- a/src/mailman/database/schema/postgres.sql +++ b/src/mailman/database/schema/postgres.sql @@ -42,6 +42,7 @@ CREATE TABLE mailinglist ( bounce_you_are_disabled_warnings INTEGER, bounce_you_are_disabled_warnings_interval TEXT, -- Content filtering. + filter_action INTEGER, filter_content BOOLEAN, collapse_alternatives BOOLEAN, convert_html_to_plaintext BOOLEAN, diff --git a/src/mailman/database/schema/sqlite.sql b/src/mailman/database/schema/sqlite.sql index 650c38a54..37b6ed8f2 100644 --- a/src/mailman/database/schema/sqlite.sql +++ b/src/mailman/database/schema/sqlite.sql @@ -138,6 +138,7 @@ CREATE TABLE mailinglist ( bounce_you_are_disabled_warnings INTEGER, bounce_you_are_disabled_warnings_interval TEXT, -- Content filtering. + filter_action INTEGER, filter_content BOOLEAN, collapse_alternatives BOOLEAN, convert_html_to_plaintext BOOLEAN, diff --git a/src/mailman/docs/NEWS.rst b/src/mailman/docs/NEWS.rst index 31e3ce7b9..b92a3b618 100644 --- a/src/mailman/docs/NEWS.rst +++ b/src/mailman/docs/NEWS.rst @@ -48,9 +48,16 @@ Architecture longer set the `X-Message-ID-Hash` header. * The Prototype archiver now stores its files in maildir format inside of `$var_dir/archives/prototype`, given by Toshio Kuratomi. - * Improved "8 mile high" document distilled by Stephen Turnbull from the + * Improved "8 mile high" document distilled by Stephen J Turnbull from the Pycon 2012 Mailman 3 sprint. Also improvements to the Sphinx build given by Andrea Crotti. + * Pipermail has been eradicated. + * Configuration variable `[mailman]filtered_messages_are_preservable` + controls whether messages which have their top-level `Content-Type` + filtered out can be preserved in the `bad` queue by list owners. + * Configuration section `[scrubber]` removed, as is the scrubber handler. + This handler was essentially incompatible with Mailman 3 since it required + coordination with Pipermail to store attachments on disk. Database -------- @@ -66,6 +73,8 @@ Database - start_chain -> posting_chain - pipeline -> posting_pipeline - real_name -> display_name (mailinglist, user, address) + * Schema additions: + - mailinglist.filter_action REST ---- diff --git a/src/mailman/interfaces/action.py b/src/mailman/interfaces/action.py index c7c79f7d8..9b3c1fbcc 100644 --- a/src/mailman/interfaces/action.py +++ b/src/mailman/interfaces/action.py @@ -20,6 +20,7 @@ __metaclass__ = type __all__ = [ 'Action', + 'FilterAction', ] @@ -33,3 +34,8 @@ class Action(Enum): discard = 2 accept = 3 defer = 4 + + +class FilterAction(Action): + forward = 5 + preserve = 6 diff --git a/src/mailman/interfaces/archiver.py b/src/mailman/interfaces/archiver.py index f24e44183..a06bbdede 100644 --- a/src/mailman/interfaces/archiver.py +++ b/src/mailman/interfaces/archiver.py @@ -22,12 +22,10 @@ from __future__ import absolute_import, unicode_literals __metaclass__ = type __all__ = [ 'IArchiver', - 'IPipermailMailingList', ] from zope.interface import Interface, Attribute -from mailman.interfaces.mailinglist import IMailingList @@ -43,36 +41,25 @@ class IArchiver(Interface): :returns: The url string. """ - def permalink(mlist, message): + def permalink(mlist, msg): """Return the url to the message in the archive. This url points directly to the message in the archive. This method only calculates the url, it does not actually archive the message. :param mlist: The IMailingList object. - :param message: The message object. + :param msg: The message object. :returns: The url string or None if the message's archive url cannot be calculated. """ - def archive_message(mlist, message): + def archive_message(mlist, msg): """Send the message to the archiver. :param mlist: The IMailingList object. - :param message: The message object. + :param msg: The message object. :returns: The url string or None if the message's archive url cannot be calculated. """ # XXX How to handle attachments? - - - -class IPipermailMailingList(IMailingList): - """An interface that adapts IMailingList as needed for Pipermail.""" - - def archive_dir(): - """The directory for storing Pipermail artifacts. - - Pipermail expects this to be a function, not a property. - """ diff --git a/src/mailman/interfaces/handler.py b/src/mailman/interfaces/handler.py index f9a075b8c..9007e8490 100644 --- a/src/mailman/interfaces/handler.py +++ b/src/mailman/interfaces/handler.py @@ -17,7 +17,7 @@ """Interface describing a pipeline handler.""" -from __future__ import absolute_import, unicode_literals +from __future__ import absolute_import, print_function, unicode_literals __metaclass__ = type __all__ = [ diff --git a/src/mailman/interfaces/mailinglist.py b/src/mailman/interfaces/mailinglist.py index 3c7ea9ee8..d92bae464 100644 --- a/src/mailman/interfaces/mailinglist.py +++ b/src/mailman/interfaces/mailinglist.py @@ -421,6 +421,12 @@ class IMailingList(Interface): Filtering is performed on MIME type and file name extension. """) + filter_action = Attribute( + """Action to take when the top-level content-type is filtered. + + The value is a `FilterAction` enum. + """) + convert_html_to_plaintext = Attribute( """Flag specifying whether text/html parts should be converted. diff --git a/src/mailman/model/mailinglist.py b/src/mailman/model/mailinglist.py index d7256d1c9..76f88caa7 100644 --- a/src/mailman/model/mailinglist.py +++ b/src/mailman/model/mailinglist.py @@ -38,7 +38,7 @@ from zope.interface import implements from mailman.config import config from mailman.database.model import Model from mailman.database.types import Enum -from mailman.interfaces.action import Action +from mailman.interfaces.action import Action, FilterAction from mailman.interfaces.address import IAddress from mailman.interfaces.autorespond import ResponseAction from mailman.interfaces.bounce import UnrecognizedBounceDisposition @@ -115,6 +115,7 @@ class MailingList(Model): autorespond_requests = Enum(ResponseAction) autoresponse_request_text = Unicode() # Content filters. + filter_action = Enum(FilterAction) filter_content = Bool() collapse_alternatives = Bool() convert_html_to_plaintext = Bool() diff --git a/src/mailman/pipeline/docs/filtering.rst b/src/mailman/pipeline/docs/filtering.rst index 5b54424e4..fd0b33d3b 100644 --- a/src/mailman/pipeline/docs/filtering.rst +++ b/src/mailman/pipeline/docs/filtering.rst @@ -45,7 +45,7 @@ content type matches the filter, the entire message will be discarded. >>> process(mlist, msg, {}) Traceback (most recent call last): ... - DiscardMessage + DiscardMessage: The message's content type was explicitly disallowed However, if we turn off content filtering altogether, then the handler short-circuits. diff --git a/src/mailman/pipeline/docs/rfc-2369.rst b/src/mailman/pipeline/docs/rfc-2369.rst index a1ba6c746..1b89f2354 100644 --- a/src/mailman/pipeline/docs/rfc-2369.rst +++ b/src/mailman/pipeline/docs/rfc-2369.rst @@ -148,35 +148,11 @@ header will be added. >>> mlist.archive = True - >>> from mailman.config import config - >>> config.push('pipermail', """ - ... [archiver.prototype] - ... enable: no - ... [archiver.mail_archive] - ... enable: no - ... [archiver.mhonarc] - ... enable: no - ... [archiver.pipermail] - ... enable: yes - ... """) - - >>> msg = message_from_string("""\ - ... From: aperson@example.com - ... - ... """) - >>> process(mlist, msg, {}) - >>> list_headers(msg, only='list-archive') - ---start--- - list-archive: <http://www.example.com/pipermail/test@example.com> - ---end--- - `RFC 5064`_ defines the `Archived-At` header which contains the url to the individual message in the archives. Archivers which don't support -pre-calculation of the archive url cannot add the `Archived-At` header, as is -the case with Pipermail (see above). However, other archivers can calculate -the url, and do add this header. +pre-calculation of the archive url cannot add the `Archived-At` header. +However, other archivers can calculate the url, and do add this header. - >>> config.pop('pipermail') >>> config.push('prototype', """ ... [archiver.prototype] ... enable: yes diff --git a/src/mailman/pipeline/docs/scrubber.rst b/src/mailman/pipeline/docs/scrubber.rst deleted file mode 100644 index 86a8161a7..000000000 --- a/src/mailman/pipeline/docs/scrubber.rst +++ /dev/null @@ -1,230 +0,0 @@ -============ -The scrubber -============ - -The scrubber is an integral part of Mailman, both in the normal delivery of -messages and in components such as the archiver. Its primary purpose is to -scrub attachments from messages so that binary goop doesn't end up in an -archive message. - - >>> mlist = create_list('_xtest@example.com') - >>> mlist.preferred_language = 'en' - -Helper functions for getting the attachment data. -:: - - >>> import os, re - >>> def read_attachment(filename, remove=True): - ... path = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR, - ... mlist.fqdn_listname, filename) - ... fp = open(path) - ... try: - ... data = fp.read() - ... finally: - ... fp.close() - ... if remove: - ... os.unlink(path) - ... return data - - >>> from urlparse import urlparse - >>> def read_url_from_message(msg): - ... url = None - ... for line in msg.get_payload().splitlines(): - ... mo = re.match('URL: <(?P<url>[^>]+)>', line) - ... if mo: - ... url = mo.group('url') - ... break - ... path = '/'.join(urlparse(url).path.split('/')[3:]) - ... return read_attachment(path) - - -Saving attachments -================== - -The Scrubber handler exposes a function called ``save_attachment()`` which can -be used to strip various types of attachments and store them in the archive -directory. This is a public interface used by components outside the normal -processing pipeline. - -Site administrators can decide whether the scrubber should use the attachment -filename suggested in the message's ``Content-Disposition:`` header or not. -If enabled, the filename will be used when this header attribute is present -(yes, this is an unfortunate double negative). -:: - - >>> config.push('test config', """ - ... [scrubber] - ... use_attachment_filename: yes - ... """) - >>> msg = message_from_string("""\ - ... Content-Type: image/gif; name="xtest.gif" - ... Content-Transfer-Encoding: base64 - ... Content-Disposition: attachment; filename="xtest.gif" - ... - ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== - ... """) - - >>> from mailman.pipeline.scrubber import save_attachment - >>> print save_attachment(mlist, msg, 'dir') - <http://www.example.com/pipermail/_xtest@example.com/dir/xtest.gif> - >>> data = read_attachment('dir/xtest.gif') - >>> print data[:6] - GIF87a - >>> len(data) - 34 - -Saving the attachment does not alter the original message. - - >>> print msg.as_string() - Content-Type: image/gif; name="xtest.gif" - Content-Transfer-Encoding: base64 - Content-Disposition: attachment; filename="xtest.gif" - <BLANKLINE> - R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== - -The site administrator can also configure Mailman to ignore the -``Content-Disposition:`` filename. This is the default. - - >>> config.pop('test config') - >>> config.push('test config', """ - ... [scrubber] - ... use_attachment_filename: no - ... """) - >>> msg = message_from_string("""\ - ... Content-Type: image/gif; name="xtest.gif" - ... Content-Transfer-Encoding: base64 - ... Content-Disposition: attachment; filename="xtest.gif" - ... - ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== - ... """) - >>> print save_attachment(mlist, msg, 'dir') - <http://www.example.com/pipermail/_xtest@example.com/dir/attachment.gif> - >>> data = read_attachment('dir/xtest.gif') - Traceback (most recent call last): - IOError: [Errno ...] No such file or directory: - u'.../archives/private/_xtest@example.com/dir/xtest.gif' - >>> data = read_attachment('dir/attachment.gif') - >>> print data[:6] - GIF87a - >>> len(data) - 34 - - -Scrubbing image attachments -=========================== - -When scrubbing image attachments, the original message is modified to include -a reference to the attachment file as available through the on-line archive. - - >>> msg = message_from_string("""\ - ... MIME-Version: 1.0 - ... Content-Type: multipart/mixed; boundary="BOUNDARY" - ... - ... --BOUNDARY - ... Content-type: text/plain; charset=us-ascii - ... - ... This is a message. - ... --BOUNDARY - ... Content-Type: image/gif; name="xtest.gif" - ... Content-Transfer-Encoding: base64 - ... Content-Disposition: attachment; filename="xtest.gif" - ... - ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== - ... --BOUNDARY-- - ... """) - >>> msgdata = {} - -The ``Scrubber.process()`` function is different than other handler process -functions in that it returns the scrubbed message. - - >>> from mailman.pipeline.scrubber import process - >>> scrubbed_msg = process(mlist, msg, msgdata) - >>> scrubbed_msg is msg - True - >>> print scrubbed_msg.as_string() - MIME-Version: 1.0 - Message-ID: ... - Content-Type: text/plain; charset="us-ascii" - Content-Transfer-Encoding: 7bit - <BLANKLINE> - This is a message. - -------------- next part -------------- - A non-text attachment was scrubbed... - Name: xtest.gif - Type: image/gif - Size: 34 bytes - Desc: not available - URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> - <BLANKLINE> - -This is the same as the transformed message originally passed in. - - >>> print msg.as_string() - MIME-Version: 1.0 - Message-ID: ... - Content-Type: text/plain; charset="us-ascii" - Content-Transfer-Encoding: 7bit - <BLANKLINE> - This is a message. - -------------- next part -------------- - A non-text attachment was scrubbed... - Name: xtest.gif - Type: image/gif - Size: 34 bytes - Desc: not available - URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> - <BLANKLINE> - >>> msgdata - {} - -The URL will point to the attachment sitting in the archive. - - >>> data = read_url_from_message(msg) - >>> data[:6] - 'GIF87a' - >>> len(data) - 34 - - -Scrubbing text attachments -========================== - -Similar to image attachments, text attachments will also be scrubbed, but the -placeholder will be slightly different. - - >>> msg = message_from_string("""\ - ... MIME-Version: 1.0 - ... Content-Type: multipart/mixed; boundary="BOUNDARY" - ... - ... --BOUNDARY - ... Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no - ... - ... This is a message. - ... --BOUNDARY - ... Content-type: text/plain; name="xtext.txt" - ... Content-Disposition: attachment; filename="xtext.txt" - ... - ... This is a text attachment. - ... --BOUNDARY-- - ... """) - >>> scrubbed_msg = process(mlist, msg, {}) - >>> print scrubbed_msg.as_string() - MIME-Version: 1.0 - Message-ID: ... - Content-Transfer-Encoding: 7bit - Content-Type: text/plain; charset="us-ascii"; format="flowed"; delsp="no" - <BLANKLINE> - This is a message. - -------------- next part -------------- - An embedded and charset-unspecified text was scrubbed... - Name: xtext.txt - URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.txt> - <BLANKLINE> - >>> read_url_from_message(msg) - 'This is a text attachment.' - - -Clean up -======== - - >>> config.pop('test config') diff --git a/src/mailman/pipeline/mime_delete.py b/src/mailman/pipeline/mime_delete.py index 402d13714..99fdc3ede 100644 --- a/src/mailman/pipeline/mime_delete.py +++ b/src/mailman/pipeline/mime_delete.py @@ -37,14 +37,18 @@ import errno import logging import tempfile -from email.Iterators import typed_subpart_iterator +from email.iterators import typed_subpart_iterator +from email.mime.message import MIMEMessage +from email.mime.text import MIMEText +from lazr.config import as_boolean from os.path import splitext from zope.interface import implements from mailman.config import config from mailman.core import errors from mailman.core.i18n import _ -from mailman.core.switchboard import Switchboard +from mailman.email.message import OwnerNotification +from mailman.interfaces.action import FilterAction from mailman.interfaces.handler import IHandler from mailman.utilities.string import oneline from mailman.version import VERSION @@ -54,6 +58,44 @@ log = logging.getLogger('mailman.error') +def dispose(mlist, msg, msgdata, why): + if mlist.filter_action is FilterAction.reject: + # Bounce the message to the original author. + raise errors.RejectMessage(why) + elif mlist.filter_action is FilterAction.forward: + # Forward it on to the list moderators. + # FIXME 2012-03-16 BAW: Trunk uses .display_name + text=_("""\ +The attached message matched the $mlist.real_name mailing list's content +filtering rules and was prevented from being forwarded on to the list +membership. You are receiving the only remaining copy of the discarded +message. + +""") + subject=_('Content filter message notification') + notice = OwnerNotification(mlist, subject, roster=mlist.moderators) + notice.set_type('multipart/mixed') + notice.attach(MIMEText(text)) + notice.attach(MIMEMessage(msg)) + notice.send(mlist) + # Let this fall through so the original message gets discarded. + elif mlist.filter_action is FilterAction.preserve: + if as_boolean(config.mailman.filtered_messages_are_preservable): + # This is just like discarding the message except that a copy is + # placed in the 'bad' queue should the site administrator want to + # inspect the message. + filebase = config.switchboards['bad'].enqueue(msg, msgdata) + log.info('{0} preserved in file base {1}'.format( + msg.get('message-id', 'n/a'), filebase)) + else: + log.error( + '{1} invalid FilterAction: {0}. Treating as discard'.format( + mlist.fqdn_listname, mlist.filter_action.name)) + # Most cases also discard the message + raise errors.DiscardMessage(why) + + + def process(mlist, msg, msgdata): # We also don't care about our own digests or plaintext ctype = msg.get_content_type() @@ -227,31 +269,6 @@ def to_plaintext(msg): -def dispose(mlist, msg, msgdata, why): - # filter_action == 0 just discards, see below - if mlist.filter_action == 1: - # Bounce the message to the original author - raise errors.RejectMessage, why - if mlist.filter_action == 2: - # Forward it on to the list owner - listname = mlist.internal_name() - mlist.ForwardMessage( - msg, - text=_("""\ -The attached message matched the $listname mailing list's content filtering -rules and was prevented from being forwarded on to the list membership. You -are receiving the only remaining copy of the discarded message. - -"""), - subject=_('Content filtered message notification')) - if mlist.filter_action == 3 and \ - config.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES: - badq = Switchboard('bad', config.BADQUEUE_DIR) - badq.enqueue(msg, msgdata) - # Most cases also discard the message - raise errors.DiscardMessage - - def get_file_ext(m): """ Get filename extension. Caution: some virus don't put filename diff --git a/src/mailman/pipeline/scrubber.py b/src/mailman/pipeline/scrubber.py deleted file mode 100644 index 0584c0a2c..000000000 --- a/src/mailman/pipeline/scrubber.py +++ /dev/null @@ -1,499 +0,0 @@ -# Copyright (C) 2001-2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Cleanse a message for archiving.""" - -from __future__ import absolute_import, unicode_literals - -__metaclass__ = type -__all__ = [ - 'Scrubber', - ] - - -import os -import re -import time -import hashlib -import logging -import binascii - -from email.charset import Charset -from email.utils import make_msgid, parsedate -from flufl.lock import Lock -from lazr.config import as_boolean -from mimetypes import guess_all_extensions -from string import Template -from zope.interface import implements - -from mailman.config import config -from mailman.core.errors import DiscardMessage -from mailman.core.i18n import _ -from mailman.interfaces.handler import IHandler -from mailman.utilities.filesystem import makedirs -from mailman.utilities.modules import find_name -from mailman.utilities.string import oneline, websafe - - -# Path characters for common platforms -pre = re.compile(r'[/\\:]') -# All other characters to strip out of Content-Disposition: filenames -# (essentially anything that isn't an alphanum, dot, dash, or underscore). -sre = re.compile(r'[^-\w.]') -# Regexp to strip out leading dots -dre = re.compile(r'^\.*') - -BR = '<br>\n' -SPACE = ' ' - -log = logging.getLogger('mailman.error') - - - -def guess_extension(ctype, ext): - """Find the extension mapped to the given content-type. - - mimetypes maps multiple extensions to the same type, e.g. .doc, .dot, and - .wiz are all mapped to application/msword. This sucks for finding the - best reverse mapping. If the extension is one of the giving mappings, - we'll trust that, otherwise we'll just guess. :/ - """ - all_extensions = guess_all_extensions(ctype, strict=False) - if ext in all_extensions: - return ext - return (all_extensions[0] if len(all_extensions) > 0 else None) - - - -def safe_strftime(fmt, t): - """A time.strftime() that eats exceptions, returning None instead.""" - try: - return time.strftime(fmt, t) - except (TypeError, ValueError, OverflowError): - return None - - -def calculate_attachments_dir(msg, msgdata): - """Calculate the directory for attachements. - - Calculate the directory that attachments for this message will go under. - To avoid inode limitations, the scheme will be: - archives/private/<listname>/attachments/YYYYMMDD/<msgid-hash>/<files> - Start by calculating the date-based and msgid-hash components. - """ - fmt = '%Y%m%d' - datestr = msg.get('Date') - if datestr: - now = parsedate(datestr) - else: - now = time.gmtime(msgdata.get('received_time', time.time())) - datedir = safe_strftime(fmt, now) - if not datedir: - datestr = msgdata.get('X-List-Received-Date') - if datestr: - datedir = safe_strftime(fmt, datestr) - if not datedir: - # What next? Unixfrom, I guess. - parts = msg.get_unixfrom().split() - try: - month = {'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, - 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12, - }.get(parts[3], 0) - day = int(parts[4]) - year = int(parts[6]) - except (IndexError, ValueError): - # Best we can do I think - month = day = year = 0 - datedir = '%04d%02d%02d' % (year, month, day) - assert datedir - # As for the msgid hash, we'll base this part on the Message-ID: so that - # all attachments for the same message end up in the same directory (we'll - # uniquify the filenames in that directory as needed). We use the first 2 - # and last 2 bytes of the SHA1 hash of the message id as the basis of the - # directory name. Clashes here don't really matter too much, and that - # still gives us a 32-bit space to work with. - msgid = msg['message-id'] - if msgid is None: - msgid = msg['Message-ID'] = make_msgid() - # We assume that the message id actually /is/ unique! - digest = hashlib.sha1(msgid).hexdigest() - return os.path.join('attachments', datedir, digest[:4] + digest[-4:]) - - -def replace_payload_by_text(msg, text, charset): - """Replace the payload of the message with some text.""" - # TK: This is a common function in replacing the attachment and the main - # message by a text (scrubbing). - del msg['content-type'] - del msg['content-transfer-encoding'] - if isinstance(text, unicode): - text = text.encode(charset) - if not isinstance(charset, str): - charset = str(charset) - msg.set_payload(text, charset) - - - -def process(mlist, msg, msgdata=None): - """Process the message through the scrubber.""" - sanitize = int(config.scrubber.archive_html_sanitizer) - outer = True - if msgdata is None: - msgdata = {} - if msgdata: - # msgdata is available if it is in GLOBAL_PIPELINE - # ie. not in digest or archiver - # check if the list owner want to scrub regular delivery - if not mlist.scrub_nondigest: - return - attachments_dir = calculate_attachments_dir(msg, msgdata) - charset = format_param = delsp = None - lcset = mlist.preferred_language.charset - lcset_out = Charset(lcset).output_charset or lcset - # Now walk over all subparts of this message and scrub out various types - for part in msg.walk(): - ctype = part.get_content_type() - # If the part is text/plain, we leave it alone - if ctype == 'text/plain': - # We need to choose a charset for the scrubbed message, so we'll - # arbitrarily pick the charset of the first text/plain part in the - # message. - # - # Also get the RFC 3676 stuff from this part. This seems to - # work okay for scrub_nondigest. It will also work as far as - # scrubbing messages for the archive is concerned, but Pipermail - # doesn't pay any attention to the RFC 3676 parameters. The plain - # format digest is going to be a disaster in any case as some of - # messages will be format="flowed" and some not. ToDigest creates - # its own Content-Type: header for the plain digest which won't - # have RFC 3676 parameters. If the message Content-Type: headers - # are retained for display in the digest, the parameters will be - # there for information, but not for the MUA. This is the best we - # can do without having get_payload() process the parameters. - if charset is None: - charset = part.get_content_charset(lcset) - format_param = part.get_param('format') - delsp = part.get_param('delsp') - # TK: if part is attached then check charset and scrub if none - if part.get('content-disposition') and \ - not part.get_content_charset(): - url = save_attachment(mlist, part, attachments_dir) - filename = part.get_filename(_('not available')) - filename = oneline(filename, lcset) - replace_payload_by_text(part, _("""\ -An embedded and charset-unspecified text was scrubbed... -Name: $filename -URL: $url -"""), lcset) - elif ctype == 'text/html' and isinstance(sanitize, int): - if sanitize == 0: - if outer: - raise DiscardMessage - replace_payload_by_text(part, - _('HTML attachment scrubbed and removed'), - # Adding charset arg and removing content-type - # sets content-type to text/plain - lcset) - elif sanitize == 2: - # By leaving it alone, Pipermail will automatically escape it - pass - elif sanitize == 3: - # Pull it out as an attachment but leave it unescaped. This - # is dangerous, but perhaps useful for heavily moderated - # lists. - url = save_attachment(mlist, part, attachments_dir, - filter_html=False) - replace_payload_by_text(part, _("""\ -An HTML attachment was scrubbed... -URL: $url -"""), lcset) - else: - # HTML-escape it and store it as an attachment, but make it - # look a /little/ bit prettier. :( - payload = websafe(part.get_payload(decode=True)) - # For whitespace in the margin, change spaces into - # non-breaking spaces, and tabs into 8 of those. Then use a - # mono-space font. Still looks hideous to me, but then I'd - # just as soon discard them. - lines = [s.replace(' ', ' ').replace('\t', ' ' * 8) - for s in payload.split('\n')] - payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n' - part.set_payload(payload) - # We're replacing the payload with the decoded payload so this - # will just get in the way. - del part['content-transfer-encoding'] - url = save_attachment(mlist, part, attachments_dir, - filter_html=False) - replace_payload_by_text(part, _("""\ -An HTML attachment was scrubbed... -URL: $url -"""), lcset) - elif ctype == 'message/rfc822': - # This part contains a submessage, so it too needs scrubbing - submsg = part.get_payload(0) - url = save_attachment(mlist, part, attachments_dir) - subject = submsg.get('subject', _('no subject')) - date = submsg.get('date', _('no date')) - who = submsg.get('from', _('unknown sender')) - size = len(str(submsg)) - replace_payload_by_text(part, _("""\ -An embedded message was scrubbed... -From: $who -Subject: $subject -Date: $date -Size: $size -URL: $url -"""), lcset) - # If the message isn't a multipart, then we'll strip it out as an - # attachment that would have to be separately downloaded. Pipermail - # will transform the url into a hyperlink. - elif part._payload and not part.is_multipart(): - payload = part.get_payload(decode=True) - ctype = part.get_content_type() - # XXX Under email 2.5, it is possible that payload will be None. - # This can happen when you have a Content-Type: multipart/* with - # only one part and that part has two blank lines between the - # first boundary and the end boundary. In email 3.0 you end up - # with a string in the payload. I think in this case it's safe to - # ignore the part. - if payload is None: - continue - size = len(payload) - url = save_attachment(mlist, part, attachments_dir) - desc = part.get('content-description', _('not available')) - desc = oneline(desc, lcset) - filename = part.get_filename(_('not available')) - filename = oneline(filename, lcset) - replace_payload_by_text(part, _("""\ -A non-text attachment was scrubbed... -Name: $filename -Type: $ctype -Size: $size bytes -Desc: $desc -URL: $url -"""), lcset) - outer = False - # We still have to sanitize multipart messages to flat text because - # Pipermail can't handle messages with list payloads. This is a kludge; - # def (n) clever hack ;). - if msg.is_multipart() and sanitize != 2: - # By default we take the charset of the first text/plain part in the - # message, but if there was none, we'll use the list's preferred - # language's charset. - if not charset or charset == 'us-ascii': - charset = lcset_out - else: - # normalize to the output charset if input/output are different - charset = Charset(charset).output_charset or charset - # We now want to concatenate all the parts which have been scrubbed to - # text/plain, into a single text/plain payload. We need to make sure - # all the characters in the concatenated string are in the same - # encoding, so we'll use the 'replace' key in the coercion call. - # BAW: Martin's original patch suggested we might want to try - # generalizing to utf-8, and that's probably a good idea (eventually). - text = [] - charsets = [] - for part in msg.walk(): - # TK: bug-id 1099138 and multipart - # MAS test payload - if part may fail if there are no headers. - if not part._payload or part.is_multipart(): - continue - # All parts should be scrubbed to text/plain by now. - partctype = part.get_content_type() - if partctype != 'text/plain': - text.append(_('Skipped content of type $partctype\n')) - continue - try: - t = part.get_payload(decode=True) or '' - # MAS: TypeError exception can occur if payload is None. This - # was observed with a message that contained an attached - # message/delivery-status part. Because of the special parsing - # of this type, this resulted in a text/plain sub-part with a - # null body. See bug 1430236. - except (binascii.Error, TypeError): - t = part.get_payload() or '' - # Email problem was solved by Mark Sapiro. (TK) - partcharset = part.get_content_charset('us-ascii') - try: - t = unicode(t, partcharset, 'replace') - except (UnicodeError, LookupError, ValueError, TypeError, - AssertionError): - # We can get here if partcharset is bogus in come way. - # Replace funny characters. We use errors='replace'. - t = unicode(t, 'ascii', 'replace') - # Separation is useful - if isinstance(t, basestring): - if not t.endswith('\n'): - t += '\n' - text.append(t) - if partcharset not in charsets: - charsets.append(partcharset) - # Now join the text and set the payload - sep = _('-------------- next part --------------\n') - assert isinstance(sep, unicode), ( - 'Expected a unicode separator, got %s' % type(sep)) - rept = sep.join(text) - # Replace entire message with text and scrubbed notice. - # Try with message charsets and utf-8 - if 'utf-8' not in charsets: - charsets.append('utf-8') - for charset in charsets: - try: - replace_payload_by_text(msg, rept, charset) - break - # Bogus charset can throw several exceptions - except (UnicodeError, LookupError, ValueError, TypeError, - AssertionError): - pass - if format_param: - msg.set_param('format', format_param) - if delsp: - msg.set_param('delsp', delsp) - return msg - - - -def save_attachment(mlist, msg, attachments_dir, filter_html=True): - fsdir = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR, - mlist.fqdn_listname, attachments_dir) - makedirs(fsdir) - # Figure out the attachment type and get the decoded data - decodedpayload = msg.get_payload(decode=True) - # BAW: mimetypes ought to handle non-standard, but commonly found types, - # e.g. image/jpg (should be image/jpeg). For now we just store such - # things as application/octet-streams since that seems the safest. - ctype = msg.get_content_type() - # i18n file name is encoded - lcset = mlist.preferred_language.charset - filename = oneline(msg.get_filename(''), lcset) - filename, fnext = os.path.splitext(filename) - # For safety, we should confirm this is valid ext for content-type - # but we can use fnext if we introduce fnext filtering - if as_boolean(config.scrubber.use_attachment_filename_extension): - # HTML message doesn't have filename :-( - ext = fnext or guess_extension(ctype, fnext) - else: - ext = guess_extension(ctype, fnext) - if not ext: - # We don't know what it is, so assume it's just a shapeless - # application/octet-stream, unless the Content-Type: is - # message/rfc822, in which case we know we'll coerce the type to - # text/plain below. - if ctype == 'message/rfc822': - ext = '.txt' - else: - ext = '.bin' - # Allow only alphanumerics, dash, underscore, and dot - ext = sre.sub('', ext) - path = None - # We need a lock to calculate the next attachment number - with Lock(os.path.join(fsdir, 'attachments.lock')): - # Now base the filename on what's in the attachment, uniquifying it if - # necessary. - if (not filename or - not as_boolean(config.scrubber.use_attachment_filename)): - filebase = 'attachment' - else: - # Sanitize the filename given in the message headers - parts = pre.split(filename) - filename = parts[-1] - # Strip off leading dots - filename = dre.sub('', filename) - # Allow only alphanumerics, dash, underscore, and dot - filename = sre.sub('', filename) - # If the filename's extension doesn't match the type we guessed, - # which one should we go with? For now, let's go with the one we - # guessed so attachments can't lie about their type. Also, if the - # filename /has/ no extension, then tack on the one we guessed. - # The extension was removed from the name above. - filebase = filename - # Now we're looking for a unique name for this file on the file - # system. If msgdir/filebase.ext isn't unique, we'll add a counter - # after filebase, e.g. msgdir/filebase-cnt.ext - counter = 0 - extra = '' - while True: - path = os.path.join(fsdir, filebase + extra + ext) - # Generally it is not a good idea to test for file existance - # before just trying to create it, but the alternatives aren't - # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't - # NFS-safe). Besides, we have an exclusive lock now, so we're - # guaranteed that no other process will be racing with us. - if os.path.exists(path): - counter += 1 - extra = '-%04d' % counter - else: - break - # `path' now contains the unique filename for the attachment. There's - # just one more step we need to do. If the part is text/html and - # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be - # here), then send the attachment through the filter program for - # sanitization - if filter_html and ctype == 'text/html': - base, ext = os.path.splitext(path) - tmppath = base + '-tmp' + ext - fp = open(tmppath, 'w') - try: - fp.write(decodedpayload) - fp.close() - cmd = Template(config.mta.archive_html_sanitizer).safe_substitue( - filename=tmppath) - progfp = os.popen(cmd, 'r') - decodedpayload = progfp.read() - status = progfp.close() - if status: - log.error('HTML sanitizer exited with non-zero status: %s', - status) - finally: - os.unlink(tmppath) - # BAW: Since we've now sanitized the document, it should be plain - # text. Blarg, we really want the sanitizer to tell us what the type - # if the return data is. :( - ext = '.txt' - path = base + '.txt' - # Is it a message/rfc822 attachment? - elif ctype == 'message/rfc822': - submsg = msg.get_payload() - # BAW: I'm sure we can eventually do better than this. :( - decodedpayload = websafe(str(submsg)) - fp = open(path, 'w') - fp.write(decodedpayload) - fp.close() - # Now calculate the url to the list's archive. - scrubber_path = config.scrubber.archive_scrubber - base_url = find_name(scrubber_path).list_url(mlist) - if not base_url.endswith('/'): - base_url += '/' - # Trailing space will definitely be a problem with format=flowed. - # Bracket the URL instead. - url = '<' + base_url + '%s/%s%s%s>' % ( - attachments_dir, filebase, extra, ext) - return url - - - -class Scrubber: - """Cleanse a message for archiving.""" - - implements(IHandler) - - name = 'scrubber' - description = _('Cleanse a message for archiving.') - - def process(self, mlist, msg, msgdata): - """See `IHandler`.""" - process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/tests/test_mimedel.py b/src/mailman/pipeline/tests/test_mimedel.py new file mode 100644 index 000000000..566c1a40c --- /dev/null +++ b/src/mailman/pipeline/tests/test_mimedel.py @@ -0,0 +1,213 @@ +# Copyright (C) 2012 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Test the mime_delete handler.""" + +from __future__ import absolute_import, print_function, unicode_literals + +__metaclass__ = type +__all__ = [ + 'TestDispose', + ] + + +import unittest + +from zope.component import getUtility + +from mailman.app.lifecycle import create_list +from mailman.config import config +from mailman.core import errors +from mailman.interfaces.action import FilterAction +from mailman.interfaces.member import MemberRole +from mailman.interfaces.usermanager import IUserManager +from mailman.pipeline import mime_delete +from mailman.testing.helpers import ( + LogFileMark, + get_queue_messages, + specialized_message_from_string as mfs) +from mailman.testing.layers import ConfigLayer + + + +class TestDispose(unittest.TestCase): + """Test the mime_delete handler.""" + + layer = ConfigLayer + + def setUp(self): + self._mlist = create_list('test@example.com') + self._msg = mfs("""\ +From: anne@example.com +To: test@example.com +Subject: A disposable message +Message-ID: <ant> + +""") + # Python 2.7 has assertMultiLineEqual. Let this work without bounds. + self.maxDiff = None + self.eq = getattr(self, 'assertMultiLineEqual', self.assertEqual) + config.push('dispose', """ + [mailman] + site_owner: noreply@example.com + """) + + def tearDown(self): + config.pop('dispose') + + def test_dispose_discard(self): + self._mlist.filter_action = FilterAction.discard + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'discarding') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + self.assertEqual(error.message, 'discarding') + # There should be no messages in the 'bad' queue. + self.assertEqual(len(get_queue_messages('bad')), 0) + + def test_dispose_bounce(self): + self._mlist.filter_action = FilterAction.reject + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'rejecting') + except errors.RejectMessage as error: + pass + else: + raise AssertionError('RejectMessage exception expected') + self.assertEqual(error.message, 'rejecting') + # There should be no messages in the 'bad' queue. + self.assertEqual(len(get_queue_messages('bad')), 0) + + def test_dispose_forward(self): + # The disposed message gets forwarded to the list moderators. So + # first add some moderators. + user_manager = getUtility(IUserManager) + anne = user_manager.create_address('anne@example.com') + bart = user_manager.create_address('bart@example.com') + self._mlist.subscribe(anne, MemberRole.moderator) + self._mlist.subscribe(bart, MemberRole.moderator) + # Now set the filter action and dispose the message. + self._mlist.filter_action = FilterAction.forward + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'forwarding') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + self.assertEqual(error.message, 'forwarding') + # There should now be a multipart message in the virgin queue destined + # for the mailing list owners. + messages = get_queue_messages('virgin') + self.assertEqual(len(messages), 1) + message = messages[0].msg + self.assertEqual(message.get_content_type(), 'multipart/mixed') + # Anne and Bart should be recipients of the message, but it will look + # like the message is going to the list owners. + self.assertEqual(message['to'], 'test-owner@example.com') + self.assertEqual(message.recipients, + set(['anne@example.com', 'bart@example.com'])) + # The list owner should be the sender. + self.assertEqual(message['from'], 'noreply@example.com') + self.assertEqual(message['subject'], + 'Content filter message notification') + # The body of the first part provides the moderators some details. + part0 = message.get_payload(0) + self.assertEqual(part0.get_content_type(), 'text/plain') + self.eq(part0.get_payload(), """\ +The attached message matched the Test mailing list's content +filtering rules and was prevented from being forwarded on to the list +membership. You are receiving the only remaining copy of the discarded +message. + +""") + # The second part is the container for the original message. + part1 = message.get_payload(1) + self.assertEqual(part1.get_content_type(), 'message/rfc822') + # And the first part of *that* message will be the original message. + original = part1.get_payload(0) + self.assertEqual(original['subject'], 'A disposable message') + self.assertEqual(original['message-id'], '<ant>') + + def test_dispose_non_preservable(self): + # Two actions can happen here, depending on a site-wide setting. If + # the site owner has indicated that filtered messages cannot be + # preserved, then this is the same as discarding them. + self._mlist.filter_action = FilterAction.preserve + config.push('non-preservable', """ + [mailman] + filtered_messages_are_preservable: no + """) + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'not preserved') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + finally: + config.pop('non-preservable') + self.assertEqual(error.message, 'not preserved') + # There should be no messages in the 'bad' queue. + self.assertEqual(len(get_queue_messages('bad')), 0) + + def test_dispose_preservable(self): + # Two actions can happen here, depending on a site-wide setting. If + # the site owner has indicated that filtered messages can be + # preserved, then this is similar to discarding the message except + # that a copy is preserved in the 'bad' queue. + self._mlist.filter_action = FilterAction.preserve + config.push('preservable', """ + [mailman] + filtered_messages_are_preservable: yes + """) + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'preserved') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + finally: + config.pop('preservable') + self.assertEqual(error.message, 'preserved') + # There should be no messages in the 'bad' queue. + messages = get_queue_messages('bad') + self.assertEqual(len(messages), 1) + message = messages[0].msg + self.assertEqual(message['subject'], 'A disposable message') + self.assertEqual(message['message-id'], '<ant>') + + def test_bad_action(self): + # This should never happen, but what if it does? + # FilterAction.accept, FilterAction.hold, and FilterAction.defer are + # not valid. They are treated as discard actions, but the problem is + # also logged. + for action in (FilterAction.accept, + FilterAction.hold, + FilterAction.defer): + self._mlist.filter_action = action + mark = LogFileMark('mailman.error') + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'bad action') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + self.assertEqual(error.message, 'bad action') + line = mark.readline()[:-1] + self.assertTrue(line.endswith( + '{0} invalid FilterAction: test@example.com. ' + 'Treating as discard'.format(action.name))) diff --git a/src/mailman/pipeline/tests/test_scrubber.py b/src/mailman/pipeline/tests/test_scrubber.py deleted file mode 100644 index 7ac5eb855..000000000 --- a/src/mailman/pipeline/tests/test_scrubber.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Scrubber module tests.""" - -from __future__ import absolute_import, print_function, unicode_literals - -__metaclass__ = type -__all__ = [ - 'TestScrubber', - ] - - -import unittest - -from mailman.pipeline import scrubber - - - -class TestScrubber(unittest.TestCase): - """Scrubber module tests.""" - - def test_guess_extension(self): - # A known extension should be found. - extension = scrubber.guess_extension('application/msword', '.doc') - self.assertEqual(extension, '.doc') - - def test_guess_missing_extension(self): - # Maybe some other extension is better. - extension = scrubber.guess_extension('application/msword', '.xxx') - self.assertEqual(extension, '.doc') diff --git a/src/mailman/rest/lists.py b/src/mailman/rest/lists.py index c95c9a88a..3374e8f73 100644 --- a/src/mailman/rest/lists.py +++ b/src/mailman/rest/lists.py @@ -137,10 +137,7 @@ class AList(_ListBase): """Delete the named mailing list.""" if self._mlist is None: return http.not_found() - remove_list(self._mlist.fqdn_listname, self._mlist, - # XXX 2010-07-06 barry we need a way to remove the list - # archives either with the mailing list or afterward. - archives=False) + remove_list(self._mlist.fqdn_listname, self._mlist) return no_content() @resource.child(member_matcher) diff --git a/src/mailman/runners/archive.py b/src/mailman/runners/archive.py index cab776076..ea85281b1 100644 --- a/src/mailman/runners/archive.py +++ b/src/mailman/runners/archive.py @@ -17,6 +17,8 @@ """Archive runner.""" +from __future__ import absolute_import, print_function, unicode_literals + __metaclass__ = type __all__ = [ 'ArchiveRunner', diff --git a/src/mailman/runners/digest.py b/src/mailman/runners/digest.py index b4ae9a442..513a20322 100644 --- a/src/mailman/runners/digest.py +++ b/src/mailman/runners/digest.py @@ -46,7 +46,6 @@ from mailman.core.i18n import _ from mailman.core.runner import Runner from mailman.interfaces.member import DeliveryMode, DeliveryStatus from mailman.pipeline.decorate import decorate -from mailman.pipeline.scrubber import process as scrubber from mailman.utilities.i18n import make from mailman.utilities.mailbox import Mailbox from mailman.utilities.string import oneline, wrap @@ -252,12 +251,6 @@ class RFC1153Digester(Digester): if count > 1: print >> self._text, self._separator30 print >> self._text - # Scrub attachements. - try: - msg = scrubber(self._mlist, msg) - except DiscardMessage: - print >> self._text, _('[Message discarded by content filter]') - return # Each message section contains a few headers. for header in config.digests.plain_digest_keep_headers.split(): if header in msg: diff --git a/src/mailman/runners/docs/archiver.rst b/src/mailman/runners/docs/archiver.rst deleted file mode 100644 index a6f5ccd24..000000000 --- a/src/mailman/runners/docs/archiver.rst +++ /dev/null @@ -1,35 +0,0 @@ -========= -Archiving -========= - -Mailman can archive to any number of archivers that adhere to the -``IArchiver`` interface. By default, there's a Pipermail archiver. -:: - - >>> mlist = create_list('test@example.com') - >>> transaction.commit() - - >>> msg = message_from_string("""\ - ... From: aperson@example.com - ... To: test@example.com - ... Subject: My first post - ... Message-ID: <first> - ... - ... First post! - ... """) - - >>> archiver_queue = config.switchboards['archive'] - >>> ignore = archiver_queue.enqueue(msg, {}, listname=mlist.fqdn_listname) - - >>> from mailman.runners.archive import ArchiveRunner - >>> from mailman.testing.helpers import make_testable_runner - >>> runner = make_testable_runner(ArchiveRunner) - >>> runner.run() - - # The best we can do is verify some landmark exists. Let's use the - # Pipermail pickle file exists. - >>> listname = mlist.fqdn_listname - >>> import os - >>> os.path.exists(os.path.join( - ... config.PUBLIC_ARCHIVE_FILE_DIR, listname, 'pipermail.pck')) - True diff --git a/src/mailman/runners/docs/digester.rst b/src/mailman/runners/docs/digester.rst index 4b9481f3e..1ca51bdae 100644 --- a/src/mailman/runners/docs/digester.rst +++ b/src/mailman/runners/docs/digester.rst @@ -223,7 +223,6 @@ The RFC 1153 contains the digest in a single plain text message. When replying, please edit your Subject line so it is more specific than "Re: Contents of Test digest..." <BLANKLINE> - <BLANKLINE> Today's Topics: <BLANKLINE> 1. Test message 1 (aperson@example.com) @@ -237,7 +236,6 @@ The RFC 1153 contains the digest in a single plain text message. From: aperson@example.com Subject: Test message 1 To: xtest@example.com - Message-ID: ... <BLANKLINE> Here is message 1 <BLANKLINE> @@ -246,7 +244,6 @@ The RFC 1153 contains the digest in a single plain text message. From: aperson@example.com Subject: Test message 2 To: xtest@example.com - Message-ID: ... <BLANKLINE> Here is message 2 <BLANKLINE> @@ -255,7 +252,6 @@ The RFC 1153 contains the digest in a single plain text message. From: aperson@example.com Subject: Test message 3 To: xtest@example.com - Message-ID: ... <BLANKLINE> Here is message 3 <BLANKLINE> @@ -264,7 +260,6 @@ The RFC 1153 contains the digest in a single plain text message. From: aperson@example.com Subject: Test message 4 To: xtest@example.com - Message-ID: ... <BLANKLINE> Here is message 4 <BLANKLINE> @@ -458,7 +453,6 @@ The content can be decoded to see the actual digest text. "'From: aperson@example.org'", "'Subject: \\xe4\\xb8\\x80\\xe7\\x95\\xaa'", "'To: test@example.com'", - "'Message-ID: ... "'Content-Type: text/plain; charset=iso-2022-jp'", "''", "'\\xe4\\xb8\\x80\\xe7\\x95\\xaa'", diff --git a/src/mailman/runners/tests/test_archiver.py b/src/mailman/runners/tests/test_archiver.py new file mode 100644 index 000000000..274aba5ec --- /dev/null +++ b/src/mailman/runners/tests/test_archiver.py @@ -0,0 +1,112 @@ +# Copyright (C) 2012 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Test the archive runner.""" + +from __future__ import absolute_import, print_function, unicode_literals + +__metaclass__ = type +__all__ = [ + 'TestArchiveRunner', + ] + + +import os +import unittest + +from email import message_from_file +from zope.interface import implements + +from mailman.app.lifecycle import create_list +from mailman.config import config +from mailman.interfaces.archiver import IArchiver +from mailman.runners.archive import ArchiveRunner +from mailman.testing.helpers import ( + make_testable_runner, + specialized_message_from_string as mfs) +from mailman.testing.layers import ConfigLayer + + + +class DummyArchiver: + implements(IArchiver) + name = 'dummy' + + @staticmethod + def list_url(mlist): + return 'http://archive.example.com/' + + @staticmethod + def permalink(mlist, msg): + filename = msg['x-message-id-hash'] + return 'http://archive.example.com/' + filename + + @staticmethod + def archive_message(mlist, msg): + filename = msg['x-message-id-hash'] + path = os.path.join(config.MESSAGES_DIR, filename) + with open(path, 'w') as fp: + print(msg.as_string(), file=fp) + # Not technically allowed by the API, but good enough for the test. + return path + + + +class TestArchiveRunner(unittest.TestCase): + """Test the archive runner.""" + + layer = ConfigLayer + + def setUp(self): + self._mlist = create_list('test@example.com') + # Enable just the dummy archiver. + config.push('dummy', """ + [archiver.dummy] + class: mailman.runners.tests.test_archiver.DummyArchiver + enable: yes + [archiver.prototype] + enable: no + [archiver.mhonarc] + enable: no + [archiver.mail_archive] + enable: no + """) + self._msg = mfs("""\ +From: aperson@example.com +To: test@example.com +Subject: My first post +Message-ID: <first> +X-Message-ID-Hash: 4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB + +First post! +""") + self._runner = make_testable_runner(ArchiveRunner) + + def tearDown(self): + config.pop('dummy') + + def test_archive_runner(self): + # Ensure that the archive runner ends up archiving the message. + config.switchboards['archive'].enqueue( + self._msg, {}, listname=self._mlist.fqdn_listname) + self._runner.run() + # There should now be a copy of the message in the file system. + filename = os.path.join( + config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB') + with open(filename) as fp: + archived = message_from_file(fp) + self.assertEqual(archived['message-id'], '<first>') diff --git a/src/mailman/styles/default.py b/src/mailman/styles/default.py index 471b43272..e64bbe40b 100644 --- a/src/mailman/styles/default.py +++ b/src/mailman/styles/default.py @@ -32,7 +32,7 @@ import datetime from zope.interface import implements from mailman.core.i18n import _ -from mailman.interfaces.action import Action +from mailman.interfaces.action import Action, FilterAction from mailman.interfaces.bounce import UnrecognizedBounceDisposition from mailman.interfaces.digests import DigestFrequency from mailman.interfaces.autorespond import ResponseAction @@ -99,6 +99,7 @@ from: .*@uplinkpro.com mlist.preferred_language = 'en' mlist.collapse_alternatives = True mlist.convert_html_to_plaintext = False + mlist.filter_action = FilterAction.discard mlist.filter_content = False # Digest related variables mlist.digestable = True diff --git a/src/mailman/templates/en/archidxentry.html b/src/mailman/templates/en/archidxentry.html deleted file mode 100644 index 1927ae7fe..000000000 --- a/src/mailman/templates/en/archidxentry.html +++ /dev/null @@ -1,4 +0,0 @@ -<LI><A HREF="$filename">$subject -</A><A NAME="$sequence"> </A> -<I>$author -</I> diff --git a/src/mailman/templates/en/archidxfoot.html b/src/mailman/templates/en/archidxfoot.html deleted file mode 100644 index 6a43546ea..000000000 --- a/src/mailman/templates/en/archidxfoot.html +++ /dev/null @@ -1,21 +0,0 @@ - </ul> - <p> - <a name="end"><b>Last message date:</b></a> - <i>$lastdate</i><br> - <b>Archived on:</b> <i>$archivedate</i> - <p> - <ul> - <li> <b>Messages sorted by:</b> - $thread_ref - $subject_ref - $author_ref - $date_ref - <li><b><a href="$listinfo">More info on this list... - </a></b></li> - </ul> - <p> - <hr> - <i>This archive was generated by - Pipermail $version.</i> - </BODY> -</HTML> diff --git a/src/mailman/templates/en/archidxhead.html b/src/mailman/templates/en/archidxhead.html deleted file mode 100644 index 70a7558d7..000000000 --- a/src/mailman/templates/en/archidxhead.html +++ /dev/null @@ -1,24 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> -<HTML> - <HEAD> - <title>The $listname $archive Archive by $archtype</title> - <META NAME="robots" CONTENT="noindex,follow"> - $encoding - </HEAD> - <BODY BGCOLOR="#ffffff"> - <a name="start"></A> - <h1>$archive Archives by $archtype</h1> - <ul> - <li> <b>Messages sorted by:</b> - $thread_ref - $subject_ref - $author_ref - $date_ref - - <li><b><a href="$listinfo">More info on this list... - </a></b></li> - </ul> - <p><b>Starting:</b> <i>$firstdate</i><br> - <b>Ending:</b> <i>$lastdate</i><br> - <b>Messages:</b> $size<p> - <ul> diff --git a/src/mailman/templates/en/archlistend.html b/src/mailman/templates/en/archlistend.html deleted file mode 100644 index 9bc052ddb..000000000 --- a/src/mailman/templates/en/archlistend.html +++ /dev/null @@ -1 +0,0 @@ - </table> diff --git a/src/mailman/templates/en/archliststart.html b/src/mailman/templates/en/archliststart.html deleted file mode 100644 index cdf5d17c4..000000000 --- a/src/mailman/templates/en/archliststart.html +++ /dev/null @@ -1,4 +0,0 @@ - <table border=3> - <tr><td>Archive</td> - <td>View by:</td> - <td>Downloadable version</td></tr> diff --git a/src/mailman/templates/en/archtoc.html b/src/mailman/templates/en/archtoc.html deleted file mode 100644 index 4dcaf5a50..000000000 --- a/src/mailman/templates/en/archtoc.html +++ /dev/null @@ -1,20 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> -<HTML> - <HEAD> - <title>The $listname Archives</title> - <META NAME="robots" CONTENT="noindex,follow"> - $meta - </HEAD> - <BODY BGCOLOR="#ffffff"> - <h1>The $listname Archives </h1> - <p> - You can get <a href="$listinfo">more information about this list</a> - or you can <a href="$fullarch">download the full raw archive</a> - ($size). - </p> - $noarchive_msg - $archive_listing_start - $archive_listing - $archive_listing_end - </BODY> - </HTML> diff --git a/src/mailman/templates/en/archtocentry.html b/src/mailman/templates/en/archtocentry.html deleted file mode 100644 index e2a6d2e37..000000000 --- a/src/mailman/templates/en/archtocentry.html +++ /dev/null @@ -1,12 +0,0 @@ - - <tr> - <td>$archivelabel:</td> - <td> - <A href="$archive/thread.html">[ Thread ]</a> - <A href="$archive/subject.html">[ Subject ]</a> - <A href="$archive/author.html">[ Author ]</a> - <A href="$archive/date.html">[ Date ]</a> - </td> - $textlink - </tr> - diff --git a/src/mailman/templates/en/archtocnombox.html b/src/mailman/templates/en/archtocnombox.html deleted file mode 100644 index 5989aa53d..000000000 --- a/src/mailman/templates/en/archtocnombox.html +++ /dev/null @@ -1,18 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> -<HTML> - <HEAD> - <title>The $listname Archives</title> - <META NAME="robots" CONTENT="noindex,follow"> - $meta - </HEAD> - <BODY BGCOLOR="#ffffff"> - <h1>The $listname Archives </h1> - <p> - You can get <a href="$listinfo">more information about this list</a>. - </p> - $noarchive_msg - $archive_listing_start - $archive_listing - $archive_listing_end - </BODY> - </HTML> diff --git a/src/mailman/testing/testing.cfg b/src/mailman/testing/testing.cfg index 526093572..d503247de 100644 --- a/src/mailman/testing/testing.cfg +++ b/src/mailman/testing/testing.cfg @@ -71,10 +71,6 @@ enable: yes base_url: http://go.mail-archive.dev/ recipient: archive@mail-archive.dev -[archiver.pipermail] -enable: yes -base_url: http://www.example.com/pipermail/$listname - [archiver.mhonarc] enable: yes command: /bin/echo "/usr/bin/mhonarc -add -dbfile $PRIVATE_ARCHIVE_FILE_DIR/${listname}.mbox/mhonarc.db -outdir $VAR_DIR/mhonarc/${listname} -stderr $LOG_DIR/mhonarc -stdout $LOG_DIR/mhonarc -spammode -umask 022" |
