summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mailman/Archiver/Archiver.py233
-rw-r--r--src/mailman/Archiver/HyperArch.py1233
-rw-r--r--src/mailman/Archiver/HyperDatabase.py339
-rw-r--r--src/mailman/Archiver/__init__.py18
-rw-r--r--src/mailman/Archiver/pipermail.py872
-rw-r--r--src/mailman/app/docs/lifecycle.rst2
-rw-r--r--src/mailman/app/lifecycle.py11
-rw-r--r--src/mailman/archiving/docs/common.rst43
-rw-r--r--src/mailman/archiving/pipermail.py128
-rw-r--r--src/mailman/archiving/prototype.py2
-rw-r--r--src/mailman/commands/cli_lists.py18
-rw-r--r--src/mailman/commands/docs/info.rst2
-rw-r--r--src/mailman/commands/docs/remove.rst44
-rw-r--r--src/mailman/config/config.py6
-rw-r--r--src/mailman/config/schema.cfg32
-rw-r--r--src/mailman/core/initialize.py4
-rw-r--r--src/mailman/interfaces/archiver.py21
-rw-r--r--src/mailman/pipeline/docs/rfc-2369.rst28
-rw-r--r--src/mailman/pipeline/scrubber.py13
-rw-r--r--src/mailman/rest/lists.py5
-rw-r--r--src/mailman/runners/archive.py2
-rw-r--r--src/mailman/runners/docs/archiver.rst35
-rw-r--r--src/mailman/runners/tests/test_archiver.py112
-rw-r--r--src/mailman/templates/en/archidxentry.html4
-rw-r--r--src/mailman/templates/en/archidxfoot.html21
-rw-r--r--src/mailman/templates/en/archidxhead.html24
-rw-r--r--src/mailman/templates/en/archlistend.html1
-rw-r--r--src/mailman/templates/en/archliststart.html4
-rw-r--r--src/mailman/templates/en/archtoc.html20
-rw-r--r--src/mailman/templates/en/archtocentry.html12
-rw-r--r--src/mailman/templates/en/archtocnombox.html18
-rw-r--r--src/mailman/testing/testing.cfg4
32 files changed, 149 insertions, 3162 deletions
diff --git a/src/mailman/Archiver/Archiver.py b/src/mailman/Archiver/Archiver.py
deleted file mode 100644
index 1e2af535f..000000000
--- a/src/mailman/Archiver/Archiver.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Copyright (C) 1998-2012 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
- # USA.
-
-"""Mixin class for putting new messages in the right place for archival.
-
-Public archives are separated from private ones. An external archival
-mechanism (eg, pipermail) should be pointed to the right places, to do the
-archival.
-"""
-
-import os
-import errno
-import logging
-import mailbox
-
-from cStringIO import StringIO
-from string import Template
-from zope.component import getUtility
-
-from mailman.config import config
-from mailman.interfaces.domain import IDomainManager
-from mailman.utilities.i18n import make
-
-log = logging.getLogger('mailman.error')
-
-
-
-def makelink(old, new):
- try:
- os.symlink(old, new)
- except OSError, e:
- if e.errno <> errno.EEXIST:
- raise
-
-def breaklink(link):
- try:
- os.unlink(link)
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
-
-
-
-class Archiver:
- #
- # Interface to Pipermail. HyperArch.py uses this method to get the
- # archive directory for the mailing list
- #
- def InitVars(self):
- # The archive file structure by default is:
- #
- # archives/
- # private/
- # listname.mbox/
- # listname.mbox
- # listname/
- # lots-of-pipermail-stuff
- # public/
- # listname.mbox@ -> ../private/listname.mbox
- # listname@ -> ../private/listname
- #
- # IOW, the mbox and pipermail archives are always stored in the
- # private archive for the list. This is safe because archives/private
- # is always set to o-rx. Public archives have a symlink to get around
- # the private directory, pointing directly to the private/listname
- # which has o+rx permissions. Private archives do not have the
- # symbolic links.
- archdir = self.archive_dir(self.fqdn_listname)
- omask = os.umask(0)
- try:
- try:
- os.mkdir(archdir+'.mbox', 02775)
- except OSError, e:
- if e.errno <> errno.EEXIST:
- raise
- # We also create an empty pipermail archive directory into
- # which we'll drop an empty index.html file into. This is so
- # that lists that have not yet received a posting have
- # /something/ as their index.html, and don't just get a 404.
- try:
- os.mkdir(archdir, 02775)
- except OSError, e:
- if e.errno <> errno.EEXIST:
- raise
- # See if there's an index.html file there already and if not,
- # write in the empty archive notice.
- indexfile = os.path.join(archdir, 'index.html')
- fp = None
- try:
- fp = open(indexfile)
- except IOError, e:
- if e.errno <> errno.ENOENT:
- raise
- omask = os.umask(002)
- try:
- fp = open(indexfile, 'w')
- finally:
- os.umask(omask)
- fp.write(make('emptyarchive.html',
- mailing_list=self,
- listname=self.real_name,
- listinfo=self.GetScriptURL('listinfo'),
- ))
- if fp:
- fp.close()
- finally:
- os.umask(omask)
-
- def ArchiveFileName(self):
- """The mbox name where messages are left for archive construction."""
- return os.path.join(self.archive_dir() + '.mbox',
- self.fqdn_listname + '.mbox')
-
- def GetBaseArchiveURL(self):
- if self.archive_private:
- url = self.GetScriptURL('private') + '/index.html'
- else:
- domain = getUtility(IDomainManager).get(self.mail_host)
- web_host = (self.mail_host if domain is None else domain.url_host)
- url = Template(config.PUBLIC_ARCHIVE_URL).safe_substitute(
- listname=self.fqdn_listname,
- hostname=web_host,
- fqdn_listname=self.fqdn_listname,
- )
- return url
-
- def __archive_file(self, afn):
- """Open (creating, if necessary) the named archive file."""
- omask = os.umask(002)
- try:
- return mailbox.mbox(afn, 'a+')
- finally:
- os.umask(omask)
-
- #
- # old ArchiveMail function, retained under a new name
- # for optional archiving to an mbox
- #
- def __archive_to_mbox(self, post):
- """Retain a text copy of the message in an mbox file."""
- try:
- afn = self.ArchiveFileName()
- mbox = self.__archive_file(afn)
- mbox.add(post)
- mbox.fp.close()
- except IOError, msg:
- log.error('Archive file access failure:\n\t%s %s', afn, msg)
- raise
-
- def ExternalArchive(self, ar, txt):
- cmd = Template(ar).safe_substitute(
- listname=self.fqdn_listname,
- hostname=self.mail_host)
- extarch = os.popen(cmd, 'w')
- extarch.write(txt)
- status = extarch.close()
- if status:
- log.error('external archiver non-zero exit status: %d\n',
- (status & 0xff00) >> 8)
-
- #
- # archiving in real time this is called from list.post(msg)
- #
- def ArchiveMail(self, msg):
- """Store postings in mbox and/or pipermail archive, depending."""
- # Fork so archival errors won't disrupt normal list delivery
- if config.ARCHIVE_TO_MBOX == -1:
- return
- #
- # We don't need an extra archiver lock here because we know the list
- # itself must be locked.
- if config.ARCHIVE_TO_MBOX in (1, 2):
- self.__archive_to_mbox(msg)
- if config.ARCHIVE_TO_MBOX == 1:
- # Archive to mbox only.
- return
- txt = str(msg)
- # should we use the internal or external archiver?
- private_p = self.archive_private
- if config.PUBLIC_EXTERNAL_ARCHIVER and not private_p:
- self.ExternalArchive(config.PUBLIC_EXTERNAL_ARCHIVER, txt)
- elif config.PRIVATE_EXTERNAL_ARCHIVER and private_p:
- self.ExternalArchive(config.PRIVATE_EXTERNAL_ARCHIVER, txt)
- else:
- # use the internal archiver
- f = StringIO(txt)
- import HyperArch
- h = HyperArch.HyperArchive(self)
- h.processUnixMailbox(f)
- h.close()
- f.close()
-
- #
- # called from MailList.MailList.Save()
- #
- def CheckHTMLArchiveDir(self):
- # We need to make sure that the archive directory has the right perms
- # for public vs private. If it doesn't exist, or some weird
- # permissions errors prevent us from stating the directory, it's
- # pointless to try to fix the perms, so we just return -scott
- if config.ARCHIVE_TO_MBOX == -1:
- # Archiving is completely disabled, don't require the skeleton.
- return
- pubdir = os.path.join(config.PUBLIC_ARCHIVE_FILE_DIR,
- self.fqdn_listname)
- privdir = self.archive_dir()
- pubmbox = pubdir + '.mbox'
- privmbox = privdir + '.mbox'
- if self.archive_private:
- breaklink(pubdir)
- breaklink(pubmbox)
- else:
- # BAW: privdir or privmbox could be nonexistant. We'd get an
- # OSError, ENOENT which should be caught and reported properly.
- makelink(privdir, pubdir)
- # Only make this link if the site has enabled public mbox files
- if config.PUBLIC_MBOX:
- makelink(privmbox, pubmbox)
diff --git a/src/mailman/Archiver/HyperArch.py b/src/mailman/Archiver/HyperArch.py
deleted file mode 100644
index 017c14342..000000000
--- a/src/mailman/Archiver/HyperArch.py
+++ /dev/null
@@ -1,1233 +0,0 @@
-# Copyright (C) 1998-2012 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
-
-"""HyperArch: Pipermail archiving for Mailman
-
- - The Dragon De Monsyne <dragondm@integral.org>
-
- TODO:
- - Should be able to force all HTML to be regenerated next time the
- archive is run, in case a template is changed.
- - Run a command to generate tarball of html archives for downloading
- (probably in the 'update_dirty_archives' method).
-"""
-
-import os
-import re
-import sys
-import gzip
-import time
-import errno
-import urllib
-import logging
-import binascii
-
-from email.Charset import Charset
-from email.Errors import HeaderParseError
-from email.Header import decode_header, make_header
-from flufl.lock import Lock, TimeOutError
-from lazr.config import as_boolean
-from string import Template
-from zope.component import getUtility
-
-from mailman.Archiver import HyperDatabase
-from mailman.Archiver import pipermail
-from mailman.config import config
-from mailman.core.i18n import _, ctime
-from mailman.interfaces.listmanager import IListManager
-from mailman.utilities.i18n import find
-from mailman.utilities.string import uncanonstr, websafe
-
-
-log = logging.getLogger('mailman.error')
-EMPTYSTRING = ''
-NL = '\n'
-
-
-# MacOSX has a default stack size that is too small for deeply recursive
-# regular expressions. We see this as crashes in the Python test suite when
-# running test_re.py and test_sre.py. The fix is to set the stack limit to
-# 2048; the general recommendation is to do in the shell before running the
-# test suite. But that's inconvenient for a daemon like the runner.
-#
-# AFAIK, this problem only affects the archiver, so we're adding this work
-# around to this file (it'll get imported by the bundled pipermail or by the
-# bin/arch script. We also only do this on darwin, a.k.a. MacOSX.
-if sys.platform == 'darwin':
- try:
- import resource
- except ImportError:
- pass
- else:
- soft, hard = resource.getrlimit(resource.RLIMIT_STACK)
- newsoft = min(hard, max(soft, 1024*2048))
- resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard))
-
-
-
-def html_quote(s, langcode=None):
- repls = ( ('&', '&amp;'),
- ("<", '&lt;'),
- (">", '&gt;'),
- ('"', '&quot;'))
- for thing, repl in repls:
- s = s.replace(thing, repl)
- return uncanonstr(s, langcode)
-
-
-def url_quote(s):
- return urllib.quote(s)
-
-
-def null_to_space(s):
- return s.replace('\000', ' ')
-
-
-def sizeof(filename, lang):
- try:
- size = os.path.getsize(filename)
- except OSError, e:
- # ENOENT can happen if the .mbox file was moved away or deleted, and
- # an explicit mbox file name was given to bin/arch.
- if e.errno <> errno.ENOENT: raise
- return _('size not available')
- if size < 1000:
- with _.using(lang.code):
- out = _(' %(size)i bytes ')
- return out
- elif size < 1000000:
- return ' %d KB ' % (size / 1000)
- # GB?? :-)
- return ' %d MB ' % (size / 1000000)
-
-
-html_charset = '<META http-equiv="Content-Type" ' \
- 'content="text/html; charset=%s">'
-
-def CGIescape(arg, lang=None):
- if isinstance(arg, unicode):
- s = websafe(arg)
- else:
- s = websafe(str(arg))
- return uncanonstr(s.replace('"', '&quot;'), lang.code)
-
-# Parenthesized human name
-paren_name_pat = re.compile(r'([(].*[)])')
-
-# Subject lines preceded with 'Re:'
-REpat = re.compile( r"\s*RE\s*(\[\d+\]\s*)?:\s*", re.IGNORECASE)
-
-# E-mail addresses and URLs in text
-emailpat = re.compile(r'([-+,.\w]+@[-+.\w]+)')
-
-# Argh! This pattern is buggy, and will choke on URLs with GET parameters.
-urlpat = re.compile(r'(\w+://[^>)\s]+)') # URLs in text
-
-# Blank lines
-blankpat = re.compile(r'^\s*$')
-
-# Starting <html> directive
-htmlpat = re.compile(r'^\s*<HTML>\s*$', re.IGNORECASE)
-# Ending </html> directive
-nohtmlpat = re.compile(r'^\s*</HTML>\s*$', re.IGNORECASE)
-# Match quoted text
-quotedpat = re.compile(r'^([>|:]|&gt;)+')
-
-
-
-# Like Utils.maketext() but with caching to improve performance.
-#
-# _templatefilepathcache is used to associate a (templatefile, lang, listname)
-# key with the file system path to a template file. This path is the one that
-# the Utils.findtext() function has computed is the one to match the values in
-# the key tuple.
-#
-# _templatecache associate a file system path as key with the text
-# returned after processing the contents of that file by Utils.findtext()
-#
-# We keep two caches to reduce the amount of template text kept in memory,
-# since the _templatefilepathcache is a many->one mapping and _templatecache
-# is a one->one mapping. Imagine 1000 lists all using the same default
-# English template.
-
-_templatefilepathcache = {}
-_templatecache = {}
-
-def quick_maketext(templatefile, dict=None, lang=None, mlist=None):
- if mlist is None:
- listname = ''
- else:
- listname = mlist.fqdn_listname
- if lang is None:
- if mlist is None:
- lang = config.mailman.default_language
- else:
- lang = mlist.preferred_language
- cachekey = (templatefile, lang, listname)
- filepath = _templatefilepathcache.get(cachekey)
- if filepath:
- template = _templatecache.get(filepath)
- if filepath is None or template is None:
- # Use the basic maketext, with defaults to get the raw template
- template, filepath = find(templatefile, mlist=mlist,
- language=lang.code)
- _templatefilepathcache[cachekey] = filepath
- _templatecache[filepath] = template
- # Copied from Utils.maketext()
- text = template
- if dict is not None:
- try:
- try:
- text = Template(template).safe_substitute(**dict)
- except UnicodeError:
- # Try again after coercing the template to unicode
- utemplate = unicode(template, lang.charset, 'replace')
- text = Template(utemplate).safe_substitute(**dict)
- except (TypeError, ValueError):
- # The template is really screwed up
- pass
- # Make sure the text is in the given character set, or html-ify any bogus
- # characters.
- return uncanonstr(text, lang.code)
-
-
-
-# Note: I'm overriding most, if not all of the pipermail Article class
-# here -ddm
-# The Article class encapsulates a single posting. The attributes are:
-#
-# sequence : Sequence number, unique for each article in a set of archives
-# subject : Subject
-# datestr : The posting date, in human-readable format
-# date : The posting date, in purely numeric format
-# fromdate : The posting date, in `unixfrom' format
-# headers : Any other headers of interest
-# author : The author's name (and possibly organization)
-# email : The author's e-mail address
-# msgid : A unique message ID
-# in_reply_to : If !="", this is the msgid of the article being replied to
-# references: A (possibly empty) list of msgid's of earlier articles in
-# the thread
-# body : A list of strings making up the message body
-
-class Article(pipermail.Article):
- __super_init = pipermail.Article.__init__
- __super_set_date = pipermail.Article._set_date
-
- _last_article_time = time.time()
-
- def __init__(self, message=None, sequence=0, keepHeaders=[],
- lang=config.mailman.default_language, mlist=None):
- self.__super_init(message, sequence, keepHeaders)
- self.prev = None
- self.next = None
- # Trim Re: from the subject line
- i = 0
- while i != -1:
- result = REpat.match(self.subject)
- if result:
- i = result.end(0)
- self.subject = self.subject[i:]
- else:
- i = -1
- # Useful to keep around
- self._lang = lang
- self._mlist = mlist
-
- if as_boolean(config.archiver.pipermail.obscure_email_addresses):
- # Avoid i18n side-effects. Note that the language for this
- # article (for this list) could be different from the site-wide
- # preferred language, so we need to ensure no side-effects will
- # occur. Think what happens when executing bin/arch.
- with _.using(lang.code):
- if self.author == self.email:
- self.author = self.email = re.sub('@', _(' at '),
- self.email)
- else:
- self.email = re.sub('@', _(' at '), self.email)
- # Snag the content-* headers. RFC 1521 states that their values are
- # case insensitive.
- ctype = message.get('Content-Type', 'text/plain')
- cenc = message.get('Content-Transfer-Encoding', '')
- self.ctype = ctype.lower()
- self.cenc = cenc.lower()
- self.decoded = {}
- cset = mlist.preferred_language.charset
- cset_out = Charset(cset).output_charset or cset
- charset = message.get_content_charset(cset_out)
- if charset:
- charset = charset.lower().strip()
- if charset[0]=='"' and charset[-1]=='"':
- charset = charset[1:-1]
- if charset[0]=="'" and charset[-1]=="'":
- charset = charset[1:-1]
- try:
- body = message.get_payload(decode=True)
- except binascii.Error:
- body = None
- if body and charset != self._lang.charset:
- # decode body
- try:
- body = unicode(body, charset)
- except (UnicodeError, LookupError):
- body = None
- if body:
- self.body = [l + "\n" for l in body.splitlines()]
-
- self.decode_headers()
-
- def __getstate__(self):
- d = self.__dict__.copy()
- # We definitely don't want to pickle the MailList instance, so just
- # pickle a reference to it.
- if d.has_key('_mlist'):
- mlist = d['_mlist']
- del d['_mlist']
- else:
- mlist = None
- if mlist:
- d['__listname'] = self._mlist.fqdn_listname
- else:
- d['__listname'] = None
- # Delete a few other things we don't want in the pickle
- for attr in ('prev', 'next', 'body'):
- if d.has_key(attr):
- del d[attr]
- d['body'] = []
- return d
-
- def __setstate__(self, d):
- # For loading older Articles via pickle. All this stuff was added
- # when Simone Piunni and Tokio Kikuchi i18n'ified Pipermail. See SF
- # patch #594771.
- self.__dict__ = d
- listname = d.get('__listname')
- if listname:
- del d['__listname']
- d['_mlist'] = getUtility(IListManager).get(listname)
- if not d.has_key('_lang'):
- if hasattr(self, '_mlist'):
- self._lang = self._mlist.preferred_language
- else:
- self._lang = config.mailman.default_language
- if not d.has_key('cenc'):
- self.cenc = None
- if not d.has_key('decoded'):
- self.decoded = {}
-
- def setListIfUnset(self, mlist):
- if getattr(self, '_mlist', None) is None:
- self._mlist = mlist
-
- def quote(self, buf):
- return html_quote(buf, self._lang.code)
-
- def decode_headers(self):
- """MIME-decode headers.
-
- If the email, subject, or author attributes contain non-ASCII
- characters using the encoded-word syntax of RFC 2047, decoded versions
- of those attributes are placed in the self.decoded (a dictionary).
-
- If the list's charset differs from the header charset, an attempt is
- made to decode the headers as Unicode. If that fails, they are left
- undecoded.
- """
- author = self.decode_charset(self.author)
- subject = self.decode_charset(self.subject)
- if author:
- self.decoded['author'] = author
- email = self.decode_charset(self.email)
- if email:
- self.decoded['email'] = email
- if subject:
- if as_boolean(config.archiver.pipermail.obscure_email_addresses):
- with _.using(self._lang.code):
- atmark = _(' at ')
- subject = re.sub(r'([-+,.\w]+)@([-+.\w]+)',
- '\g<1>' + atmark + '\g<2>', subject)
- self.decoded['subject'] = subject
- self.decoded['stripped'] = self.strip_subject(subject or self.subject)
-
- def strip_subject(self, subject):
- # Strip subject_prefix and Re: for subject sorting
- # This part was taken from CookHeaders.py (TK)
- prefix = self._mlist.subject_prefix.strip()
- if prefix:
- prefix_pat = re.escape(prefix)
- prefix_pat = '%'.join(prefix_pat.split(r'\%'))
- prefix_pat = re.sub(r'%\d*d', r'\s*\d+\s*', prefix_pat)
- subject = re.sub(prefix_pat, '', subject)
- subject = subject.lstrip()
- strip_pat = re.compile('^((RE|AW|SV|VS)(\[\d+\])?:\s*)+', re.I)
- stripped = strip_pat.sub('', subject)
- return stripped
-
- def decode_charset(self, field):
- # TK: This function was rewritten for unifying to Unicode.
- # Convert 'field' into Unicode one line string.
- try:
- pairs = decode_header(field)
- ustr = make_header(pairs).__unicode__()
- except (LookupError, UnicodeError, ValueError, HeaderParseError):
- # assume list's language
- cset = self._mlist.preferred_language.charset
- if cset == 'us-ascii':
- cset = 'iso-8859-1' # assume this for English list
- ustr = unicode(field, cset, 'replace')
- return u''.join(ustr.splitlines())
-
- def as_html(self):
- d = self.__dict__.copy()
- # Avoid i18n side-effects
- with _.using(self._lang.code):
- d["prev"], d["prev_wsubj"] = self._get_prev()
- d["next"], d["next_wsubj"] = self._get_next()
-
- d["email_html"] = self.quote(self.email)
- d["title"] = self.quote(self.subject)
- d["subject_html"] = self.quote(self.subject)
- d["subject_url"] = url_quote(self.subject)
- d["in_reply_to_url"] = url_quote(self.in_reply_to)
- if as_boolean(config.archiver.pipermail.obscure_email_addresses):
- # Point the mailto url back to the list
- author = re.sub('@', _(' at '), self.author)
- emailurl = self._mlist.posting_address
- else:
- author = self.author
- emailurl = self.email
- d["author_html"] = self.quote(author)
- d["email_url"] = url_quote(emailurl)
- d["datestr_html"] = self.quote(ctime(int(self.date)))
- d["body"] = self._get_body()
- d['listurl'] = self._mlist.script_url('listinfo')
- d['listname'] = self._mlist.real_name
- d['encoding'] = ''
- charset = self._lang.charset
- d["encoding"] = html_charset % charset
-
- self._add_decoded(d)
- return quick_maketext(
- 'article.html', d,
- lang=self._lang, mlist=self._mlist)
-
- def _get_prev(self):
- """Return the href and subject for the previous message"""
- if self.prev:
- subject = self._get_subject_enc(self.prev)
- prev = ('<LINK REL="Previous" HREF="%s">'
- % (url_quote(self.prev.filename)))
- prev_wsubj = ('<LI>' + _('Previous message (by thread):') +
- ' <A HREF="%s">%s\n</A></li>'
- % (url_quote(self.prev.filename),
- self.quote(subject)))
- else:
- prev = prev_wsubj = ""
- return prev, prev_wsubj
-
- def _get_subject_enc(self, art):
- """Return the subject of art, decoded if possible.
-
- If the charset of the current message and art match and the
- article's subject is encoded, decode it.
- """
- return art.decoded.get('subject', art.subject)
-
- def _get_next(self):
- """Return the href and subject for the previous message"""
- if self.next:
- subject = self._get_subject_enc(self.next)
- next = ('<LINK REL="Next" HREF="%s">'
- % (url_quote(self.next.filename)))
- next_wsubj = ('<LI>' + _('Next message (by thread):') +
- ' <A HREF="%s">%s\n</A></li>'
- % (url_quote(self.next.filename),
- self.quote(subject)))
- else:
- next = next_wsubj = ""
- return next, next_wsubj
-
- _rx_quote = re.compile('=([A-F0-9][A-F0-9])')
- _rx_softline = re.compile('=[ \t]*$')
-
- def _get_body(self):
- """Return the message body ready for HTML, decoded if necessary"""
- try:
- body = self.html_body
- except AttributeError:
- body = self.body
- return null_to_space(EMPTYSTRING.join(body))
-
- def _add_decoded(self, d):
- """Add encoded-word keys to HTML output"""
- for src, dst in (('author', 'author_html'),
- ('email', 'email_html'),
- ('subject', 'subject_html'),
- ('subject', 'title')):
- if self.decoded.has_key(src):
- d[dst] = self.quote(self.decoded[src])
-
- def as_text(self):
- d = self.__dict__.copy()
- # We need to guarantee a valid From_ line, even if there are
- # bososities in the headers.
- if not d.get('fromdate', '').strip():
- d['fromdate'] = time.ctime(time.time())
- if not d.get('email', '').strip():
- d['email'] = 'bogus@does.not.exist.com'
- if not d.get('datestr', '').strip():
- d['datestr'] = time.ctime(time.time())
- #
- headers = ['From %(email)s %(fromdate)s',
- 'From: %(email)s (%(author)s)',
- 'Date: %(datestr)s',
- 'Subject: %(subject)s']
- if d['_in_reply_to']:
- headers.append('In-Reply-To: %(_in_reply_to)s')
- if d['_references']:
- headers.append('References: %(_references)s')
- if d['_message_id']:
- headers.append('Message-ID: %(_message_id)s')
- body = EMPTYSTRING.join(self.body)
- cset = self._lang.charset
- # Coerce the body to Unicode and replace any invalid characters.
- if not isinstance(body, unicode):
- body = unicode(body, cset, 'replace')
- if as_boolean(config.archiver.pipermail.obscure_email_addresses):
- with _.using(self._lang.code):
- atmark = _(' at ')
- body = re.sub(r'([-+,.\w]+)@([-+.\w]+)',
- '\g<1>' + atmark + '\g<2>', body)
- # Return body to character set of article.
- body = body.encode(cset, 'replace')
- return NL.join(headers) % d + '\n\n' + body + '\n'
-
- def _set_date(self, message):
- self.__super_set_date(message)
- self.fromdate = time.ctime(int(self.date))
-
- def loadbody_fromHTML(self,fileobj):
- self.body = []
- begin = 0
- while 1:
- line = fileobj.readline()
- if not line:
- break
- if not begin:
- if line.strip() == '<!--beginarticle-->':
- begin = 1
- continue
- if line.strip() == '<!--endarticle-->':
- break
- self.body.append(line)
-
- def finished_update_article(self):
- self.body = []
- try:
- del self.html_body
- except AttributeError:
- pass
-
-
-class HyperArchive(pipermail.T):
- __super_init = pipermail.T.__init__
- __super_update_archive = pipermail.T.update_archive
- __super_update_dirty_archives = pipermail.T.update_dirty_archives
- __super_add_article = pipermail.T.add_article
-
- # some defaults
- DIRMODE = 02775
- FILEMODE = 0660
-
- VERBOSE = 0
- DEFAULTINDEX = 'thread'
- ARCHIVE_PERIOD = 'month'
-
- THREADLAZY = 0
- THREADLEVELS = 3
-
- ALLOWHTML = 1 # "Lines between <html></html>" handled as is.
- SHOWHTML = 0 # Eg, nuke leading whitespace in html manner.
- IQUOTES = 1 # Italicize quoted text.
- SHOWBR = 0 # Add <br> onto every line
-
- def __init__(self, maillist):
- # can't init the database while other processes are writing to it!
- dir = maillist.archive_dir()
- db = HyperDatabase.HyperDatabase(dir, maillist)
- self.__super_init(dir, reload=1, database=db)
-
- self.maillist = maillist
- self._lock_file = None
- self.lang = maillist.preferred_language
- self.charset = maillist.preferred_language.charset
-
- if hasattr(self.maillist,'archive_volume_frequency'):
- if self.maillist.archive_volume_frequency == 0:
- self.ARCHIVE_PERIOD='year'
- elif self.maillist.archive_volume_frequency == 2:
- self.ARCHIVE_PERIOD='quarter'
- elif self.maillist.archive_volume_frequency == 3:
- self.ARCHIVE_PERIOD='week'
- elif self.maillist.archive_volume_frequency == 4:
- self.ARCHIVE_PERIOD='day'
- else:
- self.ARCHIVE_PERIOD='month'
-
- yre = r'(?P<year>[0-9]{4,4})'
- mre = r'(?P<month>[01][0-9])'
- dre = r'(?P<day>[0123][0-9])'
- self._volre = {
- 'year': '^' + yre + '$',
- 'quarter': '^' + yre + r'q(?P<quarter>[1234])$',
- 'month': '^' + yre + r'-(?P<month>[a-zA-Z]+)$',
- 'week': r'^Week-of-Mon-' + yre + mre + dre,
- 'day': '^' + yre + mre + dre + '$'
- }
-
- def _makeArticle(self, msg, sequence):
- return Article(msg, sequence,
- lang=self.maillist.preferred_language,
- mlist=self.maillist)
-
- def html_foot(self):
- mlist = self.maillist
- # Convenience
- def quotetime(s):
- return html_quote(ctime(s), self.lang.code)
- # Avoid i18n side-effects
- with _.using(mlist.preferred_language.code):
- d = {"lastdate": quotetime(self.lastdate),
- "archivedate": quotetime(self.archivedate),
- "listinfo": mlist.script_url('listinfo'),
- "version": self.version,
- }
- i = {"thread": _("thread"),
- "subject": _("subject"),
- "author": _("author"),
- "date": _("date")
- }
- for t in i.keys():
- cap = t[0].upper() + t[1:]
- if self.type == cap:
- d["%s_ref" % (t)] = ""
- else:
- d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>'
- % (t, i[t]))
- return quick_maketext(
- 'archidxfoot.html', d,
- mlist=mlist)
-
- def html_head(self):
- mlist = self.maillist
- # Convenience
- def quotetime(s):
- return html_quote(ctime(s), self.lang.code)
- # Avoid i18n side-effects
- with _.using(mlist.preferred_language.code):
- d = {"listname": html_quote(mlist.real_name, self.lang.code),
- "archtype": self.type,
- "archive": self.volNameToDesc(self.archive),
- "listinfo": mlist.script_url('listinfo'),
- "firstdate": quotetime(self.firstdate),
- "lastdate": quotetime(self.lastdate),
- "size": self.size,
- }
- i = {"thread": _("thread"),
- "subject": _("subject"),
- "author": _("author"),
- "date": _("date"),
- }
- for t in i.keys():
- cap = t[0].upper() + t[1:]
- if self.type == cap:
- d["%s_ref" % (t)] = ""
- d["archtype"] = i[t]
- else:
- d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>'
- % (t, i[t]))
- if self.charset:
- d["encoding"] = html_charset % self.charset
- else:
- d["encoding"] = ""
- return quick_maketext(
- 'archidxhead.html', d,
- mlist=mlist)
-
- def html_TOC(self):
- mlist = self.maillist
- listname = mlist.fqdn_listname
- mbox = os.path.join(mlist.archive_dir()+'.mbox', listname+'.mbox')
- d = {"listname": mlist.real_name,
- "listinfo": mlist.script_url('listinfo'),
- "fullarch": '../%s.mbox/%s.mbox' % (listname, listname),
- "size": sizeof(mbox, mlist.preferred_language),
- 'meta': '',
- }
- # Avoid i18n side-effects
- with _.using(mlist.preferred_language.code):
- if not self.archives:
- d["noarchive_msg"] = _(
- '<P>Currently, there are no archives. </P>')
- d["archive_listing_start"] = ""
- d["archive_listing_end"] = ""
- d["archive_listing"] = ""
- else:
- d["noarchive_msg"] = ""
- d["archive_listing_start"] = quick_maketext(
- 'archliststart.html',
- lang=mlist.preferred_language,
- mlist=mlist)
- d["archive_listing_end"] = quick_maketext(
- 'archlistend.html',
- mlist=mlist)
-
- accum = []
- for a in self.archives:
- accum.append(self.html_TOC_entry(a))
- d["archive_listing"] = EMPTYSTRING.join(accum)
- # The TOC is always in the charset of the list's preferred language
- d['meta'] += html_charset % mlist.preferred_language.charset
- # The site can disable public access to the mbox file.
- if as_boolean(config.archiver.pipermail.public_mbox):
- template = 'archtoc.html'
- else:
- template = 'archtocnombox.html'
- return quick_maketext(template, d, mlist=mlist)
-
- def html_TOC_entry(self, arch):
- # Check to see if the archive is gzip'd or not
- txtfile = os.path.join(self.maillist.archive_dir(), arch + '.txt')
- gzfile = txtfile + '.gz'
- # which exists? .txt.gz first, then .txt
- if os.path.exists(gzfile):
- file = gzfile
- url = arch + '.txt.gz'
- templ = '<td><A href="%(url)s">[ ' + _('Gzip\'d Text%(sz)s') \
- + ']</a></td>'
- elif os.path.exists(txtfile):
- file = txtfile
- url = arch + '.txt'
- templ = '<td><A href="%(url)s">[ ' + _('Text%(sz)s') + ']</a></td>'
- else:
- # neither found?
- file = None
- # in Python 1.5.2 we have an easy way to get the size
- if file:
- textlink = templ % {
- 'url': url,
- 'sz' : sizeof(file, self.maillist.preferred_language)
- }
- else:
- # there's no archive file at all... hmmm.
- textlink = ''
- return quick_maketext(
- 'archtocentry.html',
- {'archive': arch,
- 'archivelabel': self.volNameToDesc(arch),
- 'textlink': textlink
- },
- mlist=self.maillist)
-
- def GetArchLock(self):
- if self._lock_file:
- return 1
- self._lock_file = Lock(
- os.path.join(config.LOCK_DIR,
- self.maillist.fqdn_listname + '-arch.lock'))
- try:
- self._lock_file.lock(timeout=0.5)
- except TimeOutError:
- return 0
- return 1
-
- def DropArchLock(self):
- if self._lock_file:
- self._lock_file.unlock(unconditionally=1)
- self._lock_file = None
-
- def processListArch(self):
- name = self.maillist.ArchiveFileName()
- wname= name+'.working'
- ename= name+'.err_unarchived'
- try:
- os.stat(name)
- except (IOError,os.error):
- #no archive file, nothin to do -ddm
- return
-
- #see if arch is locked here -ddm
- if not self.GetArchLock():
- #another archiver is running, nothing to do. -ddm
- return
-
- #if the working file is still here, the archiver may have
- # crashed during archiving. Save it, log an error, and move on.
- try:
- wf = open(wname)
- log.error('Archive working file %s present. '
- 'Check %s for possibly unarchived msgs',
- wname, ename)
- omask = os.umask(007)
- try:
- ef = open(ename, 'a+')
- finally:
- os.umask(omask)
- ef.seek(1,2)
- if ef.read(1) <> '\n':
- ef.write('\n')
- ef.write(wf.read())
- ef.close()
- wf.close()
- os.unlink(wname)
- except IOError:
- pass
- os.rename(name,wname)
- archfile = open(wname)
- self.processUnixMailbox(archfile)
- archfile.close()
- os.unlink(wname)
- self.DropArchLock()
-
- def get_filename(self, article):
- return '%06i.html' % (article.sequence,)
-
- def get_archives(self, article):
- """Return a list of indexes where the article should be filed.
- A string can be returned if the list only contains one entry,
- and the empty list is legal."""
- res = self.dateToVolName(float(article.date))
- self.message(_("figuring article archives\n"))
- self.message(res + "\n")
- return res
-
- def volNameToDesc(self, volname):
- volname = volname.strip()
- # Don't make these module global constants since we have to runtime
- # translate them anyway.
- monthdict = [
- '',
- _('January'), _('February'), _('March'), _('April'),
- _('May'), _('June'), _('July'), _('August'),
- _('September'), _('October'), _('November'), _('December')
- ]
- for each in self._volre.keys():
- match = re.match(self._volre[each], volname)
- # Let ValueErrors percolate up
- if match:
- year = int(match.group('year'))
- if each == 'quarter':
- d =["", _("First"), _("Second"), _("Third"), _("Fourth") ]
- ord = d[int(match.group('quarter'))]
- return _("%(ord)s quarter %(year)i")
- elif each == 'month':
- monthstr = match.group('month').lower()
- for i in range(1, 13):
- monthname = time.strftime("%B", (1999,i,1,0,0,0,0,1,0))
- if monthstr.lower() == monthname.lower():
- month = monthdict[i]
- return _("%(month)s %(year)i")
- raise ValueError, "%s is not a month!" % monthstr
- elif each == 'week':
- month = monthdict[int(match.group("month"))]
- day = int(match.group("day"))
- return _("The Week Of Monday %(day)i %(month)s %(year)i")
- elif each == 'day':
- month = monthdict[int(match.group("month"))]
- day = int(match.group("day"))
- return _("%(day)i %(month)s %(year)i")
- else:
- return match.group('year')
- raise ValueError, "%s is not a valid volname" % volname
-
-# The following two methods should be inverses of each other. -ddm
-
- def dateToVolName(self,date):
- datetuple=time.localtime(date)
- if self.ARCHIVE_PERIOD=='year':
- return time.strftime("%Y",datetuple)
- elif self.ARCHIVE_PERIOD=='quarter':
- if datetuple[1] in [1,2,3]:
- return time.strftime("%Yq1",datetuple)
- elif datetuple[1] in [4,5,6]:
- return time.strftime("%Yq2",datetuple)
- elif datetuple[1] in [7,8,9]:
- return time.strftime("%Yq3",datetuple)
- else:
- return time.strftime("%Yq4",datetuple)
- elif self.ARCHIVE_PERIOD == 'day':
- return time.strftime("%Y%m%d", datetuple)
- elif self.ARCHIVE_PERIOD == 'week':
- # Reconstruct "seconds since epoch", and subtract weekday
- # multiplied by the number of seconds in a day.
- monday = time.mktime(datetuple) - datetuple[6] * 24 * 60 * 60
- # Build a new datetuple from this "seconds since epoch" value
- datetuple = time.localtime(monday)
- return time.strftime("Week-of-Mon-%Y%m%d", datetuple)
- # month. -ddm
- else:
- return time.strftime("%Y-%B",datetuple)
-
-
- def volNameToDate(self, volname):
- volname = volname.strip()
- for each in self._volre.keys():
- match = re.match(self._volre[each],volname)
- if match:
- year = int(match.group('year'))
- month = 1
- day = 1
- if each == 'quarter':
- q = int(match.group('quarter'))
- month = (q * 3) - 2
- elif each == 'month':
- monthstr = match.group('month').lower()
- m = []
- for i in range(1,13):
- m.append(
- time.strftime("%B",(1999,i,1,0,0,0,0,1,0)).lower())
- try:
- month = m.index(monthstr) + 1
- except ValueError:
- pass
- elif each == 'week' or each == 'day':
- month = int(match.group("month"))
- day = int(match.group("day"))
- try:
- return time.mktime((year,month,1,0,0,0,0,1,-1))
- except OverflowError:
- return 0.0
- return 0.0
-
- def sortarchives(self):
- def sf(a, b):
- al = self.volNameToDate(a)
- bl = self.volNameToDate(b)
- if al > bl:
- return 1
- elif al < bl:
- return -1
- else:
- return 0
- if self.ARCHIVE_PERIOD in ('month','year','quarter'):
- self.archives.sort(sf)
- else:
- self.archives.sort()
- self.archives.reverse()
-
- def message(self, msg):
- if self.VERBOSE:
- f = sys.stderr
- f.write(msg)
- if msg[-1:] != '\n':
- f.write('\n')
- f.flush()
-
- def open_new_archive(self, archive, archivedir):
- index_html = os.path.join(archivedir, 'index.html')
- try:
- os.unlink(index_html)
- except:
- pass
- os.symlink(self.DEFAULTINDEX+'.html',index_html)
-
- def write_index_header(self):
- self.depth=0
- print self.html_head()
- if not self.THREADLAZY and self.type=='Thread':
- self.message(_("Computing threaded index\n"))
- self.updateThreadedIndex()
-
- def write_index_footer(self):
- for i in range(self.depth):
- print '</UL>'
- print self.html_foot()
-
- def write_index_entry(self, article):
- subject = self.get_header("subject", article)
- author = self.get_header("author", article)
- if as_boolean(config.archiver.pipermail.obscure_email_addresses):
- try:
- author = re.sub('@', _(' at '), author)
- except UnicodeError:
- # Non-ASCII author contains '@' ... no valid email anyway
- pass
- subject = CGIescape(subject, self.lang)
- author = CGIescape(author, self.lang)
-
- d = {
- 'filename': urllib.quote(article.filename),
- 'subject': subject,
- 'sequence': article.sequence,
- 'author': author
- }
- print quick_maketext(
- 'archidxentry.html', d,
- mlist=self.maillist)
-
- def get_header(self, field, article):
- # if we have no decoded header, return the encoded one
- result = article.decoded.get(field)
- if result is None:
- return getattr(article, field)
- # otherwise, the decoded one will be Unicode
- return result
-
- def write_threadindex_entry(self, article, depth):
- if depth < 0:
- self.message('depth<0')
- depth = 0
- if depth > self.THREADLEVELS:
- depth = self.THREADLEVELS
- if depth < self.depth:
- for i in range(self.depth-depth):
- print '</UL>'
- elif depth > self.depth:
- for i in range(depth-self.depth):
- print '<UL>'
- print '<!--%i %s -->' % (depth, article.threadKey)
- self.depth = depth
- self.write_index_entry(article)
-
- def write_TOC(self):
- self.sortarchives()
- omask = os.umask(002)
- try:
- toc = open(os.path.join(self.basedir, 'index.html'), 'w')
- finally:
- os.umask(omask)
- toc.write(self.html_TOC())
- toc.close()
-
- def write_article(self, index, article, path):
- # called by add_article
- omask = os.umask(002)
- try:
- f = open(path, 'w')
- finally:
- os.umask(omask)
- f.write(article.as_html())
- f.close()
-
- # Write the text article to the text archive.
- path = os.path.join(self.basedir, "%s.txt" % index)
- omask = os.umask(002)
- try:
- f = open(path, 'a+')
- finally:
- os.umask(omask)
- f.write(article.as_text())
- f.close()
-
- def update_archive(self, archive):
- self.__super_update_archive(archive)
- # only do this if the gzip module was imported globally, and
- # gzip'ing was enabled via Defaults.GZIP_ARCHIVE_TXT_FILES. See
- # above.
- if gzip:
- archz = None
- archt = None
- txtfile = os.path.join(self.basedir, '%s.txt' % archive)
- gzipfile = os.path.join(self.basedir, '%s.txt.gz' % archive)
- oldgzip = os.path.join(self.basedir, '%s.old.txt.gz' % archive)
- try:
- # open the plain text file
- archt = open(txtfile)
- except IOError:
- return
- try:
- os.rename(gzipfile, oldgzip)
- archz = gzip.open(oldgzip)
- except (IOError, RuntimeError, os.error):
- pass
- try:
- ou = os.umask(002)
- newz = gzip.open(gzipfile, 'w')
- finally:
- # XXX why is this a finally?
- os.umask(ou)
- if archz:
- newz.write(archz.read())
- archz.close()
- os.unlink(oldgzip)
- # XXX do we really need all this in a try/except?
- try:
- newz.write(archt.read())
- newz.close()
- archt.close()
- except IOError:
- pass
- os.unlink(txtfile)
-
- _skip_attrs = ('maillist', '_lock_file', 'charset')
-
- def getstate(self):
- d={}
- for each in self.__dict__.keys():
- if not (each in self._skip_attrs
- or each.upper() == each):
- d[each] = self.__dict__[each]
- return d
-
- # Add <A HREF="..."> tags around URLs and e-mail addresses.
-
- def __processbody_URLquote(self, lines):
- # XXX a lot to do here:
- # 1. use lines directly, rather than source and dest
- # 2. make it clearer
- # 3. make it faster
- # TK: Prepare for unicode obscure.
- atmark = _(' at ')
- if lines and isinstance(lines[0], unicode):
- atmark = unicode(atmark, self.lang.charset, 'replace')
- source = lines[:]
- dest = lines
- last_line_was_quoted = 0
- for i in xrange(0, len(source)):
- Lorig = L = source[i]
- prefix = suffix = ""
- if L is None:
- continue
- # Italicise quoted text
- if self.IQUOTES:
- quoted = quotedpat.match(L)
- if quoted is None:
- last_line_was_quoted = 0
- else:
- quoted = quoted.end(0)
- prefix = CGIescape(L[:quoted], self.lang) + '<i>'
- suffix = '</I>'
- if self.SHOWHTML:
- suffix += '<BR>'
- if not last_line_was_quoted:
- prefix = '<BR>' + prefix
- L = L[quoted:]
- last_line_was_quoted = 1
- # Check for an e-mail address
- L2 = ""
- jr = emailpat.search(L)
- kr = urlpat.search(L)
- while jr is not None or kr is not None:
- if jr == None:
- j = -1
- else:
- j = jr.start(0)
- if kr is None:
- k = -1
- else:
- k = kr.start(0)
- if j != -1 and (j < k or k == -1):
- text = jr.group(1)
- length = len(text)
- if as_boolean(
- config.archiver.pipermail.obscure_email_addresses):
- text = re.sub('@', atmark, text)
- URL = self.maillist.script_url('listinfo')
- else:
- URL = 'mailto:' + text
- pos = j
- elif k != -1 and (j > k or j == -1):
- text = URL = kr.group(1)
- length = len(text)
- pos = k
- else: # j==k
- raise ValueError, "j==k: This can't happen!"
- #length = len(text)
- #self.message("URL: %s %s %s \n"
- # % (CGIescape(L[:pos]), URL, CGIescape(text)))
- L2 += '%s<A HREF="%s">%s</A>' % (
- CGIescape(L[:pos], self.lang),
- html_quote(URL), CGIescape(text, self.lang))
- L = L[pos+length:]
- jr = emailpat.search(L)
- kr = urlpat.search(L)
- if jr is None and kr is None:
- L = CGIescape(L, self.lang)
- L = prefix + L2 + L + suffix
- source[i] = None
- dest[i] = L
-
- # Perform Hypermail-style processing of <HTML></HTML> directives
- # in message bodies. Lines between <HTML> and </HTML> will be written
- # out precisely as they are; other lines will be passed to func2
- # for further processing .
-
- def __processbody_HTML(self, lines):
- # XXX need to make this method modify in place
- source = lines[:]
- dest = lines
- l = len(source)
- i = 0
- while i < l:
- while i < l and htmlpat.match(source[i]) is None:
- i = i + 1
- if i < l:
- source[i] = None
- i = i + 1
- while i < l and nohtmlpat.match(source[i]) is None:
- dest[i], source[i] = source[i], None
- i = i + 1
- if i < l:
- source[i] = None
- i = i + 1
-
- def format_article(self, article):
- # called from add_article
- # TBD: Why do the HTML formatting here and keep it in the
- # pipermail database? It makes more sense to do the html
- # formatting as the article is being written as html and toss
- # the data after it has been written to the archive file.
- lines = filter(None, article.body)
- # Handle <HTML> </HTML> directives
- if self.ALLOWHTML:
- self.__processbody_HTML(lines)
- self.__processbody_URLquote(lines)
- if not self.SHOWHTML and lines:
- lines.insert(0, '<PRE>')
- lines.append('</PRE>')
- else:
- # Do fancy formatting here
- if self.SHOWBR:
- lines = map(lambda x:x + "<BR>", lines)
- else:
- for i in range(0, len(lines)):
- s = lines[i]
- if s[0:1] in ' \t\n':
- lines[i] = '<P>' + s
- article.html_body = lines
- return article
-
- def update_article(self, arcdir, article, prev, next):
- seq = article.sequence
- filename = os.path.join(arcdir, article.filename)
- self.message(_('Updating HTML for article %(seq)s'))
- try:
- f = open(filename)
- article.loadbody_fromHTML(f)
- f.close()
- except IOError, e:
- if e.errno <> errno.ENOENT: raise
- self.message(_('article file %(filename)s is missing!'))
- article.prev = prev
- article.next = next
- omask = os.umask(002)
- try:
- f = open(filename, 'w')
- finally:
- os.umask(omask)
- f.write(article.as_html())
- f.close()
diff --git a/src/mailman/Archiver/HyperDatabase.py b/src/mailman/Archiver/HyperDatabase.py
deleted file mode 100644
index fecb544e8..000000000
--- a/src/mailman/Archiver/HyperDatabase.py
+++ /dev/null
@@ -1,339 +0,0 @@
-# Copyright (C) 1998-2012 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
-
-#
-# site modules
-#
-import os
-import marshal
-import time
-import errno
-
-#
-# package/project modules
-#
-import pipermail
-from flufl.lock import Lock, NotLockedError
-
-CACHESIZE = pipermail.CACHESIZE
-
-try:
- import cPickle
- pickle = cPickle
-except ImportError:
- import pickle
-
-#
-# we're using a python dict in place of
-# of bsddb.btree database. only defining
-# the parts of the interface used by class HyperDatabase
-# only one thing can access this at a time.
-#
-class DumbBTree:
- """Stores pickles of Article objects
-
- This dictionary-like object stores pickles of all the Article
- objects. The object itself is stored using marshal. It would be
- much simpler, and probably faster, to store the actual objects in
- the DumbBTree and pickle it.
-
- TBD: Also needs a more sensible name, like IteratableDictionary or
- SortedDictionary.
- """
-
- def __init__(self, path):
- self.current_index = 0
- self.path = path
- self.lockfile = Lock(self.path + ".lock")
- self.lock()
- self.__dirty = 0
- self.dict = {}
- self.sorted = []
- self.load()
-
- def __repr__(self):
- return "DumbBTree(%s)" % self.path
-
- def __sort(self, dirty=None):
- if self.__dirty == 1 or dirty:
- self.sorted = self.dict.keys()
- self.sorted.sort()
- self.__dirty = 0
-
- def lock(self):
- self.lockfile.lock()
-
- def unlock(self):
- try:
- self.lockfile.unlock()
- except NotLockedError:
- pass
-
- def __delitem__(self, item):
- # if first hasn't been called, we can skip the sort
- if self.current_index == 0:
- del self.dict[item]
- self.__dirty = 1
- return
- try:
- ci = self.sorted[self.current_index]
- except IndexError:
- ci = None
- if ci == item:
- try:
- ci = self.sorted[self.current_index + 1]
- except IndexError:
- ci = None
- del self.dict[item]
- self.__sort(dirty=1)
- if ci is not None:
- self.current_index = self.sorted.index(ci)
- else:
- self.current_index = self.current_index + 1
-
- def clear(self):
- # bulk clearing much faster than deleting each item, esp. with the
- # implementation of __delitem__() above :(
- self.dict = {}
-
- def first(self):
- self.__sort() # guarantee that the list is sorted
- if not self.sorted:
- raise KeyError
- else:
- key = self.sorted[0]
- self.current_index = 1
- return key, self.dict[key]
-
- def last(self):
- if not self.sorted:
- raise KeyError
- else:
- key = self.sorted[-1]
- self.current_index = len(self.sorted) - 1
- return key, self.dict[key]
-
- def next(self):
- try:
- key = self.sorted[self.current_index]
- except IndexError:
- raise KeyError
- self.current_index = self.current_index + 1
- return key, self.dict[key]
-
- def has_key(self, key):
- return self.dict.has_key(key)
-
- def set_location(self, loc):
- if not self.dict.has_key(loc):
- raise KeyError
- self.current_index = self.sorted.index(loc)
-
- def __getitem__(self, item):
- return self.dict[item]
-
- def __setitem__(self, item, val):
- # if first hasn't been called, then we don't need to worry
- # about sorting again
- if self.current_index == 0:
- self.dict[item] = val
- self.__dirty = 1
- return
- try:
- current_item = self.sorted[self.current_index]
- except IndexError:
- current_item = item
- self.dict[item] = val
- self.__sort(dirty=1)
- self.current_index = self.sorted.index(current_item)
-
- def __len__(self):
- return len(self.sorted)
-
- def load(self):
- try:
- fp = open(self.path)
- try:
- self.dict = marshal.load(fp)
- finally:
- fp.close()
- except IOError, e:
- if e.errno <> errno.ENOENT: raise
- pass
- except EOFError:
- pass
- else:
- self.__sort(dirty=1)
-
- def close(self):
- omask = os.umask(007)
- try:
- fp = open(self.path, 'w')
- finally:
- os.umask(omask)
- fp.write(marshal.dumps(self.dict))
- fp.close()
- self.unlock()
-
-
-# this is lifted straight out of pipermail with
-# the bsddb.btree replaced with above class.
-# didn't use inheritance because of all the
-# __internal stuff that needs to be here -scott
-#
-class HyperDatabase(pipermail.Database):
- __super_addArticle = pipermail.Database.addArticle
-
- def __init__(self, basedir, mlist):
- self.__cache = {}
- self.__currentOpenArchive = None # The currently open indices
- self._mlist = mlist
- self.basedir = os.path.expanduser(basedir)
- # Recently added articles, indexed only by message ID
- self.changed={}
-
- def firstdate(self, archive):
- self.__openIndices(archive)
- date = 'None'
- try:
- datekey, msgid = self.dateIndex.first()
- date = time.asctime(time.localtime(float(datekey[0])))
- except KeyError:
- pass
- return date
-
- def lastdate(self, archive):
- self.__openIndices(archive)
- date = 'None'
- try:
- datekey, msgid = self.dateIndex.last()
- date = time.asctime(time.localtime(float(datekey[0])))
- except KeyError:
- pass
- return date
-
- def numArticles(self, archive):
- self.__openIndices(archive)
- return len(self.dateIndex)
-
- def addArticle(self, archive, article, subject=None, author=None,
- date=None):
- self.__openIndices(archive)
- self.__super_addArticle(archive, article, subject, author, date)
-
- def __openIndices(self, archive):
- if self.__currentOpenArchive == archive:
- return
- self.__closeIndices()
- arcdir = os.path.join(self.basedir, 'database')
- omask = os.umask(0)
- try:
- try:
- os.mkdir(arcdir, 02770)
- except OSError, e:
- if e.errno <> errno.EEXIST: raise
- finally:
- os.umask(omask)
- for i in ('date', 'author', 'subject', 'article', 'thread'):
- t = DumbBTree(os.path.join(arcdir, archive + '-' + i))
- setattr(self, i + 'Index', t)
- self.__currentOpenArchive = archive
-
- def __closeIndices(self):
- for i in ('date', 'author', 'subject', 'thread', 'article'):
- attr = i + 'Index'
- if hasattr(self, attr):
- index = getattr(self, attr)
- if i == 'article':
- if not hasattr(self, 'archive_length'):
- self.archive_length = {}
- l = len(index)
- self.archive_length[self.__currentOpenArchive] = l
- index.close()
- delattr(self, attr)
- self.__currentOpenArchive = None
-
- def close(self):
- self.__closeIndices()
-
- def hasArticle(self, archive, msgid):
- self.__openIndices(archive)
- return self.articleIndex.has_key(msgid)
-
- def setThreadKey(self, archive, key, msgid):
- self.__openIndices(archive)
- self.threadIndex[key]=msgid
-
- def getArticle(self, archive, msgid):
- self.__openIndices(archive)
- if not self.__cache.has_key(msgid):
- # get the pickled object out of the DumbBTree
- buf = self.articleIndex[msgid]
- article = self.__cache[msgid] = pickle.loads(buf)
- # For upgrading older archives
- article.setListIfUnset(self._mlist)
- else:
- article = self.__cache[msgid]
- return article
-
- def first(self, archive, index):
- self.__openIndices(archive)
- index = getattr(self, index + 'Index')
- try:
- key, msgid = index.first()
- return msgid
- except KeyError:
- return None
-
- def next(self, archive, index):
- self.__openIndices(archive)
- index = getattr(self, index + 'Index')
- try:
- key, msgid = index.next()
- return msgid
- except KeyError:
- return None
-
- def getOldestArticle(self, archive, subject):
- self.__openIndices(archive)
- subject = subject.lower()
- try:
- key, tempid=self.subjectIndex.set_location(subject)
- self.subjectIndex.next()
- [subject2, date]= key.split('\0')
- if subject!=subject2: return None
- return tempid
- except KeyError:
- return None
-
- def newArchive(self, archive):
- pass
-
- def clearIndex(self, archive, index):
- self.__openIndices(archive)
- if hasattr(self.threadIndex, 'clear'):
- self.threadIndex.clear()
- return
- finished=0
- try:
- key, msgid=self.threadIndex.first()
- except KeyError: finished=1
- while not finished:
- del self.threadIndex[key]
- try:
- key, msgid=self.threadIndex.next()
- except KeyError: finished=1
diff --git a/src/mailman/Archiver/__init__.py b/src/mailman/Archiver/__init__.py
deleted file mode 100644
index be0c61ce0..000000000
--- a/src/mailman/Archiver/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (C) 1998-2012 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
-
-from Archiver import *
diff --git a/src/mailman/Archiver/pipermail.py b/src/mailman/Archiver/pipermail.py
deleted file mode 100644
index e11cb7173..000000000
--- a/src/mailman/Archiver/pipermail.py
+++ /dev/null
@@ -1,872 +0,0 @@
-#! /usr/bin/env python
-
-import os
-import re
-import sys
-import time
-import logging
-import mailbox
-
-import cPickle as pickle
-
-from cStringIO import StringIO
-from email.utils import parseaddr, parsedate_tz, mktime_tz, formatdate
-from string import lowercase
-
-__version__ = '0.11 (Mailman edition)'
-VERSION = __version__
-CACHESIZE = 100 # Number of slots in the cache
-
-from mailman.core import errors
-from mailman.core.i18n import _
-
-SPACE = ' '
-
-log = logging.getLogger('mailman.error')
-
-
-
-msgid_pat = re.compile(r'(<.*>)')
-def strip_separators(s):
- "Remove quotes or parenthesization from a Message-ID string"
- if not s:
- return ""
- if s[0] in '"<([' and s[-1] in '">)]':
- s = s[1:-1]
- return s
-
-smallNameParts = ['van', 'von', 'der', 'de']
-
-def fixAuthor(author):
- "Canonicalize a name into Last, First format"
- # If there's a comma, guess that it's already in "Last, First" format
- if ',' in author:
- return author
- L = author.split()
- i = len(L) - 1
- if i == 0:
- return author # The string's one word--forget it
- if author.upper() == author or author.lower() == author:
- # Damn, the name is all upper- or lower-case.
- while i > 0 and L[i-1].lower() in smallNameParts:
- i = i - 1
- else:
- # Mixed case; assume that small parts of the last name will be
- # in lowercase, and check them against the list.
- while i>0 and (L[i-1][0] in lowercase or
- L[i-1].lower() in smallNameParts):
- i = i - 1
- author = SPACE.join(L[-1:] + L[i:-1]) + ', ' + SPACE.join(L[:i])
- return author
-
-# Abstract class for databases
-
-class DatabaseInterface:
- def __init__(self): pass
- def close(self): pass
- def getArticle(self, archive, msgid): pass
- def hasArticle(self, archive, msgid): pass
- def addArticle(self, archive, article, subject=None, author=None,
- date=None): pass
- def firstdate(self, archive): pass
- def lastdate(self, archive): pass
- def first(self, archive, index): pass
- def next(self, archive, index): pass
- def numArticles(self, archive): pass
- def newArchive(self, archive): pass
- def setThreadKey(self, archive, key, msgid): pass
- def getOldestArticle(self, subject): pass
-
-class Database(DatabaseInterface):
- """Define the basic sorting logic for a database
-
- Assumes that the database internally uses dateIndex, authorIndex,
- etc.
- """
-
- # TBD Factor out more of the logic shared between BSDDBDatabase
- # and HyperDatabase and place it in this class.
-
- def __init__(self):
- # This method need not be called by subclasses that do their
- # own initialization.
- self.dateIndex = {}
- self.authorIndex = {}
- self.subjectIndex = {}
- self.articleIndex = {}
- self.changed = {}
-
- def addArticle(self, archive, article, subject=None, author=None,
- date=None):
- # create the keys; always end w/ msgid which will be unique
- authorkey = (author or article.author, article.date,
- article.msgid)
- subjectkey = (subject or article.subject, article.date,
- article.msgid)
- datekey = date or article.date, article.msgid
-
- # Add the new article
- self.dateIndex[datekey] = article.msgid
- self.authorIndex[authorkey] = article.msgid
- self.subjectIndex[subjectkey] = article.msgid
-
- self.store_article(article)
- self.changed[archive, article.msgid] = None
-
- parentID = article.parentID
- if parentID is not None and self.articleIndex.has_key(parentID):
- parent = self.getArticle(archive, parentID)
- myThreadKey = parent.threadKey + article.date + '-'
- else:
- myThreadKey = article.date + '-'
- article.threadKey = myThreadKey
- key = myThreadKey, article.msgid
- self.setThreadKey(archive, key, article.msgid)
-
- def store_article(self, article):
- """Store article without message body to save space"""
- # TBD this is not thread safe!
- temp = article.body
- temp2 = article.html_body
- article.body = []
- del article.html_body
- self.articleIndex[article.msgid] = pickle.dumps(article)
- article.body = temp
- article.html_body = temp2
-
-
-# The Article class encapsulates a single posting. The attributes
-# are:
-#
-# sequence : Sequence number, unique for each article in a set of archives
-# subject : Subject
-# datestr : The posting date, in human-readable format
-# date : The posting date, in purely numeric format
-# headers : Any other headers of interest
-# author : The author's name (and possibly organization)
-# email : The author's e-mail address
-# msgid : A unique message ID
-# in_reply_to: If != "", this is the msgid of the article being replied to
-# references : A (possibly empty) list of msgid's of earlier articles
-# in the thread
-# body : A list of strings making up the message body
-
-class Article:
- _last_article_time = time.time()
-
- def __init__(self, message = None, sequence = 0, keepHeaders = []):
- if message is None:
- return
- self.sequence = sequence
-
- self.parentID = None
- self.threadKey = None
- # otherwise the current sequence number is used.
- id = strip_separators(message['Message-Id'])
- if id == "":
- self.msgid = str(self.sequence)
- else: self.msgid = id
-
- if message.has_key('Subject'):
- self.subject = str(message['Subject'])
- else:
- self.subject = _('No subject')
- if self.subject == "": self.subject = _('No subject')
-
- self._set_date(message)
-
- # Figure out the e-mail address and poster's name. Use the From:
- # field first, followed by Reply-To:
- self.author, self.email = parseaddr(message.get('From', ''))
- e = message['Reply-To']
- if not self.email and e is not None:
- ignoreauthor, self.email = parseaddr(e)
- self.email = strip_separators(self.email)
- self.author = strip_separators(self.author)
-
- if self.author == "":
- self.author = self.email
-
- # Save the In-Reply-To:, References:, and Message-ID: lines
- #
- # TBD: The original code does some munging on these fields, which
- # shouldn't be necessary, but changing this may break code. For
- # safety, I save the original headers on different attributes for use
- # in writing the plain text periodic flat files.
- self._in_reply_to = message['in-reply-to']
- self._references = message['references']
- self._message_id = message['message-id']
-
- i_r_t = message['In-Reply-To']
- if i_r_t is None:
- self.in_reply_to = ''
- else:
- match = msgid_pat.search(i_r_t)
- if match is None: self.in_reply_to = ''
- else: self.in_reply_to = strip_separators(match.group(1))
-
- references = message['References']
- if references is None:
- self.references = []
- else:
- self.references = map(strip_separators, references.split())
-
- # Save any other interesting headers
- self.headers = {}
- for i in keepHeaders:
- if message.has_key(i):
- self.headers[i] = message[i]
-
- # Read the message body
- s = StringIO(message.get_payload(decode=True)\
- or message.as_string().split('\n\n',1)[1])
- self.body = s.readlines()
-
- def _set_date(self, message):
- def floatdate(header):
- missing = []
- datestr = message.get(header, missing)
- if datestr is missing:
- return None
- date = parsedate_tz(datestr)
- try:
- return mktime_tz(date)
- except (TypeError, ValueError, OverflowError):
- return None
- date = floatdate('date')
- if date is None:
- date = floatdate('x-list-received-date')
- if date is None:
- # What's left to try?
- date = self._last_article_time + 1
- self._last_article_time = date
- self.date = '%011i' % date
- self.datestr = message.get('date') \
- or message.get('x-list-received-date') \
- or formatdate(date)
-
- def __repr__(self):
- return '<Article ID = '+repr(self.msgid)+'>'
-
- def finished_update_article(self):
- pass
-
-# Pipermail formatter class
-
-class T:
- DIRMODE = 0755 # Mode to give to created directories
- FILEMODE = 0644 # Mode to give to created files
- INDEX_EXT = ".html" # Extension for indexes
-
- def __init__(self, basedir = None, reload = 1, database = None):
- # If basedir isn't provided, assume the current directory
- if basedir is None:
- self.basedir = os.getcwd()
- else:
- basedir = os.path.expanduser(basedir)
- self.basedir = basedir
- self.database = database
-
- # If the directory doesn't exist, create it. This code shouldn't get
- # run anymore, we create the directory in Archiver.py. It should only
- # get used by legacy lists created that are only receiving their first
- # message in the HTML archive now -- Marc
- try:
- os.stat(self.basedir)
- except os.error, errdata:
- errno, errmsg = errdata
- if errno != 2:
- raise os.error, errdata
- else:
- self.message(_('Creating archive directory ') + self.basedir)
- omask = os.umask(0)
- try:
- os.mkdir(self.basedir, self.DIRMODE)
- finally:
- os.umask(omask)
-
- # Try to load previously pickled state
- try:
- if not reload:
- raise IOError
- f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
- self.message(_('Reloading pickled archive state'))
- d = pickle.load(f)
- f.close()
- for key, value in d.items():
- setattr(self, key, value)
- except (IOError, EOFError):
- # No pickled version, so initialize various attributes
- self.archives = [] # Archives
- self._dirty_archives = [] # Archives that will have to be updated
- self.sequence = 0 # Sequence variable used for
- # numbering articles
- self.update_TOC = 0 # Does the TOC need updating?
- #
- # make the basedir variable work when passed in as an __init__ arg
- # and different from the one in the pickle. Let the one passed in
- # as an __init__ arg take precedence if it's stated. This way, an
- # archive can be moved from one place to another and still work.
- #
- if basedir != self.basedir:
- self.basedir = basedir
-
- def close(self):
- "Close an archive, save its state, and update any changed archives."
- self.update_dirty_archives()
- self.update_TOC = 0
- self.write_TOC()
- # Save the collective state
- self.message(_('Pickling archive state into ')
- + os.path.join(self.basedir, 'pipermail.pck'))
- self.database.close()
- del self.database
-
- omask = os.umask(007)
- try:
- f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
- finally:
- os.umask(omask)
- pickle.dump(self.getstate(), f)
- f.close()
-
- def getstate(self):
- # can override this in subclass
- return self.__dict__
-
- #
- # Private methods
- #
- # These will be neither overridden nor called by custom archivers.
- #
-
-
- # Create a dictionary of various parameters that will be passed
- # to the write_index_{header,footer} functions
- def __set_parameters(self, archive):
- # Determine the earliest and latest date in the archive
- firstdate = self.database.firstdate(archive)
- lastdate = self.database.lastdate(archive)
-
- # Get the current time
- now = time.asctime(time.localtime(time.time()))
- self.firstdate = firstdate
- self.lastdate = lastdate
- self.archivedate = now
- self.size = self.database.numArticles(archive)
- self.archive = archive
- self.version = __version__
-
- # Find the message ID of an article's parent, or return None
- # if no parent can be found.
-
- def __findParent(self, article, children = []):
- parentID = None
- if article.in_reply_to:
- parentID = article.in_reply_to
- elif article.references:
- # Remove article IDs that aren't in the archive
- refs = filter(self.articleIndex.has_key, article.references)
- if not refs:
- return None
- maxdate = self.database.getArticle(self.archive,
- refs[0])
- for ref in refs[1:]:
- a = self.database.getArticle(self.archive, ref)
- if a.date > maxdate.date:
- maxdate = a
- parentID = maxdate.msgid
- else:
- # Look for the oldest matching subject
- try:
- key, tempid = \
- self.subjectIndex.set_location(article.subject)
- print key, tempid
- self.subjectIndex.next()
- [subject, date] = key.split('\0')
- print article.subject, subject, date
- if subject == article.subject and tempid not in children:
- parentID = tempid
- except KeyError:
- pass
- return parentID
-
- # Update the threaded index completely
- def updateThreadedIndex(self):
- # Erase the threaded index
- self.database.clearIndex(self.archive, 'thread')
-
- # Loop over all the articles
- msgid = self.database.first(self.archive, 'date')
- while msgid is not None:
- try:
- article = self.database.getArticle(self.archive, msgid)
- except KeyError:
- pass
- else:
- if article.parentID is None or \
- not self.database.hasArticle(self.archive,
- article.parentID):
- # then
- pass
- else:
- parent = self.database.getArticle(self.archive,
- article.parentID)
- article.threadKey = parent.threadKey+article.date+'-'
- self.database.setThreadKey(self.archive,
- (article.threadKey, article.msgid),
- msgid)
- msgid = self.database.next(self.archive, 'date')
-
- #
- # Public methods:
- #
- # These are part of the public interface of the T class, but will
- # never be overridden (unless you're trying to do something very new).
-
- # Update a single archive's indices, whether the archive's been
- # dirtied or not.
- def update_archive(self, archive):
- self.archive = archive
- self.message(_("Updating index files for archive [%(archive)s]"))
- arcdir = os.path.join(self.basedir, archive)
- self.__set_parameters(archive)
-
- for hdr in ('Date', 'Subject', 'Author'):
- self._update_simple_index(hdr, archive, arcdir)
-
- self._update_thread_index(archive, arcdir)
-
- def _update_simple_index(self, hdr, archive, arcdir):
- self.message(" " + hdr)
- self.type = hdr
- hdr = hdr.lower()
-
- self._open_index_file_as_stdout(arcdir, hdr)
- self.write_index_header()
- count = 0
- # Loop over the index entries
- msgid = self.database.first(archive, hdr)
- while msgid is not None:
- try:
- article = self.database.getArticle(self.archive, msgid)
- except KeyError:
- pass
- else:
- count = count + 1
- self.write_index_entry(article)
- msgid = self.database.next(archive, hdr)
- # Finish up this index
- self.write_index_footer()
- self._restore_stdout()
-
- def _update_thread_index(self, archive, arcdir):
- self.message(_(" Thread"))
- self._open_index_file_as_stdout(arcdir, "thread")
- self.type = 'Thread'
- self.write_index_header()
-
- # To handle the prev./next in thread pointers, we need to
- # track articles 5 at a time.
-
- # Get the first 5 articles
- L = [None] * 5
- i = 2
- msgid = self.database.first(self.archive, 'thread')
-
- while msgid is not None and i < 5:
- L[i] = self.database.getArticle(self.archive, msgid)
- i = i + 1
- msgid = self.database.next(self.archive, 'thread')
-
- while L[2] is not None:
- article = L[2]
- artkey = None
- if article is not None:
- artkey = article.threadKey
- if artkey is not None:
- self.write_threadindex_entry(article, artkey.count('-') - 1)
- if self.database.changed.has_key((archive,article.msgid)):
- a1 = L[1]
- a3 = L[3]
- self.update_article(arcdir, article, a1, a3)
- if a3 is not None:
- self.database.changed[(archive, a3.msgid)] = None
- if a1 is not None:
- key = archive, a1.msgid
- if not self.database.changed.has_key(key):
- self.update_article(arcdir, a1, L[0], L[2])
- else:
- del self.database.changed[key]
- if L[0]:
- L[0].finished_update_article()
- L = L[1:] # Rotate the list
- if msgid is None:
- L.append(msgid)
- else:
- L.append(self.database.getArticle(self.archive, msgid))
- msgid = self.database.next(self.archive, 'thread')
-
- self.write_index_footer()
- self._restore_stdout()
-
- def _open_index_file_as_stdout(self, arcdir, index_name):
- path = os.path.join(arcdir, index_name + self.INDEX_EXT)
- omask = os.umask(002)
- try:
- self.__f = open(path, 'w')
- finally:
- os.umask(omask)
- self.__stdout = sys.stdout
- sys.stdout = self.__f
-
- def _restore_stdout(self):
- sys.stdout = self.__stdout
- self.__f.close()
- del self.__f
- del self.__stdout
-
- # Update only archives that have been marked as "changed".
- def update_dirty_archives(self):
- for i in self._dirty_archives:
- self.update_archive(i)
- self._dirty_archives = []
-
- # Read a Unix mailbox file from the file object <input>,
- # and create a series of Article objects. Each article
- # object will then be archived.
-
- def _makeArticle(self, msg, sequence):
- return Article(msg, sequence)
-
- def processUnixMailbox(self, path, start=None, end=None):
- mbox = iter(mailbox.mbox(path))
- if start is None:
- start = 0
- counter = 0
- while counter < start:
- try:
- m = next(mbox)
- except errors.DiscardMessage:
- continue
- if m is None:
- return
- counter += 1
- while True:
- try:
- m = next(mbox)
- except StopIteration:
- break
- except errors.DiscardMessage:
- continue
- except Exception:
- log.error('uncaught archiver exception')
- raise
- if m == '':
- # It was an unparseable message
- continue
- msgid = m.get('message-id', 'n/a')
- self.message(_('#%(counter)05d %(msgid)s'))
- a = self._makeArticle(m, self.sequence)
- self.sequence += 1
- self.add_article(a)
- if end is not None and counter >= end:
- break
- counter += 1
-
- def new_archive(self, archive, archivedir):
- self.archives.append(archive)
- self.update_TOC = 1
- self.database.newArchive(archive)
- # If the archive directory doesn't exist, create it
- try:
- os.stat(archivedir)
- except os.error, errdata:
- errno, errmsg = errdata
- if errno == 2:
- omask = os.umask(0)
- try:
- os.mkdir(archivedir, self.DIRMODE)
- finally:
- os.umask(omask)
- else:
- raise os.error, errdata
- self.open_new_archive(archive, archivedir)
-
- def add_article(self, article):
- archives = self.get_archives(article)
- if not archives:
- return
- if type(archives) == type(''):
- archives = [archives]
-
- article.filename = filename = self.get_filename(article)
- temp = self.format_article(article)
- for arch in archives:
- self.archive = arch # why do this???
- archivedir = os.path.join(self.basedir, arch)
- if arch not in self.archives:
- self.new_archive(arch, archivedir)
-
- # Write the HTML-ized article
- self.write_article(arch, temp, os.path.join(archivedir,
- filename))
-
- if article.decoded.has_key('author'):
- author = fixAuthor(article.decoded['author'])
- else:
- author = fixAuthor(article.author)
- if article.decoded.has_key('stripped'):
- subject = article.decoded['stripped'].lower()
- else:
- subject = article.subject.lower()
-
- article.parentID = parentID = self.get_parent_info(arch, article)
- if parentID:
- parent = self.database.getArticle(arch, parentID)
- article.threadKey = parent.threadKey + article.date + '-'
- else:
- article.threadKey = article.date + '-'
- key = article.threadKey, article.msgid
-
- self.database.setThreadKey(arch, key, article.msgid)
- self.database.addArticle(arch, temp, author=author,
- subject=subject)
-
- if arch not in self._dirty_archives:
- self._dirty_archives.append(arch)
-
- def get_parent_info(self, archive, article):
- parentID = None
- if article.in_reply_to:
- parentID = article.in_reply_to
- elif article.references:
- refs = self._remove_external_references(article.references)
- if refs:
- maxdate = self.database.getArticle(archive, refs[0])
- for ref in refs[1:]:
- a = self.database.getArticle(archive, ref)
- if a.date > maxdate.date:
- maxdate = a
- parentID = maxdate.msgid
- else:
- # Get the oldest article with a matching subject, and
- # assume this is a follow-up to that article
- parentID = self.database.getOldestArticle(archive,
- article.subject)
-
- if parentID and not self.database.hasArticle(archive, parentID):
- parentID = None
- return parentID
-
- def write_article(self, index, article, path):
- omask = os.umask(002)
- try:
- f = open(path, 'w')
- finally:
- os.umask(omask)
- temp_stdout, sys.stdout = sys.stdout, f
- self.write_article_header(article)
- sys.stdout.writelines(article.body)
- self.write_article_footer(article)
- sys.stdout = temp_stdout
- f.close()
-
- def _remove_external_references(self, refs):
- keep = []
- for ref in refs:
- if self.database.hasArticle(self.archive, ref):
- keep.append(ref)
- return keep
-
- # Abstract methods: these will need to be overridden by subclasses
- # before anything useful can be done.
-
- def get_filename(self, article):
- pass
- def get_archives(self, article):
- """Return a list of indexes where the article should be filed.
- A string can be returned if the list only contains one entry,
- and the empty list is legal."""
- pass
- def format_article(self, article):
- pass
- def write_index_header(self):
- pass
- def write_index_footer(self):
- pass
- def write_index_entry(self, article):
- pass
- def write_threadindex_entry(self, article, depth):
- pass
- def write_article_header(self, article):
- pass
- def write_article_footer(self, article):
- pass
- def write_article_entry(self, article):
- pass
- def update_article(self, archivedir, article, prev, next):
- pass
- def write_TOC(self):
- pass
- def open_new_archive(self, archive, dir):
- pass
- def message(self, msg):
- pass
-
-
-class BSDDBdatabase(Database):
- __super_addArticle = Database.addArticle
-
- def __init__(self, basedir):
- self.__cachekeys = []
- self.__cachedict = {}
- self.__currentOpenArchive = None # The currently open indices
- self.basedir = os.path.expanduser(basedir)
- self.changed = {} # Recently added articles, indexed only by
- # message ID
-
- def firstdate(self, archive):
- self.__openIndices(archive)
- date = 'None'
- try:
- date, msgid = self.dateIndex.first()
- date = time.asctime(time.localtime(float(date)))
- except KeyError:
- pass
- return date
-
- def lastdate(self, archive):
- self.__openIndices(archive)
- date = 'None'
- try:
- date, msgid = self.dateIndex.last()
- date = time.asctime(time.localtime(float(date)))
- except KeyError:
- pass
- return date
-
- def numArticles(self, archive):
- self.__openIndices(archive)
- return len(self.dateIndex)
-
- def addArticle(self, archive, article, subject=None, author=None,
- date=None):
- self.__openIndices(archive)
- self.__super_addArticle(archive, article, subject, author, date)
-
- # Open the BSDDB files that are being used as indices
- # (dateIndex, authorIndex, subjectIndex, articleIndex)
- def __openIndices(self, archive):
- if self.__currentOpenArchive == archive:
- return
-
- import bsddb
- self.__closeIndices()
- arcdir = os.path.join(self.basedir, 'database')
- omask = os.umask(0)
- try:
- try:
- os.mkdir(arcdir, 02775)
- except OSError:
- # BAW: Hmm...
- pass
- finally:
- os.umask(omask)
- for hdr in ('date', 'author', 'subject', 'article', 'thread'):
- path = os.path.join(arcdir, archive + '-' + hdr)
- t = bsddb.btopen(path, 'c')
- setattr(self, hdr + 'Index', t)
- self.__currentOpenArchive = archive
-
- # Close the BSDDB files that are being used as indices (if they're
- # open--this is safe to call if they're already closed)
- def __closeIndices(self):
- if self.__currentOpenArchive is not None:
- pass
- for hdr in ('date', 'author', 'subject', 'thread', 'article'):
- attr = hdr + 'Index'
- if hasattr(self, attr):
- index = getattr(self, attr)
- if hdr == 'article':
- if not hasattr(self, 'archive_length'):
- self.archive_length = {}
- self.archive_length[self.__currentOpenArchive] = len(index)
- index.close()
- delattr(self,attr)
- self.__currentOpenArchive = None
-
- def close(self):
- self.__closeIndices()
- def hasArticle(self, archive, msgid):
- self.__openIndices(archive)
- return self.articleIndex.has_key(msgid)
- def setThreadKey(self, archive, key, msgid):
- self.__openIndices(archive)
- self.threadIndex[key] = msgid
- def getArticle(self, archive, msgid):
- self.__openIndices(archive)
- if self.__cachedict.has_key(msgid):
- self.__cachekeys.remove(msgid)
- self.__cachekeys.append(msgid)
- return self.__cachedict[msgid]
- if len(self.__cachekeys) == CACHESIZE:
- delkey, self.__cachekeys = (self.__cachekeys[0],
- self.__cachekeys[1:])
- del self.__cachedict[delkey]
- s = self.articleIndex[msgid]
- article = pickle.loads(s)
- self.__cachekeys.append(msgid)
- self.__cachedict[msgid] = article
- return article
-
- def first(self, archive, index):
- self.__openIndices(archive)
- index = getattr(self, index+'Index')
- try:
- key, msgid = index.first()
- return msgid
- except KeyError:
- return None
- def next(self, archive, index):
- self.__openIndices(archive)
- index = getattr(self, index+'Index')
- try:
- key, msgid = index.next()
- except KeyError:
- return None
- else:
- return msgid
-
- def getOldestArticle(self, archive, subject):
- self.__openIndices(archive)
- subject = subject.lower()
- try:
- key, tempid = self.subjectIndex.set_location(subject)
- self.subjectIndex.next()
- [subject2, date] = key.split('\0')
- if subject != subject2:
- return None
- return tempid
- except KeyError: # XXX what line raises the KeyError?
- return None
-
- def newArchive(self, archive):
- pass
-
- def clearIndex(self, archive, index):
- self.__openIndices(archive)
- index = getattr(self, index+'Index')
- finished = 0
- try:
- key, msgid = self.threadIndex.first()
- except KeyError:
- finished = 1
- while not finished:
- del self.threadIndex[key]
- try:
- key, msgid = self.threadIndex.next()
- except KeyError:
- finished = 1
-
-
diff --git a/src/mailman/app/docs/lifecycle.rst b/src/mailman/app/docs/lifecycle.rst
index d8356db74..ed0ce92b7 100644
--- a/src/mailman/app/docs/lifecycle.rst
+++ b/src/mailman/app/docs/lifecycle.rst
@@ -140,7 +140,7 @@ artifacts.
::
>>> from mailman.app.lifecycle import remove_list
- >>> remove_list(mlist_2.fqdn_listname, mlist_2, True)
+ >>> remove_list(mlist_2.fqdn_listname, mlist_2)
>>> from mailman.interfaces.listmanager import IListManager
>>> from zope.component import getUtility
diff --git a/src/mailman/app/lifecycle.py b/src/mailman/app/lifecycle.py
index 6826d68f1..5082034bc 100644
--- a/src/mailman/app/lifecycle.py
+++ b/src/mailman/app/lifecycle.py
@@ -89,7 +89,7 @@ def create_list(fqdn_listname, owners=None):
-def remove_list(fqdn_listname, mailing_list=None, archives=True):
+def remove_list(fqdn_listname, mailing_list=None):
"""Remove the list and all associated artifacts and subscriptions."""
removeables = []
# mailing_list will be None when only residual archives are being removed.
@@ -108,15 +108,6 @@ def remove_list(fqdn_listname, mailing_list=None, archives=True):
fn_listname = filename.split('.')[0]
if fn_listname == fqdn_listname:
removeables.append(os.path.join(config.LOCK_DIR, filename))
- if archives:
- private_dir = config.PRIVATE_ARCHIVE_FILE_DIR
- public_dir = config.PUBLIC_ARCHIVE_FILE_DIR
- removeables.extend([
- os.path.join(private_dir, fqdn_listname),
- os.path.join(private_dir, fqdn_listname + '.mbox'),
- os.path.join(public_dir, fqdn_listname),
- os.path.join(public_dir, fqdn_listname + '.mbox'),
- ])
# Now that we know what files and directories to delete, delete them.
for target in removeables:
if not os.path.exists(target):
diff --git a/src/mailman/archiving/docs/common.rst b/src/mailman/archiving/docs/common.rst
index 45ec8f194..9a79c3121 100644
--- a/src/mailman/archiving/docs/common.rst
+++ b/src/mailman/archiving/docs/common.rst
@@ -21,7 +21,6 @@ header, and one that provides a *permalink* to the specific message object in
the archive. This latter is appropriate for the message footer or for the RFC
5064 ``Archived-At:`` header.
-Pipermail does not support a permalink, so that interface returns ``None``.
Mailman defines a draft spec for how list servers and archivers can
interoperate.
@@ -38,9 +37,6 @@ interoperate.
mhonarc
http://lists.example.com/.../test@example.com
http://lists.example.com/.../RSZCG7IGPHFIRW3EMTVMMDNJMNCVCOLE
- pipermail
- http://www.example.com/pipermail/test@example.com
- None
prototype
http://lists.example.com
http://lists.example.com/RSZCG7IGPHFIRW3EMTVMMDNJMNCVCOLE
@@ -49,20 +45,8 @@ interoperate.
Sending the message to the archiver
===================================
-The archiver is also able to archive the message.
-::
-
- >>> archivers['pipermail'].archive_message(mlist, msg)
-
- >>> import os
- >>> from mailman.interfaces.archiver import IPipermailMailingList
- >>> pckpath = os.path.join(
- ... IPipermailMailingList(mlist).archive_dir(),
- ... 'pipermail.pck')
- >>> os.path.exists(pckpath)
- True
-
-Note however that the prototype archiver can't archive messages.
+The archiver is also able to archive the message. Note however that the
+prototype archiver can't archive messages.
>>> archivers['prototype'].archive_message(mlist, msg)
Traceback (most recent call last):
@@ -172,20 +156,17 @@ A MHonArc_ archiver is also available.
Messages sent to a local MHonArc instance are added to its archive via a
subprocess call.
+ >>> from mailman.testing.helpers import LogFileMark
+ >>> mark = LogFileMark('mailman.archiver')
>>> archiver.archive_message(mlist, msg)
- >>> archive_log = open(os.path.join(config.LOG_DIR, 'archiver'))
- >>> try:
- ... contents = archive_log.read()
- ... finally:
- ... archive_log.close()
- >>> print 'LOG:', contents
- LOG: ... /usr/bin/mhonarc -add
- -dbfile /.../private/test@example.com.mbox/mhonarc.db
- -outdir /.../mhonarc/test@example.com
- -stderr /.../logs/mhonarc
- -stdout /.../logs/mhonarc
- -spammode -umask 022
- ...
+ >>> print 'LOG:', mark.readline()
+ LOG: ... /usr/bin/mhonarc
+ -add
+ -dbfile .../test@example.com.mbox/mhonarc.db
+ -outdir .../mhonarc/test@example.com
+ -stderr .../logs/mhonarc
+ -stdout .../logs/mhonarc -spammode -umask 022
+
.. _`The Mail Archive`: http://www.mail-archive.com
.. _MHonArc: http://www.mhonarc.org
diff --git a/src/mailman/archiving/pipermail.py b/src/mailman/archiving/pipermail.py
deleted file mode 100644
index 03dcd97f4..000000000
--- a/src/mailman/archiving/pipermail.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (C) 2007-2012 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
-
-"""Pipermail archiver."""
-
-from __future__ import absolute_import, unicode_literals
-
-__metaclass__ = type
-__all__ = [
- 'Pipermail',
- ]
-
-
-import os
-import mailbox
-import tempfile
-
-from zope.interface import implements
-from zope.interface.interface import adapter_hooks
-
-from mailman.config import config
-from mailman.interfaces.archiver import IArchiver, IPipermailMailingList
-from mailman.interfaces.mailinglist import IMailingList
-from mailman.utilities.filesystem import makedirs
-from mailman.utilities.string import expand
-
-from mailman.Archiver.HyperArch import HyperArchive
-
-
-
-class PipermailMailingListAdapter:
- """An adapter for MailingList objects to work with Pipermail."""
-
- implements(IPipermailMailingList)
-
- def __init__(self, mlist):
- self._mlist = mlist
-
- def __getattr__(self, name):
- return getattr(self._mlist, name)
-
- def archive_dir(self):
- """See `IPipermailMailingList`."""
- if self._mlist.archive_private:
- basedir = config.PRIVATE_ARCHIVE_FILE_DIR
- else:
- basedir = config.PUBLIC_ARCHIVE_FILE_DIR
- # Make sure the archive directory exists.
- archive_dir = os.path.join(basedir, self._mlist.fqdn_listname)
- makedirs(archive_dir)
- return archive_dir
-
-
-def adapt_mailing_list_for_pipermail(iface, obj):
- """Adapt `IMailingLists` to `IPipermailMailingList`.
-
- :param iface: The interface to adapt to.
- :type iface: `zope.interface.Interface`
- :param obj: The object being adapted.
- :type obj: any object
- :return: An `IPipermailMailingList` instance if adaptation succeeded or
- None if it didn't.
- """
- return (PipermailMailingListAdapter(obj)
- if IMailingList.providedBy(obj) and iface is IPipermailMailingList
- else None)
-
-adapter_hooks.append(adapt_mailing_list_for_pipermail)
-
-
-
-class Pipermail:
- """The stock Pipermail archiver."""
-
- implements(IArchiver)
-
- name = 'pipermail'
-
- @staticmethod
- def list_url(mlist):
- """See `IArchiver`."""
- if mlist.archive_private:
- return mlist.script_url('private') + '/index.html'
- else:
- return expand(config.archiver.pipermail.base_url,
- dict(listname=mlist.fqdn_listname,
- hostname=mlist.domain.url_host,
- fqdn_listname=mlist.fqdn_listname,
- ))
-
- @staticmethod
- def permalink(mlist, message):
- """See `IArchiver`."""
- # Not currently implemented.
- return None
-
- @staticmethod
- def archive_message(mlist, message):
- """See `IArchiver`."""
- fd, path = tempfile.mkstemp('.mbox')
- os.close(fd)
- try:
- mbox = mailbox.mbox(path, create=True)
- mbox.add(message)
- finally:
- mbox.close()
- h = HyperArchive(IPipermailMailingList(mlist))
- try:
- h.processUnixMailbox(path)
- finally:
- h.close()
- os.remove(path)
- # There's no good way to know the url for the archived message.
- return None
diff --git a/src/mailman/archiving/prototype.py b/src/mailman/archiving/prototype.py
index 55d78074e..f041d4450 100644
--- a/src/mailman/archiving/prototype.py
+++ b/src/mailman/archiving/prototype.py
@@ -60,6 +60,6 @@ class Prototype:
return urljoin(Prototype.list_url(mlist), message_id_hash)
@staticmethod
- def archive_message(mlist, message):
+ def archive_message(mlist, msg):
"""See `IArchiver`."""
raise NotImplementedError
diff --git a/src/mailman/commands/cli_lists.py b/src/mailman/commands/cli_lists.py
index 42e67e3a8..5ae6499e9 100644
--- a/src/mailman/commands/cli_lists.py
+++ b/src/mailman/commands/cli_lists.py
@@ -252,12 +252,6 @@ class Remove:
def add(self, parser, command_parser):
"""See `ICLISubCommand`."""
command_parser.add_argument(
- '-a', '--archives',
- default=False, action='store_true',
- help=_("""\
-Remove the list's archives too, or if the list has already been deleted,
-remove any residual archives."""))
- command_parser.add_argument(
'-q', '--quiet',
default=False, action='store_true',
help=_('Suppress status messages'))
@@ -278,15 +272,9 @@ remove any residual archives."""))
fqdn_listname = args.listname[0]
mlist = getUtility(IListManager).get(fqdn_listname)
if mlist is None:
- if args.archives:
- log(_('No such list: $fqdn_listname; '
- 'removing residual archives.'))
- else:
- log(_('No such list: $fqdn_listname'))
- return
+ log(_('No such list: $fqdn_listname'))
+ return
else:
log(_('Removed list: $fqdn_listname'))
- if not args.archives:
- log(_('Not removing archives. Reinvoke with -a to remove them.'))
- remove_list(fqdn_listname, mlist, args.archives)
+ remove_list(fqdn_listname, mlist)
config.db.commit()
diff --git a/src/mailman/commands/docs/info.rst b/src/mailman/commands/docs/info.rst
index 34883711e..5e26b04a3 100644
--- a/src/mailman/commands/docs/info.rst
+++ b/src/mailman/commands/docs/info.rst
@@ -69,8 +69,6 @@ The File System Hierarchy layout is the same every by definition.
LOG_DIR = /var/log/mailman
MESSAGES_DIR = /var/lib/mailman/messages
PID_FILE = /var/run/mailman/master.pid
- PRIVATE_ARCHIVE_FILE_DIR = /var/lib/mailman/archives/private
- PUBLIC_ARCHIVE_FILE_DIR = /var/lib/mailman/archives/public
QUEUE_DIR = /var/spool/mailman
TEMPLATE_DIR = .../mailman/templates
VAR_DIR = /var/lib/mailman
diff --git a/src/mailman/commands/docs/remove.rst b/src/mailman/commands/docs/remove.rst
index f0f4e64f6..35dc53c5e 100644
--- a/src/mailman/commands/docs/remove.rst
+++ b/src/mailman/commands/docs/remove.rst
@@ -24,7 +24,6 @@ A system administrator can remove mailing lists by the command line.
>>> command = Remove()
>>> command.process(args)
Removed list: test@example.com
- Not removing archives. Reinvoke with -a to remove them.
>>> print list_manager.get('test@example.com')
None
@@ -40,46 +39,3 @@ You can also remove lists quietly.
>>> print list_manager.get('test@example.com')
None
-
-
-Removing archives
-=================
-
-By default 'mailman remove' does not remove a mailing list's archives.
-::
-
- >>> create_list('test@example.com')
- <mailing list "test@example.com" at ...>
-
- # Fake an mbox file for the mailing list.
- >>> import os
- >>> def make_mbox(fqdn_listname):
- ... mbox_dir = os.path.join(
- ... config.PUBLIC_ARCHIVE_FILE_DIR, fqdn_listname + '.mbox')
- ... os.makedirs(mbox_dir)
- ... mbox_file = os.path.join(mbox_dir, fqdn_listname + '.mbox')
- ... with open(mbox_file, 'w') as fp:
- ... print >> fp, 'A message'
- ... assert os.path.exists(mbox_file)
- ... return mbox_file
-
- >>> mbox_file = make_mbox('test@example.com')
- >>> args.quiet = False
- >>> command.process(args)
- Removed list: test@example.com
- Not removing archives. Reinvoke with -a to remove them.
-
- >>> os.path.exists(mbox_file)
- True
-
-Even if the mailing list has been deleted, you can still delete the archives
-afterward.
-::
-
- >>> args.archives = True
-
- >>> command.process(args)
- No such list: test@example.com; removing residual archives.
-
- >>> os.path.exists(mbox_file)
- False
diff --git a/src/mailman/config/config.py b/src/mailman/config/config.py
index 034b76b4f..da20001b8 100644
--- a/src/mailman/config/config.py
+++ b/src/mailman/config/config.py
@@ -173,8 +173,6 @@ class Configuration:
lock_dir = category.lock_dir,
log_dir = category.log_dir,
messages_dir = category.messages_dir,
- pipermail_private_dir = category.pipermail_private_dir,
- pipermail_public_dir = category.pipermail_public_dir,
queue_dir = category.queue_dir,
var_dir = var_dir,
template_dir = (
@@ -208,10 +206,6 @@ class Configuration:
# Ensure that all paths are normalized and made absolute. Handle the
# few special cases first. Most of these are due to backward
# compatibility.
- self.PUBLIC_ARCHIVE_FILE_DIR = os.path.abspath(
- substitutions.pop('pipermail_public_dir'))
- self.PRIVATE_ARCHIVE_FILE_DIR = os.path.abspath(
- substitutions.pop('pipermail_private_dir'))
self.PID_FILE = os.path.abspath(substitutions.pop('pid_file'))
for key in substitutions:
attribute = key.upper()
diff --git a/src/mailman/config/schema.cfg b/src/mailman/config/schema.cfg
index e662633e6..8b5aa690a 100644
--- a/src/mailman/config/schema.cfg
+++ b/src/mailman/config/schema.cfg
@@ -113,10 +113,6 @@ etc_dir: $var_dir/etc
ext_dir: $var_dir/ext
# Directory where the default IMessageStore puts its messages.
messages_dir: $var_dir/messages
-# Directory for public Pipermail archiver artifacts.
-pipermail_public_dir: $var_dir/archives/public
-# Directory for private Pipermail archiver artifacts.
-pipermail_private_dir: $var_dir/archives/private
# Root directory for site-specific template override files.
template_dir: $var_dir/templates
# There are also a number of paths to specific file locations that can be
@@ -550,32 +546,6 @@ base_url: http://$hostname/archives/$fqdn_listname
# This is the stock mail-archive.com archiver.
class: mailman.archiving.mailarchive.MailArchive
-[archiver.pipermail]
-# This is the stock Pipermail archiver.
-class: mailman.archiving.pipermail.Pipermail
-
-# This sets the default `clobber date' policy for the archiver. When a
-# message is to be archived either by Pipermail or an external archiver,
-# Mailman can modify the Date: header to be the date the message was received
-# instead of the Date: in the original message. This is useful if you
-# typically receive messages with outrageous dates. Set this to 0 to retain
-# the date of the original message, or to 1 to always clobber the date. Set
-# it to 2 to perform `smart overrides' on the date; when the date is outside
-# allowable_sane_date_skew (either too early or too late), then the received
-# date is substituted instead.
-clobber_date_policy: 2
-allowable_sane_date_skew: 15d
-
-# Pipermail archives contain the raw email addresses of the posting authors.
-# Some view this as a goldmine for spam harvesters. Set this to 'yes' to
-# moderately obscure email addresses, but note that this breaks mailto: URLs
-# in the archives too.
-obscure_email_addresses: yes
-
-# When the archive is public, should Pipermail also make the raw Unix mbox
-# file publically available?
-public_mbox: no
-
[archiver.prototype]
# This is a prototypical sample archiver.
@@ -598,7 +568,7 @@ class: mailman.styles.default.DefaultStyle
# a MailList object and a Message object. It should raise
# Errors.DiscardMessage if it wants to throw the message away. Otherwise it
# should modify the Message object as necessary.
-archive_scrubber: mailman.archiving.pipermail.Pipermail
+archive_scrubber: mailman.archiving.prototype.Prototype
# This variable defines what happens to text/html subparts. They can be
# stripped completely, escaped, or filtered through an external program. The
diff --git a/src/mailman/core/initialize.py b/src/mailman/core/initialize.py
index 721877056..389a45f3b 100644
--- a/src/mailman/core/initialize.py
+++ b/src/mailman/core/initialize.py
@@ -108,9 +108,7 @@ def initialize_1(config_path=None):
# By default, set the umask so that only owner and group can read and
# write our files. Specifically we must have g+rw and we probably want
# o-rwx although I think in most cases it doesn't hurt if other can read
- # or write the files. Note that the Pipermail archive has more
- # restrictive permissions in order to handle private archives, but it
- # handles that correctly.
+ # or write the files.
os.umask(007)
# config_path will be set if the command line argument -C is given. That
# case overrides all others. When not given on the command line, the
diff --git a/src/mailman/interfaces/archiver.py b/src/mailman/interfaces/archiver.py
index f24e44183..a06bbdede 100644
--- a/src/mailman/interfaces/archiver.py
+++ b/src/mailman/interfaces/archiver.py
@@ -22,12 +22,10 @@ from __future__ import absolute_import, unicode_literals
__metaclass__ = type
__all__ = [
'IArchiver',
- 'IPipermailMailingList',
]
from zope.interface import Interface, Attribute
-from mailman.interfaces.mailinglist import IMailingList
@@ -43,36 +41,25 @@ class IArchiver(Interface):
:returns: The url string.
"""
- def permalink(mlist, message):
+ def permalink(mlist, msg):
"""Return the url to the message in the archive.
This url points directly to the message in the archive. This method
only calculates the url, it does not actually archive the message.
:param mlist: The IMailingList object.
- :param message: The message object.
+ :param msg: The message object.
:returns: The url string or None if the message's archive url cannot
be calculated.
"""
- def archive_message(mlist, message):
+ def archive_message(mlist, msg):
"""Send the message to the archiver.
:param mlist: The IMailingList object.
- :param message: The message object.
+ :param msg: The message object.
:returns: The url string or None if the message's archive url cannot
be calculated.
"""
# XXX How to handle attachments?
-
-
-
-class IPipermailMailingList(IMailingList):
- """An interface that adapts IMailingList as needed for Pipermail."""
-
- def archive_dir():
- """The directory for storing Pipermail artifacts.
-
- Pipermail expects this to be a function, not a property.
- """
diff --git a/src/mailman/pipeline/docs/rfc-2369.rst b/src/mailman/pipeline/docs/rfc-2369.rst
index a1ba6c746..1b89f2354 100644
--- a/src/mailman/pipeline/docs/rfc-2369.rst
+++ b/src/mailman/pipeline/docs/rfc-2369.rst
@@ -148,35 +148,11 @@ header will be added.
>>> mlist.archive = True
- >>> from mailman.config import config
- >>> config.push('pipermail', """
- ... [archiver.prototype]
- ... enable: no
- ... [archiver.mail_archive]
- ... enable: no
- ... [archiver.mhonarc]
- ... enable: no
- ... [archiver.pipermail]
- ... enable: yes
- ... """)
-
- >>> msg = message_from_string("""\
- ... From: aperson@example.com
- ...
- ... """)
- >>> process(mlist, msg, {})
- >>> list_headers(msg, only='list-archive')
- ---start---
- list-archive: <http://www.example.com/pipermail/test@example.com>
- ---end---
-
`RFC 5064`_ defines the `Archived-At` header which contains the url to the
individual message in the archives. Archivers which don't support
-pre-calculation of the archive url cannot add the `Archived-At` header, as is
-the case with Pipermail (see above). However, other archivers can calculate
-the url, and do add this header.
+pre-calculation of the archive url cannot add the `Archived-At` header.
+However, other archivers can calculate the url, and do add this header.
- >>> config.pop('pipermail')
>>> config.push('prototype', """
... [archiver.prototype]
... enable: yes
diff --git a/src/mailman/pipeline/scrubber.py b/src/mailman/pipeline/scrubber.py
index 0584c0a2c..76d10427e 100644
--- a/src/mailman/pipeline/scrubber.py
+++ b/src/mailman/pipeline/scrubber.py
@@ -175,8 +175,7 @@ def process(mlist, msg, msgdata=None):
#
# Also get the RFC 3676 stuff from this part. This seems to
# work okay for scrub_nondigest. It will also work as far as
- # scrubbing messages for the archive is concerned, but Pipermail
- # doesn't pay any attention to the RFC 3676 parameters. The plain
+ # scrubbing messages for the archive is concerned. The plain
# format digest is going to be a disaster in any case as some of
# messages will be format="flowed" and some not. ToDigest creates
# its own Content-Type: header for the plain digest which won't
@@ -209,7 +208,9 @@ URL: $url
# sets content-type to text/plain
lcset)
elif sanitize == 2:
- # By leaving it alone, Pipermail will automatically escape it
+ # By leaving it alone, Pipermail will automatically escape it.
+ # XXX 2012-03-13 BAW: Now that Pipermail has been removed, do
+ # we even need this?
pass
elif sanitize == 3:
# Pull it out as an attachment but leave it unescaped. This
@@ -259,8 +260,7 @@ Size: $size
URL: $url
"""), lcset)
# If the message isn't a multipart, then we'll strip it out as an
- # attachment that would have to be separately downloaded. Pipermail
- # will transform the url into a hyperlink.
+ # attachment that would have to be separately downloaded.
elif part._payload and not part.is_multipart():
payload = part.get_payload(decode=True)
ctype = part.get_content_type()
@@ -290,6 +290,9 @@ URL: $url
# We still have to sanitize multipart messages to flat text because
# Pipermail can't handle messages with list payloads. This is a kludge;
# def (n) clever hack ;).
+ #
+ # XXX 2012-03-13 BAW: Now that Pipermail has been removed, do we even need
+ # this code?
if msg.is_multipart() and sanitize != 2:
# By default we take the charset of the first text/plain part in the
# message, but if there was none, we'll use the list's preferred
diff --git a/src/mailman/rest/lists.py b/src/mailman/rest/lists.py
index 0103022e7..9372d71dc 100644
--- a/src/mailman/rest/lists.py
+++ b/src/mailman/rest/lists.py
@@ -135,10 +135,7 @@ class AList(_ListBase):
"""Delete the named mailing list."""
if self._mlist is None:
return http.not_found()
- remove_list(self._mlist.fqdn_listname, self._mlist,
- # XXX 2010-07-06 barry we need a way to remove the list
- # archives either with the mailing list or afterward.
- archives=False)
+ remove_list(self._mlist.fqdn_listname, self._mlist)
return no_content()
@resource.child(member_matcher)
diff --git a/src/mailman/runners/archive.py b/src/mailman/runners/archive.py
index cab776076..ea85281b1 100644
--- a/src/mailman/runners/archive.py
+++ b/src/mailman/runners/archive.py
@@ -17,6 +17,8 @@
"""Archive runner."""
+from __future__ import absolute_import, print_function, unicode_literals
+
__metaclass__ = type
__all__ = [
'ArchiveRunner',
diff --git a/src/mailman/runners/docs/archiver.rst b/src/mailman/runners/docs/archiver.rst
deleted file mode 100644
index a6f5ccd24..000000000
--- a/src/mailman/runners/docs/archiver.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-=========
-Archiving
-=========
-
-Mailman can archive to any number of archivers that adhere to the
-``IArchiver`` interface. By default, there's a Pipermail archiver.
-::
-
- >>> mlist = create_list('test@example.com')
- >>> transaction.commit()
-
- >>> msg = message_from_string("""\
- ... From: aperson@example.com
- ... To: test@example.com
- ... Subject: My first post
- ... Message-ID: <first>
- ...
- ... First post!
- ... """)
-
- >>> archiver_queue = config.switchboards['archive']
- >>> ignore = archiver_queue.enqueue(msg, {}, listname=mlist.fqdn_listname)
-
- >>> from mailman.runners.archive import ArchiveRunner
- >>> from mailman.testing.helpers import make_testable_runner
- >>> runner = make_testable_runner(ArchiveRunner)
- >>> runner.run()
-
- # The best we can do is verify some landmark exists. Let's use the
- # Pipermail pickle file exists.
- >>> listname = mlist.fqdn_listname
- >>> import os
- >>> os.path.exists(os.path.join(
- ... config.PUBLIC_ARCHIVE_FILE_DIR, listname, 'pipermail.pck'))
- True
diff --git a/src/mailman/runners/tests/test_archiver.py b/src/mailman/runners/tests/test_archiver.py
new file mode 100644
index 000000000..274aba5ec
--- /dev/null
+++ b/src/mailman/runners/tests/test_archiver.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2012 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
+
+"""Test the archive runner."""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'TestArchiveRunner',
+ ]
+
+
+import os
+import unittest
+
+from email import message_from_file
+from zope.interface import implements
+
+from mailman.app.lifecycle import create_list
+from mailman.config import config
+from mailman.interfaces.archiver import IArchiver
+from mailman.runners.archive import ArchiveRunner
+from mailman.testing.helpers import (
+ make_testable_runner,
+ specialized_message_from_string as mfs)
+from mailman.testing.layers import ConfigLayer
+
+
+
+class DummyArchiver:
+ implements(IArchiver)
+ name = 'dummy'
+
+ @staticmethod
+ def list_url(mlist):
+ return 'http://archive.example.com/'
+
+ @staticmethod
+ def permalink(mlist, msg):
+ filename = msg['x-message-id-hash']
+ return 'http://archive.example.com/' + filename
+
+ @staticmethod
+ def archive_message(mlist, msg):
+ filename = msg['x-message-id-hash']
+ path = os.path.join(config.MESSAGES_DIR, filename)
+ with open(path, 'w') as fp:
+ print(msg.as_string(), file=fp)
+ # Not technically allowed by the API, but good enough for the test.
+ return path
+
+
+
+class TestArchiveRunner(unittest.TestCase):
+ """Test the archive runner."""
+
+ layer = ConfigLayer
+
+ def setUp(self):
+ self._mlist = create_list('test@example.com')
+ # Enable just the dummy archiver.
+ config.push('dummy', """
+ [archiver.dummy]
+ class: mailman.runners.tests.test_archiver.DummyArchiver
+ enable: yes
+ [archiver.prototype]
+ enable: no
+ [archiver.mhonarc]
+ enable: no
+ [archiver.mail_archive]
+ enable: no
+ """)
+ self._msg = mfs("""\
+From: aperson@example.com
+To: test@example.com
+Subject: My first post
+Message-ID: <first>
+X-Message-ID-Hash: 4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB
+
+First post!
+""")
+ self._runner = make_testable_runner(ArchiveRunner)
+
+ def tearDown(self):
+ config.pop('dummy')
+
+ def test_archive_runner(self):
+ # Ensure that the archive runner ends up archiving the message.
+ config.switchboards['archive'].enqueue(
+ self._msg, {}, listname=self._mlist.fqdn_listname)
+ self._runner.run()
+ # There should now be a copy of the message in the file system.
+ filename = os.path.join(
+ config.MESSAGES_DIR, '4CMWUN6BHVCMHMDAOSJZ2Q72G5M32MWB')
+ with open(filename) as fp:
+ archived = message_from_file(fp)
+ self.assertEqual(archived['message-id'], '<first>')
diff --git a/src/mailman/templates/en/archidxentry.html b/src/mailman/templates/en/archidxentry.html
deleted file mode 100644
index 1927ae7fe..000000000
--- a/src/mailman/templates/en/archidxentry.html
+++ /dev/null
@@ -1,4 +0,0 @@
-<LI><A HREF="$filename">$subject
-</A><A NAME="$sequence">&nbsp;</A>
-<I>$author
-</I>
diff --git a/src/mailman/templates/en/archidxfoot.html b/src/mailman/templates/en/archidxfoot.html
deleted file mode 100644
index 6a43546ea..000000000
--- a/src/mailman/templates/en/archidxfoot.html
+++ /dev/null
@@ -1,21 +0,0 @@
- </ul>
- <p>
- <a name="end"><b>Last message date:</b></a>
- <i>$lastdate</i><br>
- <b>Archived on:</b> <i>$archivedate</i>
- <p>
- <ul>
- <li> <b>Messages sorted by:</b>
- $thread_ref
- $subject_ref
- $author_ref
- $date_ref
- <li><b><a href="$listinfo">More info on this list...
- </a></b></li>
- </ul>
- <p>
- <hr>
- <i>This archive was generated by
- Pipermail $version.</i>
- </BODY>
-</HTML>
diff --git a/src/mailman/templates/en/archidxhead.html b/src/mailman/templates/en/archidxhead.html
deleted file mode 100644
index 70a7558d7..000000000
--- a/src/mailman/templates/en/archidxhead.html
+++ /dev/null
@@ -1,24 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
-<HTML>
- <HEAD>
- <title>The $listname $archive Archive by $archtype</title>
- <META NAME="robots" CONTENT="noindex,follow">
- $encoding
- </HEAD>
- <BODY BGCOLOR="#ffffff">
- <a name="start"></A>
- <h1>$archive Archives by $archtype</h1>
- <ul>
- <li> <b>Messages sorted by:</b>
- $thread_ref
- $subject_ref
- $author_ref
- $date_ref
-
- <li><b><a href="$listinfo">More info on this list...
- </a></b></li>
- </ul>
- <p><b>Starting:</b> <i>$firstdate</i><br>
- <b>Ending:</b> <i>$lastdate</i><br>
- <b>Messages:</b> $size<p>
- <ul>
diff --git a/src/mailman/templates/en/archlistend.html b/src/mailman/templates/en/archlistend.html
deleted file mode 100644
index 9bc052ddb..000000000
--- a/src/mailman/templates/en/archlistend.html
+++ /dev/null
@@ -1 +0,0 @@
- </table>
diff --git a/src/mailman/templates/en/archliststart.html b/src/mailman/templates/en/archliststart.html
deleted file mode 100644
index cdf5d17c4..000000000
--- a/src/mailman/templates/en/archliststart.html
+++ /dev/null
@@ -1,4 +0,0 @@
- <table border=3>
- <tr><td>Archive</td>
- <td>View by:</td>
- <td>Downloadable version</td></tr>
diff --git a/src/mailman/templates/en/archtoc.html b/src/mailman/templates/en/archtoc.html
deleted file mode 100644
index 4dcaf5a50..000000000
--- a/src/mailman/templates/en/archtoc.html
+++ /dev/null
@@ -1,20 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
-<HTML>
- <HEAD>
- <title>The $listname Archives</title>
- <META NAME="robots" CONTENT="noindex,follow">
- $meta
- </HEAD>
- <BODY BGCOLOR="#ffffff">
- <h1>The $listname Archives </h1>
- <p>
- You can get <a href="$listinfo">more information about this list</a>
- or you can <a href="$fullarch">download the full raw archive</a>
- ($size).
- </p>
- $noarchive_msg
- $archive_listing_start
- $archive_listing
- $archive_listing_end
- </BODY>
- </HTML>
diff --git a/src/mailman/templates/en/archtocentry.html b/src/mailman/templates/en/archtocentry.html
deleted file mode 100644
index e2a6d2e37..000000000
--- a/src/mailman/templates/en/archtocentry.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
- <tr>
- <td>$archivelabel:</td>
- <td>
- <A href="$archive/thread.html">[ Thread ]</a>
- <A href="$archive/subject.html">[ Subject ]</a>
- <A href="$archive/author.html">[ Author ]</a>
- <A href="$archive/date.html">[ Date ]</a>
- </td>
- $textlink
- </tr>
-
diff --git a/src/mailman/templates/en/archtocnombox.html b/src/mailman/templates/en/archtocnombox.html
deleted file mode 100644
index 5989aa53d..000000000
--- a/src/mailman/templates/en/archtocnombox.html
+++ /dev/null
@@ -1,18 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
-<HTML>
- <HEAD>
- <title>The $listname Archives</title>
- <META NAME="robots" CONTENT="noindex,follow">
- $meta
- </HEAD>
- <BODY BGCOLOR="#ffffff">
- <h1>The $listname Archives </h1>
- <p>
- You can get <a href="$listinfo">more information about this list</a>.
- </p>
- $noarchive_msg
- $archive_listing_start
- $archive_listing
- $archive_listing_end
- </BODY>
- </HTML>
diff --git a/src/mailman/testing/testing.cfg b/src/mailman/testing/testing.cfg
index 526093572..d503247de 100644
--- a/src/mailman/testing/testing.cfg
+++ b/src/mailman/testing/testing.cfg
@@ -71,10 +71,6 @@ enable: yes
base_url: http://go.mail-archive.dev/
recipient: archive@mail-archive.dev
-[archiver.pipermail]
-enable: yes
-base_url: http://www.example.com/pipermail/$listname
-
[archiver.mhonarc]
enable: yes
command: /bin/echo "/usr/bin/mhonarc -add -dbfile $PRIVATE_ARCHIVE_FILE_DIR/${listname}.mbox/mhonarc.db -outdir $VAR_DIR/mhonarc/${listname} -stderr $LOG_DIR/mhonarc -stdout $LOG_DIR/mhonarc -spammode -umask 022"