author: Barry Warsaw 2009-01-25 13:01:41 -0500
committer: Barry Warsaw 2009-01-25 13:01:41 -0500
commit: eefd06f1b88b8ecbb23a9013cd223b72ca85c20d (patch)
tree: 72c947fe16fce0e07e996ee74020b26585d7e846 /mailman/Archiver
parent: 07871212f74498abd56bef3919bf3e029eb8b930 (diff)
download: mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.gz
mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.zst
mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.zip
5 files changed, 0 insertions, 2698 deletions
diff --git a/mailman/Archiver/Archiver.py b/mailman/Archiver/Archiver.py
deleted file mode 100644
index d0b9fbd1b..000000000
--- a/mailman/Archiver/Archiver.py
+++ /dev/null
@@ -1,230 +0,0 @@
-# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.
- # USA.
-
-"""Mixin class for putting new messages in the right place for archival.
-
-Public archives are separated from private ones.  An external archival
-mechanism (eg, pipermail) should be pointed to the right places, to do the
-archival.
-"""
-
-import os
-import errno
-import logging
-
-from cStringIO import StringIO
-from string import Template
-
-from mailman import Mailbox
-from mailman import Utils
-from mailman.config import config
-
-log = logging.getLogger('mailman.error')
-
-
-
-def makelink(old, new):
-    try:
-        os.symlink(old, new)
-    except OSError, e:
-        if e.errno <> errno.EEXIST:
-            raise
-
-def breaklink(link):
-    try:
-        os.unlink(link)
-    except OSError, e:
-        if e.errno <> errno.ENOENT:
-            raise
-
-
-
-class Archiver:
-    #
-    # Interface to Pipermail.  HyperArch.py uses this method to get the
-    # archive directory for the mailing list
-    #
-    def InitVars(self):
-        # The archive file structure by default is:
-        #
-        # archives/
-        #     private/
-        #         listname.mbox/
-        #             listname.mbox
-        #         listname/
-        #             lots-of-pipermail-stuff
-        #     public/
-        #         listname.mbox@ -> ../private/listname.mbox
-        #         listname@ -> ../private/listname
-        #
-        # IOW, the mbox and pipermail archives are always stored in the
-        # private archive for the list.  This is safe because archives/private
-        # is always set to o-rx.  Public archives have a symlink to get around
-        # the private directory, pointing directly to the private/listname
-        # which has o+rx permissions.  Private archives do not have the
-        # symbolic links.
-        archdir = self.archive_dir(self.fqdn_listname)
-        omask = os.umask(0)
-        try:
-            try:
-                os.mkdir(archdir+'.mbox', 02775)
-            except OSError, e:
-                if e.errno <> errno.EEXIST:
-                    raise
-                # We also create an empty pipermail archive directory into
-                # which we'll drop an empty index.html file into.  This is so
-                # that lists that have not yet received a posting have
-                # /something/ as their index.html, and don't just get a 404.
-            try:
-                os.mkdir(archdir, 02775)
-            except OSError, e:
-                if e.errno <> errno.EEXIST:
-                    raise
-            # See if there's an index.html file there already and if not,
-            # write in the empty archive notice.
-            indexfile = os.path.join(archdir, 'index.html')
-            fp = None
-            try:
-                fp = open(indexfile)
-            except IOError, e:
-                if e.errno <> errno.ENOENT:
-                    raise
-                omask = os.umask(002)
-                try:
-                    fp = open(indexfile, 'w')
-                finally:
-                    os.umask(omask)
-                fp.write(Utils.maketext(
-                    'emptyarchive.html',
-                    {'listname': self.real_name,
-                     'listinfo': self.GetScriptURL('listinfo'),
-                     }, mlist=self))
-            if fp:
-                fp.close()
-        finally:
-            os.umask(omask)
-
-    def ArchiveFileName(self):
-        """The mbox name where messages are left for archive construction."""
-        return os.path.join(self.archive_dir() + '.mbox',
-                            self.fqdn_listname + '.mbox')
-
-    def GetBaseArchiveURL(self):
-        if self.archive_private:
-            url = self.GetScriptURL('private') + '/index.html'
-        else:
-            web_host = config.domains.get(self.host_name, self.host_name)
-            url = Template(config.PUBLIC_ARCHIVE_URL).safe_substitute(
-                listname=self.fqdn_listname,
-                hostname=web_host,
-                fqdn_listname=self.fqdn_listname,
-                )
-        return url
-
-    def __archive_file(self, afn):
-        """Open (creating, if necessary) the named archive file."""
-        omask = os.umask(002)
-        try:
-            return Mailbox.Mailbox(open(afn, 'a+'))
-        finally:
-            os.umask(omask)
-
-    #
-    # old ArchiveMail function, retained under a new name
-    # for optional archiving to an mbox
-    #
-    def __archive_to_mbox(self, post):
-        """Retain a text copy of the message in an mbox file."""
-        try:
-            afn = self.ArchiveFileName()
-            mbox = self.__archive_file(afn)
-            mbox.AppendMessage(post)
-            mbox.fp.close()
-        except IOError, msg:
-            log.error('Archive file access failure:\n\t%s %s', afn, msg)
-            raise
-
-    def ExternalArchive(self, ar, txt):
-        cmd = Template(ar).safe_substitute(
-            listname=self.fqdn_listname,
-            hostname=self.host_name)
-        extarch = os.popen(cmd, 'w')
-        extarch.write(txt)
-        status = extarch.close()
-        if status:
-            log.error('external archiver non-zero exit status: %d\n',
-                      (status & 0xff00) >> 8)
-
-    #
-    # archiving in real time  this is called from list.post(msg)
-    #
-    def ArchiveMail(self, msg):
-        """Store postings in mbox and/or pipermail archive, depending."""
-        # Fork so archival errors won't disrupt normal list delivery
-        if config.ARCHIVE_TO_MBOX == -1:
-            return
-        #
-        # We don't need an extra archiver lock here because we know the list
-        # itself must be locked.
-        if config.ARCHIVE_TO_MBOX in (1, 2):
-            self.__archive_to_mbox(msg)
-            if config.ARCHIVE_TO_MBOX == 1:
-                # Archive to mbox only.
-                return
-        txt = str(msg)
-        # should we use the internal or external archiver?
-        private_p = self.archive_private
-        if config.PUBLIC_EXTERNAL_ARCHIVER and not private_p:
-            self.ExternalArchive(config.PUBLIC_EXTERNAL_ARCHIVER, txt)
-        elif config.PRIVATE_EXTERNAL_ARCHIVER and private_p:
-            self.ExternalArchive(config.PRIVATE_EXTERNAL_ARCHIVER, txt)
-        else:
-            # use the internal archiver
-            f = StringIO(txt)
-            import HyperArch
-            h = HyperArch.HyperArchive(self)
-            h.processUnixMailbox(f)
-            h.close()
-            f.close()
-
-    #
-    # called from MailList.MailList.Save()
-    #
-    def CheckHTMLArchiveDir(self):
-        # We need to make sure that the archive directory has the right perms
-        # for public vs private.  If it doesn't exist, or some weird
-        # permissions errors prevent us from stating the directory, it's
-        # pointless to try to fix the perms, so we just return -scott
-        if config.ARCHIVE_TO_MBOX == -1:
-            # Archiving is completely disabled, don't require the skeleton.
-            return
-        pubdir = os.path.join(config.PUBLIC_ARCHIVE_FILE_DIR,
-                              self.fqdn_listname)
-        privdir = self.archive_dir()
-        pubmbox = pubdir + '.mbox'
-        privmbox = privdir + '.mbox'
-        if self.archive_private:
-            breaklink(pubdir)
-            breaklink(pubmbox)
-        else:
-            # BAW: privdir or privmbox could be nonexistant.  We'd get an
-            # OSError, ENOENT which should be caught and reported properly.
-            makelink(privdir, pubdir)
-            # Only make this link if the site has enabled public mbox files
-            if config.PUBLIC_MBOX:
-                makelink(privmbox, pubmbox)
diff --git a/mailman/Archiver/HyperArch.py b/mailman/Archiver/HyperArch.py
deleted file mode 100644
index d9477cc3f..000000000
--- a/mailman/Archiver/HyperArch.py
+++ /dev/null
@@ -1,1237 +0,0 @@
-# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.
-
-"""HyperArch: Pipermail archiving for Mailman
-
-     - The Dragon De Monsyne <dragondm@integral.org>
-
-   TODO:
-     - Should be able to force all HTML to be regenerated next time the
-       archive is run, in case a template is changed.
-     - Run a command to generate tarball of html archives for downloading
-       (probably in the 'update_dirty_archives' method).
-"""
-
-import os
-import re
-import sys
-import gzip
-import time
-import errno
-import urllib
-import logging
-import weakref
-import binascii
-
-from email.Charset import Charset
-from email.Errors import HeaderParseError
-from email.Header import decode_header, make_header
-from lazr.config import as_boolean
-from locknix.lockfile import Lock
-from string import Template
-
-from mailman import Utils
-from mailman import i18n
-from mailman.Archiver import HyperDatabase
-from mailman.Archiver import pipermail
-from mailman.Mailbox import ArchiverMailbox
-from mailman.config import config
-
-
-log = logging.getLogger('mailman.error')
-
-# Set up i18n.  Assume the current language has already been set in the caller.
-_ = i18n._
-
-EMPTYSTRING = ''
-NL = '\n'
-
-# MacOSX has a default stack size that is too small for deeply recursive
-# regular expressions.  We see this as crashes in the Python test suite when
-# running test_re.py and test_sre.py.  The fix is to set the stack limit to
-# 2048; the general recommendation is to do in the shell before running the
-# test suite.  But that's inconvenient for a daemon like the qrunner.
-#
-# AFAIK, this problem only affects the archiver, so we're adding this work
-# around to this file (it'll get imported by the bundled pipermail or by the
-# bin/arch script.  We also only do this on darwin, a.k.a. MacOSX.
-if sys.platform == 'darwin':
-    try:
-        import resource
-    except ImportError:
-        pass
-    else:
-        soft, hard = resource.getrlimit(resource.RLIMIT_STACK)
-        newsoft = min(hard, max(soft, 1024*2048))
-        resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard))
-
-
-
-def html_quote(s, lang=None):
-    repls = ( ('&', '&amp;'),
-              ("<", '&lt;'),
-              (">", '&gt;'),
-              ('"', '&quot;'))
-    for thing, repl in repls:
-        s = s.replace(thing, repl)
-    return Utils.uncanonstr(s, lang)
-
-
-def url_quote(s):
-    return urllib.quote(s)
-
-
-def null_to_space(s):
-    return s.replace('\000', ' ')
-
-
-def sizeof(filename, lang):
-    try:
-        size = os.path.getsize(filename)
-    except OSError, e:
-        # ENOENT can happen if the .mbox file was moved away or deleted, and
-        # an explicit mbox file name was given to bin/arch.
-        if e.errno <> errno.ENOENT: raise
-        return _('size not available')
-    if size < 1000:
-        with i18n.using_language(lang):
-            out = _(' %(size)i bytes ')
-        return out
-    elif size < 1000000:
-        return ' %d KB ' % (size / 1000)
-    # GB?? :-)
-    return ' %d MB ' % (size / 1000000)
-
-
-html_charset = '<META http-equiv="Content-Type" ' \
-               'content="text/html; charset=%s">'
-
-def CGIescape(arg, lang=None):
-    if isinstance(arg, unicode):
-        s = Utils.websafe(arg)
-    else:
-        s = Utils.websafe(str(arg))
-    return Utils.uncanonstr(s.replace('"', '&quot;'), lang)
-
-# Parenthesized human name
-paren_name_pat = re.compile(r'([(].*[)])')
-
-# Subject lines preceded with 'Re:'
-REpat = re.compile( r"\s*RE\s*(\[\d+\]\s*)?:\s*", re.IGNORECASE)
-
-# E-mail addresses and URLs in text
-emailpat = re.compile(r'([-+,.\w]+@[-+.\w]+)')
-
-#  Argh!  This pattern is buggy, and will choke on URLs with GET parameters.
-urlpat = re.compile(r'(\w+://[^>)\s]+)') # URLs in text
-
-# Blank lines
-blankpat = re.compile(r'^\s*$')
-
-# Starting <html> directive
-htmlpat = re.compile(r'^\s*<HTML>\s*$', re.IGNORECASE)
-# Ending </html> directive
-nohtmlpat = re.compile(r'^\s*</HTML>\s*$', re.IGNORECASE)
-# Match quoted text
-quotedpat = re.compile(r'^([>|:]|&gt;)+')
-
-
-
-# Like Utils.maketext() but with caching to improve performance.
-#
-# _templatefilepathcache is used to associate a (templatefile, lang, listname)
-# key with the file system path to a template file.  This path is the one that
-# the Utils.findtext() function has computed is the one to match the values in
-# the key tuple.
-#
-# _templatecache associate a file system path as key with the text
-# returned after processing the contents of that file by Utils.findtext()
-#
-# We keep two caches to reduce the amount of template text kept in memory,
-# since the _templatefilepathcache is a many->one mapping and _templatecache
-# is a one->one mapping.  Imagine 1000 lists all using the same default
-# English template.
-
-_templatefilepathcache = {}
-_templatecache = {}
-
-def quick_maketext(templatefile, dict=None, lang=None, mlist=None):
-    if mlist is None:
-        listname = ''
-    else:
-        listname = mlist.fqdn_listname
-    if lang is None:
-        if mlist is None:
-            lang = config.mailman.default_language
-        else:
-            lang = mlist.preferred_language
-    cachekey = (templatefile, lang, listname)
-    filepath =  _templatefilepathcache.get(cachekey)
-    if filepath:
-        template = _templatecache.get(filepath)
-    if filepath is None or template is None:
-        # Use the basic maketext, with defaults to get the raw template
-        template, filepath = Utils.findtext(templatefile, lang=lang,
-                                            raw=True, mlist=mlist)
-        _templatefilepathcache[cachekey] = filepath
-        _templatecache[filepath] = template
-    # Copied from Utils.maketext()
-    text = template
-    if dict is not None:
-        try:
-            try:
-                text = Template(template).safe_substitute(**dict)
-            except UnicodeError:
-                # Try again after coercing the template to unicode
-                utemplate = unicode(template,
-                                    Utils.GetCharSet(lang),
-                                    'replace')
-                text = Template(utemplate).safe_substitute(**dict)
-        except (TypeError, ValueError):
-            # The template is really screwed up
-            pass
-    # Make sure the text is in the given character set, or html-ify any bogus
-    # characters.
-    return Utils.uncanonstr(text, lang)
-
-
-
-# Note: I'm overriding most, if not all of the pipermail Article class
-#       here -ddm
-# The Article class encapsulates a single posting.  The attributes are:
-#
-#  sequence : Sequence number, unique for each article in a set of archives
-#  subject  : Subject
-#  datestr  : The posting date, in human-readable format
-#  date     : The posting date, in purely numeric format
-#  fromdate : The posting date, in `unixfrom' format
-#  headers  : Any other headers of interest
-#  author   : The author's name (and possibly organization)
-#  email    : The author's e-mail address
-#  msgid    : A unique message ID
-#  in_reply_to : If !="", this is the msgid of the article being replied to
-#  references: A (possibly empty) list of msgid's of earlier articles in
-#              the thread
-#  body     : A list of strings making up the message body
-
-class Article(pipermail.Article):
-    __super_init = pipermail.Article.__init__
-    __super_set_date = pipermail.Article._set_date
-
-    _last_article_time = time.time()
-
-    def __init__(self, message=None, sequence=0, keepHeaders=[],
-                       lang=config.mailman.default_language, mlist=None):
-        self.__super_init(message, sequence, keepHeaders)
-        self.prev = None
-        self.next = None
-        # Trim Re: from the subject line
-        i = 0
-        while i != -1:
-            result = REpat.match(self.subject)
-            if result:
-                i = result.end(0)
-                self.subject = self.subject[i:]
-            else:
-                i = -1
-        # Useful to keep around
-        self._lang = lang
-        self._mlist = mlist
-
-        if as_boolean(config.archiver.pipermail.obscure_email_addresses):
-            # Avoid i18n side-effects.  Note that the language for this
-            # article (for this list) could be different from the site-wide
-            # preferred language, so we need to ensure no side-effects will
-            # occur.  Think what happens when executing bin/arch.
-            with i18n.using_language(lang):
-                if self.author == self.email:
-                    self.author = self.email = re.sub('@', _(' at '),
-                                                      self.email)
-                else:
-                    self.email = re.sub('@', _(' at '), self.email)
-        # Snag the content-* headers.  RFC 1521 states that their values are
-        # case insensitive.
-        ctype = message.get('Content-Type', 'text/plain')
-        cenc = message.get('Content-Transfer-Encoding', '')
-        self.ctype = ctype.lower()
-        self.cenc = cenc.lower()
-        self.decoded = {}
-        cset = Utils.GetCharSet(mlist.preferred_language)
-        cset_out = Charset(cset).output_charset or cset
-        charset = message.get_content_charset(cset_out)
-        if charset:
-            charset = charset.lower().strip()
-            if charset[0]=='"' and charset[-1]=='"':
-                charset = charset[1:-1]
-            if charset[0]=="'" and charset[-1]=="'":
-                charset = charset[1:-1]
-            try:
-                body = message.get_payload(decode=True)
-            except binascii.Error:
-                body = None
-            if body and charset != Utils.GetCharSet(self._lang):
-                # decode body
-                try:
-                    body = unicode(body, charset)
-                except (UnicodeError, LookupError):
-                    body = None
-            if body:
-                self.body = [l + "\n" for l in body.splitlines()]
-
-        self.decode_headers()
-
-    def __getstate__(self):
-        d = self.__dict__.copy()
-        # We definitely don't want to pickle the MailList instance, so just
-        # pickle a reference to it.
-        if d.has_key('_mlist'):
-            mlist = d['_mlist']
-            del d['_mlist']
-        else:
-            mlist = None
-        if mlist:
-            d['__listname'] = self._mlist.fqdn_listname
-        else:
-            d['__listname'] = None
-        # Delete a few other things we don't want in the pickle
-        for attr in ('prev', 'next', 'body'):
-            if d.has_key(attr):
-                del d[attr]
-        d['body'] = []
-        return d
-
-    def __setstate__(self, d):
-        # For loading older Articles via pickle.  All this stuff was added
-        # when Simone Piunni and Tokio Kikuchi i18n'ified Pipermail.  See SF
-        # patch #594771.
-        self.__dict__ = d
-        listname = d.get('__listname')
-        if listname:
-            del d['__listname']
-            d['_mlist'] = config.db.list_manager.get(listname)
-        if not d.has_key('_lang'):
-            if hasattr(self, '_mlist'):
-                self._lang = self._mlist.preferred_language
-            else:
-                self._lang = config.mailman.default_language
-        if not d.has_key('cenc'):
-            self.cenc = None
-        if not d.has_key('decoded'):
-            self.decoded = {}
-
-    def setListIfUnset(self, mlist):
-        if getattr(self, '_mlist', None) is None:
-            self._mlist = mlist
-
-    def quote(self, buf):
-        return html_quote(buf, self._lang)
-
-    def decode_headers(self):
-        """MIME-decode headers.
-
-        If the email, subject, or author attributes contain non-ASCII
-        characters using the encoded-word syntax of RFC 2047, decoded versions
-        of those attributes are placed in the self.decoded (a dictionary).
-
-        If the list's charset differs from the header charset, an attempt is
-        made to decode the headers as Unicode.  If that fails, they are left
-        undecoded.
-        """
-        author = self.decode_charset(self.author)
-        subject = self.decode_charset(self.subject)
-        if author:
-            self.decoded['author'] = author
-            email = self.decode_charset(self.email)
-            if email:
-                self.decoded['email'] = email
-        if subject:
-            if as_boolean(config.archiver.pipermail.obscure_email_addresses):
-                with i18n.using_language(self._lang):
-                    atmark = _(' at ')
-                    subject = re.sub(r'([-+,.\w]+)@([-+.\w]+)',
-                              '\g<1>' + atmark + '\g<2>', subject)
-            self.decoded['subject'] = subject
-        self.decoded['stripped'] = self.strip_subject(subject or self.subject)
-
-    def strip_subject(self, subject):
-        # Strip subject_prefix and Re: for subject sorting
-        # This part was taken from CookHeaders.py (TK)
-        prefix = self._mlist.subject_prefix.strip()
-        if prefix:
-            prefix_pat = re.escape(prefix)
-            prefix_pat = '%'.join(prefix_pat.split(r'\%'))
-            prefix_pat = re.sub(r'%\d*d', r'\s*\d+\s*', prefix_pat)
-            subject = re.sub(prefix_pat, '', subject)
-        subject = subject.lstrip()
-        strip_pat = re.compile('^((RE|AW|SV|VS)(\[\d+\])?:\s*)+', re.I)
-        stripped = strip_pat.sub('', subject)
-        return stripped
-
-    def decode_charset(self, field):
-        # TK: This function was rewritten for unifying to Unicode.
-        # Convert 'field' into Unicode one line string.
-        try:
-            pairs = decode_header(field)
-            ustr = make_header(pairs).__unicode__()
-        except (LookupError, UnicodeError, ValueError, HeaderParseError):
-            # assume list's language
-            cset = Utils.GetCharSet(self._mlist.preferred_language)
-            if cset == 'us-ascii':
-                cset = 'iso-8859-1' # assume this for English list
-            ustr = unicode(field, cset, 'replace')
-        return u''.join(ustr.splitlines())
-
-    def as_html(self):
-        d = self.__dict__.copy()
-        # avoid i18n side-effects
-        with i18n.using_language(self._lang):
-            d["prev"], d["prev_wsubj"] = self._get_prev()
-            d["next"], d["next_wsubj"] = self._get_next()
-
-            d["email_html"] = self.quote(self.email)
-            d["title"] = self.quote(self.subject)
-            d["subject_html"] = self.quote(self.subject)
-            d["subject_url"] = url_quote(self.subject)
-            d["in_reply_to_url"] = url_quote(self.in_reply_to)
-            if as_boolean(config.archiver.pipermail.obscure_email_addresses):
-                # Point the mailto url back to the list
-                author = re.sub('@', _(' at '), self.author)
-                emailurl = self._mlist.posting_address
-            else:
-                author = self.author
-                emailurl = self.email
-            d["author_html"] = self.quote(author)
-            d["email_url"] = url_quote(emailurl)
-            d["datestr_html"] = self.quote(i18n.ctime(int(self.date)))
-            d["body"] = self._get_body()
-            d['listurl'] = self._mlist.script_url('listinfo')
-            d['listname'] = self._mlist.real_name
-            d['encoding'] = ''
-        charset = Utils.GetCharSet(self._lang)
-        d["encoding"] = html_charset % charset
-
-        self._add_decoded(d)
-        return quick_maketext(
-             'article.html', d,
-             lang=self._lang, mlist=self._mlist)
-
-    def _get_prev(self):
-        """Return the href and subject for the previous message"""
-        if self.prev:
-            subject = self._get_subject_enc(self.prev)
-            prev = ('<LINK REL="Previous"  HREF="%s">'
-                    % (url_quote(self.prev.filename)))
-            prev_wsubj = ('<LI>' + _('Previous message (by thread):') +
-                          ' <A HREF="%s">%s\n</A></li>'
-                          % (url_quote(self.prev.filename),
-                             self.quote(subject)))
-        else:
-            prev = prev_wsubj = ""
-        return prev, prev_wsubj
-
-    def _get_subject_enc(self, art):
-        """Return the subject of art, decoded if possible.
-
-        If the charset of the current message and art match and the
-        article's subject is encoded, decode it.
-        """
-        return art.decoded.get('subject', art.subject)
-
-    def _get_next(self):
-        """Return the href and subject for the previous message"""
-        if self.next:
-            subject = self._get_subject_enc(self.next)
-            next = ('<LINK REL="Next"  HREF="%s">'
-                    % (url_quote(self.next.filename)))
-            next_wsubj = ('<LI>' + _('Next message (by thread):') +
-                          ' <A HREF="%s">%s\n</A></li>'
-                          % (url_quote(self.next.filename),
-                             self.quote(subject)))
-        else:
-            next = next_wsubj = ""
-        return next, next_wsubj
-
-    _rx_quote = re.compile('=([A-F0-9][A-F0-9])')
-    _rx_softline = re.compile('=[ \t]*$')
-
-    def _get_body(self):
-        """Return the message body ready for HTML, decoded if necessary"""
-        try:
-            body = self.html_body
-        except AttributeError:
-            body = self.body
-        return null_to_space(EMPTYSTRING.join(body))
-
-    def _add_decoded(self, d):
-        """Add encoded-word keys to HTML output"""
-        for src, dst in (('author', 'author_html'),
-                         ('email', 'email_html'),
-                         ('subject', 'subject_html'),
-                         ('subject', 'title')):
-            if self.decoded.has_key(src):
-                d[dst] = self.quote(self.decoded[src])
-
-    def as_text(self):
-        d = self.__dict__.copy()
-        # We need to guarantee a valid From_ line, even if there are
-        # bososities in the headers.
-        if not d.get('fromdate', '').strip():
-            d['fromdate'] = time.ctime(time.time())
-        if not d.get('email', '').strip():
-            d['email'] = 'bogus@does.not.exist.com'
-        if not d.get('datestr', '').strip():
-            d['datestr'] = time.ctime(time.time())
-        #
-        headers = ['From %(email)s  %(fromdate)s',
-                 'From: %(email)s (%(author)s)',
-                 'Date: %(datestr)s',
-                 'Subject: %(subject)s']
-        if d['_in_reply_to']:
-            headers.append('In-Reply-To: %(_in_reply_to)s')
-        if d['_references']:
-            headers.append('References: %(_references)s')
-        if d['_message_id']:
-            headers.append('Message-ID: %(_message_id)s')
-        body = EMPTYSTRING.join(self.body)
-        cset = Utils.GetCharSet(self._lang)
-        # Coerce the body to Unicode and replace any invalid characters.
-        if not isinstance(body, unicode):
-            body = unicode(body, cset, 'replace')
-        if as_boolean(config.archiver.pipermail.obscure_email_addresses):
-            with i18n.using_language(self._lang):
-                atmark = _(' at ')
-                body = re.sub(r'([-+,.\w]+)@([-+.\w]+)',
-                              '\g<1>' + atmark + '\g<2>', body)
-        # Return body to character set of article.
-        body = body.encode(cset, 'replace')
-        return NL.join(headers) % d + '\n\n' + body + '\n'
-
-    def _set_date(self, message):
-        self.__super_set_date(message)
-        self.fromdate = time.ctime(int(self.date))
-
-    def loadbody_fromHTML(self,fileobj):
-        self.body = []
-        begin = 0
-        while 1:
-            line = fileobj.readline()
-            if not line:
-                break
-            if not begin:
-                if line.strip() == '<!--beginarticle-->':
-                    begin = 1
-                continue
-            if line.strip() == '<!--endarticle-->':
-                break
-            self.body.append(line)
-
-    def finished_update_article(self):
-        self.body = []
-        try:
-            del self.html_body
-        except AttributeError:
-            pass
-
-
-class HyperArchive(pipermail.T):
-    __super_init = pipermail.T.__init__
-    __super_update_archive = pipermail.T.update_archive
-    __super_update_dirty_archives = pipermail.T.update_dirty_archives
-    __super_add_article = pipermail.T.add_article
-
-    # some defaults
-    DIRMODE = 02775
-    FILEMODE = 0660
-
-    VERBOSE = 0
-    DEFAULTINDEX = 'thread'
-    ARCHIVE_PERIOD = 'month'
-
-    THREADLAZY = 0
-    THREADLEVELS = 3
-
-    ALLOWHTML = 1             # "Lines between <html></html>" handled as is.
-    SHOWHTML = 0              # Eg, nuke leading whitespace in html manner.
-    IQUOTES = 1               # Italicize quoted text.
-    SHOWBR = 0                # Add <br> onto every line
-
-    def __init__(self, maillist):
-        # can't init the database while other processes are writing to it!
-        dir = maillist.archive_dir()
-        db = HyperDatabase.HyperDatabase(dir, maillist)
-        self.__super_init(dir, reload=1, database=db)
-
-        self.maillist = maillist
-        self._lock_file = None
-        self.lang = maillist.preferred_language
-        self.charset = Utils.GetCharSet(maillist.preferred_language)
-
-        if hasattr(self.maillist,'archive_volume_frequency'):
-            if self.maillist.archive_volume_frequency == 0:
-                self.ARCHIVE_PERIOD='year'
-            elif self.maillist.archive_volume_frequency == 2:
-                self.ARCHIVE_PERIOD='quarter'
-            elif self.maillist.archive_volume_frequency == 3:
-                self.ARCHIVE_PERIOD='week'
-            elif self.maillist.archive_volume_frequency == 4:
-                self.ARCHIVE_PERIOD='day'
-            else:
-                self.ARCHIVE_PERIOD='month'
-
-        yre = r'(?P<year>[0-9]{4,4})'
-        mre = r'(?P<month>[01][0-9])'
-        dre = r'(?P<day>[0123][0-9])'
-        self._volre = {
-            'year':    '^' + yre + '$',
-            'quarter': '^' + yre + r'q(?P<quarter>[1234])$',
-            'month':   '^' + yre + r'-(?P<month>[a-zA-Z]+)$',
-            'week':    r'^Week-of-Mon-' + yre + mre + dre,
-            'day':     '^' + yre + mre + dre + '$'
-            }
-
-    def _makeArticle(self, msg, sequence):
-        return Article(msg, sequence,
-                       lang=self.maillist.preferred_language,
-                       mlist=self.maillist)
-
-    def html_foot(self):
-        # avoid i18n side-effects
-        mlist = self.maillist
-        # Convenience
-        def quotetime(s):
-            return html_quote(i18n.ctime(s), self.lang)
-        with i18n.using_language(mlist.preferred_language):
-            d = {"lastdate": quotetime(self.lastdate),
-                 "archivedate": quotetime(self.archivedate),
-                 "listinfo": mlist.script_url('listinfo'),
-                 "version": self.version,
-                 }
-            i = {"thread": _("thread"),
-                 "subject": _("subject"),
-                 "author": _("author"),
-                 "date": _("date")
-                 }
-        for t in i.keys():
-            cap = t[0].upper() + t[1:]
-            if self.type == cap:
-                d["%s_ref" % (t)] = ""
-            else:
-                d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>'
-                                     % (t, i[t]))
-        return quick_maketext(
-            'archidxfoot.html', d,
-            mlist=mlist)
-
-    def html_head(self):
-        # avoid i18n side-effects
-        mlist = self.maillist
-        # Convenience
-        def quotetime(s):
-            return html_quote(i18n.ctime(s), self.lang)
-        with i18n.using_language(mlist.preferred_language):
-            d = {"listname": html_quote(mlist.real_name, self.lang),
-                 "archtype": self.type,
-                 "archive":  self.volNameToDesc(self.archive),
-                 "listinfo": mlist.script_url('listinfo'),
-                 "firstdate": quotetime(self.firstdate),
-                 "lastdate": quotetime(self.lastdate),
-                 "size": self.size,
-                 }
-            i = {"thread": _("thread"),
-                 "subject": _("subject"),
-                 "author": _("author"),
-                 "date": _("date"),
-                 }
-        for t in i.keys():
-            cap = t[0].upper() + t[1:]
-            if self.type == cap:
-                d["%s_ref" % (t)] = ""
-                d["archtype"] = i[t]
-            else:
-                d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>'
-                                     % (t, i[t]))
-        if self.charset:
-            d["encoding"] = html_charset % self.charset
-        else:
-            d["encoding"] = ""
-        return quick_maketext(
-            'archidxhead.html', d,
-            mlist=mlist)
-
-    def html_TOC(self):
-        mlist = self.maillist
-        listname = mlist.fqdn_listname
-        mbox = os.path.join(mlist.archive_dir()+'.mbox', listname+'.mbox')
-        d = {"listname": mlist.real_name,
-             "listinfo": mlist.script_url('listinfo'),
-             "fullarch": '../%s.mbox/%s.mbox' % (listname, listname),
-             "size": sizeof(mbox, mlist.preferred_language),
-             'meta': '',
-             }
-        # Avoid i18n side-effects
-        with i18n.using_language(mlist.preferred_language):
-            if not self.archives:
-                d["noarchive_msg"] = _(
-                    '<P>Currently, there are no archives. </P>')
-                d["archive_listing_start"] = ""
-                d["archive_listing_end"] = ""
-                d["archive_listing"] = ""
-            else:
-                d["noarchive_msg"] = ""
-                d["archive_listing_start"] = quick_maketext(
-                    'archliststart.html',
-                    lang=mlist.preferred_language,
-                    mlist=mlist)
-                d["archive_listing_end"] = quick_maketext(
-                    'archlistend.html',
-                    mlist=mlist)
-
-                accum = []
-                for a in self.archives:
-                    accum.append(self.html_TOC_entry(a))
-                d["archive_listing"] = EMPTYSTRING.join(accum)
-        # The TOC is always in the charset of the list's preferred language
-        d['meta'] += html_charset % Utils.GetCharSet(mlist.preferred_language)
-        # The site can disable public access to the mbox file.
-        if as_boolean(config.archiver.pipermail.public_mbox):
-            template = 'archtoc.html'
-        else:
-            template = 'archtocnombox.html'
-        return quick_maketext(template, d, mlist=mlist)
-
-    def html_TOC_entry(self, arch):
-        # Check to see if the archive is gzip'd or not
-        txtfile = os.path.join(self.maillist.archive_dir(), arch + '.txt')
-        gzfile = txtfile + '.gz'
-        # which exists?  .txt.gz first, then .txt
-        if os.path.exists(gzfile):
-            file = gzfile
-            url = arch + '.txt.gz'
-            templ = '<td><A href="%(url)s">[ ' + _('Gzip\'d Text%(sz)s') \
-                    + ']</a></td>'
-        elif os.path.exists(txtfile):
-            file = txtfile
-            url = arch + '.txt'
-            templ = '<td><A href="%(url)s">[ ' + _('Text%(sz)s') + ']</a></td>'
-        else:
-            # neither found?
-            file = None
-        # in Python 1.5.2 we have an easy way to get the size
-        if file:
-            textlink = templ % {
-                'url': url,
-                'sz' : sizeof(file, self.maillist.preferred_language)
-                }
-        else:
-            # there's no archive file at all... hmmm.
-            textlink = ''
-        return quick_maketext(
-            'archtocentry.html',
-            {'archive': arch,
-             'archivelabel': self.volNameToDesc(arch),
-             'textlink': textlink
-             },
-            mlist=self.maillist)
-
-    def GetArchLock(self):
-        if self._lock_file:
-            return 1
-        self._lock_file = Lock(
-            os.path.join(config.LOCK_DIR,
-                         self.maillist.fqdn_listname + '-arch.lock'))
-        try:
-            self._lock_file.lock(timeout=0.5)
-        except lockfile.TimeOutError:
-            return 0
-        return 1
-
-    def DropArchLock(self):
-        if self._lock_file:
-            self._lock_file.unlock(unconditionally=1)
-            self._lock_file = None
-
-    def processListArch(self):
-        name = self.maillist.ArchiveFileName()
-        wname= name+'.working'
-        ename= name+'.err_unarchived'
-        try:
-            os.stat(name)
-        except (IOError,os.error):
-            #no archive file, nothin to do -ddm
-            return
-
-        #see if arch is locked here -ddm
-        if not self.GetArchLock():
-            #another archiver is running, nothing to do. -ddm
-            return
-
-        #if the working file is still here, the archiver may have
-        # crashed during archiving. Save it, log an error, and move on.
-        try:
-            wf = open(wname)
-            log.error('Archive working file %s present.  '
-                      'Check %s for possibly unarchived msgs',
-                      wname, ename)
-            omask = os.umask(007)
-            try:
-                ef = open(ename, 'a+')
-            finally:
-                os.umask(omask)
-            ef.seek(1,2)
-            if ef.read(1) <> '\n':
-                ef.write('\n')
-            ef.write(wf.read())
-            ef.close()
-            wf.close()
-            os.unlink(wname)
-        except IOError:
-            pass
-        os.rename(name,wname)
-        archfile = open(wname)
-        self.processUnixMailbox(archfile)
-        archfile.close()
-        os.unlink(wname)
-        self.DropArchLock()
-
-    def get_filename(self, article):
-        return '%06i.html' % (article.sequence,)
-
-    def get_archives(self, article):
-        """Return a list of indexes where the article should be filed.
-        A string can be returned if the list only contains one entry,
-        and the empty list is legal."""
-        res = self.dateToVolName(float(article.date))
-        self.message(_("figuring article archives\n"))
-        self.message(res + "\n")
-        return res
-
-    def volNameToDesc(self, volname):
-        volname = volname.strip()
-        # Don't make these module global constants since we have to runtime
-        # translate them anyway.
-        monthdict = [
-            '',
-            _('January'),   _('February'), _('March'),    _('April'),
-            _('May'),       _('June'),     _('July'),     _('August'),
-            _('September'), _('October'),  _('November'), _('December')
-            ]
-        for each in self._volre.keys():
-            match = re.match(self._volre[each], volname)
-            # Let ValueErrors percolate up
-            if match:
-                year = int(match.group('year'))
-                if each == 'quarter':
-                    d =["", _("First"), _("Second"), _("Third"), _("Fourth") ]
-                    ord = d[int(match.group('quarter'))]
-                    return _("%(ord)s quarter %(year)i")
-                elif each == 'month':
-                    monthstr = match.group('month').lower()
-                    for i in range(1, 13):
-                        monthname = time.strftime("%B", (1999,i,1,0,0,0,0,1,0))
-                        if monthstr.lower() == monthname.lower():
-                            month = monthdict[i]
-                            return _("%(month)s %(year)i")
-                    raise ValueError, "%s is not a month!" % monthstr
-                elif each == 'week':
-                    month = monthdict[int(match.group("month"))]
-                    day = int(match.group("day"))
-                    return _("The Week Of Monday %(day)i %(month)s %(year)i")
-                elif each == 'day':
-                    month = monthdict[int(match.group("month"))]
-                    day = int(match.group("day"))
-                    return _("%(day)i %(month)s %(year)i")
-                else:
-                    return match.group('year')
-        raise ValueError, "%s is not a valid volname" % volname
-
-# The following two methods should be inverses of each other. -ddm
-
-    def dateToVolName(self,date):
-        datetuple=time.localtime(date)
-        if self.ARCHIVE_PERIOD=='year':
-            return time.strftime("%Y",datetuple)
-        elif self.ARCHIVE_PERIOD=='quarter':
-            if datetuple[1] in [1,2,3]:
-                return time.strftime("%Yq1",datetuple)
-            elif datetuple[1] in [4,5,6]:
-                return time.strftime("%Yq2",datetuple)
-            elif datetuple[1] in [7,8,9]:
-                return time.strftime("%Yq3",datetuple)
-            else:
-                return time.strftime("%Yq4",datetuple)
-        elif self.ARCHIVE_PERIOD == 'day':
-            return time.strftime("%Y%m%d", datetuple)
-        elif self.ARCHIVE_PERIOD == 'week':
-            # Reconstruct "seconds since epoch", and subtract weekday
-            # multiplied by the number of seconds in a day.
-            monday = time.mktime(datetuple) - datetuple[6] * 24 * 60 * 60
-            # Build a new datetuple from this "seconds since epoch" value
-            datetuple = time.localtime(monday)
-            return time.strftime("Week-of-Mon-%Y%m%d", datetuple)
-        # month. -ddm
-        else:
-            return time.strftime("%Y-%B",datetuple)
-
-
-    def volNameToDate(self, volname):
-        volname = volname.strip()
-        for each in self._volre.keys():
-            match = re.match(self._volre[each],volname)
-            if match:
-                year = int(match.group('year'))
-                month = 1
-                day = 1
-                if each == 'quarter':
-                    q = int(match.group('quarter'))
-                    month = (q * 3) - 2
-                elif each == 'month':
-                    monthstr = match.group('month').lower()
-                    m = []
-                    for i in range(1,13):
-                        m.append(
-                            time.strftime("%B",(1999,i,1,0,0,0,0,1,0)).lower())
-                    try:
-                        month = m.index(monthstr) + 1
-                    except ValueError:
-                        pass
-                elif each == 'week' or each == 'day':
-                    month = int(match.group("month"))
-                    day = int(match.group("day"))
-                try:
-                    return time.mktime((year,month,1,0,0,0,0,1,-1))
-                except OverflowError:
-                    return 0.0
-        return 0.0
-
-    def sortarchives(self):
-        def sf(a, b):
-            al = self.volNameToDate(a)
-            bl = self.volNameToDate(b)
-            if al > bl:
-                return 1
-            elif al < bl:
-                return -1
-            else:
-                return 0
-        if self.ARCHIVE_PERIOD in ('month','year','quarter'):
-            self.archives.sort(sf)
-        else:
-            self.archives.sort()
-        self.archives.reverse()
-
-    def message(self, msg):
-        if self.VERBOSE:
-            f = sys.stderr
-            f.write(msg)
-            if msg[-1:] != '\n':
-                f.write('\n')
-            f.flush()
-
-    def open_new_archive(self, archive, archivedir):
-        index_html = os.path.join(archivedir, 'index.html')
-        try:
-            os.unlink(index_html)
-        except:
-            pass
-        os.symlink(self.DEFAULTINDEX+'.html',index_html)
-
-    def write_index_header(self):
-        self.depth=0
-        print self.html_head()
-        if not self.THREADLAZY and self.type=='Thread':
-            self.message(_("Computing threaded index\n"))
-            self.updateThreadedIndex()
-
-    def write_index_footer(self):
-        for i in range(self.depth):
-            print '</UL>'
-        print self.html_foot()
-
-    def write_index_entry(self, article):
-        subject = self.get_header("subject", article)
-        author = self.get_header("author", article)
-        if as_boolean(config.archiver.pipermail.obscure_email_addresses):
-            try:
-                author = re.sub('@', _(' at '), author)
-            except UnicodeError:
-                # Non-ASCII author contains '@' ... no valid email anyway
-                pass
-        subject = CGIescape(subject, self.lang)
-        author = CGIescape(author, self.lang)
-
-        d = {
-            'filename': urllib.quote(article.filename),
-            'subject':  subject,
-            'sequence': article.sequence,
-            'author':   author
-        }
-        print quick_maketext(
-            'archidxentry.html', d,
-            mlist=self.maillist)
-
-    def get_header(self, field, article):
-        # if we have no decoded header, return the encoded one
-        result = article.decoded.get(field)
-        if result is None:
-            return getattr(article, field)
-        # otherwise, the decoded one will be Unicode
-        return result
-
-    def write_threadindex_entry(self, article, depth):
-        if depth < 0:
-            self.message('depth<0')
-            depth = 0
-        if depth > self.THREADLEVELS:
-            depth = self.THREADLEVELS
-        if depth < self.depth:
-            for i in range(self.depth-depth):
-                print '</UL>'
-        elif depth > self.depth:
-            for i in range(depth-self.depth):
-                print '<UL>'
-        print '<!--%i %s -->' % (depth, article.threadKey)
-        self.depth = depth
-        self.write_index_entry(article)
-
-    def write_TOC(self):
-        self.sortarchives()
-        omask = os.umask(002)
-        try:
-            toc = open(os.path.join(self.basedir, 'index.html'), 'w')
-        finally:
-            os.umask(omask)
-        toc.write(self.html_TOC())
-        toc.close()
-
-    def write_article(self, index, article, path):
-        # called by add_article
-        omask = os.umask(002)
-        try:
-            f = open(path, 'w')
-        finally:
-            os.umask(omask)
-        f.write(article.as_html())
-        f.close()
-
-        # Write the text article to the text archive.
-        path = os.path.join(self.basedir, "%s.txt" % index)
-        omask = os.umask(002)
-        try:
-            f = open(path, 'a+')
-        finally:
-            os.umask(omask)
-        f.write(article.as_text())
-        f.close()
-
-    def update_archive(self, archive):
-        self.__super_update_archive(archive)
-        # only do this if the gzip module was imported globally, and
-        # gzip'ing was enabled via Defaults.GZIP_ARCHIVE_TXT_FILES.  See
-        # above.
-        if gzip:
-            archz = None
-            archt = None
-            txtfile = os.path.join(self.basedir, '%s.txt' % archive)
-            gzipfile = os.path.join(self.basedir, '%s.txt.gz' % archive)
-            oldgzip = os.path.join(self.basedir, '%s.old.txt.gz' % archive)
-            try:
-                # open the plain text file
-                archt = open(txtfile)
-            except IOError:
-                return
-            try:
-                os.rename(gzipfile, oldgzip)
-                archz = gzip.open(oldgzip)
-            except (IOError, RuntimeError, os.error):
-                pass
-            try:
-                ou = os.umask(002)
-                newz = gzip.open(gzipfile, 'w')
-            finally:
-                # XXX why is this a finally?
-                os.umask(ou)
-            if archz:
-                newz.write(archz.read())
-                archz.close()
-                os.unlink(oldgzip)
-            # XXX do we really need all this in a try/except?
-            try:
-                newz.write(archt.read())
-                newz.close()
-                archt.close()
-            except IOError:
-                pass
-            os.unlink(txtfile)
-
-    _skip_attrs = ('maillist', '_lock_file', 'charset')
-
-    def getstate(self):
-        d={}
-        for each in self.__dict__.keys():
-            if not (each in self._skip_attrs
-                    or each.upper() == each):
-                d[each] = self.__dict__[each]
-        return d
-
-    # Add <A HREF="..."> tags around URLs and e-mail addresses.
-
-    def __processbody_URLquote(self, lines):
-        # XXX a lot to do here:
-        # 1. use lines directly, rather than source and dest
-        # 2. make it clearer
-        # 3. make it faster
-        # TK: Prepare for unicode obscure.
-        atmark = _(' at ')
-        if lines and isinstance(lines[0], unicode):
-            atmark = unicode(atmark, Utils.GetCharSet(self.lang), 'replace')
-        source = lines[:]
-        dest = lines
-        last_line_was_quoted = 0
-        for i in xrange(0, len(source)):
-            Lorig = L = source[i]
-            prefix = suffix = ""
-            if L is None:
-                continue
-            # Italicise quoted text
-            if self.IQUOTES:
-                quoted = quotedpat.match(L)
-                if quoted is None:
-                    last_line_was_quoted = 0
-                else:
-                    quoted = quoted.end(0)
-                    prefix = CGIescape(L[:quoted], self.lang) + '<i>'
-                    suffix = '</I>'
-                    if self.SHOWHTML:
-                        suffix += '<BR>'
-                        if not last_line_was_quoted:
-                            prefix = '<BR>' + prefix
-                    L = L[quoted:]
-                    last_line_was_quoted = 1
-            # Check for an e-mail address
-            L2 = ""
-            jr = emailpat.search(L)
-            kr = urlpat.search(L)
-            while jr is not None or kr is not None:
-                if jr == None:
-                    j = -1
-                else:
-                    j = jr.start(0)
-                if kr is None:
-                    k = -1
-                else:
-                    k = kr.start(0)
-                if j != -1 and (j < k or k == -1):
-                    text = jr.group(1)
-                    length = len(text)
-                    if as_boolean(
-                        config.archiver.pipermail.obscure_email_addresses):
-                        text = re.sub('@', atmark, text)
-                        URL = self.maillist.script_url('listinfo')
-                    else:
-                        URL = 'mailto:' + text
-                    pos = j
-                elif k != -1 and (j > k or j == -1):
-                    text = URL = kr.group(1)
-                    length = len(text)
-                    pos = k
-                else: # j==k
-                    raise ValueError, "j==k: This can't happen!"
-                #length = len(text)
-                #self.message("URL: %s %s %s \n"
-                #             % (CGIescape(L[:pos]), URL, CGIescape(text)))
-                L2 += '%s<A HREF="%s">%s</A>' % (
-                    CGIescape(L[:pos], self.lang),
-                    html_quote(URL), CGIescape(text, self.lang))
-                L = L[pos+length:]
-                jr = emailpat.search(L)
-                kr = urlpat.search(L)
-            if jr is None and kr is None:
-                L = CGIescape(L, self.lang)
-            L = prefix + L2 + L + suffix
-            source[i] = None
-            dest[i] = L
-
-    # Perform Hypermail-style processing of <HTML></HTML> directives
-    # in message bodies.  Lines between <HTML> and </HTML> will be written
-    # out precisely as they are; other lines will be passed to func2
-    # for further processing .
-
-    def __processbody_HTML(self, lines):
-        # XXX need to make this method modify in place
-        source = lines[:]
-        dest = lines
-        l = len(source)
-        i = 0
-        while i < l:
-            while i < l and htmlpat.match(source[i]) is None:
-                i = i + 1
-            if i < l:
-                source[i] = None
-                i = i + 1
-            while i < l and nohtmlpat.match(source[i]) is None:
-                dest[i], source[i] = source[i], None
-                i = i + 1
-            if i < l:
-                source[i] = None
-                i = i + 1
-
-    def format_article(self, article):
-        # called from add_article
-        # TBD: Why do the HTML formatting here and keep it in the
-        # pipermail database?  It makes more sense to do the html
-        # formatting as the article is being written as html and toss
-        # the data after it has been written to the archive file.
-        lines = filter(None, article.body)
-        # Handle <HTML> </HTML> directives
-        if self.ALLOWHTML:
-            self.__processbody_HTML(lines)
-        self.__processbody_URLquote(lines)
-        if not self.SHOWHTML and lines:
-            lines.insert(0, '<PRE>')
-            lines.append('</PRE>')
-        else:
-            # Do fancy formatting here
-            if self.SHOWBR:
-                lines = map(lambda x:x + "<BR>", lines)
-            else:
-                for i in range(0, len(lines)):
-                    s = lines[i]
-                    if s[0:1] in ' \t\n':
-                        lines[i] = '<P>' + s
-        article.html_body = lines
-        return article
-
-    def update_article(self, arcdir, article, prev, next):
-        seq = article.sequence
-        filename = os.path.join(arcdir, article.filename)
-        self.message(_('Updating HTML for article %(seq)s'))
-        try:
-            f = open(filename)
-            article.loadbody_fromHTML(f)
-            f.close()
-        except IOError, e:
-            if e.errno <> errno.ENOENT: raise
-            self.message(_('article file %(filename)s is missing!'))
-        article.prev = prev
-        article.next = next
-        omask = os.umask(002)
-        try:
-            f = open(filename, 'w')
-        finally:
-            os.umask(omask)
-        f.write(article.as_html())
-        f.close()
diff --git a/mailman/Archiver/HyperDatabase.py b/mailman/Archiver/HyperDatabase.py
deleted file mode 100644
index 49928d7b3..000000000
--- a/mailman/Archiver/HyperDatabase.py
+++ /dev/null
@@ -1,339 +0,0 @@
-# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.
-
-#
-# site modules
-#
-import os
-import marshal
-import time
-import errno
-
-#
-# package/project modules
-#
-import pipermail
-from locknix import lockfile
-
-CACHESIZE = pipermail.CACHESIZE
-
-try:
-    import cPickle
-    pickle = cPickle
-except ImportError:
-    import pickle
-
-#
-# we're using a python dict in place of
-# of bsddb.btree database.  only defining
-# the parts of the interface used by class HyperDatabase
-# only one thing can access this at a time.
-#
-class DumbBTree:
-    """Stores pickles of Article objects
-
-    This dictionary-like object stores pickles of all the Article
-    objects.  The object itself is stored using marshal.  It would be
-    much simpler, and probably faster, to store the actual objects in
-    the DumbBTree and pickle it.
-
-    TBD: Also needs a more sensible name, like IteratableDictionary or
-    SortedDictionary.
-    """
-
-    def __init__(self, path):
-        self.current_index = 0
-        self.path = path
-        self.lockfile = lockfile.Lock(self.path + ".lock")
-        self.lock()
-        self.__dirty = 0
-        self.dict = {}
-        self.sorted = []
-        self.load()
-
-    def __repr__(self):
-        return "DumbBTree(%s)" % self.path
-
-    def __sort(self, dirty=None):
-        if self.__dirty == 1 or dirty:
-            self.sorted = self.dict.keys()
-            self.sorted.sort()
-            self.__dirty = 0
-
-    def lock(self):
-        self.lockfile.lock()
-
-    def unlock(self):
-        try:
-            self.lockfile.unlock()
-        except lockfile.NotLockedError:
-            pass
-
-    def __delitem__(self, item):
-        # if first hasn't been called, we can skip the sort
-        if self.current_index == 0:
-            del self.dict[item]
-            self.__dirty = 1
-            return
-        try:
-            ci = self.sorted[self.current_index]
-        except IndexError:
-            ci = None
-        if ci == item:
-            try:
-                ci = self.sorted[self.current_index + 1]
-            except IndexError:
-                ci = None
-        del self.dict[item]
-        self.__sort(dirty=1)
-        if ci is not None:
-            self.current_index = self.sorted.index(ci)
-        else:
-            self.current_index = self.current_index + 1
-
-    def clear(self):
-        # bulk clearing much faster than deleting each item, esp. with the
-        # implementation of __delitem__() above :(
-        self.dict = {}
-
-    def first(self):
-        self.__sort() # guarantee that the list is sorted
-        if not self.sorted:
-            raise KeyError
-        else:
-            key = self.sorted[0]
-            self.current_index = 1
-            return key, self.dict[key]
-
-    def last(self):
-        if not self.sorted:
-            raise KeyError
-        else:
-            key = self.sorted[-1]
-            self.current_index = len(self.sorted) - 1
-            return key, self.dict[key]
-
-    def next(self):
-        try:
-            key = self.sorted[self.current_index]
-        except IndexError:
-            raise KeyError
-        self.current_index = self.current_index + 1
-        return key, self.dict[key]
-
-    def has_key(self, key):
-        return self.dict.has_key(key)
-
-    def set_location(self, loc):
-        if not self.dict.has_key(loc):
-            raise KeyError
-        self.current_index = self.sorted.index(loc)
-
-    def __getitem__(self, item):
-        return self.dict[item]
-
-    def __setitem__(self, item, val):
-        # if first hasn't been called, then we don't need to worry
-        # about sorting again
-        if self.current_index == 0:
-            self.dict[item] = val
-            self.__dirty = 1
-            return
-        try:
-            current_item = self.sorted[self.current_index]
-        except IndexError:
-            current_item = item
-        self.dict[item] = val
-        self.__sort(dirty=1)
-        self.current_index = self.sorted.index(current_item)
-
-    def __len__(self):
-        return len(self.sorted)
-
-    def load(self):
-        try:
-            fp = open(self.path)
-            try:
-                self.dict = marshal.load(fp)
-            finally:
-                fp.close()
-        except IOError, e:
-            if e.errno <> errno.ENOENT: raise
-            pass
-        except EOFError:
-            pass
-        else:
-            self.__sort(dirty=1)
-
-    def close(self):
-        omask = os.umask(007)
-        try:
-            fp = open(self.path, 'w')
-        finally:
-            os.umask(omask)
-        fp.write(marshal.dumps(self.dict))
-        fp.close()
-        self.unlock()
-
-
-# this is lifted straight out of pipermail with
-# the bsddb.btree replaced with above class.
-# didn't use inheritance because of all the
-# __internal stuff that needs to be here -scott
-#
-class HyperDatabase(pipermail.Database):
-    __super_addArticle = pipermail.Database.addArticle
-
-    def __init__(self, basedir, mlist):
-        self.__cache = {}
-        self.__currentOpenArchive = None   # The currently open indices
-        self._mlist = mlist
-        self.basedir = os.path.expanduser(basedir)
-        # Recently added articles, indexed only by message ID
-        self.changed={}
-
-    def firstdate(self, archive):
-        self.__openIndices(archive)
-        date = 'None'
-        try:
-            datekey, msgid = self.dateIndex.first()
-            date = time.asctime(time.localtime(float(datekey[0])))
-        except KeyError:
-            pass
-        return date
-
-    def lastdate(self, archive):
-        self.__openIndices(archive)
-        date = 'None'
-        try:
-            datekey, msgid = self.dateIndex.last()
-            date = time.asctime(time.localtime(float(datekey[0])))
-        except KeyError:
-            pass
-        return date
-
-    def numArticles(self, archive):
-        self.__openIndices(archive)
-        return len(self.dateIndex)
-
-    def addArticle(self, archive, article, subject=None, author=None,
-                   date=None):
-        self.__openIndices(archive)
-        self.__super_addArticle(archive, article, subject, author, date)
-
-    def __openIndices(self, archive):
-        if self.__currentOpenArchive == archive:
-            return
-        self.__closeIndices()
-        arcdir = os.path.join(self.basedir, 'database')
-        omask = os.umask(0)
-        try:
-            try:
-                os.mkdir(arcdir, 02770)
-            except OSError, e:
-                if e.errno <> errno.EEXIST: raise
-        finally:
-            os.umask(omask)
-        for i in ('date', 'author', 'subject', 'article', 'thread'):
-            t = DumbBTree(os.path.join(arcdir, archive + '-' + i))
-            setattr(self, i + 'Index', t)
-        self.__currentOpenArchive = archive
-
-    def __closeIndices(self):
-        for i in ('date', 'author', 'subject', 'thread', 'article'):
-            attr = i + 'Index'
-            if hasattr(self, attr):
-                index = getattr(self, attr)
-                if i == 'article':
-                    if not hasattr(self, 'archive_length'):
-                        self.archive_length = {}
-                    l = len(index)
-                    self.archive_length[self.__currentOpenArchive] = l
-                index.close()
-                delattr(self, attr)
-        self.__currentOpenArchive = None
-
-    def close(self):
-        self.__closeIndices()
-
-    def hasArticle(self, archive, msgid):
-        self.__openIndices(archive)
-        return self.articleIndex.has_key(msgid)
-
-    def setThreadKey(self, archive, key, msgid):
-        self.__openIndices(archive)
-        self.threadIndex[key]=msgid
-
-    def getArticle(self, archive, msgid):
-        self.__openIndices(archive)
-        if not self.__cache.has_key(msgid):
-            # get the pickled object out of the DumbBTree
-            buf = self.articleIndex[msgid]
-            article = self.__cache[msgid] = pickle.loads(buf)
-            # For upgrading older archives
-            article.setListIfUnset(self._mlist)
-        else:
-            article = self.__cache[msgid]
-        return article
-
-    def first(self, archive, index):
-        self.__openIndices(archive)
-        index = getattr(self, index + 'Index')
-        try:
-            key, msgid = index.first()
-            return msgid
-        except KeyError:
-            return None
-
-    def next(self, archive, index):
-        self.__openIndices(archive)
-        index = getattr(self, index + 'Index')
-        try:
-            key, msgid = index.next()
-            return msgid
-        except KeyError:
-            return None
-
-    def getOldestArticle(self, archive, subject):
-        self.__openIndices(archive)
-        subject = subject.lower()
-        try:
-            key, tempid=self.subjectIndex.set_location(subject)
-            self.subjectIndex.next()
-            [subject2, date]= key.split('\0')
-            if subject!=subject2: return None
-            return tempid
-        except KeyError:
-            return None
-
-    def newArchive(self, archive):
-        pass
-
-    def clearIndex(self, archive, index):
-        self.__openIndices(archive)
-        if hasattr(self.threadIndex, 'clear'):
-            self.threadIndex.clear()
-            return
-        finished=0
-        try:
-            key, msgid=self.threadIndex.first()
-        except KeyError: finished=1
-        while not finished:
-            del self.threadIndex[key]
-            try:
-                key, msgid=self.threadIndex.next()
-            except KeyError: finished=1
diff --git a/mailman/Archiver/__init__.py b/mailman/Archiver/__init__.py
deleted file mode 100644
index 322010acb..000000000
--- a/mailman/Archiver/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.
-
-from Archiver import *
diff --git a/mailman/Archiver/pipermail.py b/mailman/Archiver/pipermail.py
deleted file mode 100644
index 19bc05c3f..000000000
--- a/mailman/Archiver/pipermail.py
+++ /dev/null
@@ -1,874 +0,0 @@
-#! /usr/bin/env python
-
-import os
-import re
-import sys
-import time
-import logging
-import mailbox
-
-import cPickle as pickle
-
-from cStringIO import StringIO
-from email.Utils import parseaddr, parsedate_tz, mktime_tz, formatdate
-from string import lowercase
-
-__version__ = '0.11 (Mailman edition)'
-VERSION = __version__
-CACHESIZE = 100    # Number of slots in the cache
-
-from mailman.Mailbox import ArchiverMailbox
-from mailman.core import errors
-from mailman.i18n import _
-
-SPACE = ' '
-
-log = logging.getLogger('mailman.error')
-
-
-
-msgid_pat = re.compile(r'(<.*>)')
-def strip_separators(s):
-    "Remove quotes or parenthesization from a Message-ID string"
-    if not s:
-        return ""
-    if s[0] in '"<([' and s[-1] in '">)]':
-        s = s[1:-1]
-    return s
-
-smallNameParts = ['van', 'von', 'der', 'de']
-
-def fixAuthor(author):
-    "Canonicalize a name into Last, First format"
-    # If there's a comma, guess that it's already in "Last, First" format
-    if ',' in author:
-        return author
-    L = author.split()
-    i = len(L) - 1
-    if i == 0:
-        return author # The string's one word--forget it
-    if author.upper() == author or author.lower() == author:
-        # Damn, the name is all upper- or lower-case.
-        while i > 0 and L[i-1].lower() in smallNameParts:
-            i = i - 1
-    else:
-        # Mixed case; assume that small parts of the last name will be
-        # in lowercase, and check them against the list.
-        while i>0 and (L[i-1][0] in lowercase or
-                       L[i-1].lower() in smallNameParts):
-            i = i - 1
-    author = SPACE.join(L[-1:] + L[i:-1]) + ', ' + SPACE.join(L[:i])
-    return author
-
-# Abstract class for databases
-
-class DatabaseInterface:
-    def __init__(self): pass
-    def close(self): pass
-    def getArticle(self, archive, msgid): pass
-    def hasArticle(self, archive, msgid): pass
-    def addArticle(self, archive, article, subject=None, author=None,
-                   date=None): pass
-    def firstdate(self, archive): pass
-    def lastdate(self, archive): pass
-    def first(self, archive, index): pass
-    def next(self, archive, index): pass
-    def numArticles(self, archive): pass
-    def newArchive(self, archive): pass
-    def setThreadKey(self, archive, key, msgid): pass
-    def getOldestArticle(self, subject): pass
-
-class Database(DatabaseInterface):
-    """Define the basic sorting logic for a database
-
-    Assumes that the database internally uses dateIndex, authorIndex,
-    etc.
-    """
-
-    # TBD Factor out more of the logic shared between BSDDBDatabase
-    # and HyperDatabase and place it in this class.
-
-    def __init__(self):
-        # This method need not be called by subclasses that do their
-        # own initialization.
-        self.dateIndex = {}
-        self.authorIndex = {}
-        self.subjectIndex = {}
-        self.articleIndex = {}
-        self.changed = {}
-
-    def addArticle(self, archive, article, subject=None, author=None,
-                   date=None):
-        # create the keys; always end w/ msgid which will be unique
-        authorkey = (author or article.author, article.date,
-                     article.msgid)
-        subjectkey = (subject or article.subject, article.date,
-                      article.msgid)
-        datekey = date or article.date, article.msgid
-
-        # Add the new article
-        self.dateIndex[datekey] = article.msgid
-        self.authorIndex[authorkey] = article.msgid
-        self.subjectIndex[subjectkey] = article.msgid
-
-        self.store_article(article)
-        self.changed[archive, article.msgid] = None
-
-        parentID = article.parentID
-        if parentID is not None and self.articleIndex.has_key(parentID):
-            parent = self.getArticle(archive, parentID)
-            myThreadKey = parent.threadKey + article.date + '-'
-        else:
-            myThreadKey = article.date + '-'
-        article.threadKey = myThreadKey
-        key = myThreadKey, article.msgid
-        self.setThreadKey(archive, key, article.msgid)
-
-    def store_article(self, article):
-        """Store article without message body to save space"""
-        # TBD this is not thread safe!
-        temp = article.body
-        temp2 = article.html_body
-        article.body = []
-        del article.html_body
-        self.articleIndex[article.msgid] = pickle.dumps(article)
-        article.body = temp
-        article.html_body = temp2
-
-
-# The Article class encapsulates a single posting.  The attributes
-# are:
-#
-# sequence   : Sequence number, unique for each article in a set of archives
-# subject    : Subject
-# datestr    : The posting date, in human-readable format
-# date       : The posting date, in purely numeric format
-# headers    : Any other headers of interest
-# author     : The author's name (and possibly organization)
-# email      : The author's e-mail address
-# msgid      : A unique message ID
-# in_reply_to: If != "", this is the msgid of the article being replied to
-# references : A (possibly empty) list of msgid's of earlier articles
-#              in the thread
-# body       : A list of strings making up the message body
-
-class Article:
-    _last_article_time = time.time()
-
-    def __init__(self, message = None, sequence = 0, keepHeaders = []):
-        if message is None:
-            return
-        self.sequence = sequence
-
-        self.parentID = None
-        self.threadKey = None
-        # otherwise the current sequence number is used.
-        id = strip_separators(message['Message-Id'])
-        if id == "":
-            self.msgid = str(self.sequence)
-        else: self.msgid = id
-
-        if message.has_key('Subject'):
-            self.subject = str(message['Subject'])
-        else:
-            self.subject = _('No subject')
-        if self.subject == "": self.subject = _('No subject')
-
-        self._set_date(message)
-
-        # Figure out the e-mail address and poster's name.  Use the From:
-        # field first, followed by Reply-To:
-        self.author, self.email = parseaddr(message.get('From', ''))
-        e = message['Reply-To']
-        if not self.email and e is not None:
-            ignoreauthor, self.email = parseaddr(e)
-        self.email = strip_separators(self.email)
-        self.author = strip_separators(self.author)
-
-        if self.author == "":
-            self.author = self.email
-
-        # Save the In-Reply-To:, References:, and Message-ID: lines
-        #
-        # TBD: The original code does some munging on these fields, which
-        # shouldn't be necessary, but changing this may break code.  For
-        # safety, I save the original headers on different attributes for use
-        # in writing the plain text periodic flat files.
-        self._in_reply_to = message['in-reply-to']
-        self._references = message['references']
-        self._message_id = message['message-id']
-
-        i_r_t = message['In-Reply-To']
-        if i_r_t is None:
-            self.in_reply_to = ''
-        else:
-            match = msgid_pat.search(i_r_t)
-            if match is None: self.in_reply_to = ''
-            else: self.in_reply_to = strip_separators(match.group(1))
-
-        references = message['References']
-        if references is None:
-            self.references = []
-        else:
-            self.references = map(strip_separators, references.split())
-
-        # Save any other interesting headers
-        self.headers = {}
-        for i in keepHeaders:
-            if message.has_key(i):
-                self.headers[i] = message[i]
-
-        # Read the message body
-        s = StringIO(message.get_payload(decode=True)\
-                     or message.as_string().split('\n\n',1)[1])
-        self.body = s.readlines()
-
-    def _set_date(self, message):
-        def floatdate(header):
-            missing = []
-            datestr = message.get(header, missing)
-            if datestr is missing:
-                return None
-            date = parsedate_tz(datestr)
-            try:
-                return mktime_tz(date)
-            except (TypeError, ValueError, OverflowError):
-                return None
-        date = floatdate('date')
-        if date is None:
-            date = floatdate('x-list-received-date')
-        if date is None:
-            # What's left to try?
-            date = self._last_article_time + 1
-        self._last_article_time = date
-        self.date = '%011i' % date
-        self.datestr = message.get('date') \
-                       or message.get('x-list-received-date') \
-                       or formatdate(date)
-
-    def __repr__(self):
-        return '<Article ID = '+repr(self.msgid)+'>'
-
-    def finished_update_article(self):
-        pass    
-
-# Pipermail formatter class
-
-class T:
-    DIRMODE = 0755      # Mode to give to created directories
-    FILEMODE = 0644     # Mode to give to created files
-    INDEX_EXT = ".html" # Extension for indexes
-
-    def __init__(self, basedir = None, reload = 1, database = None):
-        # If basedir isn't provided, assume the current directory
-        if basedir is None:
-            self.basedir = os.getcwd()
-        else:
-            basedir = os.path.expanduser(basedir)
-            self.basedir = basedir
-        self.database = database
-
-        # If the directory doesn't exist, create it.  This code shouldn't get
-        # run anymore, we create the directory in Archiver.py.  It should only
-        # get used by legacy lists created that are only receiving their first
-        # message in the HTML archive now -- Marc
-        try:
-            os.stat(self.basedir)
-        except os.error, errdata:
-            errno, errmsg = errdata
-            if errno != 2:
-                raise os.error, errdata
-            else:
-                self.message(_('Creating archive directory ') + self.basedir)
-                omask = os.umask(0)
-                try:
-                    os.mkdir(self.basedir, self.DIRMODE)
-                finally:
-                    os.umask(omask)
-
-        # Try to load previously pickled state
-        try:
-            if not reload:
-                raise IOError
-            f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
-            self.message(_('Reloading pickled archive state'))
-            d = pickle.load(f)
-            f.close()
-            for key, value in d.items():
-                setattr(self, key, value)
-        except (IOError, EOFError):
-            # No pickled version, so initialize various attributes
-            self.archives = []        # Archives
-            self._dirty_archives = [] # Archives that will have to be updated
-            self.sequence = 0         # Sequence variable used for
-                                      #   numbering articles
-            self.update_TOC = 0       # Does the TOC need updating?
-        #
-        # make the basedir variable work when passed in as an __init__ arg
-        # and different from the one in the pickle.  Let the one passed in
-        # as an __init__ arg take precedence if it's stated.  This way, an
-        # archive can be moved from one place to another and still work.
-        #
-        if basedir != self.basedir:
-            self.basedir = basedir
-
-    def close(self):
-        "Close an archive, save its state, and update any changed archives."
-        self.update_dirty_archives()
-        self.update_TOC = 0
-        self.write_TOC()
-        # Save the collective state
-        self.message(_('Pickling archive state into ')
-                     + os.path.join(self.basedir, 'pipermail.pck'))
-        self.database.close()
-        del self.database
-
-        omask = os.umask(007)
-        try:
-            f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
-        finally:
-            os.umask(omask)
-        pickle.dump(self.getstate(), f)
-        f.close()
-
-    def getstate(self):
-        # can override this in subclass
-        return self.__dict__
-
-    #
-    # Private methods
-    #
-    # These will be neither overridden nor called by custom archivers.
-    #
-
-
-    # Create a dictionary of various parameters that will be passed
-    # to the write_index_{header,footer} functions
-    def __set_parameters(self, archive):
-        # Determine the earliest and latest date in the archive
-        firstdate = self.database.firstdate(archive)
-        lastdate = self.database.lastdate(archive)
-
-        # Get the current time
-        now = time.asctime(time.localtime(time.time()))
-        self.firstdate = firstdate
-        self.lastdate = lastdate
-        self.archivedate = now
-        self.size = self.database.numArticles(archive)
-        self.archive = archive
-        self.version = __version__
-
-    # Find the message ID of an article's parent, or return None
-    # if no parent can be found.
-
-    def __findParent(self, article, children = []):
-            parentID = None
-            if article.in_reply_to:
-                parentID = article.in_reply_to
-            elif article.references:
-                # Remove article IDs that aren't in the archive
-                refs = filter(self.articleIndex.has_key, article.references)
-                if not refs:
-                    return None
-                maxdate = self.database.getArticle(self.archive,
-                                                   refs[0])
-                for ref in refs[1:]:
-                    a = self.database.getArticle(self.archive, ref)
-                    if a.date > maxdate.date:
-                        maxdate = a
-                parentID = maxdate.msgid
-            else:
-                # Look for the oldest matching subject
-                try:
-                    key, tempid = \
-                         self.subjectIndex.set_location(article.subject)
-                    print key, tempid
-                    self.subjectIndex.next()
-                    [subject, date] = key.split('\0')
-                    print article.subject, subject, date
-                    if subject == article.subject and tempid not in children:
-                        parentID = tempid
-                except KeyError:
-                    pass
-            return parentID
-
-    # Update the threaded index completely
-    def updateThreadedIndex(self):
-        # Erase the threaded index
-        self.database.clearIndex(self.archive, 'thread')
-
-        # Loop over all the articles
-        msgid = self.database.first(self.archive, 'date')
-        while msgid is not None:
-            try:
-                article = self.database.getArticle(self.archive, msgid)
-            except KeyError:
-                pass
-            else:
-                if article.parentID is None or \
-                   not self.database.hasArticle(self.archive,
-                                                article.parentID):
-                    # then
-                    pass
-                else:
-                    parent = self.database.getArticle(self.archive,
-                                                    article.parentID)
-                    article.threadKey = parent.threadKey+article.date+'-'
-                self.database.setThreadKey(self.archive,
-                    (article.threadKey, article.msgid),
-                    msgid)
-            msgid = self.database.next(self.archive, 'date')
-
-    #
-    # Public methods:
-    #
-    # These are part of the public interface of the T class, but will
-    # never be overridden (unless you're trying to do something very new).
-
-    # Update a single archive's indices, whether the archive's been
-    # dirtied or not.
-    def update_archive(self, archive):
-        self.archive = archive
-        self.message(_("Updating index files for archive [%(archive)s]"))
-        arcdir = os.path.join(self.basedir, archive)
-        self.__set_parameters(archive)
-
-        for hdr in ('Date', 'Subject', 'Author'):
-            self._update_simple_index(hdr, archive, arcdir)
-
-        self._update_thread_index(archive, arcdir)
-
-    def _update_simple_index(self, hdr, archive, arcdir):
-        self.message("  " + hdr)
-        self.type = hdr
-        hdr = hdr.lower()
-
-        self._open_index_file_as_stdout(arcdir, hdr)
-        self.write_index_header()
-        count = 0
-        # Loop over the index entries
-        msgid = self.database.first(archive, hdr)
-        while msgid is not None:
-            try:
-                article = self.database.getArticle(self.archive, msgid)
-            except KeyError:
-                pass
-            else:
-                count = count + 1
-                self.write_index_entry(article)
-            msgid = self.database.next(archive, hdr)
-        # Finish up this index
-        self.write_index_footer()
-        self._restore_stdout()
-
-    def _update_thread_index(self, archive, arcdir):
-        self.message(_("  Thread"))
-        self._open_index_file_as_stdout(arcdir, "thread")
-        self.type = 'Thread'
-        self.write_index_header()
-
-        # To handle the prev./next in thread pointers, we need to
-        # track articles 5 at a time.
-
-        # Get the first 5 articles
-        L = [None] * 5
-        i = 2
-        msgid = self.database.first(self.archive, 'thread')
-
-        while msgid is not None and i < 5:
-            L[i] = self.database.getArticle(self.archive, msgid)
-            i = i + 1
-            msgid = self.database.next(self.archive, 'thread')
-
-        while L[2] is not None:
-            article = L[2]
-            artkey = None
-            if article is not None:
-                artkey = article.threadKey
-            if artkey is not None:
-                self.write_threadindex_entry(article, artkey.count('-') - 1)
-                if self.database.changed.has_key((archive,article.msgid)):
-                    a1 = L[1]
-                    a3 = L[3]
-                    self.update_article(arcdir, article, a1, a3)
-                    if a3 is not None:
-                        self.database.changed[(archive, a3.msgid)] = None
-                    if a1 is not None:
-                        key = archive, a1.msgid
-                        if not self.database.changed.has_key(key):
-                            self.update_article(arcdir, a1, L[0], L[2])
-                        else:
-                            del self.database.changed[key]
-            if L[0]:
-                L[0].finished_update_article()
-            L = L[1:]                   # Rotate the list
-            if msgid is None:
-                L.append(msgid)
-            else:
-                L.append(self.database.getArticle(self.archive, msgid))
-            msgid = self.database.next(self.archive, 'thread')
-
-        self.write_index_footer()
-        self._restore_stdout()
-
-    def _open_index_file_as_stdout(self, arcdir, index_name):
-        path = os.path.join(arcdir, index_name + self.INDEX_EXT)
-        omask = os.umask(002)
-        try:
-            self.__f = open(path, 'w')
-        finally:
-            os.umask(omask)
-        self.__stdout = sys.stdout
-        sys.stdout = self.__f
-
-    def _restore_stdout(self):
-        sys.stdout = self.__stdout
-        self.__f.close()
-        del self.__f
-        del self.__stdout
-
-    # Update only archives that have been marked as "changed".
-    def update_dirty_archives(self):
-        for i in self._dirty_archives:
-            self.update_archive(i)
-        self._dirty_archives = []
-
-    # Read a Unix mailbox file from the file object <input>,
-    # and create a series of Article objects.  Each article
-    # object will then be archived.
-
-    def _makeArticle(self, msg, sequence):
-        return Article(msg, sequence)
-
-    def processUnixMailbox(self, input, start=None, end=None):
-        mbox = ArchiverMailbox(input, self.maillist)
-        if start is None:
-            start = 0
-        counter = 0
-        while counter < start:
-            try:
-                m = mbox.next()
-            except errors.DiscardMessage:
-                continue
-            if m is None:
-                return
-            counter += 1
-        while 1:
-            try:
-                pos = input.tell()
-                m = mbox.next()
-            except errors.DiscardMessage:
-                continue
-            except Exception:
-                log.error('uncaught archiver exception at filepos: %s', pos)
-                raise
-            if m is None:
-                break
-            if m == '':
-                # It was an unparseable message
-                continue
-            msgid = m.get('message-id', 'n/a')
-            self.message(_('#%(counter)05d %(msgid)s'))
-            a = self._makeArticle(m, self.sequence)
-            self.sequence += 1
-            self.add_article(a)
-            if end is not None and counter >= end:
-               break
-            counter += 1
-
-    def new_archive(self, archive, archivedir):
-        self.archives.append(archive)
-        self.update_TOC = 1
-        self.database.newArchive(archive)
-        # If the archive directory doesn't exist, create it
-        try:
-            os.stat(archivedir)
-        except os.error, errdata:
-            errno, errmsg = errdata
-            if errno == 2:
-                omask = os.umask(0)
-                try:
-                    os.mkdir(archivedir, self.DIRMODE)
-                finally:
-                    os.umask(omask)
-            else:
-                raise os.error, errdata
-        self.open_new_archive(archive, archivedir)
-
-    def add_article(self, article):
-        archives = self.get_archives(article)
-        if not archives:
-            return
-        if type(archives) == type(''):
-            archives = [archives]
-
-        article.filename = filename = self.get_filename(article)
-        temp = self.format_article(article)
-        for arch in archives:
-            self.archive = arch # why do this???
-            archivedir = os.path.join(self.basedir, arch)
-            if arch not in self.archives:
-                self.new_archive(arch, archivedir)
-
-            # Write the HTML-ized article
-            self.write_article(arch, temp, os.path.join(archivedir,
-                                                        filename))
-
-            if article.decoded.has_key('author'):
-                author = fixAuthor(article.decoded['author'])
-            else:
-                author = fixAuthor(article.author)
-            if article.decoded.has_key('stripped'):
-                subject = article.decoded['stripped'].lower()
-            else:
-                subject = article.subject.lower()
-
-            article.parentID = parentID = self.get_parent_info(arch, article)
-            if parentID:
-                parent = self.database.getArticle(arch, parentID)
-                article.threadKey = parent.threadKey + article.date + '-'
-            else:
-                article.threadKey = article.date + '-'
-            key = article.threadKey, article.msgid
-
-            self.database.setThreadKey(arch, key, article.msgid)
-            self.database.addArticle(arch, temp, author=author,
-                                     subject=subject)
-
-            if arch not in self._dirty_archives:
-                self._dirty_archives.append(arch)
-
-    def get_parent_info(self, archive, article):
-        parentID = None
-        if article.in_reply_to:
-            parentID = article.in_reply_to
-        elif article.references:
-            refs = self._remove_external_references(article.references)
-            if refs:
-                maxdate = self.database.getArticle(archive, refs[0])
-                for ref in refs[1:]:
-                    a = self.database.getArticle(archive, ref)
-                    if a.date > maxdate.date:
-                        maxdate = a
-                parentID = maxdate.msgid
-        else:
-            # Get the oldest article with a matching subject, and
-            # assume this is a follow-up to that article
-            parentID = self.database.getOldestArticle(archive,
-                                                      article.subject)
-
-        if parentID and not self.database.hasArticle(archive, parentID):
-            parentID = None
-        return parentID
-
-    def write_article(self, index, article, path):
-        omask = os.umask(002)
-        try:
-            f = open(path, 'w')
-        finally:
-            os.umask(omask)
-        temp_stdout, sys.stdout = sys.stdout, f
-        self.write_article_header(article)
-        sys.stdout.writelines(article.body)
-        self.write_article_footer(article)
-        sys.stdout = temp_stdout
-        f.close()
-
-    def _remove_external_references(self, refs):
-        keep = []
-        for ref in refs:
-            if self.database.hasArticle(self.archive, ref):
-                keep.append(ref)
-        return keep
-
-    # Abstract methods: these will need to be overridden by subclasses
-    # before anything useful can be done.
-
-    def get_filename(self, article):
-        pass
-    def get_archives(self, article):
-        """Return a list of indexes where the article should be filed.
-        A string can be returned if the list only contains one entry,
-        and the empty list is legal."""
-        pass
-    def format_article(self, article):
-        pass
-    def write_index_header(self):
-        pass
-    def write_index_footer(self):
-        pass
-    def write_index_entry(self, article):
-        pass
-    def write_threadindex_entry(self, article, depth):
-        pass
-    def write_article_header(self, article):
-        pass
-    def write_article_footer(self, article):
-        pass
-    def write_article_entry(self, article):
-        pass
-    def update_article(self, archivedir, article, prev, next):
-        pass
-    def write_TOC(self):
-        pass
-    def open_new_archive(self, archive, dir):
-        pass
-    def message(self, msg):
-        pass
-
-
-class BSDDBdatabase(Database):
-    __super_addArticle = Database.addArticle
-
-    def __init__(self, basedir):
-        self.__cachekeys = []
-        self.__cachedict = {}
-        self.__currentOpenArchive = None # The currently open indices
-        self.basedir = os.path.expanduser(basedir)
-        self.changed = {} # Recently added articles, indexed only by
-                          # message ID
-
-    def firstdate(self, archive):
-        self.__openIndices(archive)
-        date = 'None'
-        try:
-            date, msgid = self.dateIndex.first()
-            date = time.asctime(time.localtime(float(date)))
-        except KeyError:
-            pass
-        return date
-
-    def lastdate(self, archive):
-        self.__openIndices(archive)
-        date = 'None'
-        try:
-            date, msgid = self.dateIndex.last()
-            date = time.asctime(time.localtime(float(date)))
-        except KeyError:
-            pass
-        return date
-
-    def numArticles(self, archive):
-        self.__openIndices(archive)
-        return len(self.dateIndex)
-
-    def addArticle(self, archive, article, subject=None, author=None,
-                   date=None):
-        self.__openIndices(archive)
-        self.__super_addArticle(archive, article, subject, author, date)
-
-    # Open the BSDDB files that are being used as indices
-    # (dateIndex, authorIndex, subjectIndex, articleIndex)
-    def __openIndices(self, archive):
-        if self.__currentOpenArchive == archive:
-            return
-
-        import bsddb
-        self.__closeIndices()
-        arcdir = os.path.join(self.basedir, 'database')
-        omask = os.umask(0)
-        try:
-            try:
-                os.mkdir(arcdir, 02775)
-            except OSError:
-                # BAW: Hmm...
-                pass
-        finally:
-            os.umask(omask)
-        for hdr in ('date', 'author', 'subject', 'article', 'thread'):
-            path = os.path.join(arcdir, archive + '-' + hdr)
-            t = bsddb.btopen(path, 'c')
-            setattr(self, hdr + 'Index', t)
-        self.__currentOpenArchive = archive
-
-    # Close the BSDDB files that are being used as indices (if they're
-    # open--this is safe to call if they're already closed)
-    def __closeIndices(self):
-        if self.__currentOpenArchive is not None:
-            pass
-        for hdr in ('date', 'author', 'subject', 'thread', 'article'):
-            attr = hdr + 'Index'
-            if hasattr(self, attr):
-                index = getattr(self, attr)
-                if hdr == 'article':
-                    if not hasattr(self, 'archive_length'):
-                        self.archive_length = {}
-                    self.archive_length[self.__currentOpenArchive] = len(index)
-                index.close()
-                delattr(self,attr)
-        self.__currentOpenArchive = None
-
-    def close(self):
-        self.__closeIndices()
-    def hasArticle(self, archive, msgid):
-        self.__openIndices(archive)
-        return self.articleIndex.has_key(msgid)
-    def setThreadKey(self, archive, key, msgid):
-        self.__openIndices(archive)
-        self.threadIndex[key] = msgid
-    def getArticle(self, archive, msgid):
-        self.__openIndices(archive)
-        if self.__cachedict.has_key(msgid):
-            self.__cachekeys.remove(msgid)
-            self.__cachekeys.append(msgid)
-            return self.__cachedict[msgid]
-        if len(self.__cachekeys) == CACHESIZE:
-            delkey, self.__cachekeys = (self.__cachekeys[0],
-                                        self.__cachekeys[1:])
-            del self.__cachedict[delkey]
-        s = self.articleIndex[msgid]
-        article = pickle.loads(s)
-        self.__cachekeys.append(msgid)
-        self.__cachedict[msgid] = article
-        return article
-
-    def first(self, archive, index):
-        self.__openIndices(archive)
-        index = getattr(self, index+'Index')
-        try:
-            key, msgid = index.first()
-            return msgid
-        except KeyError:
-            return None
-    def next(self, archive, index):
-        self.__openIndices(archive)
-        index = getattr(self, index+'Index')
-        try:
-            key, msgid = index.next()
-        except KeyError:
-            return None
-        else:
-            return msgid
-
-    def getOldestArticle(self, archive, subject):
-        self.__openIndices(archive)
-        subject = subject.lower()
-        try:
-            key, tempid = self.subjectIndex.set_location(subject)
-            self.subjectIndex.next()
-            [subject2, date] = key.split('\0')
-            if subject != subject2:
-                return None
-            return tempid
-        except KeyError: # XXX what line raises the KeyError?
-            return None
-
-    def newArchive(self, archive):
-        pass
-
-    def clearIndex(self, archive, index):
-        self.__openIndices(archive)
-        index = getattr(self, index+'Index')
-        finished = 0
-        try:
-            key, msgid = self.threadIndex.first()
-        except KeyError:
-            finished = 1
-        while not finished:
-            del self.threadIndex[key]
-            try:
-                key, msgid = self.threadIndex.next()
-            except KeyError:
-                finished = 1
-
-
author	Barry Warsaw	2009-01-25 13:01:41 -0500
committer	Barry Warsaw	2009-01-25 13:01:41 -0500
commit	eefd06f1b88b8ecbb23a9013cd223b72ca85c20d (patch)
tree	72c947fe16fce0e07e996ee74020b26585d7e846 /mailman/Archiver
parent	07871212f74498abd56bef3919bf3e029eb8b930 (diff)
download	mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.gz mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.zst mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.zip