diff options
Diffstat (limited to 'mailman/Archiver')
| -rw-r--r-- | mailman/Archiver/Archiver.py | 230 | ||||
| -rw-r--r-- | mailman/Archiver/HyperArch.py | 1237 | ||||
| -rw-r--r-- | mailman/Archiver/HyperDatabase.py | 339 | ||||
| -rw-r--r-- | mailman/Archiver/__init__.py | 18 | ||||
| -rw-r--r-- | mailman/Archiver/pipermail.py | 874 |
5 files changed, 0 insertions, 2698 deletions
diff --git a/mailman/Archiver/Archiver.py b/mailman/Archiver/Archiver.py deleted file mode 100644 index d0b9fbd1b..000000000 --- a/mailman/Archiver/Archiver.py +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - # USA. - -"""Mixin class for putting new messages in the right place for archival. - -Public archives are separated from private ones. An external archival -mechanism (eg, pipermail) should be pointed to the right places, to do the -archival. -""" - -import os -import errno -import logging - -from cStringIO import StringIO -from string import Template - -from mailman import Mailbox -from mailman import Utils -from mailman.config import config - -log = logging.getLogger('mailman.error') - - - -def makelink(old, new): - try: - os.symlink(old, new) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - -def breaklink(link): - try: - os.unlink(link) - except OSError, e: - if e.errno <> errno.ENOENT: - raise - - - -class Archiver: - # - # Interface to Pipermail. HyperArch.py uses this method to get the - # archive directory for the mailing list - # - def InitVars(self): - # The archive file structure by default is: - # - # archives/ - # private/ - # listname.mbox/ - # listname.mbox - # listname/ - # lots-of-pipermail-stuff - # public/ - # listname.mbox@ -> ../private/listname.mbox - # listname@ -> ../private/listname - # - # IOW, the mbox and pipermail archives are always stored in the - # private archive for the list. This is safe because archives/private - # is always set to o-rx. Public archives have a symlink to get around - # the private directory, pointing directly to the private/listname - # which has o+rx permissions. Private archives do not have the - # symbolic links. - archdir = self.archive_dir(self.fqdn_listname) - omask = os.umask(0) - try: - try: - os.mkdir(archdir+'.mbox', 02775) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - # We also create an empty pipermail archive directory into - # which we'll drop an empty index.html file into. This is so - # that lists that have not yet received a posting have - # /something/ as their index.html, and don't just get a 404. - try: - os.mkdir(archdir, 02775) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - # See if there's an index.html file there already and if not, - # write in the empty archive notice. - indexfile = os.path.join(archdir, 'index.html') - fp = None - try: - fp = open(indexfile) - except IOError, e: - if e.errno <> errno.ENOENT: - raise - omask = os.umask(002) - try: - fp = open(indexfile, 'w') - finally: - os.umask(omask) - fp.write(Utils.maketext( - 'emptyarchive.html', - {'listname': self.real_name, - 'listinfo': self.GetScriptURL('listinfo'), - }, mlist=self)) - if fp: - fp.close() - finally: - os.umask(omask) - - def ArchiveFileName(self): - """The mbox name where messages are left for archive construction.""" - return os.path.join(self.archive_dir() + '.mbox', - self.fqdn_listname + '.mbox') - - def GetBaseArchiveURL(self): - if self.archive_private: - url = self.GetScriptURL('private') + '/index.html' - else: - web_host = config.domains.get(self.host_name, self.host_name) - url = Template(config.PUBLIC_ARCHIVE_URL).safe_substitute( - listname=self.fqdn_listname, - hostname=web_host, - fqdn_listname=self.fqdn_listname, - ) - return url - - def __archive_file(self, afn): - """Open (creating, if necessary) the named archive file.""" - omask = os.umask(002) - try: - return Mailbox.Mailbox(open(afn, 'a+')) - finally: - os.umask(omask) - - # - # old ArchiveMail function, retained under a new name - # for optional archiving to an mbox - # - def __archive_to_mbox(self, post): - """Retain a text copy of the message in an mbox file.""" - try: - afn = self.ArchiveFileName() - mbox = self.__archive_file(afn) - mbox.AppendMessage(post) - mbox.fp.close() - except IOError, msg: - log.error('Archive file access failure:\n\t%s %s', afn, msg) - raise - - def ExternalArchive(self, ar, txt): - cmd = Template(ar).safe_substitute( - listname=self.fqdn_listname, - hostname=self.host_name) - extarch = os.popen(cmd, 'w') - extarch.write(txt) - status = extarch.close() - if status: - log.error('external archiver non-zero exit status: %d\n', - (status & 0xff00) >> 8) - - # - # archiving in real time this is called from list.post(msg) - # - def ArchiveMail(self, msg): - """Store postings in mbox and/or pipermail archive, depending.""" - # Fork so archival errors won't disrupt normal list delivery - if config.ARCHIVE_TO_MBOX == -1: - return - # - # We don't need an extra archiver lock here because we know the list - # itself must be locked. - if config.ARCHIVE_TO_MBOX in (1, 2): - self.__archive_to_mbox(msg) - if config.ARCHIVE_TO_MBOX == 1: - # Archive to mbox only. - return - txt = str(msg) - # should we use the internal or external archiver? - private_p = self.archive_private - if config.PUBLIC_EXTERNAL_ARCHIVER and not private_p: - self.ExternalArchive(config.PUBLIC_EXTERNAL_ARCHIVER, txt) - elif config.PRIVATE_EXTERNAL_ARCHIVER and private_p: - self.ExternalArchive(config.PRIVATE_EXTERNAL_ARCHIVER, txt) - else: - # use the internal archiver - f = StringIO(txt) - import HyperArch - h = HyperArch.HyperArchive(self) - h.processUnixMailbox(f) - h.close() - f.close() - - # - # called from MailList.MailList.Save() - # - def CheckHTMLArchiveDir(self): - # We need to make sure that the archive directory has the right perms - # for public vs private. If it doesn't exist, or some weird - # permissions errors prevent us from stating the directory, it's - # pointless to try to fix the perms, so we just return -scott - if config.ARCHIVE_TO_MBOX == -1: - # Archiving is completely disabled, don't require the skeleton. - return - pubdir = os.path.join(config.PUBLIC_ARCHIVE_FILE_DIR, - self.fqdn_listname) - privdir = self.archive_dir() - pubmbox = pubdir + '.mbox' - privmbox = privdir + '.mbox' - if self.archive_private: - breaklink(pubdir) - breaklink(pubmbox) - else: - # BAW: privdir or privmbox could be nonexistant. We'd get an - # OSError, ENOENT which should be caught and reported properly. - makelink(privdir, pubdir) - # Only make this link if the site has enabled public mbox files - if config.PUBLIC_MBOX: - makelink(privmbox, pubmbox) diff --git a/mailman/Archiver/HyperArch.py b/mailman/Archiver/HyperArch.py deleted file mode 100644 index d9477cc3f..000000000 --- a/mailman/Archiver/HyperArch.py +++ /dev/null @@ -1,1237 +0,0 @@ -# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""HyperArch: Pipermail archiving for Mailman - - - The Dragon De Monsyne <dragondm@integral.org> - - TODO: - - Should be able to force all HTML to be regenerated next time the - archive is run, in case a template is changed. - - Run a command to generate tarball of html archives for downloading - (probably in the 'update_dirty_archives' method). -""" - -import os -import re -import sys -import gzip -import time -import errno -import urllib -import logging -import weakref -import binascii - -from email.Charset import Charset -from email.Errors import HeaderParseError -from email.Header import decode_header, make_header -from lazr.config import as_boolean -from locknix.lockfile import Lock -from string import Template - -from mailman import Utils -from mailman import i18n -from mailman.Archiver import HyperDatabase -from mailman.Archiver import pipermail -from mailman.Mailbox import ArchiverMailbox -from mailman.config import config - - -log = logging.getLogger('mailman.error') - -# Set up i18n. Assume the current language has already been set in the caller. -_ = i18n._ - -EMPTYSTRING = '' -NL = '\n' - -# MacOSX has a default stack size that is too small for deeply recursive -# regular expressions. We see this as crashes in the Python test suite when -# running test_re.py and test_sre.py. The fix is to set the stack limit to -# 2048; the general recommendation is to do in the shell before running the -# test suite. But that's inconvenient for a daemon like the qrunner. -# -# AFAIK, this problem only affects the archiver, so we're adding this work -# around to this file (it'll get imported by the bundled pipermail or by the -# bin/arch script. We also only do this on darwin, a.k.a. MacOSX. -if sys.platform == 'darwin': - try: - import resource - except ImportError: - pass - else: - soft, hard = resource.getrlimit(resource.RLIMIT_STACK) - newsoft = min(hard, max(soft, 1024*2048)) - resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard)) - - - -def html_quote(s, lang=None): - repls = ( ('&', '&'), - ("<", '<'), - (">", '>'), - ('"', '"')) - for thing, repl in repls: - s = s.replace(thing, repl) - return Utils.uncanonstr(s, lang) - - -def url_quote(s): - return urllib.quote(s) - - -def null_to_space(s): - return s.replace('\000', ' ') - - -def sizeof(filename, lang): - try: - size = os.path.getsize(filename) - except OSError, e: - # ENOENT can happen if the .mbox file was moved away or deleted, and - # an explicit mbox file name was given to bin/arch. - if e.errno <> errno.ENOENT: raise - return _('size not available') - if size < 1000: - with i18n.using_language(lang): - out = _(' %(size)i bytes ') - return out - elif size < 1000000: - return ' %d KB ' % (size / 1000) - # GB?? :-) - return ' %d MB ' % (size / 1000000) - - -html_charset = '<META http-equiv="Content-Type" ' \ - 'content="text/html; charset=%s">' - -def CGIescape(arg, lang=None): - if isinstance(arg, unicode): - s = Utils.websafe(arg) - else: - s = Utils.websafe(str(arg)) - return Utils.uncanonstr(s.replace('"', '"'), lang) - -# Parenthesized human name -paren_name_pat = re.compile(r'([(].*[)])') - -# Subject lines preceded with 'Re:' -REpat = re.compile( r"\s*RE\s*(\[\d+\]\s*)?:\s*", re.IGNORECASE) - -# E-mail addresses and URLs in text -emailpat = re.compile(r'([-+,.\w]+@[-+.\w]+)') - -# Argh! This pattern is buggy, and will choke on URLs with GET parameters. -urlpat = re.compile(r'(\w+://[^>)\s]+)') # URLs in text - -# Blank lines -blankpat = re.compile(r'^\s*$') - -# Starting <html> directive -htmlpat = re.compile(r'^\s*<HTML>\s*$', re.IGNORECASE) -# Ending </html> directive -nohtmlpat = re.compile(r'^\s*</HTML>\s*$', re.IGNORECASE) -# Match quoted text -quotedpat = re.compile(r'^([>|:]|>)+') - - - -# Like Utils.maketext() but with caching to improve performance. -# -# _templatefilepathcache is used to associate a (templatefile, lang, listname) -# key with the file system path to a template file. This path is the one that -# the Utils.findtext() function has computed is the one to match the values in -# the key tuple. -# -# _templatecache associate a file system path as key with the text -# returned after processing the contents of that file by Utils.findtext() -# -# We keep two caches to reduce the amount of template text kept in memory, -# since the _templatefilepathcache is a many->one mapping and _templatecache -# is a one->one mapping. Imagine 1000 lists all using the same default -# English template. - -_templatefilepathcache = {} -_templatecache = {} - -def quick_maketext(templatefile, dict=None, lang=None, mlist=None): - if mlist is None: - listname = '' - else: - listname = mlist.fqdn_listname - if lang is None: - if mlist is None: - lang = config.mailman.default_language - else: - lang = mlist.preferred_language - cachekey = (templatefile, lang, listname) - filepath = _templatefilepathcache.get(cachekey) - if filepath: - template = _templatecache.get(filepath) - if filepath is None or template is None: - # Use the basic maketext, with defaults to get the raw template - template, filepath = Utils.findtext(templatefile, lang=lang, - raw=True, mlist=mlist) - _templatefilepathcache[cachekey] = filepath - _templatecache[filepath] = template - # Copied from Utils.maketext() - text = template - if dict is not None: - try: - try: - text = Template(template).safe_substitute(**dict) - except UnicodeError: - # Try again after coercing the template to unicode - utemplate = unicode(template, - Utils.GetCharSet(lang), - 'replace') - text = Template(utemplate).safe_substitute(**dict) - except (TypeError, ValueError): - # The template is really screwed up - pass - # Make sure the text is in the given character set, or html-ify any bogus - # characters. - return Utils.uncanonstr(text, lang) - - - -# Note: I'm overriding most, if not all of the pipermail Article class -# here -ddm -# The Article class encapsulates a single posting. The attributes are: -# -# sequence : Sequence number, unique for each article in a set of archives -# subject : Subject -# datestr : The posting date, in human-readable format -# date : The posting date, in purely numeric format -# fromdate : The posting date, in `unixfrom' format -# headers : Any other headers of interest -# author : The author's name (and possibly organization) -# email : The author's e-mail address -# msgid : A unique message ID -# in_reply_to : If !="", this is the msgid of the article being replied to -# references: A (possibly empty) list of msgid's of earlier articles in -# the thread -# body : A list of strings making up the message body - -class Article(pipermail.Article): - __super_init = pipermail.Article.__init__ - __super_set_date = pipermail.Article._set_date - - _last_article_time = time.time() - - def __init__(self, message=None, sequence=0, keepHeaders=[], - lang=config.mailman.default_language, mlist=None): - self.__super_init(message, sequence, keepHeaders) - self.prev = None - self.next = None - # Trim Re: from the subject line - i = 0 - while i != -1: - result = REpat.match(self.subject) - if result: - i = result.end(0) - self.subject = self.subject[i:] - else: - i = -1 - # Useful to keep around - self._lang = lang - self._mlist = mlist - - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - # Avoid i18n side-effects. Note that the language for this - # article (for this list) could be different from the site-wide - # preferred language, so we need to ensure no side-effects will - # occur. Think what happens when executing bin/arch. - with i18n.using_language(lang): - if self.author == self.email: - self.author = self.email = re.sub('@', _(' at '), - self.email) - else: - self.email = re.sub('@', _(' at '), self.email) - # Snag the content-* headers. RFC 1521 states that their values are - # case insensitive. - ctype = message.get('Content-Type', 'text/plain') - cenc = message.get('Content-Transfer-Encoding', '') - self.ctype = ctype.lower() - self.cenc = cenc.lower() - self.decoded = {} - cset = Utils.GetCharSet(mlist.preferred_language) - cset_out = Charset(cset).output_charset or cset - charset = message.get_content_charset(cset_out) - if charset: - charset = charset.lower().strip() - if charset[0]=='"' and charset[-1]=='"': - charset = charset[1:-1] - if charset[0]=="'" and charset[-1]=="'": - charset = charset[1:-1] - try: - body = message.get_payload(decode=True) - except binascii.Error: - body = None - if body and charset != Utils.GetCharSet(self._lang): - # decode body - try: - body = unicode(body, charset) - except (UnicodeError, LookupError): - body = None - if body: - self.body = [l + "\n" for l in body.splitlines()] - - self.decode_headers() - - def __getstate__(self): - d = self.__dict__.copy() - # We definitely don't want to pickle the MailList instance, so just - # pickle a reference to it. - if d.has_key('_mlist'): - mlist = d['_mlist'] - del d['_mlist'] - else: - mlist = None - if mlist: - d['__listname'] = self._mlist.fqdn_listname - else: - d['__listname'] = None - # Delete a few other things we don't want in the pickle - for attr in ('prev', 'next', 'body'): - if d.has_key(attr): - del d[attr] - d['body'] = [] - return d - - def __setstate__(self, d): - # For loading older Articles via pickle. All this stuff was added - # when Simone Piunni and Tokio Kikuchi i18n'ified Pipermail. See SF - # patch #594771. - self.__dict__ = d - listname = d.get('__listname') - if listname: - del d['__listname'] - d['_mlist'] = config.db.list_manager.get(listname) - if not d.has_key('_lang'): - if hasattr(self, '_mlist'): - self._lang = self._mlist.preferred_language - else: - self._lang = config.mailman.default_language - if not d.has_key('cenc'): - self.cenc = None - if not d.has_key('decoded'): - self.decoded = {} - - def setListIfUnset(self, mlist): - if getattr(self, '_mlist', None) is None: - self._mlist = mlist - - def quote(self, buf): - return html_quote(buf, self._lang) - - def decode_headers(self): - """MIME-decode headers. - - If the email, subject, or author attributes contain non-ASCII - characters using the encoded-word syntax of RFC 2047, decoded versions - of those attributes are placed in the self.decoded (a dictionary). - - If the list's charset differs from the header charset, an attempt is - made to decode the headers as Unicode. If that fails, they are left - undecoded. - """ - author = self.decode_charset(self.author) - subject = self.decode_charset(self.subject) - if author: - self.decoded['author'] = author - email = self.decode_charset(self.email) - if email: - self.decoded['email'] = email - if subject: - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - with i18n.using_language(self._lang): - atmark = _(' at ') - subject = re.sub(r'([-+,.\w]+)@([-+.\w]+)', - '\g<1>' + atmark + '\g<2>', subject) - self.decoded['subject'] = subject - self.decoded['stripped'] = self.strip_subject(subject or self.subject) - - def strip_subject(self, subject): - # Strip subject_prefix and Re: for subject sorting - # This part was taken from CookHeaders.py (TK) - prefix = self._mlist.subject_prefix.strip() - if prefix: - prefix_pat = re.escape(prefix) - prefix_pat = '%'.join(prefix_pat.split(r'\%')) - prefix_pat = re.sub(r'%\d*d', r'\s*\d+\s*', prefix_pat) - subject = re.sub(prefix_pat, '', subject) - subject = subject.lstrip() - strip_pat = re.compile('^((RE|AW|SV|VS)(\[\d+\])?:\s*)+', re.I) - stripped = strip_pat.sub('', subject) - return stripped - - def decode_charset(self, field): - # TK: This function was rewritten for unifying to Unicode. - # Convert 'field' into Unicode one line string. - try: - pairs = decode_header(field) - ustr = make_header(pairs).__unicode__() - except (LookupError, UnicodeError, ValueError, HeaderParseError): - # assume list's language - cset = Utils.GetCharSet(self._mlist.preferred_language) - if cset == 'us-ascii': - cset = 'iso-8859-1' # assume this for English list - ustr = unicode(field, cset, 'replace') - return u''.join(ustr.splitlines()) - - def as_html(self): - d = self.__dict__.copy() - # avoid i18n side-effects - with i18n.using_language(self._lang): - d["prev"], d["prev_wsubj"] = self._get_prev() - d["next"], d["next_wsubj"] = self._get_next() - - d["email_html"] = self.quote(self.email) - d["title"] = self.quote(self.subject) - d["subject_html"] = self.quote(self.subject) - d["subject_url"] = url_quote(self.subject) - d["in_reply_to_url"] = url_quote(self.in_reply_to) - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - # Point the mailto url back to the list - author = re.sub('@', _(' at '), self.author) - emailurl = self._mlist.posting_address - else: - author = self.author - emailurl = self.email - d["author_html"] = self.quote(author) - d["email_url"] = url_quote(emailurl) - d["datestr_html"] = self.quote(i18n.ctime(int(self.date))) - d["body"] = self._get_body() - d['listurl'] = self._mlist.script_url('listinfo') - d['listname'] = self._mlist.real_name - d['encoding'] = '' - charset = Utils.GetCharSet(self._lang) - d["encoding"] = html_charset % charset - - self._add_decoded(d) - return quick_maketext( - 'article.html', d, - lang=self._lang, mlist=self._mlist) - - def _get_prev(self): - """Return the href and subject for the previous message""" - if self.prev: - subject = self._get_subject_enc(self.prev) - prev = ('<LINK REL="Previous" HREF="%s">' - % (url_quote(self.prev.filename))) - prev_wsubj = ('<LI>' + _('Previous message (by thread):') + - ' <A HREF="%s">%s\n</A></li>' - % (url_quote(self.prev.filename), - self.quote(subject))) - else: - prev = prev_wsubj = "" - return prev, prev_wsubj - - def _get_subject_enc(self, art): - """Return the subject of art, decoded if possible. - - If the charset of the current message and art match and the - article's subject is encoded, decode it. - """ - return art.decoded.get('subject', art.subject) - - def _get_next(self): - """Return the href and subject for the previous message""" - if self.next: - subject = self._get_subject_enc(self.next) - next = ('<LINK REL="Next" HREF="%s">' - % (url_quote(self.next.filename))) - next_wsubj = ('<LI>' + _('Next message (by thread):') + - ' <A HREF="%s">%s\n</A></li>' - % (url_quote(self.next.filename), - self.quote(subject))) - else: - next = next_wsubj = "" - return next, next_wsubj - - _rx_quote = re.compile('=([A-F0-9][A-F0-9])') - _rx_softline = re.compile('=[ \t]*$') - - def _get_body(self): - """Return the message body ready for HTML, decoded if necessary""" - try: - body = self.html_body - except AttributeError: - body = self.body - return null_to_space(EMPTYSTRING.join(body)) - - def _add_decoded(self, d): - """Add encoded-word keys to HTML output""" - for src, dst in (('author', 'author_html'), - ('email', 'email_html'), - ('subject', 'subject_html'), - ('subject', 'title')): - if self.decoded.has_key(src): - d[dst] = self.quote(self.decoded[src]) - - def as_text(self): - d = self.__dict__.copy() - # We need to guarantee a valid From_ line, even if there are - # bososities in the headers. - if not d.get('fromdate', '').strip(): - d['fromdate'] = time.ctime(time.time()) - if not d.get('email', '').strip(): - d['email'] = 'bogus@does.not.exist.com' - if not d.get('datestr', '').strip(): - d['datestr'] = time.ctime(time.time()) - # - headers = ['From %(email)s %(fromdate)s', - 'From: %(email)s (%(author)s)', - 'Date: %(datestr)s', - 'Subject: %(subject)s'] - if d['_in_reply_to']: - headers.append('In-Reply-To: %(_in_reply_to)s') - if d['_references']: - headers.append('References: %(_references)s') - if d['_message_id']: - headers.append('Message-ID: %(_message_id)s') - body = EMPTYSTRING.join(self.body) - cset = Utils.GetCharSet(self._lang) - # Coerce the body to Unicode and replace any invalid characters. - if not isinstance(body, unicode): - body = unicode(body, cset, 'replace') - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - with i18n.using_language(self._lang): - atmark = _(' at ') - body = re.sub(r'([-+,.\w]+)@([-+.\w]+)', - '\g<1>' + atmark + '\g<2>', body) - # Return body to character set of article. - body = body.encode(cset, 'replace') - return NL.join(headers) % d + '\n\n' + body + '\n' - - def _set_date(self, message): - self.__super_set_date(message) - self.fromdate = time.ctime(int(self.date)) - - def loadbody_fromHTML(self,fileobj): - self.body = [] - begin = 0 - while 1: - line = fileobj.readline() - if not line: - break - if not begin: - if line.strip() == '<!--beginarticle-->': - begin = 1 - continue - if line.strip() == '<!--endarticle-->': - break - self.body.append(line) - - def finished_update_article(self): - self.body = [] - try: - del self.html_body - except AttributeError: - pass - - -class HyperArchive(pipermail.T): - __super_init = pipermail.T.__init__ - __super_update_archive = pipermail.T.update_archive - __super_update_dirty_archives = pipermail.T.update_dirty_archives - __super_add_article = pipermail.T.add_article - - # some defaults - DIRMODE = 02775 - FILEMODE = 0660 - - VERBOSE = 0 - DEFAULTINDEX = 'thread' - ARCHIVE_PERIOD = 'month' - - THREADLAZY = 0 - THREADLEVELS = 3 - - ALLOWHTML = 1 # "Lines between <html></html>" handled as is. - SHOWHTML = 0 # Eg, nuke leading whitespace in html manner. - IQUOTES = 1 # Italicize quoted text. - SHOWBR = 0 # Add <br> onto every line - - def __init__(self, maillist): - # can't init the database while other processes are writing to it! - dir = maillist.archive_dir() - db = HyperDatabase.HyperDatabase(dir, maillist) - self.__super_init(dir, reload=1, database=db) - - self.maillist = maillist - self._lock_file = None - self.lang = maillist.preferred_language - self.charset = Utils.GetCharSet(maillist.preferred_language) - - if hasattr(self.maillist,'archive_volume_frequency'): - if self.maillist.archive_volume_frequency == 0: - self.ARCHIVE_PERIOD='year' - elif self.maillist.archive_volume_frequency == 2: - self.ARCHIVE_PERIOD='quarter' - elif self.maillist.archive_volume_frequency == 3: - self.ARCHIVE_PERIOD='week' - elif self.maillist.archive_volume_frequency == 4: - self.ARCHIVE_PERIOD='day' - else: - self.ARCHIVE_PERIOD='month' - - yre = r'(?P<year>[0-9]{4,4})' - mre = r'(?P<month>[01][0-9])' - dre = r'(?P<day>[0123][0-9])' - self._volre = { - 'year': '^' + yre + '$', - 'quarter': '^' + yre + r'q(?P<quarter>[1234])$', - 'month': '^' + yre + r'-(?P<month>[a-zA-Z]+)$', - 'week': r'^Week-of-Mon-' + yre + mre + dre, - 'day': '^' + yre + mre + dre + '$' - } - - def _makeArticle(self, msg, sequence): - return Article(msg, sequence, - lang=self.maillist.preferred_language, - mlist=self.maillist) - - def html_foot(self): - # avoid i18n side-effects - mlist = self.maillist - # Convenience - def quotetime(s): - return html_quote(i18n.ctime(s), self.lang) - with i18n.using_language(mlist.preferred_language): - d = {"lastdate": quotetime(self.lastdate), - "archivedate": quotetime(self.archivedate), - "listinfo": mlist.script_url('listinfo'), - "version": self.version, - } - i = {"thread": _("thread"), - "subject": _("subject"), - "author": _("author"), - "date": _("date") - } - for t in i.keys(): - cap = t[0].upper() + t[1:] - if self.type == cap: - d["%s_ref" % (t)] = "" - else: - d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>' - % (t, i[t])) - return quick_maketext( - 'archidxfoot.html', d, - mlist=mlist) - - def html_head(self): - # avoid i18n side-effects - mlist = self.maillist - # Convenience - def quotetime(s): - return html_quote(i18n.ctime(s), self.lang) - with i18n.using_language(mlist.preferred_language): - d = {"listname": html_quote(mlist.real_name, self.lang), - "archtype": self.type, - "archive": self.volNameToDesc(self.archive), - "listinfo": mlist.script_url('listinfo'), - "firstdate": quotetime(self.firstdate), - "lastdate": quotetime(self.lastdate), - "size": self.size, - } - i = {"thread": _("thread"), - "subject": _("subject"), - "author": _("author"), - "date": _("date"), - } - for t in i.keys(): - cap = t[0].upper() + t[1:] - if self.type == cap: - d["%s_ref" % (t)] = "" - d["archtype"] = i[t] - else: - d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>' - % (t, i[t])) - if self.charset: - d["encoding"] = html_charset % self.charset - else: - d["encoding"] = "" - return quick_maketext( - 'archidxhead.html', d, - mlist=mlist) - - def html_TOC(self): - mlist = self.maillist - listname = mlist.fqdn_listname - mbox = os.path.join(mlist.archive_dir()+'.mbox', listname+'.mbox') - d = {"listname": mlist.real_name, - "listinfo": mlist.script_url('listinfo'), - "fullarch": '../%s.mbox/%s.mbox' % (listname, listname), - "size": sizeof(mbox, mlist.preferred_language), - 'meta': '', - } - # Avoid i18n side-effects - with i18n.using_language(mlist.preferred_language): - if not self.archives: - d["noarchive_msg"] = _( - '<P>Currently, there are no archives. </P>') - d["archive_listing_start"] = "" - d["archive_listing_end"] = "" - d["archive_listing"] = "" - else: - d["noarchive_msg"] = "" - d["archive_listing_start"] = quick_maketext( - 'archliststart.html', - lang=mlist.preferred_language, - mlist=mlist) - d["archive_listing_end"] = quick_maketext( - 'archlistend.html', - mlist=mlist) - - accum = [] - for a in self.archives: - accum.append(self.html_TOC_entry(a)) - d["archive_listing"] = EMPTYSTRING.join(accum) - # The TOC is always in the charset of the list's preferred language - d['meta'] += html_charset % Utils.GetCharSet(mlist.preferred_language) - # The site can disable public access to the mbox file. - if as_boolean(config.archiver.pipermail.public_mbox): - template = 'archtoc.html' - else: - template = 'archtocnombox.html' - return quick_maketext(template, d, mlist=mlist) - - def html_TOC_entry(self, arch): - # Check to see if the archive is gzip'd or not - txtfile = os.path.join(self.maillist.archive_dir(), arch + '.txt') - gzfile = txtfile + '.gz' - # which exists? .txt.gz first, then .txt - if os.path.exists(gzfile): - file = gzfile - url = arch + '.txt.gz' - templ = '<td><A href="%(url)s">[ ' + _('Gzip\'d Text%(sz)s') \ - + ']</a></td>' - elif os.path.exists(txtfile): - file = txtfile - url = arch + '.txt' - templ = '<td><A href="%(url)s">[ ' + _('Text%(sz)s') + ']</a></td>' - else: - # neither found? - file = None - # in Python 1.5.2 we have an easy way to get the size - if file: - textlink = templ % { - 'url': url, - 'sz' : sizeof(file, self.maillist.preferred_language) - } - else: - # there's no archive file at all... hmmm. - textlink = '' - return quick_maketext( - 'archtocentry.html', - {'archive': arch, - 'archivelabel': self.volNameToDesc(arch), - 'textlink': textlink - }, - mlist=self.maillist) - - def GetArchLock(self): - if self._lock_file: - return 1 - self._lock_file = Lock( - os.path.join(config.LOCK_DIR, - self.maillist.fqdn_listname + '-arch.lock')) - try: - self._lock_file.lock(timeout=0.5) - except lockfile.TimeOutError: - return 0 - return 1 - - def DropArchLock(self): - if self._lock_file: - self._lock_file.unlock(unconditionally=1) - self._lock_file = None - - def processListArch(self): - name = self.maillist.ArchiveFileName() - wname= name+'.working' - ename= name+'.err_unarchived' - try: - os.stat(name) - except (IOError,os.error): - #no archive file, nothin to do -ddm - return - - #see if arch is locked here -ddm - if not self.GetArchLock(): - #another archiver is running, nothing to do. -ddm - return - - #if the working file is still here, the archiver may have - # crashed during archiving. Save it, log an error, and move on. - try: - wf = open(wname) - log.error('Archive working file %s present. ' - 'Check %s for possibly unarchived msgs', - wname, ename) - omask = os.umask(007) - try: - ef = open(ename, 'a+') - finally: - os.umask(omask) - ef.seek(1,2) - if ef.read(1) <> '\n': - ef.write('\n') - ef.write(wf.read()) - ef.close() - wf.close() - os.unlink(wname) - except IOError: - pass - os.rename(name,wname) - archfile = open(wname) - self.processUnixMailbox(archfile) - archfile.close() - os.unlink(wname) - self.DropArchLock() - - def get_filename(self, article): - return '%06i.html' % (article.sequence,) - - def get_archives(self, article): - """Return a list of indexes where the article should be filed. - A string can be returned if the list only contains one entry, - and the empty list is legal.""" - res = self.dateToVolName(float(article.date)) - self.message(_("figuring article archives\n")) - self.message(res + "\n") - return res - - def volNameToDesc(self, volname): - volname = volname.strip() - # Don't make these module global constants since we have to runtime - # translate them anyway. - monthdict = [ - '', - _('January'), _('February'), _('March'), _('April'), - _('May'), _('June'), _('July'), _('August'), - _('September'), _('October'), _('November'), _('December') - ] - for each in self._volre.keys(): - match = re.match(self._volre[each], volname) - # Let ValueErrors percolate up - if match: - year = int(match.group('year')) - if each == 'quarter': - d =["", _("First"), _("Second"), _("Third"), _("Fourth") ] - ord = d[int(match.group('quarter'))] - return _("%(ord)s quarter %(year)i") - elif each == 'month': - monthstr = match.group('month').lower() - for i in range(1, 13): - monthname = time.strftime("%B", (1999,i,1,0,0,0,0,1,0)) - if monthstr.lower() == monthname.lower(): - month = monthdict[i] - return _("%(month)s %(year)i") - raise ValueError, "%s is not a month!" % monthstr - elif each == 'week': - month = monthdict[int(match.group("month"))] - day = int(match.group("day")) - return _("The Week Of Monday %(day)i %(month)s %(year)i") - elif each == 'day': - month = monthdict[int(match.group("month"))] - day = int(match.group("day")) - return _("%(day)i %(month)s %(year)i") - else: - return match.group('year') - raise ValueError, "%s is not a valid volname" % volname - -# The following two methods should be inverses of each other. -ddm - - def dateToVolName(self,date): - datetuple=time.localtime(date) - if self.ARCHIVE_PERIOD=='year': - return time.strftime("%Y",datetuple) - elif self.ARCHIVE_PERIOD=='quarter': - if datetuple[1] in [1,2,3]: - return time.strftime("%Yq1",datetuple) - elif datetuple[1] in [4,5,6]: - return time.strftime("%Yq2",datetuple) - elif datetuple[1] in [7,8,9]: - return time.strftime("%Yq3",datetuple) - else: - return time.strftime("%Yq4",datetuple) - elif self.ARCHIVE_PERIOD == 'day': - return time.strftime("%Y%m%d", datetuple) - elif self.ARCHIVE_PERIOD == 'week': - # Reconstruct "seconds since epoch", and subtract weekday - # multiplied by the number of seconds in a day. - monday = time.mktime(datetuple) - datetuple[6] * 24 * 60 * 60 - # Build a new datetuple from this "seconds since epoch" value - datetuple = time.localtime(monday) - return time.strftime("Week-of-Mon-%Y%m%d", datetuple) - # month. -ddm - else: - return time.strftime("%Y-%B",datetuple) - - - def volNameToDate(self, volname): - volname = volname.strip() - for each in self._volre.keys(): - match = re.match(self._volre[each],volname) - if match: - year = int(match.group('year')) - month = 1 - day = 1 - if each == 'quarter': - q = int(match.group('quarter')) - month = (q * 3) - 2 - elif each == 'month': - monthstr = match.group('month').lower() - m = [] - for i in range(1,13): - m.append( - time.strftime("%B",(1999,i,1,0,0,0,0,1,0)).lower()) - try: - month = m.index(monthstr) + 1 - except ValueError: - pass - elif each == 'week' or each == 'day': - month = int(match.group("month")) - day = int(match.group("day")) - try: - return time.mktime((year,month,1,0,0,0,0,1,-1)) - except OverflowError: - return 0.0 - return 0.0 - - def sortarchives(self): - def sf(a, b): - al = self.volNameToDate(a) - bl = self.volNameToDate(b) - if al > bl: - return 1 - elif al < bl: - return -1 - else: - return 0 - if self.ARCHIVE_PERIOD in ('month','year','quarter'): - self.archives.sort(sf) - else: - self.archives.sort() - self.archives.reverse() - - def message(self, msg): - if self.VERBOSE: - f = sys.stderr - f.write(msg) - if msg[-1:] != '\n': - f.write('\n') - f.flush() - - def open_new_archive(self, archive, archivedir): - index_html = os.path.join(archivedir, 'index.html') - try: - os.unlink(index_html) - except: - pass - os.symlink(self.DEFAULTINDEX+'.html',index_html) - - def write_index_header(self): - self.depth=0 - print self.html_head() - if not self.THREADLAZY and self.type=='Thread': - self.message(_("Computing threaded index\n")) - self.updateThreadedIndex() - - def write_index_footer(self): - for i in range(self.depth): - print '</UL>' - print self.html_foot() - - def write_index_entry(self, article): - subject = self.get_header("subject", article) - author = self.get_header("author", article) - if as_boolean(config.archiver.pipermail.obscure_email_addresses): - try: - author = re.sub('@', _(' at '), author) - except UnicodeError: - # Non-ASCII author contains '@' ... no valid email anyway - pass - subject = CGIescape(subject, self.lang) - author = CGIescape(author, self.lang) - - d = { - 'filename': urllib.quote(article.filename), - 'subject': subject, - 'sequence': article.sequence, - 'author': author - } - print quick_maketext( - 'archidxentry.html', d, - mlist=self.maillist) - - def get_header(self, field, article): - # if we have no decoded header, return the encoded one - result = article.decoded.get(field) - if result is None: - return getattr(article, field) - # otherwise, the decoded one will be Unicode - return result - - def write_threadindex_entry(self, article, depth): - if depth < 0: - self.message('depth<0') - depth = 0 - if depth > self.THREADLEVELS: - depth = self.THREADLEVELS - if depth < self.depth: - for i in range(self.depth-depth): - print '</UL>' - elif depth > self.depth: - for i in range(depth-self.depth): - print '<UL>' - print '<!--%i %s -->' % (depth, article.threadKey) - self.depth = depth - self.write_index_entry(article) - - def write_TOC(self): - self.sortarchives() - omask = os.umask(002) - try: - toc = open(os.path.join(self.basedir, 'index.html'), 'w') - finally: - os.umask(omask) - toc.write(self.html_TOC()) - toc.close() - - def write_article(self, index, article, path): - # called by add_article - omask = os.umask(002) - try: - f = open(path, 'w') - finally: - os.umask(omask) - f.write(article.as_html()) - f.close() - - # Write the text article to the text archive. - path = os.path.join(self.basedir, "%s.txt" % index) - omask = os.umask(002) - try: - f = open(path, 'a+') - finally: - os.umask(omask) - f.write(article.as_text()) - f.close() - - def update_archive(self, archive): - self.__super_update_archive(archive) - # only do this if the gzip module was imported globally, and - # gzip'ing was enabled via Defaults.GZIP_ARCHIVE_TXT_FILES. See - # above. - if gzip: - archz = None - archt = None - txtfile = os.path.join(self.basedir, '%s.txt' % archive) - gzipfile = os.path.join(self.basedir, '%s.txt.gz' % archive) - oldgzip = os.path.join(self.basedir, '%s.old.txt.gz' % archive) - try: - # open the plain text file - archt = open(txtfile) - except IOError: - return - try: - os.rename(gzipfile, oldgzip) - archz = gzip.open(oldgzip) - except (IOError, RuntimeError, os.error): - pass - try: - ou = os.umask(002) - newz = gzip.open(gzipfile, 'w') - finally: - # XXX why is this a finally? - os.umask(ou) - if archz: - newz.write(archz.read()) - archz.close() - os.unlink(oldgzip) - # XXX do we really need all this in a try/except? - try: - newz.write(archt.read()) - newz.close() - archt.close() - except IOError: - pass - os.unlink(txtfile) - - _skip_attrs = ('maillist', '_lock_file', 'charset') - - def getstate(self): - d={} - for each in self.__dict__.keys(): - if not (each in self._skip_attrs - or each.upper() == each): - d[each] = self.__dict__[each] - return d - - # Add <A HREF="..."> tags around URLs and e-mail addresses. - - def __processbody_URLquote(self, lines): - # XXX a lot to do here: - # 1. use lines directly, rather than source and dest - # 2. make it clearer - # 3. make it faster - # TK: Prepare for unicode obscure. - atmark = _(' at ') - if lines and isinstance(lines[0], unicode): - atmark = unicode(atmark, Utils.GetCharSet(self.lang), 'replace') - source = lines[:] - dest = lines - last_line_was_quoted = 0 - for i in xrange(0, len(source)): - Lorig = L = source[i] - prefix = suffix = "" - if L is None: - continue - # Italicise quoted text - if self.IQUOTES: - quoted = quotedpat.match(L) - if quoted is None: - last_line_was_quoted = 0 - else: - quoted = quoted.end(0) - prefix = CGIescape(L[:quoted], self.lang) + '<i>' - suffix = '</I>' - if self.SHOWHTML: - suffix += '<BR>' - if not last_line_was_quoted: - prefix = '<BR>' + prefix - L = L[quoted:] - last_line_was_quoted = 1 - # Check for an e-mail address - L2 = "" - jr = emailpat.search(L) - kr = urlpat.search(L) - while jr is not None or kr is not None: - if jr == None: - j = -1 - else: - j = jr.start(0) - if kr is None: - k = -1 - else: - k = kr.start(0) - if j != -1 and (j < k or k == -1): - text = jr.group(1) - length = len(text) - if as_boolean( - config.archiver.pipermail.obscure_email_addresses): - text = re.sub('@', atmark, text) - URL = self.maillist.script_url('listinfo') - else: - URL = 'mailto:' + text - pos = j - elif k != -1 and (j > k or j == -1): - text = URL = kr.group(1) - length = len(text) - pos = k - else: # j==k - raise ValueError, "j==k: This can't happen!" - #length = len(text) - #self.message("URL: %s %s %s \n" - # % (CGIescape(L[:pos]), URL, CGIescape(text))) - L2 += '%s<A HREF="%s">%s</A>' % ( - CGIescape(L[:pos], self.lang), - html_quote(URL), CGIescape(text, self.lang)) - L = L[pos+length:] - jr = emailpat.search(L) - kr = urlpat.search(L) - if jr is None and kr is None: - L = CGIescape(L, self.lang) - L = prefix + L2 + L + suffix - source[i] = None - dest[i] = L - - # Perform Hypermail-style processing of <HTML></HTML> directives - # in message bodies. Lines between <HTML> and </HTML> will be written - # out precisely as they are; other lines will be passed to func2 - # for further processing . - - def __processbody_HTML(self, lines): - # XXX need to make this method modify in place - source = lines[:] - dest = lines - l = len(source) - i = 0 - while i < l: - while i < l and htmlpat.match(source[i]) is None: - i = i + 1 - if i < l: - source[i] = None - i = i + 1 - while i < l and nohtmlpat.match(source[i]) is None: - dest[i], source[i] = source[i], None - i = i + 1 - if i < l: - source[i] = None - i = i + 1 - - def format_article(self, article): - # called from add_article - # TBD: Why do the HTML formatting here and keep it in the - # pipermail database? It makes more sense to do the html - # formatting as the article is being written as html and toss - # the data after it has been written to the archive file. - lines = filter(None, article.body) - # Handle <HTML> </HTML> directives - if self.ALLOWHTML: - self.__processbody_HTML(lines) - self.__processbody_URLquote(lines) - if not self.SHOWHTML and lines: - lines.insert(0, '<PRE>') - lines.append('</PRE>') - else: - # Do fancy formatting here - if self.SHOWBR: - lines = map(lambda x:x + "<BR>", lines) - else: - for i in range(0, len(lines)): - s = lines[i] - if s[0:1] in ' \t\n': - lines[i] = '<P>' + s - article.html_body = lines - return article - - def update_article(self, arcdir, article, prev, next): - seq = article.sequence - filename = os.path.join(arcdir, article.filename) - self.message(_('Updating HTML for article %(seq)s')) - try: - f = open(filename) - article.loadbody_fromHTML(f) - f.close() - except IOError, e: - if e.errno <> errno.ENOENT: raise - self.message(_('article file %(filename)s is missing!')) - article.prev = prev - article.next = next - omask = os.umask(002) - try: - f = open(filename, 'w') - finally: - os.umask(omask) - f.write(article.as_html()) - f.close() diff --git a/mailman/Archiver/HyperDatabase.py b/mailman/Archiver/HyperDatabase.py deleted file mode 100644 index 49928d7b3..000000000 --- a/mailman/Archiver/HyperDatabase.py +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -# -# site modules -# -import os -import marshal -import time -import errno - -# -# package/project modules -# -import pipermail -from locknix import lockfile - -CACHESIZE = pipermail.CACHESIZE - -try: - import cPickle - pickle = cPickle -except ImportError: - import pickle - -# -# we're using a python dict in place of -# of bsddb.btree database. only defining -# the parts of the interface used by class HyperDatabase -# only one thing can access this at a time. -# -class DumbBTree: - """Stores pickles of Article objects - - This dictionary-like object stores pickles of all the Article - objects. The object itself is stored using marshal. It would be - much simpler, and probably faster, to store the actual objects in - the DumbBTree and pickle it. - - TBD: Also needs a more sensible name, like IteratableDictionary or - SortedDictionary. - """ - - def __init__(self, path): - self.current_index = 0 - self.path = path - self.lockfile = lockfile.Lock(self.path + ".lock") - self.lock() - self.__dirty = 0 - self.dict = {} - self.sorted = [] - self.load() - - def __repr__(self): - return "DumbBTree(%s)" % self.path - - def __sort(self, dirty=None): - if self.__dirty == 1 or dirty: - self.sorted = self.dict.keys() - self.sorted.sort() - self.__dirty = 0 - - def lock(self): - self.lockfile.lock() - - def unlock(self): - try: - self.lockfile.unlock() - except lockfile.NotLockedError: - pass - - def __delitem__(self, item): - # if first hasn't been called, we can skip the sort - if self.current_index == 0: - del self.dict[item] - self.__dirty = 1 - return - try: - ci = self.sorted[self.current_index] - except IndexError: - ci = None - if ci == item: - try: - ci = self.sorted[self.current_index + 1] - except IndexError: - ci = None - del self.dict[item] - self.__sort(dirty=1) - if ci is not None: - self.current_index = self.sorted.index(ci) - else: - self.current_index = self.current_index + 1 - - def clear(self): - # bulk clearing much faster than deleting each item, esp. with the - # implementation of __delitem__() above :( - self.dict = {} - - def first(self): - self.__sort() # guarantee that the list is sorted - if not self.sorted: - raise KeyError - else: - key = self.sorted[0] - self.current_index = 1 - return key, self.dict[key] - - def last(self): - if not self.sorted: - raise KeyError - else: - key = self.sorted[-1] - self.current_index = len(self.sorted) - 1 - return key, self.dict[key] - - def next(self): - try: - key = self.sorted[self.current_index] - except IndexError: - raise KeyError - self.current_index = self.current_index + 1 - return key, self.dict[key] - - def has_key(self, key): - return self.dict.has_key(key) - - def set_location(self, loc): - if not self.dict.has_key(loc): - raise KeyError - self.current_index = self.sorted.index(loc) - - def __getitem__(self, item): - return self.dict[item] - - def __setitem__(self, item, val): - # if first hasn't been called, then we don't need to worry - # about sorting again - if self.current_index == 0: - self.dict[item] = val - self.__dirty = 1 - return - try: - current_item = self.sorted[self.current_index] - except IndexError: - current_item = item - self.dict[item] = val - self.__sort(dirty=1) - self.current_index = self.sorted.index(current_item) - - def __len__(self): - return len(self.sorted) - - def load(self): - try: - fp = open(self.path) - try: - self.dict = marshal.load(fp) - finally: - fp.close() - except IOError, e: - if e.errno <> errno.ENOENT: raise - pass - except EOFError: - pass - else: - self.__sort(dirty=1) - - def close(self): - omask = os.umask(007) - try: - fp = open(self.path, 'w') - finally: - os.umask(omask) - fp.write(marshal.dumps(self.dict)) - fp.close() - self.unlock() - - -# this is lifted straight out of pipermail with -# the bsddb.btree replaced with above class. -# didn't use inheritance because of all the -# __internal stuff that needs to be here -scott -# -class HyperDatabase(pipermail.Database): - __super_addArticle = pipermail.Database.addArticle - - def __init__(self, basedir, mlist): - self.__cache = {} - self.__currentOpenArchive = None # The currently open indices - self._mlist = mlist - self.basedir = os.path.expanduser(basedir) - # Recently added articles, indexed only by message ID - self.changed={} - - def firstdate(self, archive): - self.__openIndices(archive) - date = 'None' - try: - datekey, msgid = self.dateIndex.first() - date = time.asctime(time.localtime(float(datekey[0]))) - except KeyError: - pass - return date - - def lastdate(self, archive): - self.__openIndices(archive) - date = 'None' - try: - datekey, msgid = self.dateIndex.last() - date = time.asctime(time.localtime(float(datekey[0]))) - except KeyError: - pass - return date - - def numArticles(self, archive): - self.__openIndices(archive) - return len(self.dateIndex) - - def addArticle(self, archive, article, subject=None, author=None, - date=None): - self.__openIndices(archive) - self.__super_addArticle(archive, article, subject, author, date) - - def __openIndices(self, archive): - if self.__currentOpenArchive == archive: - return - self.__closeIndices() - arcdir = os.path.join(self.basedir, 'database') - omask = os.umask(0) - try: - try: - os.mkdir(arcdir, 02770) - except OSError, e: - if e.errno <> errno.EEXIST: raise - finally: - os.umask(omask) - for i in ('date', 'author', 'subject', 'article', 'thread'): - t = DumbBTree(os.path.join(arcdir, archive + '-' + i)) - setattr(self, i + 'Index', t) - self.__currentOpenArchive = archive - - def __closeIndices(self): - for i in ('date', 'author', 'subject', 'thread', 'article'): - attr = i + 'Index' - if hasattr(self, attr): - index = getattr(self, attr) - if i == 'article': - if not hasattr(self, 'archive_length'): - self.archive_length = {} - l = len(index) - self.archive_length[self.__currentOpenArchive] = l - index.close() - delattr(self, attr) - self.__currentOpenArchive = None - - def close(self): - self.__closeIndices() - - def hasArticle(self, archive, msgid): - self.__openIndices(archive) - return self.articleIndex.has_key(msgid) - - def setThreadKey(self, archive, key, msgid): - self.__openIndices(archive) - self.threadIndex[key]=msgid - - def getArticle(self, archive, msgid): - self.__openIndices(archive) - if not self.__cache.has_key(msgid): - # get the pickled object out of the DumbBTree - buf = self.articleIndex[msgid] - article = self.__cache[msgid] = pickle.loads(buf) - # For upgrading older archives - article.setListIfUnset(self._mlist) - else: - article = self.__cache[msgid] - return article - - def first(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index + 'Index') - try: - key, msgid = index.first() - return msgid - except KeyError: - return None - - def next(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index + 'Index') - try: - key, msgid = index.next() - return msgid - except KeyError: - return None - - def getOldestArticle(self, archive, subject): - self.__openIndices(archive) - subject = subject.lower() - try: - key, tempid=self.subjectIndex.set_location(subject) - self.subjectIndex.next() - [subject2, date]= key.split('\0') - if subject!=subject2: return None - return tempid - except KeyError: - return None - - def newArchive(self, archive): - pass - - def clearIndex(self, archive, index): - self.__openIndices(archive) - if hasattr(self.threadIndex, 'clear'): - self.threadIndex.clear() - return - finished=0 - try: - key, msgid=self.threadIndex.first() - except KeyError: finished=1 - while not finished: - del self.threadIndex[key] - try: - key, msgid=self.threadIndex.next() - except KeyError: finished=1 diff --git a/mailman/Archiver/__init__.py b/mailman/Archiver/__init__.py deleted file mode 100644 index 322010acb..000000000 --- a/mailman/Archiver/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -from Archiver import * diff --git a/mailman/Archiver/pipermail.py b/mailman/Archiver/pipermail.py deleted file mode 100644 index 19bc05c3f..000000000 --- a/mailman/Archiver/pipermail.py +++ /dev/null @@ -1,874 +0,0 @@ -#! /usr/bin/env python - -import os -import re -import sys -import time -import logging -import mailbox - -import cPickle as pickle - -from cStringIO import StringIO -from email.Utils import parseaddr, parsedate_tz, mktime_tz, formatdate -from string import lowercase - -__version__ = '0.11 (Mailman edition)' -VERSION = __version__ -CACHESIZE = 100 # Number of slots in the cache - -from mailman.Mailbox import ArchiverMailbox -from mailman.core import errors -from mailman.i18n import _ - -SPACE = ' ' - -log = logging.getLogger('mailman.error') - - - -msgid_pat = re.compile(r'(<.*>)') -def strip_separators(s): - "Remove quotes or parenthesization from a Message-ID string" - if not s: - return "" - if s[0] in '"<([' and s[-1] in '">)]': - s = s[1:-1] - return s - -smallNameParts = ['van', 'von', 'der', 'de'] - -def fixAuthor(author): - "Canonicalize a name into Last, First format" - # If there's a comma, guess that it's already in "Last, First" format - if ',' in author: - return author - L = author.split() - i = len(L) - 1 - if i == 0: - return author # The string's one word--forget it - if author.upper() == author or author.lower() == author: - # Damn, the name is all upper- or lower-case. - while i > 0 and L[i-1].lower() in smallNameParts: - i = i - 1 - else: - # Mixed case; assume that small parts of the last name will be - # in lowercase, and check them against the list. - while i>0 and (L[i-1][0] in lowercase or - L[i-1].lower() in smallNameParts): - i = i - 1 - author = SPACE.join(L[-1:] + L[i:-1]) + ', ' + SPACE.join(L[:i]) - return author - -# Abstract class for databases - -class DatabaseInterface: - def __init__(self): pass - def close(self): pass - def getArticle(self, archive, msgid): pass - def hasArticle(self, archive, msgid): pass - def addArticle(self, archive, article, subject=None, author=None, - date=None): pass - def firstdate(self, archive): pass - def lastdate(self, archive): pass - def first(self, archive, index): pass - def next(self, archive, index): pass - def numArticles(self, archive): pass - def newArchive(self, archive): pass - def setThreadKey(self, archive, key, msgid): pass - def getOldestArticle(self, subject): pass - -class Database(DatabaseInterface): - """Define the basic sorting logic for a database - - Assumes that the database internally uses dateIndex, authorIndex, - etc. - """ - - # TBD Factor out more of the logic shared between BSDDBDatabase - # and HyperDatabase and place it in this class. - - def __init__(self): - # This method need not be called by subclasses that do their - # own initialization. - self.dateIndex = {} - self.authorIndex = {} - self.subjectIndex = {} - self.articleIndex = {} - self.changed = {} - - def addArticle(self, archive, article, subject=None, author=None, - date=None): - # create the keys; always end w/ msgid which will be unique - authorkey = (author or article.author, article.date, - article.msgid) - subjectkey = (subject or article.subject, article.date, - article.msgid) - datekey = date or article.date, article.msgid - - # Add the new article - self.dateIndex[datekey] = article.msgid - self.authorIndex[authorkey] = article.msgid - self.subjectIndex[subjectkey] = article.msgid - - self.store_article(article) - self.changed[archive, article.msgid] = None - - parentID = article.parentID - if parentID is not None and self.articleIndex.has_key(parentID): - parent = self.getArticle(archive, parentID) - myThreadKey = parent.threadKey + article.date + '-' - else: - myThreadKey = article.date + '-' - article.threadKey = myThreadKey - key = myThreadKey, article.msgid - self.setThreadKey(archive, key, article.msgid) - - def store_article(self, article): - """Store article without message body to save space""" - # TBD this is not thread safe! - temp = article.body - temp2 = article.html_body - article.body = [] - del article.html_body - self.articleIndex[article.msgid] = pickle.dumps(article) - article.body = temp - article.html_body = temp2 - - -# The Article class encapsulates a single posting. The attributes -# are: -# -# sequence : Sequence number, unique for each article in a set of archives -# subject : Subject -# datestr : The posting date, in human-readable format -# date : The posting date, in purely numeric format -# headers : Any other headers of interest -# author : The author's name (and possibly organization) -# email : The author's e-mail address -# msgid : A unique message ID -# in_reply_to: If != "", this is the msgid of the article being replied to -# references : A (possibly empty) list of msgid's of earlier articles -# in the thread -# body : A list of strings making up the message body - -class Article: - _last_article_time = time.time() - - def __init__(self, message = None, sequence = 0, keepHeaders = []): - if message is None: - return - self.sequence = sequence - - self.parentID = None - self.threadKey = None - # otherwise the current sequence number is used. - id = strip_separators(message['Message-Id']) - if id == "": - self.msgid = str(self.sequence) - else: self.msgid = id - - if message.has_key('Subject'): - self.subject = str(message['Subject']) - else: - self.subject = _('No subject') - if self.subject == "": self.subject = _('No subject') - - self._set_date(message) - - # Figure out the e-mail address and poster's name. Use the From: - # field first, followed by Reply-To: - self.author, self.email = parseaddr(message.get('From', '')) - e = message['Reply-To'] - if not self.email and e is not None: - ignoreauthor, self.email = parseaddr(e) - self.email = strip_separators(self.email) - self.author = strip_separators(self.author) - - if self.author == "": - self.author = self.email - - # Save the In-Reply-To:, References:, and Message-ID: lines - # - # TBD: The original code does some munging on these fields, which - # shouldn't be necessary, but changing this may break code. For - # safety, I save the original headers on different attributes for use - # in writing the plain text periodic flat files. - self._in_reply_to = message['in-reply-to'] - self._references = message['references'] - self._message_id = message['message-id'] - - i_r_t = message['In-Reply-To'] - if i_r_t is None: - self.in_reply_to = '' - else: - match = msgid_pat.search(i_r_t) - if match is None: self.in_reply_to = '' - else: self.in_reply_to = strip_separators(match.group(1)) - - references = message['References'] - if references is None: - self.references = [] - else: - self.references = map(strip_separators, references.split()) - - # Save any other interesting headers - self.headers = {} - for i in keepHeaders: - if message.has_key(i): - self.headers[i] = message[i] - - # Read the message body - s = StringIO(message.get_payload(decode=True)\ - or message.as_string().split('\n\n',1)[1]) - self.body = s.readlines() - - def _set_date(self, message): - def floatdate(header): - missing = [] - datestr = message.get(header, missing) - if datestr is missing: - return None - date = parsedate_tz(datestr) - try: - return mktime_tz(date) - except (TypeError, ValueError, OverflowError): - return None - date = floatdate('date') - if date is None: - date = floatdate('x-list-received-date') - if date is None: - # What's left to try? - date = self._last_article_time + 1 - self._last_article_time = date - self.date = '%011i' % date - self.datestr = message.get('date') \ - or message.get('x-list-received-date') \ - or formatdate(date) - - def __repr__(self): - return '<Article ID = '+repr(self.msgid)+'>' - - def finished_update_article(self): - pass - -# Pipermail formatter class - -class T: - DIRMODE = 0755 # Mode to give to created directories - FILEMODE = 0644 # Mode to give to created files - INDEX_EXT = ".html" # Extension for indexes - - def __init__(self, basedir = None, reload = 1, database = None): - # If basedir isn't provided, assume the current directory - if basedir is None: - self.basedir = os.getcwd() - else: - basedir = os.path.expanduser(basedir) - self.basedir = basedir - self.database = database - - # If the directory doesn't exist, create it. This code shouldn't get - # run anymore, we create the directory in Archiver.py. It should only - # get used by legacy lists created that are only receiving their first - # message in the HTML archive now -- Marc - try: - os.stat(self.basedir) - except os.error, errdata: - errno, errmsg = errdata - if errno != 2: - raise os.error, errdata - else: - self.message(_('Creating archive directory ') + self.basedir) - omask = os.umask(0) - try: - os.mkdir(self.basedir, self.DIRMODE) - finally: - os.umask(omask) - - # Try to load previously pickled state - try: - if not reload: - raise IOError - f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r') - self.message(_('Reloading pickled archive state')) - d = pickle.load(f) - f.close() - for key, value in d.items(): - setattr(self, key, value) - except (IOError, EOFError): - # No pickled version, so initialize various attributes - self.archives = [] # Archives - self._dirty_archives = [] # Archives that will have to be updated - self.sequence = 0 # Sequence variable used for - # numbering articles - self.update_TOC = 0 # Does the TOC need updating? - # - # make the basedir variable work when passed in as an __init__ arg - # and different from the one in the pickle. Let the one passed in - # as an __init__ arg take precedence if it's stated. This way, an - # archive can be moved from one place to another and still work. - # - if basedir != self.basedir: - self.basedir = basedir - - def close(self): - "Close an archive, save its state, and update any changed archives." - self.update_dirty_archives() - self.update_TOC = 0 - self.write_TOC() - # Save the collective state - self.message(_('Pickling archive state into ') - + os.path.join(self.basedir, 'pipermail.pck')) - self.database.close() - del self.database - - omask = os.umask(007) - try: - f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w') - finally: - os.umask(omask) - pickle.dump(self.getstate(), f) - f.close() - - def getstate(self): - # can override this in subclass - return self.__dict__ - - # - # Private methods - # - # These will be neither overridden nor called by custom archivers. - # - - - # Create a dictionary of various parameters that will be passed - # to the write_index_{header,footer} functions - def __set_parameters(self, archive): - # Determine the earliest and latest date in the archive - firstdate = self.database.firstdate(archive) - lastdate = self.database.lastdate(archive) - - # Get the current time - now = time.asctime(time.localtime(time.time())) - self.firstdate = firstdate - self.lastdate = lastdate - self.archivedate = now - self.size = self.database.numArticles(archive) - self.archive = archive - self.version = __version__ - - # Find the message ID of an article's parent, or return None - # if no parent can be found. - - def __findParent(self, article, children = []): - parentID = None - if article.in_reply_to: - parentID = article.in_reply_to - elif article.references: - # Remove article IDs that aren't in the archive - refs = filter(self.articleIndex.has_key, article.references) - if not refs: - return None - maxdate = self.database.getArticle(self.archive, - refs[0]) - for ref in refs[1:]: - a = self.database.getArticle(self.archive, ref) - if a.date > maxdate.date: - maxdate = a - parentID = maxdate.msgid - else: - # Look for the oldest matching subject - try: - key, tempid = \ - self.subjectIndex.set_location(article.subject) - print key, tempid - self.subjectIndex.next() - [subject, date] = key.split('\0') - print article.subject, subject, date - if subject == article.subject and tempid not in children: - parentID = tempid - except KeyError: - pass - return parentID - - # Update the threaded index completely - def updateThreadedIndex(self): - # Erase the threaded index - self.database.clearIndex(self.archive, 'thread') - - # Loop over all the articles - msgid = self.database.first(self.archive, 'date') - while msgid is not None: - try: - article = self.database.getArticle(self.archive, msgid) - except KeyError: - pass - else: - if article.parentID is None or \ - not self.database.hasArticle(self.archive, - article.parentID): - # then - pass - else: - parent = self.database.getArticle(self.archive, - article.parentID) - article.threadKey = parent.threadKey+article.date+'-' - self.database.setThreadKey(self.archive, - (article.threadKey, article.msgid), - msgid) - msgid = self.database.next(self.archive, 'date') - - # - # Public methods: - # - # These are part of the public interface of the T class, but will - # never be overridden (unless you're trying to do something very new). - - # Update a single archive's indices, whether the archive's been - # dirtied or not. - def update_archive(self, archive): - self.archive = archive - self.message(_("Updating index files for archive [%(archive)s]")) - arcdir = os.path.join(self.basedir, archive) - self.__set_parameters(archive) - - for hdr in ('Date', 'Subject', 'Author'): - self._update_simple_index(hdr, archive, arcdir) - - self._update_thread_index(archive, arcdir) - - def _update_simple_index(self, hdr, archive, arcdir): - self.message(" " + hdr) - self.type = hdr - hdr = hdr.lower() - - self._open_index_file_as_stdout(arcdir, hdr) - self.write_index_header() - count = 0 - # Loop over the index entries - msgid = self.database.first(archive, hdr) - while msgid is not None: - try: - article = self.database.getArticle(self.archive, msgid) - except KeyError: - pass - else: - count = count + 1 - self.write_index_entry(article) - msgid = self.database.next(archive, hdr) - # Finish up this index - self.write_index_footer() - self._restore_stdout() - - def _update_thread_index(self, archive, arcdir): - self.message(_(" Thread")) - self._open_index_file_as_stdout(arcdir, "thread") - self.type = 'Thread' - self.write_index_header() - - # To handle the prev./next in thread pointers, we need to - # track articles 5 at a time. - - # Get the first 5 articles - L = [None] * 5 - i = 2 - msgid = self.database.first(self.archive, 'thread') - - while msgid is not None and i < 5: - L[i] = self.database.getArticle(self.archive, msgid) - i = i + 1 - msgid = self.database.next(self.archive, 'thread') - - while L[2] is not None: - article = L[2] - artkey = None - if article is not None: - artkey = article.threadKey - if artkey is not None: - self.write_threadindex_entry(article, artkey.count('-') - 1) - if self.database.changed.has_key((archive,article.msgid)): - a1 = L[1] - a3 = L[3] - self.update_article(arcdir, article, a1, a3) - if a3 is not None: - self.database.changed[(archive, a3.msgid)] = None - if a1 is not None: - key = archive, a1.msgid - if not self.database.changed.has_key(key): - self.update_article(arcdir, a1, L[0], L[2]) - else: - del self.database.changed[key] - if L[0]: - L[0].finished_update_article() - L = L[1:] # Rotate the list - if msgid is None: - L.append(msgid) - else: - L.append(self.database.getArticle(self.archive, msgid)) - msgid = self.database.next(self.archive, 'thread') - - self.write_index_footer() - self._restore_stdout() - - def _open_index_file_as_stdout(self, arcdir, index_name): - path = os.path.join(arcdir, index_name + self.INDEX_EXT) - omask = os.umask(002) - try: - self.__f = open(path, 'w') - finally: - os.umask(omask) - self.__stdout = sys.stdout - sys.stdout = self.__f - - def _restore_stdout(self): - sys.stdout = self.__stdout - self.__f.close() - del self.__f - del self.__stdout - - # Update only archives that have been marked as "changed". - def update_dirty_archives(self): - for i in self._dirty_archives: - self.update_archive(i) - self._dirty_archives = [] - - # Read a Unix mailbox file from the file object <input>, - # and create a series of Article objects. Each article - # object will then be archived. - - def _makeArticle(self, msg, sequence): - return Article(msg, sequence) - - def processUnixMailbox(self, input, start=None, end=None): - mbox = ArchiverMailbox(input, self.maillist) - if start is None: - start = 0 - counter = 0 - while counter < start: - try: - m = mbox.next() - except errors.DiscardMessage: - continue - if m is None: - return - counter += 1 - while 1: - try: - pos = input.tell() - m = mbox.next() - except errors.DiscardMessage: - continue - except Exception: - log.error('uncaught archiver exception at filepos: %s', pos) - raise - if m is None: - break - if m == '': - # It was an unparseable message - continue - msgid = m.get('message-id', 'n/a') - self.message(_('#%(counter)05d %(msgid)s')) - a = self._makeArticle(m, self.sequence) - self.sequence += 1 - self.add_article(a) - if end is not None and counter >= end: - break - counter += 1 - - def new_archive(self, archive, archivedir): - self.archives.append(archive) - self.update_TOC = 1 - self.database.newArchive(archive) - # If the archive directory doesn't exist, create it - try: - os.stat(archivedir) - except os.error, errdata: - errno, errmsg = errdata - if errno == 2: - omask = os.umask(0) - try: - os.mkdir(archivedir, self.DIRMODE) - finally: - os.umask(omask) - else: - raise os.error, errdata - self.open_new_archive(archive, archivedir) - - def add_article(self, article): - archives = self.get_archives(article) - if not archives: - return - if type(archives) == type(''): - archives = [archives] - - article.filename = filename = self.get_filename(article) - temp = self.format_article(article) - for arch in archives: - self.archive = arch # why do this??? - archivedir = os.path.join(self.basedir, arch) - if arch not in self.archives: - self.new_archive(arch, archivedir) - - # Write the HTML-ized article - self.write_article(arch, temp, os.path.join(archivedir, - filename)) - - if article.decoded.has_key('author'): - author = fixAuthor(article.decoded['author']) - else: - author = fixAuthor(article.author) - if article.decoded.has_key('stripped'): - subject = article.decoded['stripped'].lower() - else: - subject = article.subject.lower() - - article.parentID = parentID = self.get_parent_info(arch, article) - if parentID: - parent = self.database.getArticle(arch, parentID) - article.threadKey = parent.threadKey + article.date + '-' - else: - article.threadKey = article.date + '-' - key = article.threadKey, article.msgid - - self.database.setThreadKey(arch, key, article.msgid) - self.database.addArticle(arch, temp, author=author, - subject=subject) - - if arch not in self._dirty_archives: - self._dirty_archives.append(arch) - - def get_parent_info(self, archive, article): - parentID = None - if article.in_reply_to: - parentID = article.in_reply_to - elif article.references: - refs = self._remove_external_references(article.references) - if refs: - maxdate = self.database.getArticle(archive, refs[0]) - for ref in refs[1:]: - a = self.database.getArticle(archive, ref) - if a.date > maxdate.date: - maxdate = a - parentID = maxdate.msgid - else: - # Get the oldest article with a matching subject, and - # assume this is a follow-up to that article - parentID = self.database.getOldestArticle(archive, - article.subject) - - if parentID and not self.database.hasArticle(archive, parentID): - parentID = None - return parentID - - def write_article(self, index, article, path): - omask = os.umask(002) - try: - f = open(path, 'w') - finally: - os.umask(omask) - temp_stdout, sys.stdout = sys.stdout, f - self.write_article_header(article) - sys.stdout.writelines(article.body) - self.write_article_footer(article) - sys.stdout = temp_stdout - f.close() - - def _remove_external_references(self, refs): - keep = [] - for ref in refs: - if self.database.hasArticle(self.archive, ref): - keep.append(ref) - return keep - - # Abstract methods: these will need to be overridden by subclasses - # before anything useful can be done. - - def get_filename(self, article): - pass - def get_archives(self, article): - """Return a list of indexes where the article should be filed. - A string can be returned if the list only contains one entry, - and the empty list is legal.""" - pass - def format_article(self, article): - pass - def write_index_header(self): - pass - def write_index_footer(self): - pass - def write_index_entry(self, article): - pass - def write_threadindex_entry(self, article, depth): - pass - def write_article_header(self, article): - pass - def write_article_footer(self, article): - pass - def write_article_entry(self, article): - pass - def update_article(self, archivedir, article, prev, next): - pass - def write_TOC(self): - pass - def open_new_archive(self, archive, dir): - pass - def message(self, msg): - pass - - -class BSDDBdatabase(Database): - __super_addArticle = Database.addArticle - - def __init__(self, basedir): - self.__cachekeys = [] - self.__cachedict = {} - self.__currentOpenArchive = None # The currently open indices - self.basedir = os.path.expanduser(basedir) - self.changed = {} # Recently added articles, indexed only by - # message ID - - def firstdate(self, archive): - self.__openIndices(archive) - date = 'None' - try: - date, msgid = self.dateIndex.first() - date = time.asctime(time.localtime(float(date))) - except KeyError: - pass - return date - - def lastdate(self, archive): - self.__openIndices(archive) - date = 'None' - try: - date, msgid = self.dateIndex.last() - date = time.asctime(time.localtime(float(date))) - except KeyError: - pass - return date - - def numArticles(self, archive): - self.__openIndices(archive) - return len(self.dateIndex) - - def addArticle(self, archive, article, subject=None, author=None, - date=None): - self.__openIndices(archive) - self.__super_addArticle(archive, article, subject, author, date) - - # Open the BSDDB files that are being used as indices - # (dateIndex, authorIndex, subjectIndex, articleIndex) - def __openIndices(self, archive): - if self.__currentOpenArchive == archive: - return - - import bsddb - self.__closeIndices() - arcdir = os.path.join(self.basedir, 'database') - omask = os.umask(0) - try: - try: - os.mkdir(arcdir, 02775) - except OSError: - # BAW: Hmm... - pass - finally: - os.umask(omask) - for hdr in ('date', 'author', 'subject', 'article', 'thread'): - path = os.path.join(arcdir, archive + '-' + hdr) - t = bsddb.btopen(path, 'c') - setattr(self, hdr + 'Index', t) - self.__currentOpenArchive = archive - - # Close the BSDDB files that are being used as indices (if they're - # open--this is safe to call if they're already closed) - def __closeIndices(self): - if self.__currentOpenArchive is not None: - pass - for hdr in ('date', 'author', 'subject', 'thread', 'article'): - attr = hdr + 'Index' - if hasattr(self, attr): - index = getattr(self, attr) - if hdr == 'article': - if not hasattr(self, 'archive_length'): - self.archive_length = {} - self.archive_length[self.__currentOpenArchive] = len(index) - index.close() - delattr(self,attr) - self.__currentOpenArchive = None - - def close(self): - self.__closeIndices() - def hasArticle(self, archive, msgid): - self.__openIndices(archive) - return self.articleIndex.has_key(msgid) - def setThreadKey(self, archive, key, msgid): - self.__openIndices(archive) - self.threadIndex[key] = msgid - def getArticle(self, archive, msgid): - self.__openIndices(archive) - if self.__cachedict.has_key(msgid): - self.__cachekeys.remove(msgid) - self.__cachekeys.append(msgid) - return self.__cachedict[msgid] - if len(self.__cachekeys) == CACHESIZE: - delkey, self.__cachekeys = (self.__cachekeys[0], - self.__cachekeys[1:]) - del self.__cachedict[delkey] - s = self.articleIndex[msgid] - article = pickle.loads(s) - self.__cachekeys.append(msgid) - self.__cachedict[msgid] = article - return article - - def first(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index+'Index') - try: - key, msgid = index.first() - return msgid - except KeyError: - return None - def next(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index+'Index') - try: - key, msgid = index.next() - except KeyError: - return None - else: - return msgid - - def getOldestArticle(self, archive, subject): - self.__openIndices(archive) - subject = subject.lower() - try: - key, tempid = self.subjectIndex.set_location(subject) - self.subjectIndex.next() - [subject2, date] = key.split('\0') - if subject != subject2: - return None - return tempid - except KeyError: # XXX what line raises the KeyError? - return None - - def newArchive(self, archive): - pass - - def clearIndex(self, archive, index): - self.__openIndices(archive) - index = getattr(self, index+'Index') - finished = 0 - try: - key, msgid = self.threadIndex.first() - except KeyError: - finished = 1 - while not finished: - del self.threadIndex[key] - try: - key, msgid = self.threadIndex.next() - except KeyError: - finished = 1 - - |
