summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Mailman/Archiver/pipermail.py685
1 files changed, 340 insertions, 345 deletions
diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py
index cd6c6d522..8d6414a5f 100644
--- a/Mailman/Archiver/pipermail.py
+++ b/Mailman/Archiver/pipermail.py
@@ -44,21 +44,21 @@ def fixAuthor(author):
if i == 0:
return author # The string's one word--forget it
if author.upper() == author or author.lower() == author:
- # Damn, the name is all upper- or lower-case.
- while i > 0 and L[i-1].lower() in smallNameParts:
+ # Damn, the name is all upper- or lower-case.
+ while i > 0 and L[i-1].lower() in smallNameParts:
i = i - 1
else:
- # Mixed case; assume that small parts of the last name will be
+ # Mixed case; assume that small parts of the last name will be
# in lowercase, and check them against the list.
- while i>0 and (L[i-1][0] in lowercase or
- L[i-1].lower() in smallNameParts):
- i = i - 1
+ while i>0 and (L[i-1][0] in lowercase or
+ L[i-1].lower() in smallNameParts):
+ i = i - 1
author = SPACE.join(L[-1:] + L[i:-1]) + ', ' + SPACE.join(L[:i])
return author
# Abstract class for databases
-class DatabaseInterface:
+class DatabaseInterface:
def __init__(self): pass
def close(self): pass
def getArticle(self, archive, msgid): pass
@@ -83,7 +83,7 @@ class Database(DatabaseInterface):
# TBD Factor out more of the logic shared between BSDDBDatabase
# and HyperDatabase and place it in this class.
-
+
def __init__(self):
# This method need not be called by subclasses that do their
# own initialization.
@@ -92,7 +92,7 @@ class Database(DatabaseInterface):
self.subjectIndex = {}
self.articleIndex = {}
self.changed = {}
-
+
def addArticle(self, archive, article, subject=None, author=None,
date=None):
# create the keys; always end w/ msgid which will be unique
@@ -102,33 +102,33 @@ class Database(DatabaseInterface):
article.msgid)
datekey = date or article.date, article.msgid
- # Add the new article
- self.dateIndex[datekey] = article.msgid
- self.authorIndex[authorkey] = article.msgid
- self.subjectIndex[subjectkey] = article.msgid
+ # Add the new article
+ self.dateIndex[datekey] = article.msgid
+ self.authorIndex[authorkey] = article.msgid
+ self.subjectIndex[subjectkey] = article.msgid
self.store_article(article)
- self.changed[archive, article.msgid] = None
+ self.changed[archive, article.msgid] = None
- parentID = article.parentID
- if parentID is not None and self.articleIndex.has_key(parentID):
- parent = self.getArticle(archive, parentID)
- myThreadKey = parent.threadKey + article.date + '-'
- else:
+ parentID = article.parentID
+ if parentID is not None and self.articleIndex.has_key(parentID):
+ parent = self.getArticle(archive, parentID)
+ myThreadKey = parent.threadKey + article.date + '-'
+ else:
myThreadKey = article.date + '-'
- article.threadKey = myThreadKey
+ article.threadKey = myThreadKey
key = myThreadKey, article.msgid
- self.setThreadKey(archive, key, article.msgid)
+ self.setThreadKey(archive, key, article.msgid)
def store_article(self, article):
"""Store article without message body to save space"""
# TBD this is not thread safe!
- temp = article.body
+ temp = article.body
article.body = []
- self.articleIndex[article.msgid] = pickle.dumps(article)
- article.body = temp
+ self.articleIndex[article.msgid] = pickle.dumps(article)
+ article.body = temp
-# The Article class encapsulates a single posting. The attributes
+# The Article class encapsulates a single posting. The attributes
# are:
#
# sequence : Sequence number, unique for each article in a set of archives
@@ -141,46 +141,46 @@ class Database(DatabaseInterface):
# msgid : A unique message ID
# in_reply_to: If != "", this is the msgid of the article being replied to
# references : A (possibly empty) list of msgid's of earlier articles
-# in the thread
+# in the thread
# body : A list of strings making up the message body
class Article:
_last_article_time = time.time()
-
+
def __init__(self, message = None, sequence = 0, keepHeaders = []):
- if message is None:
+ if message is None:
return
- self.sequence = sequence
+ self.sequence = sequence
- self.parentID = None
+ self.parentID = None
self.threadKey = None
- # otherwise the current sequence number is used.
- id = strip_separators(message['Message-Id'])
- if id == "":
+ # otherwise the current sequence number is used.
+ id = strip_separators(message['Message-Id'])
+ if id == "":
self.msgid = str(self.sequence)
- else: self.msgid = id
+ else: self.msgid = id
- if message.has_key('Subject'):
+ if message.has_key('Subject'):
self.subject = str(message['Subject'])
- else:
+ else:
self.subject = _('No subject')
- if self.subject == "": self.subject = _('No subject')
+ if self.subject == "": self.subject = _('No subject')
self._set_date(message)
- # Figure out the e-mail address and poster's name. Use the From:
- # field first, followed by Reply-To:
- self.author, self.email = email.Utils.parseaddr(message['From'])
- e = message['Reply-To']
- if not self.email and e is not None:
+ # Figure out the e-mail address and poster's name. Use the From:
+ # field first, followed by Reply-To:
+ self.author, self.email = email.Utils.parseaddr(message['From'])
+ e = message['Reply-To']
+ if not self.email and e is not None:
ignoreauthor, self.email = email.Utils.parseaddr(e)
- self.email = strip_separators(self.email)
- self.author = strip_separators(self.author)
+ self.email = strip_separators(self.email)
+ self.author = strip_separators(self.author)
- if self.author == "":
+ if self.author == "":
self.author = self.email
- # Save the In-Reply-To:, References:, and Message-ID: lines
+ # Save the In-Reply-To:, References:, and Message-ID: lines
#
# TBD: The original code does some munging on these fields, which
# shouldn't be necessary, but changing this may break code. For
@@ -190,53 +190,53 @@ class Article:
self._references = message['references']
self._message_id = message['message-id']
- i_r_t = message['In-Reply-To']
- if i_r_t is None:
+ i_r_t = message['In-Reply-To']
+ if i_r_t is None:
self.in_reply_to = ''
- else:
- match = msgid_pat.search(i_r_t)
- if match is None: self.in_reply_to = ''
- else: self.in_reply_to = strip_separators(match.group(1))
-
- references = message['References']
- if references is None:
+ else:
+ match = msgid_pat.search(i_r_t)
+ if match is None: self.in_reply_to = ''
+ else: self.in_reply_to = strip_separators(match.group(1))
+
+ references = message['References']
+ if references is None:
self.references = []
- else:
- self.references = map(strip_separators, references.split())
+ else:
+ self.references = map(strip_separators, references.split())
- # Save any other interesting headers
- self.headers = {}
- for i in keepHeaders:
- if message.has_key(i):
+ # Save any other interesting headers
+ self.headers = {}
+ for i in keepHeaders:
+ if message.has_key(i):
self.headers[i] = message[i]
- # Read the message body
+ # Read the message body
s = StringIO(message.get_payload())
self.body = s.readlines()
def _set_date(self, message):
- if message.has_key('Date'):
- self.datestr = str(message['Date'])
- date = email.Utils.parsedate_tz(message['Date'])
- else:
- self.datestr = ''
- date = None
- if date is not None:
- date, tzoffset = date[:9], date[-1] or 0
+ if message.has_key('Date'):
+ self.datestr = str(message['Date'])
+ date = email.Utils.parsedate_tz(message['Date'])
+ else:
+ self.datestr = ''
+ date = None
+ if date is not None:
+ date, tzoffset = date[:9], date[-1] or 0
try:
date = time.mktime(date) - tzoffset
except (ValueError, OverflowError):
date = self._last_article_time + 1
#print 'Article with bad date:', self.msgid
- else:
- date = self._last_article_time + 1
+ else:
+ date = self._last_article_time + 1
#print 'Article without date:', self.msgid
-
- self._last_article_time = date
- self.date = '%011i' % date
+
+ self._last_article_time = date
+ self.date = '%011i' % date
def __repr__(self):
- return '<Article ID = '+repr(self.msgid)+'>'
+ return '<Article ID = '+repr(self.msgid)+'>'
# Pipermail formatter class
@@ -246,49 +246,49 @@ class T:
INDEX_EXT = ".html" # Extension for indexes
def __init__(self, basedir = None, reload = 1, database = None):
- # If basedir isn't provided, assume the current directory
- if basedir is None:
+ # If basedir isn't provided, assume the current directory
+ if basedir is None:
self.basedir = os.getcwd()
- else:
+ else:
basedir = os.path.expanduser(basedir)
- self.basedir = basedir
- self.database = database
+ self.basedir = basedir
+ self.database = database
- # If the directory doesn't exist, create it. This code shouldn't get
- # run anymore, we create the directory in Archiver.py. It should only
- # get used by legacy lists created that are only receiving their first
- # message in the HTML archive now -- Marc
- try:
+ # If the directory doesn't exist, create it. This code shouldn't get
+ # run anymore, we create the directory in Archiver.py. It should only
+ # get used by legacy lists created that are only receiving their first
+ # message in the HTML archive now -- Marc
+ try:
os.stat(self.basedir)
- except os.error, errdata:
- errno, errmsg = errdata
- if errno != 2:
+ except os.error, errdata:
+ errno, errmsg = errdata
+ if errno != 2:
raise os.error, errdata
- else:
- self.message(_('Creating archive directory ') + self.basedir)
+ else:
+ self.message(_('Creating archive directory ') + self.basedir)
omask = os.umask(0)
try:
os.mkdir(self.basedir, self.DIRMODE)
finally:
os.umask(omask)
- # Try to load previously pickled state
- try:
- if not reload:
+ # Try to load previously pickled state
+ try:
+ if not reload:
raise IOError
- f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
- self.message(_('Reloading pickled archive state'))
- d = pickle.load(f)
- f.close()
- for key, value in d.items():
+ f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
+ self.message(_('Reloading pickled archive state'))
+ d = pickle.load(f)
+ f.close()
+ for key, value in d.items():
setattr(self, key, value)
- except (IOError, EOFError):
- # No pickled version, so initialize various attributes
- self.archives = [] # Archives
- self._dirty_archives = [] # Archives that will have to be updated
- self.sequence = 0 # Sequence variable used for
- # numbering articles
- self.update_TOC = 0 # Does the TOC need updating?
+ except (IOError, EOFError):
+ # No pickled version, so initialize various attributes
+ self.archives = [] # Archives
+ self._dirty_archives = [] # Archives that will have to be updated
+ self.sequence = 0 # Sequence variable used for
+ # numbering articles
+ self.update_TOC = 0 # Does the TOC need updating?
#
# make the basedir variable work when passed in as an __init__ arg
# and different from the one in the pickle. Let the one passed in
@@ -299,62 +299,62 @@ class T:
self.basedir = basedir
def close(self):
- "Close an archive, save its state, and update any changed archives."
- self.update_dirty_archives()
+ "Close an archive, save its state, and update any changed archives."
+ self.update_dirty_archives()
self.update_TOC = 0
self.write_TOC()
- # Save the collective state
- self.message(_('Pickling archive state into ')
+ # Save the collective state
+ self.message(_('Pickling archive state into ')
+ os.path.join(self.basedir, 'pipermail.pck'))
- self.database.close()
- del self.database
+ self.database.close()
+ del self.database
omask = os.umask(007)
try:
f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
finally:
os.umask(omask)
- pickle.dump(self.getstate(), f)
- f.close()
+ pickle.dump(self.getstate(), f)
+ f.close()
def getstate(self):
# can override this in subclass
return self.__dict__
- #
- # Private methods
- #
+ #
+ # Private methods
+ #
# These will be neither overridden nor called by custom archivers.
#
-
- # Create a dictionary of various parameters that will be passed
+
+ # Create a dictionary of various parameters that will be passed
# to the write_index_{header,footer} functions
def __set_parameters(self, archive):
- # Determine the earliest and latest date in the archive
- firstdate = self.database.firstdate(archive)
- lastdate = self.database.lastdate(archive)
+ # Determine the earliest and latest date in the archive
+ firstdate = self.database.firstdate(archive)
+ lastdate = self.database.lastdate(archive)
- # Get the current time
- now = time.asctime(time.localtime(time.time()))
- self.firstdate = firstdate
+ # Get the current time
+ now = time.asctime(time.localtime(time.time()))
+ self.firstdate = firstdate
self.lastdate = lastdate
- self.archivedate = now
+ self.archivedate = now
self.size = self.database.numArticles(archive)
- self.archive = archive
+ self.archive = archive
self.version = __version__
# Find the message ID of an article's parent, or return None
# if no parent can be found.
def __findParent(self, article, children = []):
- parentID = None
+ parentID = None
if article.in_reply_to:
parentID = article.in_reply_to
- elif article.references:
- # Remove article IDs that aren't in the archive
- refs = filter(self.articleIndex.has_key, article.references)
- if not refs:
+ elif article.references:
+ # Remove article IDs that aren't in the archive
+ refs = filter(self.articleIndex.has_key, article.references)
+ if not refs:
return None
maxdate = self.database.getArticle(self.archive,
refs[0])
@@ -363,29 +363,29 @@ class T:
if a.date > maxdate.date:
maxdate = a
parentID = maxdate.msgid
- else:
- # Look for the oldest matching subject
- try:
- key, tempid = \
+ else:
+ # Look for the oldest matching subject
+ try:
+ key, tempid = \
self.subjectIndex.set_location(article.subject)
- print key, tempid
- self.subjectIndex.next()
- [subject, date] = key.split('\0')
- print article.subject, subject, date
- if subject == article.subject and tempid not in children:
- parentID = tempid
- except KeyError:
+ print key, tempid
+ self.subjectIndex.next()
+ [subject, date] = key.split('\0')
+ print article.subject, subject, date
+ if subject == article.subject and tempid not in children:
+ parentID = tempid
+ except KeyError:
pass
- return parentID
+ return parentID
# Update the threaded index completely
def updateThreadedIndex(self):
- # Erase the threaded index
- self.database.clearIndex(self.archive, 'thread')
-
- # Loop over all the articles
- msgid = self.database.first(self.archive, 'date')
- while msgid is not None:
+ # Erase the threaded index
+ self.database.clearIndex(self.archive, 'thread')
+
+ # Loop over all the articles
+ msgid = self.database.first(self.archive, 'date')
+ while msgid is not None:
try:
article = self.database.getArticle(self.archive, msgid)
except KeyError:
@@ -395,31 +395,31 @@ class T:
not self.database.hasArticle(self.archive,
article.parentID):
# then
- key = article.date
- else:
+ pass
+ else:
parent = self.database.getArticle(self.archive,
article.parentID)
- article.threadKey = parent.threadKey+article.date+'-'
+ article.threadKey = parent.threadKey+article.date+'-'
self.database.setThreadKey(self.archive,
(article.threadKey, article.msgid),
msgid)
- msgid = self.database.next(self.archive, 'date')
+ msgid = self.database.next(self.archive, 'date')
#
# Public methods:
#
# These are part of the public interface of the T class, but will
# never be overridden (unless you're trying to do something very new).
-
+
# Update a single archive's indices, whether the archive's been
- # dirtied or not.
- def update_archive(self, archive):
- self.archive = archive
- self.message(_("Updating index files for archive [%(archive)s]"))
- arcdir = os.path.join(self.basedir, archive)
- self.__set_parameters(archive)
+ # dirtied or not.
+ def update_archive(self, archive):
+ self.archive = archive
+ self.message(_("Updating index files for archive [%(archive)s]"))
+ arcdir = os.path.join(self.basedir, archive)
+ self.__set_parameters(archive)
- for hdr in ('Date', 'Subject', 'Author'):
+ for hdr in ('Date', 'Subject', 'Author'):
self._update_simple_index(hdr, archive, arcdir)
self._update_thread_index(archive, arcdir)
@@ -433,7 +433,6 @@ class T:
self.write_index_header()
count = 0
# Loop over the index entries
- finished = 0
msgid = self.database.first(archive, hdr)
while msgid is not None:
try:
@@ -449,52 +448,51 @@ class T:
self._restore_stdout()
def _update_thread_index(self, archive, arcdir):
- self.message(_(" Thread"))
+ self.message(_(" Thread"))
self._open_index_file_as_stdout(arcdir, "thread")
- self.type = 'Thread'
- self.write_index_header()
+ self.type = 'Thread'
+ self.write_index_header()
- # To handle the prev./next in thread pointers, we need to
- # track articles 5 at a time.
+ # To handle the prev./next in thread pointers, we need to
+ # track articles 5 at a time.
- # Get the first 5 articles
- L = [None] * 5
+ # Get the first 5 articles
+ L = [None] * 5
i = 2
- finished = 0
- msgid = self.database.first(self.archive, 'thread')
-
- while msgid is not None and i < 5:
- L[i] = self.database.getArticle(self.archive, msgid)
+ msgid = self.database.first(self.archive, 'thread')
+
+ while msgid is not None and i < 5:
+ L[i] = self.database.getArticle(self.archive, msgid)
i = i + 1
- msgid = self.database.next(self.archive, 'thread')
+ msgid = self.database.next(self.archive, 'thread')
- while L[2] is not None:
- article = L[2]
+ while L[2] is not None:
+ article = L[2]
artkey = None
- if article is not None:
+ if article is not None:
artkey = article.threadKey
- if artkey is not None:
- self.write_threadindex_entry(article, artkey.count('-') - 1)
- if self.database.changed.has_key((archive,article.msgid)):
- a1 = L[1]
+ if artkey is not None:
+ self.write_threadindex_entry(article, artkey.count('-') - 1)
+ if self.database.changed.has_key((archive,article.msgid)):
+ a1 = L[1]
a3 = L[3]
- self.update_article(arcdir, article, a1, a3)
- if a3 is not None:
+ self.update_article(arcdir, article, a1, a3)
+ if a3 is not None:
self.database.changed[(archive, a3.msgid)] = None
- if a1 is not None:
+ if a1 is not None:
key = archive, a1.msgid
- if not self.database.changed.has_key(key):
- self.update_article(arcdir, a1, L[0], L[2])
- else:
+ if not self.database.changed.has_key(key):
+ self.update_article(arcdir, a1, L[0], L[2])
+ else:
del self.database.changed[key]
- L = L[1:] # Rotate the list
- if msgid is None:
+ L = L[1:] # Rotate the list
+ if msgid is None:
L.append(msgid)
- else:
+ else:
L.append(self.database.getArticle(self.archive, msgid))
- msgid = self.database.next(self.archive, 'thread')
-
- self.write_index_footer()
+ msgid = self.database.next(self.archive, 'thread')
+
+ self.write_index_footer()
self._restore_stdout()
def _open_index_file_as_stdout(self, arcdir, index_name):
@@ -515,19 +513,19 @@ class T:
# Update only archives that have been marked as "changed".
def update_dirty_archives(self):
- for i in self._dirty_archives:
+ for i in self._dirty_archives:
self.update_archive(i)
- self._dirty_archives = []
+ self._dirty_archives = []
# Read a Unix mailbox file from the file object <input>,
# and create a series of Article objects. Each article
# object will then be archived.
-
+
def _makeArticle(self, msg, sequence):
return Article(msg, sequence)
def processUnixMailbox(self, input, start=None, end=None):
- mbox = ArchiverMailbox(input, self.maillist)
+ mbox = ArchiverMailbox(input, self.maillist)
if start is None:
start = 0
counter = 0
@@ -539,26 +537,26 @@ class T:
if m is None:
return
counter += 1
- while 1:
+ while 1:
try:
pos = input.tell()
m = mbox.next()
except Errors.DiscardMessage:
continue
- except Exception, e:
+ except Exception:
syslog('error', 'uncaught archiver exception at filepos: %s',
pos)
raise
- if m is None:
+ if m is None:
break
if m == '':
# It was an unparseable message
continue
msgid = m.get('message-id', 'n/a')
self.message(_('#%(counter)05d %(msgid)s'))
- a = self._makeArticle(m, self.sequence)
- self.sequence += 1
- self.add_article(a)
+ a = self._makeArticle(m, self.sequence)
+ self.sequence += 1
+ self.add_article(a)
if end is not None and counter >= end:
break
counter += 1
@@ -583,23 +581,23 @@ class T:
self.open_new_archive(archive, archivedir)
def add_article(self, article):
- archives = self.get_archives(article)
+ archives = self.get_archives(article)
if not archives:
return
- if type(archives) == type(''):
+ if type(archives) == type(''):
archives = [archives]
- article.filename = filename = self.get_filename(article)
- temp = self.format_article(article)
- for arch in archives:
- self.archive = arch # why do this???
- archivedir = os.path.join(self.basedir, arch)
- if arch not in self.archives:
+ article.filename = filename = self.get_filename(article)
+ temp = self.format_article(article)
+ for arch in archives:
+ self.archive = arch # why do this???
+ archivedir = os.path.join(self.basedir, arch)
+ if arch not in self.archives:
self.new_archive(arch, archivedir)
-
- # Write the HTML-ized article
+
+ # Write the HTML-ized article
self.write_article(arch, temp, os.path.join(archivedir,
- filename))
+ filename))
author = fixAuthor(article.author)
subject = article.subject.lower()
@@ -611,19 +609,19 @@ class T:
else:
article.threadKey = article.date + '-'
key = article.threadKey, article.msgid
-
- self.database.setThreadKey(arch, key, article.msgid)
- self.database.addArticle(arch, temp, author=author,
+
+ self.database.setThreadKey(arch, key, article.msgid)
+ self.database.addArticle(arch, temp, author=author,
subject=subject)
-
- if arch not in self._dirty_archives:
- self._dirty_archives.append(arch)
+
+ if arch not in self._dirty_archives:
+ self._dirty_archives.append(arch)
def get_parent_info(self, archive, article):
parentID = None
if article.in_reply_to:
parentID = article.in_reply_to
- elif article.references:
+ elif article.references:
refs = self._remove_external_references(article.references)
if refs:
maxdate = self.database.getArticle(archive, refs[0])
@@ -636,13 +634,11 @@ class T:
# Get the oldest article with a matching subject, and
# assume this is a follow-up to that article
parentID = self.database.getOldestArticle(archive,
- article.subject)
+ article.subject)
- if parentID and not self.database.hasArticle(archive, parentID):
+ if parentID and not self.database.hasArticle(archive, parentID):
parentID = None
return parentID
-
-
def write_article(self, index, article, path):
omask = os.umask(002)
@@ -667,73 +663,73 @@ class T:
# Abstract methods: these will need to be overridden by subclasses
# before anything useful can be done.
- def get_filename(self, article):
- pass
+ def get_filename(self, article):
+ pass
def get_archives(self, article):
- """Return a list of indexes where the article should be filed.
- A string can be returned if the list only contains one entry,
- and the empty list is legal."""
- pass
+ """Return a list of indexes where the article should be filed.
+ A string can be returned if the list only contains one entry,
+ and the empty list is legal."""
+ pass
def format_article(self, article):
- pass
+ pass
def write_index_header(self):
- pass
+ pass
def write_index_footer(self):
- pass
+ pass
def write_index_entry(self, article):
- pass
+ pass
def write_threadindex_entry(self, article, depth):
- pass
+ pass
def write_article_header(self, article):
- pass
+ pass
def write_article_footer(self, article):
- pass
+ pass
def write_article_entry(self, article):
- pass
+ pass
def update_article(self, archivedir, article, prev, next):
- pass
+ pass
def write_TOC(self):
- pass
+ pass
def open_new_archive(self, archive, dir):
- pass
+ pass
def message(self, msg):
- pass
+ pass
class BSDDBdatabase(Database):
__super_addArticle = Database.addArticle
-
+
def __init__(self, basedir):
- self.__cachekeys = []
+ self.__cachekeys = []
self.__cachedict = {}
- self.__currentOpenArchive = None # The currently open indices
- self.basedir = os.path.expanduser(basedir)
- self.changed = {} # Recently added articles, indexed only by
- # message ID
-
+ self.__currentOpenArchive = None # The currently open indices
+ self.basedir = os.path.expanduser(basedir)
+ self.changed = {} # Recently added articles, indexed only by
+ # message ID
+
def firstdate(self, archive):
- self.__openIndices(archive)
- date = 'None'
- try:
- date, msgid = self.dateIndex.first()
- date = time.asctime(time.localtime(float(date)))
- except KeyError:
+ self.__openIndices(archive)
+ date = 'None'
+ try:
+ date, msgid = self.dateIndex.first()
+ date = time.asctime(time.localtime(float(date)))
+ except KeyError:
pass
- return date
-
+ return date
+
def lastdate(self, archive):
- self.__openIndices(archive)
- date = 'None'
- try:
- date, msgid = self.dateIndex.last()
- date = time.asctime(time.localtime(float(date)))
- except KeyError:
+ self.__openIndices(archive)
+ date = 'None'
+ try:
+ date, msgid = self.dateIndex.last()
+ date = time.asctime(time.localtime(float(date)))
+ except KeyError:
pass
- return date
-
+ return date
+
def numArticles(self, archive):
- self.__openIndices(archive)
- return len(self.dateIndex)
+ self.__openIndices(archive)
+ return len(self.dateIndex)
def addArticle(self, archive, article, subject=None, author=None,
date=None):
@@ -743,12 +739,12 @@ class BSDDBdatabase(Database):
# Open the BSDDB files that are being used as indices
# (dateIndex, authorIndex, subjectIndex, articleIndex)
def __openIndices(self, archive):
- if self.__currentOpenArchive == archive:
+ if self.__currentOpenArchive == archive:
return
- import bsddb
- self.__closeIndices()
- arcdir = os.path.join(self.basedir, 'database')
+ import bsddb
+ self.__closeIndices()
+ arcdir = os.path.join(self.basedir, 'database')
omask = os.umask(0)
try:
try:
@@ -758,101 +754,100 @@ class BSDDBdatabase(Database):
pass
finally:
os.umask(omask)
- for hdr in ('date', 'author', 'subject', 'article', 'thread'):
+ for hdr in ('date', 'author', 'subject', 'article', 'thread'):
path = os.path.join(arcdir, archive + '-' + hdr)
- t = bsddb.btopen(path, 'c')
- setattr(self, hdr + 'Index', t)
- self.__currentOpenArchive = archive
+ t = bsddb.btopen(path, 'c')
+ setattr(self, hdr + 'Index', t)
+ self.__currentOpenArchive = archive
# Close the BSDDB files that are being used as indices (if they're
# open--this is safe to call if they're already closed)
def __closeIndices(self):
- if self.__currentOpenArchive is not None:
- pass
- for hdr in ('date', 'author', 'subject', 'thread', 'article'):
- attr = hdr + 'Index'
- if hasattr(self, attr):
- index = getattr(self, attr)
- if hdr == 'article':
- if not hasattr(self, 'archive_length'):
+ if self.__currentOpenArchive is not None:
+ pass
+ for hdr in ('date', 'author', 'subject', 'thread', 'article'):
+ attr = hdr + 'Index'
+ if hasattr(self, attr):
+ index = getattr(self, attr)
+ if hdr == 'article':
+ if not hasattr(self, 'archive_length'):
self.archive_length = {}
- self.archive_length[self.__currentOpenArchive] = len(index)
- index.close()
- delattr(self,attr)
- self.__currentOpenArchive = None
-
+ self.archive_length[self.__currentOpenArchive] = len(index)
+ index.close()
+ delattr(self,attr)
+ self.__currentOpenArchive = None
+
def close(self):
- self.__closeIndices()
- def hasArticle(self, archive, msgid):
- self.__openIndices(archive)
- return self.articleIndex.has_key(msgid)
+ self.__closeIndices()
+ def hasArticle(self, archive, msgid):
+ self.__openIndices(archive)
+ return self.articleIndex.has_key(msgid)
def setThreadKey(self, archive, key, msgid):
- self.__openIndices(archive)
- self.threadIndex[key] = msgid
+ self.__openIndices(archive)
+ self.threadIndex[key] = msgid
def getArticle(self, archive, msgid):
- self.__openIndices(archive)
- if self.__cachedict.has_key(msgid):
- self.__cachekeys.remove(msgid)
- self.__cachekeys.append(msgid)
- return self.__cachedict[msgid]
- if len(self.__cachekeys) == CACHESIZE:
- delkey, self.__cachekeys = (self.__cachekeys[0],
+ self.__openIndices(archive)
+ if self.__cachedict.has_key(msgid):
+ self.__cachekeys.remove(msgid)
+ self.__cachekeys.append(msgid)
+ return self.__cachedict[msgid]
+ if len(self.__cachekeys) == CACHESIZE:
+ delkey, self.__cachekeys = (self.__cachekeys[0],
self.__cachekeys[1:])
- del self.__cachedict[delkey]
- s = self.articleIndex[msgid]
- article = pickle.loads(s)
- self.__cachekeys.append(msgid)
+ del self.__cachedict[delkey]
+ s = self.articleIndex[msgid]
+ article = pickle.loads(s)
+ self.__cachekeys.append(msgid)
self.__cachedict[msgid] = article
- return article
+ return article
- def first(self, archive, index):
- self.__openIndices(archive)
- index = getattr(self, index+'Index')
- try:
- key, msgid = index.first()
- return msgid
- except KeyError:
+ def first(self, archive, index):
+ self.__openIndices(archive)
+ index = getattr(self, index+'Index')
+ try:
+ key, msgid = index.first()
+ return msgid
+ except KeyError:
return None
- def next(self, archive, index):
- self.__openIndices(archive)
- index = getattr(self, index+'Index')
- try:
- key, msgid = index.next()
- except KeyError:
+ def next(self, archive, index):
+ self.__openIndices(archive)
+ index = getattr(self, index+'Index')
+ try:
+ key, msgid = index.next()
+ except KeyError:
return None
else:
return msgid
-
def getOldestArticle(self, archive, subject):
- self.__openIndices(archive)
- subject = subject.lower()
- try:
- key, tempid = self.subjectIndex.set_location(subject)
- self.subjectIndex.next()
- [subject2, date] = key.split('\0')
- if subject != subject2:
+ self.__openIndices(archive)
+ subject = subject.lower()
+ try:
+ key, tempid = self.subjectIndex.set_location(subject)
+ self.subjectIndex.next()
+ [subject2, date] = key.split('\0')
+ if subject != subject2:
return None
- return tempid
- except KeyError: # XXX what line raises the KeyError?
- return None
+ return tempid
+ except KeyError: # XXX what line raises the KeyError?
+ return None
def newArchive(self, archive):
pass
-
+
def clearIndex(self, archive, index):
- self.__openIndices(archive)
- index = getattr(self, index+'Index')
- finished = 0
- try:
- key, msgid = self.threadIndex.first()
- except KeyError:
+ self.__openIndices(archive)
+ index = getattr(self, index+'Index')
+ finished = 0
+ try:
+ key, msgid = self.threadIndex.first()
+ except KeyError:
finished = 1
- while not finished:
- del self.threadIndex[key]
- try:
- key, msgid = self.threadIndex.next()
- except KeyError:
+ while not finished:
+ del self.threadIndex[key]
+ try:
+ key, msgid = self.threadIndex.next()
+ except KeyError:
finished = 1