diff options
| author | jhylton | 2000-09-22 18:23:37 +0000 |
|---|---|---|
| committer | jhylton | 2000-09-22 18:23:37 +0000 |
| commit | ad4ba52f694b2f026be863ab869b8d7ca83ab03c (patch) | |
| tree | 81c03da4aaa50d7c20b3f84609889dbc5dec66ed /Mailman/Archiver/pipermail.py | |
| parent | bf6d28c3c757820f428772472379cc8848948a85 (diff) | |
| download | mailman-ad4ba52f694b2f026be863ab869b8d7ca83ab03c.tar.gz mailman-ad4ba52f694b2f026be863ab869b8d7ca83ab03c.tar.zst mailman-ad4ba52f694b2f026be863ab869b8d7ca83ab03c.zip | |
Diffstat (limited to 'Mailman/Archiver/pipermail.py')
| -rw-r--r-- | Mailman/Archiver/pipermail.py | 228 |
1 files changed, 136 insertions, 92 deletions
diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py index 71fb323d2..0cfe93ddd 100644 --- a/Mailman/Archiver/pipermail.py +++ b/Mailman/Archiver/pipermail.py @@ -58,12 +58,13 @@ def fixAuthor(author): # Abstract class for databases -class Database: +class DatabaseInterface: def __init__(self): pass def close(self): pass def getArticle(self, archive, msgid): pass def hasArticle(self, archive, msgid): pass - def addArticle(self, archive, article, subjectkey, authorkey): pass + def addArticle(self, archive, article, subject=None, author=None, + date=None): pass def firstdate(self, archive): pass def lastdate(self, archive): pass def first(self, archive, index): pass @@ -73,6 +74,60 @@ class Database: def setThreadKey(self, archive, key, msgid): pass def getOldestArticle(self, subject): pass +class Database(DatabaseInterface): + """Define the basic sorting logic for a database + + Assumes that the database internally uses dateIndex, authorIndex, + etc. + """ + + # TBD Factor out more of the logic shared between BSDDBDatabase + # and HyperDatabase and place it in this class. + + def __init__(self): + # This method need not be called by subclasses that do their + # own initialization. + self.dateIndex = {} + self.authorIndex = {} + self.subjectIndex = {} + self.articleIndex = {} + self.changed = {} + + def addArticle(self, archive, article, subject=None, author=None, + date=None): + # create the keys; always end w/ msgid which will be unique + authorkey = (author or article.author, article.date, + article.msgid) + subjectkey = (subject or article.subject, article.date, + article.msgid) + datekey = date or article.date, article.msgid + + # Add the new article + self.dateIndex[datekey] = article.msgid + self.authorIndex[authorkey] = article.msgid + self.subjectIndex[subjectkey] = article.msgid + + self.store_article(article) + self.changed[archive, article.msgid] = None + + parentID = article.parentID + if parentID is not None and self.articleIndex.has_key(parentID): + parent = self.getArticle(archive, parentID) + myThreadKey = parent.threadKey + article.date + '-' + else: + myThreadKey = article.date + '-' + article.threadKey = myThreadKey + key = myThreadKey, article.msgid + self.setThreadKey(archive, key, article.msgid) + + def store_article(self, article): + """Store article without message body to save space""" + # TBD this is not thread safe! + temp = article.body + article.body = [] + self.articleIndex[article.msgid] = pickle.dumps(article) + article.body = temp + # The Article class encapsulates a single posting. The attributes # are: # @@ -330,7 +385,7 @@ class T: article.parentID) article.threadKey = parent.threadKey+article.date+'-' self.database.setThreadKey(self.archive, - article.threadKey + '\000' + article.msgid, + (article.threadKey, article.msgid), msgid) msgid = self.database.next(self.archive, 'date') @@ -460,80 +515,84 @@ class T: self.sequence = self.sequence + 1 self.add_article(a) - # Archive an Article object. + def new_archive(self, archive, archivedir): + self.archives.append(archive) + self.update_TOC = 1 + self.database.newArchive(archive) + # If the archive directory doesn't exist, create it + try: + os.stat(archivedir) + except os.error, errdata: + errno, errmsg = errdata + if errno == 2: + mkdir(archivedir, self.DIRMODE) + else: + raise os.error, errdata + self.open_new_archive(archive, archivedir) + def add_article(self, article): - # Determine into what archives the article should be placed archives = self.get_archives(article) if not archives: return if type(archives) == type(''): archives = [archives] - # Add the article to each archive in turn article.filename = filename = self.get_filename(article) - temp = self.format_article(article) # Reformat the article - fmt = "Processing article #%s into archives %s" - self.message(fmt % (article.sequence, archives)) - for i in archives: - self.archive = i - archivedir = os.path.join(self.basedir, i) - # If it's a new archive, create it - if i not in self.archives: - self.archives.append(i) - self.update_TOC = 1 - self.database.newArchive(i) - # If the archive directory doesn't exist, create it - try: - os.stat(archivedir) - except os.error, errdata: - errno, errmsg = errdata - if errno == 2: - mkdir(archivedir, self.DIRMODE) - else: - raise os.error, errdata - self.open_new_archive(i, archivedir) + temp = self.format_article(article) + fmt = "Processing article #%s into archives %s: %s" + self.message(fmt % (article.sequence, archives, article.subject)) + for arch in archives: + self.archive = arch # why do this??? + archivedir = os.path.join(self.basedir, arch) + if arch not in self.archives: + self.new_archive(arch, archivedir) # Write the HTML-ized article - self.write_article(i, temp, os.path.join(archivedir, - filename)) + self.write_article(arch, temp, os.path.join(archivedir, + filename)) - authorkey = fixAuthor(article.author) + '\000' + article.date - subjectkey = string.lower(article.subject ) +'\000' + article.date - - # Update parenting info - parentID = None - if article.in_reply_to: - parentID = article.in_reply_to - elif article.references: - refs = self._remove_external_references(article.references) - if refs: - maxdate = self.database.getArticle(self.archive, - refs[0]) - for ref in refs[1:]: - a = self.database.getArticle(self.archive, ref) - if a.date > maxdate.date: - maxdate = a - parentID = maxdate.msgid - else: - # Get the oldest article with a matching subject, and - # assume this is a follow-up to that article - parentID = self.database.getOldestArticle(self.archive, - article.subject) + author = fixAuthor(article.author) + subject = string.lower(article.subject) - if parentID is not None \ - and not self.database.hasArticle(self.archive, parentID): - parentID = None - article.parentID = parentID - if parentID is not None: - parent = self.database.getArticle(self.archive, parentID) - article.threadKey = parent.threadKey + article.date + '-' - else: + article.parentID = parentID = self.get_parent_info(arch, article) + if parentID: + parent = self.database.getArticle(arch, parentID) + article.threadKey = parent.threadKey + article.date + '-' + else: article.threadKey = article.date + '-' - key = article.threadKey + '\000' + article.msgid - self.database.setThreadKey(self.archive, key, article.msgid) - self.database.addArticle(i, temp, subjectkey, authorkey) - if i not in self._dirty_archives: - self._dirty_archives.append(i) + key = article.threadKey, article.msgid + + self.database.setThreadKey(arch, key, article.msgid) + self.database.addArticle(arch, temp, author=author, + subject=subject) + + if arch not in self._dirty_archives: + self._dirty_archives.append(arch) + + def get_parent_info(self, archive, article): + parentID = None + if article.in_reply_to: + parentID = article.in_reply_to + elif article.references: + refs = self._remove_external_references(article.references) + if refs: + maxdate = self.database.getArticle(archive, refs[0]) + for ref in refs[1:]: + a = self.database.getArticle(archive, ref) + if a.date > maxdate.date: + maxdate = a + parentID = maxdate.msgid + else: + # Get the oldest article with a matching subject, and + # assume this is a follow-up to that article + parentID = self.database.getOldestArticle(archive, + article.subject) + + if parentID and not self.database.hasArticle(archive, parentID): + parentID = None + return parentID + + def write_article(self, index, article, path): f = open(path, 'w') @@ -588,13 +647,16 @@ class T: class BSDDBdatabase(Database): + __super_addArticle = Database.addArticle + def __init__(self, basedir): self.__cachekeys = [] self.__cachedict = {} self.__currentOpenArchive = None # The currently open indices self.basedir = os.path.expanduser(basedir) self.changed = {} # Recently added articles, indexed only by - # message ID + # message ID + def firstdate(self, archive): self.__openIndices(archive) date = 'None' @@ -604,6 +666,7 @@ class BSDDBdatabase(Database): except KeyError: pass return date + def lastdate(self, archive): self.__openIndices(archive) date = 'None' @@ -613,41 +676,21 @@ class BSDDBdatabase(Database): except KeyError: pass return date + def numArticles(self, archive): self.__openIndices(archive) return len(self.dateIndex) - # Add a single article to the internal indexes for an archive. - - def addArticle(self, archive, article, subjectkey, authorkey): - self.__openIndices(archive) - - # Add the new article - self.dateIndex[article.date] = article.msgid - self.authorIndex[authorkey] = article.msgid - self.subjectIndex[subjectkey] = article.msgid - # Set the 'body' attribute to empty, to avoid storing the - # whole message - temp = article.body - article.body = [] - self.articleIndex[article.msgid] = pickle.dumps(article) - article.body = temp - self.changed[archive,article.msgid] = None - - parentID = article.parentID - if parentID is not None and self.articleIndex.has_key(parentID): - parent = self.getArticle(archive, parentID) - myThreadKey = parent.threadKey+article.date + '-' - else: - myThreadKey = article.date + '-' - article.threadKey = myThreadKey - key = myThreadKey + '\000' + article.msgid - self.setThreadKey(archive, key, article.msgid) + def addArticle(self, archive, article, subject=None, author=None, + date=None): + self.__openIndices(archive) + self.__super_addArticle(archive, article, subject, author, date) # Open the BSDDB files that are being used as indices # (dateIndex, authorIndex, subjectIndex, articleIndex) def __openIndices(self, archive): - if self.__currentOpenArchive == archive: return + if self.__currentOpenArchive == archive: + return import bsddb self.__closeIndices() @@ -676,6 +719,7 @@ class BSDDBdatabase(Database): index.close() delattr(self,attr) self.__currentOpenArchive = None + def close(self): self.__closeIndices() def hasArticle(self, archive, msgid): |
