summaryrefslogtreecommitdiff
path: root/Mailman/Archiver/HyperDatabase.py
diff options
context:
space:
mode:
authorcotton1998-10-22 21:14:44 +0000
committercotton1998-10-22 21:14:44 +0000
commit73b134e83997212d049c58946d9e2d2e2b4b070c (patch)
tree9e3bd65b1ac74228498231863970337cc5098097 /Mailman/Archiver/HyperDatabase.py
parent0eb0572a6f7f521c23cd88d13b06fd8c48d15511 (diff)
downloadmailman-73b134e83997212d049c58946d9e2d2e2b4b070c.tar.gz
mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.tar.zst
mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.zip
Diffstat (limited to 'Mailman/Archiver/HyperDatabase.py')
-rw-r--r--Mailman/Archiver/HyperDatabase.py317
1 files changed, 317 insertions, 0 deletions
diff --git a/Mailman/Archiver/HyperDatabase.py b/Mailman/Archiver/HyperDatabase.py
new file mode 100644
index 000000000..2963b0558
--- /dev/null
+++ b/Mailman/Archiver/HyperDatabase.py
@@ -0,0 +1,317 @@
+# Copyright (C) 1998 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#
+# site modules
+#
+import os
+import marshal
+import string
+
+#
+# package/project modules
+#
+import pipermail
+import Mailman.flock
+flock = Mailman.flock
+
+CACHESIZE = pipermail.CACHESIZE
+
+try:
+ import cPickle
+ pickle = cPickle
+except ImportError:
+ import pickle
+
+
+#
+# we're using a python dict in place of
+# of bsddb.btree database. only defining
+# the parts of the interface used by class HyperDatabase
+# only one thing can access this at a time.
+#
+class DumbBTree:
+
+ def __init__(self, path):
+ self.current_index = 0
+ self.path = path
+ self.lockfile = flock.FileLock(self.path + ".lock")
+ self.lock()
+ if os.path.exists(path):
+ self.dict = marshal.load(open(path))
+ else:
+ self.dict = {}
+ self.sorted = self.dict.keys()
+ self.sorted.sort()
+
+ def lock(self):
+ self.lockfile.lock()
+
+
+ def unlock(self):
+ try:
+ self.lockfile.unlock()
+ except flock.NotLockedError:
+ pass
+
+
+ def __delitem__(self, item):
+ try:
+ ci = self.sorted[self.current_index]
+ except IndexError:
+ ci = None
+ if ci == item:
+ try:
+ ci = self.sorted[self.current_index + 1]
+ except IndexError:
+ ci = None
+ del self.dict[item]
+ self.sorted = self.dict.keys()
+ self.sorted.sort()
+ if ci is not None:
+ self.current_index = self.sorted.index(ci)
+ else:
+ self.current_index = self.current_index + 1
+
+
+
+
+ def first(self):
+ if not self.sorted:
+ raise KeyError
+ else:
+ sorted = self.sorted
+ res = sorted[0], self.dict[sorted[0]]
+ self.current_index = 1
+ return res
+
+ def last(self):
+ if not self.sorted:
+ raise KeyError
+ else:
+ sorted = self.sorted
+ self.current_index = len(self.sorted) - 1
+ return sorted[-1], self.dict[sorted[-1]]
+
+
+ def next(self):
+ try:
+ key = self.sorted[self.current_index]
+ except IndexError:
+ raise KeyError
+ self.current_index = self.current_index + 1
+ return key, self.dict[key]
+
+ def has_key(self, key):
+ return self.dict.has_key(key)
+
+
+ def set_location(self, loc):
+ if not self.dict.has_key(loc):
+ raise KeyError
+ self.current_index = self.sorted.index(loc)
+
+
+ def __getitem__(self, item):
+ return self.dict[item]
+
+
+ def __setitem__(self, item, val):
+ try:
+ current_item = self.sorted[self.current_index]
+ except IndexError:
+ current_item = item
+ self.dict[item] = val
+ self.sorted = self.dict.keys()
+ self.sorted.sort()
+ self.current_index = self.sorted.index(current_item)
+
+ def __len__(self):
+ return len(self.sorted)
+
+
+ def close(self):
+ fp = open(self.path, "w")
+ fp.write(marshal.dumps(self.dict))
+ fp.close()
+ self.unlock()
+
+
+
+
+
+
+#
+# this is lifted straight out of pipermail with
+# the bsddb.btree replaced with above class.
+# didn't use inheritance because of all the
+# __internal stuff that needs to be here -scott
+#
+class HyperDatabase(pipermail.Database):
+ def __init__(self, basedir):
+ self.__cachekeys=[] ; self.__cachedict={}
+ self.__currentOpenArchive=None # The currently open indices
+ self.basedir=os.path.expanduser(basedir)
+ self.changed={} # Recently added articles, indexed only by message ID
+
+ def firstdate(self, archive):
+ import time
+ self.__openIndices(archive)
+ date='None'
+ try:
+ date, msgid = self.dateIndex.first()
+ date=time.asctime(time.localtime(string.atof(date)))
+ except KeyError: pass
+ return date
+
+ def lastdate(self, archive):
+ import time
+ self.__openIndices(archive)
+ date='None'
+ try:
+ date, msgid = self.dateIndex.last()
+ date=time.asctime(time.localtime(string.atof(date)))
+ except KeyError: pass
+ return date
+
+ def numArticles(self, archive):
+ self.__openIndices(archive)
+ return len(self.dateIndex)
+
+ # Add a single article to the internal indexes for an archive.
+
+ def addArticle(self, archive, article, subjectkey, authorkey):
+ self.__openIndices(archive)
+
+ # Add the new article
+ self.dateIndex[article.date]=article.msgid
+ self.authorIndex[authorkey]=article.msgid
+ self.subjectIndex[subjectkey]=article.msgid
+ # Set the 'body' attribute to empty, to avoid storing the whole message
+ temp = article.body ; article.body=[]
+ self.articleIndex[article.msgid]=pickle.dumps(article)
+ article.body=temp
+ self.changed[archive,article.msgid]=None
+
+ parentID=article.parentID
+ if parentID!=None and self.articleIndex.has_key(parentID):
+ parent=self.getArticle(archive, parentID)
+ myThreadKey=parent.threadKey+article.date+'-'
+ else: myThreadKey = article.date+'-'
+ article.threadKey=myThreadKey
+ self.setThreadKey(archive, myThreadKey+'\000'+article.msgid, article.msgid)
+
+ # Open the BSDDB files that are being used as indices
+ # (dateIndex, authorIndex, subjectIndex, articleIndex)
+ def __openIndices(self, archive):
+ if self.__currentOpenArchive==archive: return
+ self.__closeIndices()
+ arcdir=os.path.join(self.basedir, 'database')
+ try: os.mkdir(arcdir, 0700)
+ except os.error: pass
+ for i in ['date', 'author', 'subject', 'article', 'thread']:
+ t=DumbBTree(os.path.join(arcdir, archive+'-'+i))
+ setattr(self, i+'Index', t)
+ self.__currentOpenArchive=archive
+
+ # Close the BSDDB files that are being used as indices (if they're
+ # open--this is safe to call if they're already closed)
+ def __closeIndices(self):
+ if self.__currentOpenArchive!=None:
+ pass
+ for i in ['date', 'author', 'subject', 'thread', 'article']:
+ attr=i+'Index'
+ if hasattr(self, attr):
+ index=getattr(self, attr)
+ if i=='article':
+ if not hasattr(self, 'archive_length'): self.archive_length={}
+ self.archive_length[self.__currentOpenArchive]=len(index)
+ index.close()
+ delattr(self,attr)
+ self.__currentOpenArchive=None
+ def close(self):
+ self.__closeIndices()
+ def hasArticle(self, archive, msgid):
+ self.__openIndices(archive)
+ return self.articleIndex.has_key(msgid)
+ def setThreadKey(self, archive, key, msgid):
+ self.__openIndices(archive)
+ self.threadIndex[key]=msgid
+ def getArticle(self, archive, msgid):
+ self.__openIndices(archive)
+ if self.__cachedict.has_key(msgid):
+ self.__cachekeys.remove(msgid)
+ self.__cachekeys.append(msgid)
+ return self.__cachedict[msgid]
+ if len(self.__cachekeys)==CACHESIZE:
+ delkey, self.__cachekeys = self.__cachekeys[0], self.__cachekeys[1:]
+ del self.__cachedict[delkey]
+ s=self.articleIndex[msgid]
+ article=pickle.loads(s)
+ self.__cachekeys.append(msgid) ; self.__cachedict[msgid]=article
+ return article
+
+ def first(self, archive, index):
+ self.__openIndices(archive)
+ index=getattr(self, index+'Index')
+ try:
+ key, msgid = index.first()
+ return msgid
+ except KeyError: return None
+ def next(self, archive, index):
+ self.__openIndices(archive)
+ index=getattr(self, index+'Index')
+ try:
+ key, msgid = index.next()
+ return msgid
+ except KeyError: return None
+
+ def getOldestArticle(self, archive, subject):
+ self.__openIndices(archive)
+ subject=string.lower(subject)
+ try:
+ key, tempid=self.subjectIndex.set_location(subject)
+ self.subjectIndex.next()
+ [subject2, date]= string.split(key, '\0')
+ if subject!=subject2: return None
+ return tempid
+ except KeyError:
+ return None
+
+ def newArchive(self, archive): pass
+ def clearIndex(self, archive, index):
+ self.__openIndices(archive)
+ index=getattr(self, index+'Index')
+ finished=0
+ try:
+ key, msgid=self.threadIndex.first()
+ except KeyError: finished=1
+ while not finished:
+ del self.threadIndex[key]
+ try:
+ key, msgid=self.threadIndex.next()
+ except KeyError: finished=1
+
+
+
+
+
+
+
+
+
+
+