author: cotton 1998-10-22 21:14:44 +0000
committer: cotton 1998-10-22 21:14:44 +0000
commit: 73b134e83997212d049c58946d9e2d2e2b4b070c (patch)
tree: 9e3bd65b1ac74228498231863970337cc5098097 /Mailman/Archiver/HyperDatabase.py
parent: 0eb0572a6f7f521c23cd88d13b06fd8c48d15511 (diff)
download: mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.tar.gz
mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.tar.zst
mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.zip
1 files changed, 317 insertions, 0 deletions
diff --git a/Mailman/Archiver/HyperDatabase.py b/Mailman/Archiver/HyperDatabase.py
new file mode 100644
index 000000000..2963b0558
--- /dev/null
+++ b/Mailman/Archiver/HyperDatabase.py
@@ -0,0 +1,317 @@
+# Copyright (C) 1998 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#
+# site modules
+#
+import os
+import marshal
+import string
+
+#
+# package/project modules
+#
+import pipermail
+import Mailman.flock
+flock = Mailman.flock
+
+CACHESIZE = pipermail.CACHESIZE
+
+try:
+    import cPickle
+    pickle = cPickle
+except ImportError:
+    import pickle
+
+
+#
+# we're using a python dict in place of
+# of bsddb.btree database.  only defining
+# the parts of the interface used by class HyperDatabase
+# only one thing can access this at a time.
+#
+class DumbBTree:
+
+    def __init__(self, path):
+        self.current_index = 0
+        self.path = path
+        self.lockfile = flock.FileLock(self.path + ".lock")
+        self.lock()
+        if os.path.exists(path):
+            self.dict = marshal.load(open(path))
+        else:
+            self.dict = {}
+        self.sorted = self.dict.keys()
+        self.sorted.sort()
+        
+    def lock(self):
+        self.lockfile.lock()
+
+
+    def unlock(self):
+        try:
+            self.lockfile.unlock()
+        except flock.NotLockedError:
+            pass
+        
+
+    def __delitem__(self, item):
+	try:
+	    ci = self.sorted[self.current_index]
+	except IndexError:
+	    ci = None
+	if ci == item:
+	    try:
+		ci = self.sorted[self.current_index + 1]
+	    except IndexError:
+		ci = None
+	del self.dict[item]
+	self.sorted = self.dict.keys()
+	self.sorted.sort()
+	if ci is not None:
+	    self.current_index = self.sorted.index(ci)
+	else:
+	    self.current_index = self.current_index + 1
+
+	
+
+
+    def first(self):
+        if not self.sorted:
+            raise KeyError
+        else:
+	    sorted = self.sorted
+            res =  sorted[0], self.dict[sorted[0]]
+            self.current_index = 1
+	    return res
+
+    def last(self):
+        if not self.sorted:
+            raise KeyError
+        else:
+	    sorted = self.sorted
+	    self.current_index = len(self.sorted) - 1
+            return sorted[-1], self.dict[sorted[-1]]
+	
+
+    def next(self):
+        try:
+            key = self.sorted[self.current_index]
+        except IndexError:
+            raise KeyError
+	self.current_index = self.current_index + 1
+        return key, self.dict[key]
+
+    def has_key(self, key):
+        return self.dict.has_key(key)
+
+
+    def set_location(self, loc):
+        if not self.dict.has_key(loc):
+            raise KeyError
+        self.current_index = self.sorted.index(loc)
+
+
+    def __getitem__(self, item):
+        return self.dict[item]
+
+
+    def __setitem__(self, item, val):
+	try:
+	    current_item = self.sorted[self.current_index]
+	except IndexError:
+	    current_item = item
+        self.dict[item] = val
+        self.sorted = self.dict.keys()
+        self.sorted.sort()
+        self.current_index = self.sorted.index(current_item)
+
+    def __len__(self):
+        return len(self.sorted)
+
+
+    def close(self):
+        fp = open(self.path, "w")
+        fp.write(marshal.dumps(self.dict))
+        fp.close()
+        self.unlock()
+
+
+    
+
+
+
+#
+# this is lifted straight out of pipermail with
+# the bsddb.btree replaced with above class.
+# didn't use inheritance because of all the
+# __internal stuff that needs to be here -scott
+#
+class HyperDatabase(pipermail.Database):
+    def __init__(self, basedir):
+	self.__cachekeys=[] ; self.__cachedict={}
+	self.__currentOpenArchive=None   # The currently open indices
+	self.basedir=os.path.expanduser(basedir)
+	self.changed={}         # Recently added articles, indexed only by message ID
+
+    def firstdate(self, archive):
+	import time
+	self.__openIndices(archive)
+	date='None'
+	try:
+	    date, msgid = self.dateIndex.first()
+	    date=time.asctime(time.localtime(string.atof(date)))
+	except KeyError: pass
+	return date
+
+    def lastdate(self, archive):
+	import time
+	self.__openIndices(archive)
+	date='None'
+	try:
+	    date, msgid = self.dateIndex.last()
+	    date=time.asctime(time.localtime(string.atof(date)))
+	except KeyError: pass
+	return date
+
+    def numArticles(self, archive):
+	self.__openIndices(archive)
+	return len(self.dateIndex)    
+
+    # Add a single article to the internal indexes for an archive.
+
+    def addArticle(self, archive, article, subjectkey, authorkey):
+	self.__openIndices(archive)
+
+	# Add the new article
+	self.dateIndex[article.date]=article.msgid
+	self.authorIndex[authorkey]=article.msgid
+	self.subjectIndex[subjectkey]=article.msgid
+	# Set the 'body' attribute to empty, to avoid storing the whole message
+	temp = article.body ; article.body=[]
+	self.articleIndex[article.msgid]=pickle.dumps(article)
+	article.body=temp
+	self.changed[archive,article.msgid]=None
+
+	parentID=article.parentID
+	if parentID!=None and self.articleIndex.has_key(parentID): 
+	    parent=self.getArticle(archive, parentID)
+	    myThreadKey=parent.threadKey+article.date+'-'
+	else: myThreadKey = article.date+'-'
+	article.threadKey=myThreadKey
+	self.setThreadKey(archive, myThreadKey+'\000'+article.msgid, article.msgid)
+
+    # Open the BSDDB files that are being used as indices
+    # (dateIndex, authorIndex, subjectIndex, articleIndex)
+    def __openIndices(self, archive):
+	if self.__currentOpenArchive==archive: return
+	self.__closeIndices()
+	arcdir=os.path.join(self.basedir, 'database')
+	try: os.mkdir(arcdir, 0700)
+	except os.error: pass
+	for i in ['date', 'author', 'subject', 'article', 'thread']:
+	    t=DumbBTree(os.path.join(arcdir, archive+'-'+i)) 
+	    setattr(self, i+'Index', t)
+	self.__currentOpenArchive=archive
+
+    # Close the BSDDB files that are being used as indices (if they're
+    # open--this is safe to call if they're already closed)
+    def __closeIndices(self):
+	if self.__currentOpenArchive!=None: 
+	    pass
+	for i in ['date', 'author', 'subject', 'thread', 'article']:
+	    attr=i+'Index'
+	    if hasattr(self, attr): 
+		index=getattr(self, attr) 
+		if i=='article': 
+	            if not hasattr(self, 'archive_length'): self.archive_length={}
+		    self.archive_length[self.__currentOpenArchive]=len(index)
+		index.close() 
+		delattr(self,attr)
+	self.__currentOpenArchive=None
+    def close(self):
+	self.__closeIndices()
+    def hasArticle(self, archive, msgid): 
+	self.__openIndices(archive)
+	return self.articleIndex.has_key(msgid)
+    def setThreadKey(self, archive, key, msgid):
+	self.__openIndices(archive)
+	self.threadIndex[key]=msgid
+    def getArticle(self, archive, msgid):
+	self.__openIndices(archive)
+	if self.__cachedict.has_key(msgid): 
+	    self.__cachekeys.remove(msgid)
+	    self.__cachekeys.append(msgid)
+	    return self.__cachedict[msgid]
+	if len(self.__cachekeys)==CACHESIZE: 
+	    delkey, self.__cachekeys = self.__cachekeys[0], self.__cachekeys[1:]
+	    del self.__cachedict[delkey]
+	s=self.articleIndex[msgid]
+	article=pickle.loads(s)
+	self.__cachekeys.append(msgid) ; self.__cachedict[msgid]=article
+	return article
+
+    def first(self, archive, index): 
+	self.__openIndices(archive)
+	index=getattr(self, index+'Index')
+	try: 
+	    key, msgid = index.first()
+	    return msgid
+	except KeyError: return None
+    def next(self, archive, index): 
+	self.__openIndices(archive)
+	index=getattr(self, index+'Index')
+	try: 
+	    key, msgid = index.next()
+	    return msgid
+	except KeyError: return None
+	
+    def getOldestArticle(self, archive, subject):
+	self.__openIndices(archive)
+	subject=string.lower(subject)
+	try: 
+	    key, tempid=self.subjectIndex.set_location(subject)
+	    self.subjectIndex.next()	
+	    [subject2, date]= string.split(key, '\0')
+	    if subject!=subject2: return None
+	    return tempid
+	except KeyError: 
+	    return None
+
+    def newArchive(self, archive): pass
+    def clearIndex(self, archive, index):
+	self.__openIndices(archive)
+	index=getattr(self, index+'Index')
+	finished=0
+	try:
+	    key, msgid=self.threadIndex.first()	    		
+	except KeyError: finished=1
+	while not finished:
+	    del self.threadIndex[key]
+	    try:
+		key, msgid=self.threadIndex.next()	    		
+	    except KeyError: finished=1
+
+
+
+
+
+
+
+
+
+
+
author	cotton	1998-10-22 21:14:44 +0000
committer	cotton	1998-10-22 21:14:44 +0000
commit	73b134e83997212d049c58946d9e2d2e2b4b070c (patch)
tree	9e3bd65b1ac74228498231863970337cc5098097 /Mailman/Archiver/HyperDatabase.py
parent	0eb0572a6f7f521c23cd88d13b06fd8c48d15511 (diff)
download	mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.tar.gz mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.tar.zst mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.zip