summaryrefslogtreecommitdiff
path: root/Mailman
diff options
context:
space:
mode:
authorbwarsaw1999-08-21 05:27:52 +0000
committerbwarsaw1999-08-21 05:27:52 +0000
commit54913081da961ddafeb2f0591850af03497dc65d (patch)
tree1e7c58888be6c29adda1e6084c03964358056e7e /Mailman
parentd32edf3685107a87a3645ca4f4df4bda165e249b (diff)
downloadmailman-54913081da961ddafeb2f0591850af03497dc65d.tar.gz
mailman-54913081da961ddafeb2f0591850af03497dc65d.tar.zst
mailman-54913081da961ddafeb2f0591850af03497dc65d.zip
Extensive changes based on Jeremy Hylton's investigations. These
should considerably help the performance of the archiver. Specifically: class DumbBTree: Don't sort the self.sorted list unless some client is actually traversing the data structure. This saves a lot of work when items are added. See also Jeremy's XXX comment for further optimization ideas. class HyperDatabase: Jeremy also has questions about the usefulness of the cache used here. Because the items are traversed in linear order, there isn't much locality of reference, so cache eviction doesn't buy you much (it's actually more expensive than just keeping everything in the cache, so that's what we do). That's a space for time trade-off that might need a re-evaluation. Clearly, more work could be done to improve the performance of the archiver, but this should improve matters significantly. Caveat: this has been only minimally tested in a production environment. I call this the Hylton Band-aid.
Diffstat (limited to 'Mailman')
-rw-r--r--Mailman/Archiver/HyperDatabase.py90
1 files changed, 49 insertions, 41 deletions
diff --git a/Mailman/Archiver/HyperDatabase.py b/Mailman/Archiver/HyperDatabase.py
index 7a7a838c0..43d3ffcd4 100644
--- a/Mailman/Archiver/HyperDatabase.py
+++ b/Mailman/Archiver/HyperDatabase.py
@@ -25,7 +25,7 @@ import string
# package/project modules
#
import pipermail
-from Mailman import flock
+from Mailman import LockFile
from Mailman.Utils import mkdir, open_ex
# TBD: ugly, ugly, ugly -baw
open = open_ex
@@ -46,31 +46,46 @@ except ImportError:
# only one thing can access this at a time.
#
class DumbBTree:
-
+ # XXX This dictionary-like object stores pickles of all the
+ # Article objects. The object itself is stored using marshal. It
+ # would be much simpler, and probably faster, to store the actual
+ # objects in the DumbBTree and pickle it.
+ # XXX Also needs a more sensible name, like IteratableDictionary
+ # or SortedDictionary.
def __init__(self, path):
self.current_index = 0
self.path = path
- self.lockfile = flock.FileLock(self.path + ".lock")
+ self.lockfile = LockFile.LockFile(self.path + ".lock")
self.lock()
+ self.__dirty = 0
if os.path.exists(path):
self.dict = marshal.load(open(path))
+ self.__sort(dirty=1)
else:
self.dict = {}
- self.sorted = self.dict.keys()
- self.sorted.sort()
+ self.sorted = []
+
+ def __sort(self, dirty=None):
+ if self.__dirty == 1 or dirty:
+ self.sorted = self.dict.keys()
+ self.sorted.sort()
+ self.__dirty = 0
def lock(self):
self.lockfile.lock()
-
def unlock(self):
try:
self.lockfile.unlock()
- except flock.NotLockedError:
+ except LockFile.NotLockedError:
pass
-
def __delitem__(self, item):
+ # if first hasn't been called, we can skip the sort
+ if self.current_index == 0:
+ del self.dict[item]
+ self.__dirty = 1
+ return
try:
ci = self.sorted[self.current_index]
except IndexError:
@@ -81,8 +96,7 @@ class DumbBTree:
except IndexError:
ci = None
del self.dict[item]
- self.sorted = self.dict.keys()
- self.sorted.sort()
+ self.__sort(dirty=1)
if ci is not None:
self.current_index = self.sorted.index(ci)
else:
@@ -94,11 +108,12 @@ class DumbBTree:
self.dict = {}
def first(self):
+ self.__sort() # guarantee that the list is sorted
if not self.sorted:
raise KeyError
else:
- sorted = self.sorted
- res = sorted[0], self.dict[sorted[0]]
+ key = self.sorted[0]
+ res = key, self.dict[key]
self.current_index = 1
return res
@@ -106,11 +121,10 @@ class DumbBTree:
if not self.sorted:
raise KeyError
else:
- sorted = self.sorted
+ key = self.sorted[-1]
self.current_index = len(self.sorted) - 1
- return sorted[-1], self.dict[sorted[-1]]
+ return key, self.dict[key]
-
def next(self):
try:
key = self.sorted[self.current_index]
@@ -122,31 +136,32 @@ class DumbBTree:
def has_key(self, key):
return self.dict.has_key(key)
-
def set_location(self, loc):
if not self.dict.has_key(loc):
raise KeyError
self.current_index = self.sorted.index(loc)
-
def __getitem__(self, item):
return self.dict[item]
-
def __setitem__(self, item, val):
+ # if first hasn't been called, then we don't need to worry
+ # about sorting again
+ if self.current_index == 0:
+ self.dict[item] = val
+ self.__dirty = 1
+ return
try:
current_item = self.sorted[self.current_index]
except IndexError:
current_item = item
self.dict[item] = val
- self.sorted = self.dict.keys()
- self.sorted.sort()
+ self.__sort(dirty=1)
self.current_index = self.sorted.index(current_item)
def __len__(self):
return len(self.sorted)
-
def close(self):
fp = open(self.path, "w")
fp.write(marshal.dumps(self.dict))
@@ -154,11 +169,7 @@ class DumbBTree:
self.unlock()
-
-
-
-
-#
+
# this is lifted straight out of pipermail with
# the bsddb.btree replaced with above class.
# didn't use inheritance because of all the
@@ -166,10 +177,11 @@ class DumbBTree:
#
class HyperDatabase(pipermail.Database):
def __init__(self, basedir):
- self.__cachekeys=[] ; self.__cachedict={}
- self.__currentOpenArchive=None # The currently open indices
- self.basedir=os.path.expanduser(basedir)
- self.changed={} # Recently added articles, indexed only by message ID
+ self.__cache = {}
+ self.__currentOpenArchive = None # The currently open indices
+ self.basedir = os.path.expanduser(basedir)
+ # Recently added articles, indexed only by message ID
+ self.changed={}
def firstdate(self, archive):
import time
@@ -256,17 +268,13 @@ class HyperDatabase(pipermail.Database):
self.threadIndex[key]=msgid
def getArticle(self, archive, msgid):
self.__openIndices(archive)
- if self.__cachedict.has_key(msgid):
- self.__cachekeys.remove(msgid)
- self.__cachekeys.append(msgid)
- return self.__cachedict[msgid]
- if len(self.__cachekeys)==CACHESIZE:
- delkey, self.__cachekeys = self.__cachekeys[0], self.__cachekeys[1:]
- del self.__cachedict[delkey]
- s=self.articleIndex[msgid]
- article=pickle.loads(s)
- self.__cachekeys.append(msgid) ; self.__cachedict[msgid]=article
- return article
+ if not self.__cache.has_key(msgid):
+ # get the pickled object out of the DumbBTree
+ buf = self.articleIndex[msgid]
+ article = self.__cache[msgid] = pickle.loads(buf)
+ else:
+ article = self.__cache[msgid]
+ return article
def first(self, archive, index):
self.__openIndices(archive)