2 files changed, 401 insertions, 335 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index 7a5c47cd7..8429b8356 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -420,13 +420,14 @@ arch_listing_end = '''\
  
 
 class HyperArchive(pipermail.T):
+    __super_init = pipermail.T.__init__
 
     # some defaults
     DIRMODE=02775
     FILEMODE=0660
     
 
-    VERBOSE=0
+    VERBOSE=1
     DEFAULTINDEX='thread'
     ARCHIVE_PERIOD='month'
  
@@ -540,23 +541,19 @@ class HyperArchive(pipermail.T):
         d["archive_listing"] = listing
         return self.html_TOC_tmpl % d
 
-    def __init__(self, maillist,unlock=1):
-        self.maillist=maillist
-        self._unlocklist=unlock
-        self._lock_file=None
+    def __init__(self, maillist, unlock=1):
+        self.maillist = maillist
+        self._unlocklist = unlock
+        self._lock_file = None
  
-
-        #
         # can't init the database while other
         # processes are writing to it!
         # XXX TODO- implement native locking
         # with mailman's LockFile module for HyperDatabase.HyperDatabase
         #
-	pipermail.T.__init__(
-            self,
-            maillist.archive_dir(),
-            reload=1,
-            database=HyperDatabase.HyperDatabase(maillist.archive_dir()))
+        dir = maillist.archive_dir()
+        db = HyperDatabase.HyperDatabase(dir)
+        self.__super_init(dir, reload=1, database=db)
 
         if hasattr(self.maillist,'archive_volume_frequency'):
             if self.maillist.archive_volume_frequency == 0:
@@ -738,8 +735,10 @@ class HyperArchive(pipermail.T):
 
     def open_new_archive(self, archive, archivedir):
 	index_html=os.path.join(archivedir, 'index.html') 
-	try: os.unlink(index_html)
-	except: pass
+	try:
+            os.unlink(index_html)
+	except:
+            pass
 	os.symlink(self.DEFAULTINDEX+'.html',index_html)
 
 
@@ -930,10 +929,10 @@ class HyperArchive(pipermail.T):
         self.database.close()
         del self.database
         f=open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
-        pickle.dump(self.__getstate__(), f)
+        pickle.dump(self.getstate(), f)
         f.close()
 
-    def __getstate__(self):
+    def getstate(self):
         d={}
         for each in self.__dict__.keys():
             if not (each in ['maillist','_lock_file','_unlocklist']
@@ -941,9 +940,6 @@ class HyperArchive(pipermail.T):
                 d[each] = self.__dict__[each]
         return d
 
-        
- 
-
     # Add <A HREF="..."> tags around URLs and e-mail addresses.
 
     def __processbody_URLquote(self, source, dest):
diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py
index ccdbb7c9f..c21cad9c7 100644
--- a/Mailman/Archiver/pipermail.py
+++ b/Mailman/Archiver/pipermail.py
@@ -1,6 +1,10 @@
 #! /usr/bin/env python
 
-import os, sys, string, re
+import os
+import re
+import sys
+import string
+import time
 
 try:
     import cPickle
@@ -12,17 +16,19 @@ from Mailman.Utils import mkdir, open_ex
 # TBD: ugly, ugly, ugly -baw
 open = open_ex
 
-__version__='0.05 (Mailman edition)'
-VERSION=__version__
-CACHESIZE=100    # Number of slots in the cache
+__version__ = '0.05 (Mailman edition)'
+VERSION = __version__
+CACHESIZE = 100    # Number of slots in the cache
 
 
 
-msgid_pat=re.compile(r'(<.*>)')
+msgid_pat = re.compile(r'(<.*>)')
 def strip_separators(s):
     "Remove quotes or parenthesization from a Message-ID string"
-    if s==None or s=="": return ""
-    if s[0] in '"<([' and s[-1] in '">)]': s=s[1:-1]
+    if not s:
+        return ""
+    if s[0] in '"<([' and s[-1] in '">)]':
+        s = s[1:-1]
     return s
 
 smallNameParts = ['van', 'von', 'der', 'de']
@@ -30,20 +36,24 @@ smallNameParts = ['van', 'von', 'der', 'de']
 def fixAuthor(author):
     "Canonicalize a name into Last, First format"
     # If there's a comma, guess that it's already in "Last, First" format
-    if ',' in author: return author
-    L=string.split(author)
-    i=len(L)-1
-    if i==0: return author # The string's one word--forget it
-    if string.upper(author)==author or string.lower(author)==author:
+    if ',' in author:
+        return author
+    L = string.split(author)
+    i = len(L) - 1
+    if i == 0:
+        return author # The string's one word--forget it
+    if string.upper(author) == author or string.lower(author) == author:
 	# Damn, the name is all upper- or lower-case.  
-	while i>0 and string.lower(L[i-1]) in smallNameParts: i=i-1
+	while i > 0 and string.lower(L[i-1]) in smallNameParts:
+            i = i - 1
     else:
 	# Mixed case; assume that small parts of the last name will be
         # in lowercase, and check them against the list.
 	while i>0 and (L[i-1][0] in string.lowercase or 
 		       string.lower(L[i-1]) in smallNameParts): 
-	    i=i-1
-    author=string.join(L[-1:]+L[i:-1], ' ')+', '+string.join(L[:i], ' ')
+	    i = i - 1
+    author = string.join(L[-1:] + L[i:-1], ' ') \
+             + ', ' + string.join(L[:i], ' ')
     return author
 
 # Abstract class for databases
@@ -66,125 +76,144 @@ class Database:
 # The Article class encapsulates a single posting.  The attributes 
 # are:
 #
-#  sequence : Sequence number, unique for each article in a set of archives
-#  subject  : Subject
-#  datestr  : The posting date, in human-readable format
-#  date     : The posting date, in purely numeric format
-#  headers  : Any other headers of interest
-#  author   : The author's name (and possibly organization)
-#  email    : The author's e-mail address
-#  msgid    : A unique message ID
-#  in_reply_to : If !="", this is the msgid of the article being replied to
-#  references: A (possibly empty) list of msgid's of earlier articles in the thread
-#  body     : A list of strings making up the message body
+# sequence   : Sequence number, unique for each article in a set of archives
+# subject    : Subject
+# datestr    : The posting date, in human-readable format
+# date       : The posting date, in purely numeric format
+# headers    : Any other headers of interest
+# author     : The author's name (and possibly organization)
+# email      : The author's e-mail address
+# msgid      : A unique message ID
+# in_reply_to: If != "", this is the msgid of the article being replied to
+# references : A (possibly empty) list of msgid's of earlier articles
+#              in the thread 
+# body       : A list of strings making up the message body
 
 class Article:
-    import time
-    __last_article_time=time.time()
-    def __init__(self, message=None, sequence=0, keepHeaders=[]):
-	import time
-	if message==None: return
-	self.sequence=sequence
+    __last_article_time = time.time()
+    
+    def __init__(self, message = None, sequence = 0, keepHeaders = []):
+	if message is None:
+            return
+	self.sequence = sequence
 
-	self.parentID = None ; self.threadKey = None
+	self.parentID = None
+        self.threadKey = None
 	# otherwise the current sequence number is used.
-	id=strip_separators(message.getheader('Message-Id'))
-	if id=="": self.msgid=str(self.sequence)
-	else: self.msgid=id
+	id = strip_separators(message.getheader('Message-Id'))
+	if id == "":
+            self.msgid = str(self.sequence)
+	else: self.msgid = id
 
-	if message.has_key('Subject'): self.subject=str(message['Subject'])
-	else: self.subject='No subject'
-	if self.subject=="": self.subject='No subject'
+	if message.has_key('Subject'):
+            self.subject = str(message['Subject'])
+	else:
+            self.subject = 'No subject'
+	if self.subject == "": self.subject = 'No subject'
 
 	if message.has_key('Date'): 
-	    self.datestr=str(message['Date'])
-   	    date=message.getdate_tz('Date')
+	    self.datestr = str(message['Date'])
+   	    date = message.getdate_tz('Date')
 	else: 
-	    self.datestr='None' 
-	    date=None
-	if date!=None:
-	    date, tzoffset=date[:9], date[-1] 
-	    date=time.mktime(date)-tzoffset
+	    self.datestr = 'None' 
+	    date = None
+	if date is not None:
+	    date, tzoffset = date[:9], date[-1] 
+	    date = time.mktime(date)-tzoffset
 	else:
-	    date=self.__last_article_time+1 ; print 'Article without date:', self.msgid
+	    date = self.__last_article_time+1
+            print 'Article without date:', self.msgid
 	    
-	self.__last_article_time=date 
-	self.date='%011i' % (date,)
+	self.__last_article_time = date 
+	self.date = '%011i' % (date,)
 
 	# Figure out the e-mail address and poster's name
-	self.author, self.email=message.getaddr('From')
-	e=message.getheader('Reply-To')
-	if e!=None: self.email=e
-	self.email=strip_separators(self.email)
-	self.author=strip_separators(self.author)
+	self.author, self.email = message.getaddr('From')
+	e = message.getheader('Reply-To')
+	if e is not None:
+            self.email = e
+	self.email = strip_separators(self.email)
+	self.author = strip_separators(self.author)
 
-	if self.author=="": self.author=self.email
+	if self.author == "": self.author = self.email
 
 	# Save the 'In-Reply-To:' and 'References:' lines
-	i_r_t=message.getheader('In-Reply-To')
-	if i_r_t==None: self.in_reply_to=''
+	i_r_t = message.getheader('In-Reply-To')
+	if i_r_t is None:
+            self.in_reply_to = ''
 	else:
-	    match=msgid_pat.search(i_r_t)
-	    if match==None: self.in_reply_to=''
-	    else: self.in_reply_to=strip_separators(match.group(1))
+	    match = msgid_pat.search(i_r_t)
+	    if match is None: self.in_reply_to = ''
+	    else: self.in_reply_to = strip_separators(match.group(1))
 		
-	references=message.getheader('References')
-	if references==None: self.references=[]
-	else: self.references=map(strip_separators, string.split(references))
+	references = message.getheader('References')
+	if references is None:
+            self.references = []
+	else:
+            self.references = map(strip_separators,
+                                  string.split(references))  
 
 	# Save any other interesting headers
-	self.headers={}
+	self.headers = {}
 	for i in keepHeaders:
-	    if message.has_key(i): self.headers[i]=message[i]
+	    if message.has_key(i):
+                self.headers[i] = message[i]
 
 	# Read the message body
-	self.body=[]
+	self.body = []
 	message.rewindbody()
-	while (1):
-	    line=message.fp.readline()
-	    if line=="": break
+	while 1:
+	    line = message.fp.readline()
+	    if line == "":
+                break
 	    self.body.append(line)
     def __repr__(self):
-	return '<Article ID='+repr(self.msgid)+'>'
+	return '<Article ID = '+repr(self.msgid)+'>'
 
 # Pipermail formatter class
 
 class T:
-    DIRMODE=0755      # Mode to give to created directories
-    FILEMODE=0644     # Mode to give to created files
+    DIRMODE = 0755      # Mode to give to created directories
+    FILEMODE = 0644     # Mode to give to created files
     INDEX_EXT = ".html" # Extension for indexes
 
-    def __init__(self, basedir=None, reload=1, database=None):
+    def __init__(self, basedir = None, reload = 1, database = None):
 	# If basedir isn't provided, assume the current directory
-	if basedir==None: self.basedir=os.getcwd()
+	if basedir is None:
+            self.basedir = os.getcwd()
 	else: 
-            basedir=os.path.expanduser(basedir)
-	    self.basedir=basedir
-	self.database=database
+            basedir = os.path.expanduser(basedir)
+	    self.basedir = basedir
+	self.database = database
 
 	# If the directory doesn't exist, create it
-	try: os.stat(self.basedir)
+	try:
+            os.stat(self.basedir)
 	except os.error, errdata:
 	    errno, errmsg = errdata
-	    if errno!=2: raise os.error, errdata
+	    if errno != 2:
+                raise os.error, errdata
 	    else: 
-		self.message('Creating archive directory '+self.basedir)
+		self.message('Creating archive directory ' + self.basedir)
 		mkdir(self.basedir, self.DIRMODE)
 
 	# Try to load previously pickled state
 	try:
-	    if not reload: raise IOError
-	    f=open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
+	    if not reload:
+                raise IOError
+	    f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
 	    self.message('Reloading pickled archive state')
-	    d=pickle.load(f)
+	    d = pickle.load(f)
 	    f.close()
-	    for key, value in d.items(): setattr(self, key, value)
+	    for key, value in d.items():
+                setattr(self, key, value)
 	except IOError: 
 	    # No pickled version, so initialize various attributes
-	    self.archives=[]        # Archives 
-	    self._dirty_archives=[]  # Archives that will have to be updated
-	    self.sequence=0         # Sequence variable used for numbering articles
-	    self.update_TOC=0       # Does the TOC need updating?
+	    self.archives = []        # Archives 
+	    self._dirty_archives = [] # Archives that will have to be updated
+	    self.sequence = 0         # Sequence variable used for
+	                              #   numbering articles  
+	    self.update_TOC = 0       # Does the TOC need updating?
         #
         # make the basedir variable work when passed in as an __init__ arg
         # and different from the one in the pickle.  Let the one passed in
@@ -195,17 +224,17 @@ class T:
             self.basedir = basedir
 
     def close(self):
-	"Close an archive, saving its state and updating any changed archives."
-	self.update_dirty_archives()# Update all changed archives
-	# If required, update the table of contents
-	if self.update_TOC or 1:
-	    self.update_TOC=0
+	"Close an archive, save its state, and update any changed archives."
+	self.update_dirty_archives()
+	if self.update_TOC:
+	    self.update_TOC = 0
 	    self.write_TOC()
 	# Save the collective state 
-	self.message('Pickling archive state into '+os.path.join(self.basedir, 'pipermail.pck'))
+	self.message('Pickling archive state into ' \
+                     + os.path.join(self.basedir, 'pipermail.pck'))
 	self.database.close()
 	del self.database
-	f=open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
+	f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
 	pickle.dump(self.__dict__, f)
 	f.close()
 
@@ -219,43 +248,51 @@ class T:
     # Create a dictionary of various parameters that will be passed 
     # to the write_index_{header,footer} functions
     def __set_parameters(self, archive):
-	import time
 	# Determine the earliest and latest date in the archive
-	firstdate=self.database.firstdate(archive)
-	lastdate=self.database.lastdate(archive)
+	firstdate = self.database.firstdate(archive)
+	lastdate = self.database.lastdate(archive)
 
 	# Get the current time
-	now=time.asctime(time.localtime(time.time()))	
-	self.firstdate=firstdate ; self.lastdate=lastdate
-	self.archivedate=now ; self.size=self.database.numArticles(archive)
-	self.archive=archive ; self.version=__version__
+	now = time.asctime(time.localtime(time.time()))	
+	self.firstdate = firstdate
+        self.lastdate = lastdate
+	self.archivedate = now
+        self.size = self.database.numArticles(archive)
+	self.archive = archive
+        self.version = __version__
 
     # Find the message ID of an article's parent, or return None
     # if no parent can be found.
 
-    def __findParent(self, article, children=[]):
-	    parentID=None
-	    if article.in_reply_to!='': parentID=article.in_reply_to
-	    elif article.references!=[]: 
+    def __findParent(self, article, children = []):
+	    parentID = None
+            if article.in_reply_to:
+                parentID = article.in_reply_to
+	    elif article.references:
 		# Remove article IDs that aren't in the archive
-		refs=filter(self.articleIndex.has_key, article.references)
-		if len(refs):
-		    refs=map(lambda x, s=self: s.database.getArticle(s.archive, x), refs)
-		    maxdate=refs[0]
-		    for i in refs[1:]: 
-			if i.date>maxdate.date: maxdate=i
-		    parentID=maxdate.msgid
+		refs = filter(self.articleIndex.has_key, article.references)
+		if not refs:
+                    return None
+                maxdate = self.database.getArticle(self.archive,
+                                                   refs[0])
+                for ref in refs[1:]:
+                    a = self.database.getArticle(self.archive, ref)
+                    if a.date > maxdate.data:
+                        maxdate = a
+                parentID = maxdate.msgid
 	    else:
 		# Look for the oldest matching subject
 		try: 
-		    key, tempid=self.subjectIndex.set_location(article.subject)
+		    key, tempid = \
+                         self.subjectIndex.set_location(article.subject)
 		    print key, tempid
 		    self.subjectIndex.next()	
-		    [subject, date]= string.split(key, '\0')
+		    [subject, date] = string.split(key, '\0')
 		    print article.subject, subject, date
-		    if (subject==article.subject and tempid not in children):
-			parentID=tempid
-		except KeyError: pass
+		    if subject == article.subject and tempid not in children:
+			parentID = tempid
+		except KeyError:
+                    pass
 	    return parentID
 
     # Update the threaded index completely
@@ -264,40 +301,26 @@ class T:
 	self.database.clearIndex(self.archive, 'thread')
 	
 	# Loop over all the articles 
-	msgid=self.database.first(self.archive, 'date')
-	while (msgid != None):
+	msgid = self.database.first(self.archive, 'date')
+	while msgid is not None:
             try:
-                article=self.database.getArticle(self.archive, msgid)
+                article = self.database.getArticle(self.archive, msgid)
             except KeyError:
                 pass
             else:
-                if article.parentID==None or \
+                if article.parentID is None or \
                    not self.database.hasArticle(self.archive,
                                                 article.parentID):
                     # then
-                    key=article.date
+                    key = article.date
                 else: 
-                    parent=self.database.getArticle(self.archive,
+                    parent = self.database.getArticle(self.archive,
                                                     article.parentID)
-                    article.threadKey=parent.threadKey+article.date+'-' 
-                self.database.setThreadKey(
-                    self.archive,
-                    article.threadKey+'\000'+ article.msgid,
+                    article.threadKey = parent.threadKey+article.date+'-' 
+                self.database.setThreadKey(self.archive,
+                    article.threadKey + '\000' + article.msgid,
                     msgid)
-	    msgid=self.database.next(self.archive, 'date')
-
-## 	    L1=[] ; L2=[]
-## 	    while (1):
-## 		article=self.database.getArticle(self.archive, msgid)
-## 		L1.append('') ; L2.append(msgid) 
-## 		L1=map(lambda x, d=article.date: d+'-'+x, L1)
-## 		parentID=self.__findParent(article, L2)
-## 		if parentID==None or not self.database.hasArticle(parentID): 
-## 		    break
-## 		else: msgid=parentID
-## 	    for i in range(0, len(L1)):
-## 		self.database.setThreadKey(self.archive, L1[i], '\000'+L2[i])
-## 		self.database.setThreadKey(self.archive, '\000'+L2[i], L1[i])
+	    msgid = self.database.next(self.archive, 'date')
 
     #
     # Public methods:
@@ -308,171 +331,201 @@ class T:
     # Update a single archive's indices, whether the archive's been
     # dirtied or not. 
     def update_archive(self, archive):	
-	self.archive=archive
-	self.message("Updating index files for archive ["+archive+']')
-	arcdir=os.path.join(self.basedir, archive)
-	parameters=self.__set_parameters(archive)
-	# Handle the 3 simple indices first
-	for i in ['Date', 'Subject', 'Author']:
-	    self.message("  "+i)
-	    self.type=i
-	    # Get the right index
-	    i=string.lower(i)
+	self.archive = archive
+	self.message("Updating index files for archive [%s]" % archive)
+	arcdir = os.path.join(self.basedir, archive)
+	self.__set_parameters(archive)
 
-	    # Redirect sys.stdout
-	    import sys
-	    f=open(os.path.join(arcdir, i+self.INDEX_EXT), 'w')
-##	    os.chmod(f.name, self.FILEMODE)
-	    temp_stdout, sys.stdout=sys.stdout, f
-	    self.write_index_header()
-	    count=0
-	    # Loop over the index entries
-	    finished=0
-	    msgid=self.database.first(archive, i)
-	    while (msgid != None):
-                try:
-                    article=self.database.getArticle(self.archive, msgid)
-                except KeyError:
-                    pass
-                else:
-                    count=count+1
-                    self.write_index_entry(article)
-		msgid = self.database.next(archive, i)
-	    # Finish up this index
-	    self.write_index_footer()
-	    sys.stdout=temp_stdout
-	    f.close()
+	for hdr in ('Date', 'Subject', 'Author'):
+            self._update_simple_index(hdr, archive, arcdir)
+
+        self._update_thread_index(archive, arcdir)
+
+    def _update_simple_index(self, hdr, archive, arcdir):
+        self.message("  " + hdr)
+        self.type = hdr
+        hdr = string.lower(hdr)
+
+        self._open_index_file_as_stdout(arcdir, hdr)
+        self.write_index_header()
+        count = 0
+        # Loop over the index entries
+        finished = 0
+        msgid = self.database.first(archive, hdr)
+        while msgid is not None:
+            try:
+                article = self.database.getArticle(self.archive, msgid)
+            except KeyError:
+                pass
+            else:
+                count = count + 1
+                self.write_index_entry(article)
+            msgid = self.database.next(archive, hdr)
+        # Finish up this index
+        self.write_index_footer()
+        self._restore_stdout()
 
-	# Print the threaded index
+    def _update_thread_index(self, archive, arcdir):
 	self.message("  Thread")
- 	temp_stdout, sys.stdout=sys.stdout, open(os.path.join(arcdir, 'thread' + self.INDEX_EXT), 'w')
-##	os.chmod(os.path.join(arcdir, 'thread' + self.INDEX_EXT), self.FILEMODE)
- 	self.type='Thread'
+        self._open_index_file_as_stdout(arcdir, "thread")
+ 	self.type = 'Thread'
  	self.write_index_header()
 
 	# To handle the prev./next in thread pointers, we need to
 	# track articles 5 at a time.  
 
 	# Get the first 5 articles	
-	L=[ None ]*5 ; i=2 ; finished=0
-	msgid=self.database.first(self.archive, 'thread')
-	while msgid!=None and i<5:
-	    L[i]=self.database.getArticle(self.archive, msgid) ; i=i+1
+	L = [None] * 5
+        i = 2
+        finished = 0
+	msgid = self.database.first(self.archive, 'thread')
+        
+	while msgid is not None and i < 5:
+	    L[i] = self.database.getArticle(self.archive, msgid)
+            i = i + 1
 	    msgid = self.database.next(self.archive, 'thread')
 
-	while L[2]!=None:
- 	    article=L[2] ; artkey=None
-	    if article!=None: artkey=article.threadKey
-	    if artkey!=None: 
-		import sys
-		self.write_threadindex_entry(article, string.count(artkey, '-')-1)
-		if self.database.changed.has_key( (archive,article.msgid) ):
-		    a1=L[1] ; a3=L[3]
+	while L[2] is not None:
+ 	    article = L[2]
+            artkey = None
+	    if article is not None:
+                artkey = article.threadKey
+	    if artkey is not None: 
+		self.write_threadindex_entry(article,
+                                     string.count(artkey, '-') - 1)
+		if self.database.changed.has_key((archive,article.msgid)):
+		    a1 = L[1]
+                    a3 = L[3]
 		    self.update_article(arcdir, article, a1, a3) 
-		    if a3!=None: self.database.changed[ (archive,a3.msgid) ]=None
-		    if a1!=None:
-			if not self.database.changed.has_key( (archive,a1.msgid) ): 
+		    if a3 is not None:
+                        self.database.changed[(archive, a3.msgid)] = None
+		    if a1 is not None:
+                        key = archive, a1.msgid
+			if not self.database.changed.has_key(key): 
 			    self.update_article(arcdir, a1, L[0], L[2])
-			else: del self.database.changed[ (archive,a1.msgid) ]
-	    L=L[1:]			# Rotate the list
-	    if msgid==None: L.append(msgid)
-	    else: L.append( self.database.getArticle(self.archive, msgid) )
+			else:
+                            del self.database.changed[key]
+	    L = L[1:]			# Rotate the list
+	    if msgid is None:
+                L.append(msgid)
+	    else:
+                L.append(self.database.getArticle(self.archive, msgid))
 	    msgid = self.database.next(self.archive, 'thread')
 	    
  	self.write_index_footer()
- 	sys.stdout=temp_stdout
+        self._restore_stdout()
+
+    def _open_index_file_as_stdout(self, arcdir, index_name):
+        path = os.path.join(arcdir, index_name + self.INDEX_EXT)
+        self.__f = open(path, "w")
+        self.__stdout = sys.stdout
+        sys.stdout = self.__f
+
+    def _restore_stdout(self):
+        sys.stdout = self.__stdout
+        self.__f.close()
+        del self.__f
+        del self.__stdout
 
     # Update only archives that have been marked as "changed".
     def update_dirty_archives(self):
-	for i in self._dirty_archives: self.update_archive(i)
-	self._dirty_archives=[]
+	for i in self._dirty_archives:
+            self.update_archive(i)
+	self._dirty_archives = []
 
     # Read a Unix mailbox file from the file object <input>,
     # and create a series of Article objects.  Each article
     # object will then be archived.
     
-    def processUnixMailbox(self, input, articleClass=Article):
+    def processUnixMailbox(self, input, articleClass = Article):
 	import mailbox
-	mbox=mailbox.UnixMailbox(input)
+	mbox = mailbox.UnixMailbox(input)
 	while (1):
-	    m=mbox.next()
-	    if not m: break			# End of file reached
-	    a=articleClass(m, self.sequence) # Create an article object
-	    self.sequence=self.sequence+1  # Increment the archive's sequence number
-	    self.add_article(a)		# Add the article
+	    m = mbox.next()
+	    if not m:
+                break
+	    a = articleClass(m, self.sequence)
+	    self.sequence = self.sequence + 1
+	    self.add_article(a)
 
     # Archive an Article object.
     def add_article(self, article):
 	# Determine into what archives the article should be placed
-	archives=self.get_archives(article)
-	if archives==None: archives=[]        # If no value was returned, ignore it
-	if type(archives)==type(''): archives=[archives] 	# If a string was returned, convert to a list
-	if archives==[]: return         # Ignore the article
+	archives = self.get_archives(article)
+        if not archives:
+            return
+	if type(archives) == type(''):
+            archives = [archives]
 
 	# Add the article to each archive in turn
-	article.filename=filename=self.get_filename(article)
-	temp=self.format_article(article) # Reformat the article
-	self.message("Processing article #"+str(article.sequence)+' into archives '+str(archives))
+	article.filename = filename = self.get_filename(article)
+	temp = self.format_article(article) # Reformat the article
+	self.message("Processing article #" + str(article.sequence)+ \
+                     "into archives " + str(archives)) 
 	for i in archives:
-	    self.archive=i
-	    archivedir=os.path.join(self.basedir, i)
+	    self.archive = i
+	    archivedir = os.path.join(self.basedir, i)
 	    # If it's a new archive, create it
 	    if i not in self.archives: 
-		self.archives.append(i) ; self.update_TOC=1
+		self.archives.append(i)
+                self.update_TOC = 1
 		self.database.newArchive(i)
 		# If the archive directory doesn't exist, create it
 		try: os.stat(archivedir)
 		except os.error, errdata:
-		    errno, errmsg=errdata
-		    if errno==2: 
+		    errno, errmsg = errdata
+		    if errno == 2: 
 			mkdir(archivedir, self.DIRMODE)
 		    else: raise os.error, errdata
 		self.open_new_archive(i, archivedir)
 		
 	    # Write the HTML-ized article
-	    f=open(os.path.join(archivedir, filename), 'w')
-##	    os.chmod(os.path.join(archivedir, filename), self.FILEMODE)
+	    f = open(os.path.join(archivedir, filename), 'w')
 	    temp_stdout, sys.stdout = sys.stdout, f
 	    self.write_article_header(temp)
 	    sys.stdout.writelines(temp.body)
 	    self.write_article_footer(temp)
-	    sys.stdout=temp_stdout
+	    sys.stdout = temp_stdout
 	    f.close()
 
-	    authorkey=fixAuthor(article.author)+'\000'+article.date
-	    subjectkey=string.lower(article.subject)+'\000'+article.date
+	    authorkey = fixAuthor(article.author)+'\000'+article.date
+	    subjectkey = string.lower(article.subject)+'\000'+article.date
 
 	    # Update parenting info
-	    parentID=None
-	    if article.in_reply_to!='': parentID=article.in_reply_to
-	    elif article.references!=[]: 
-		# Remove article IDs that aren't in the archive
-		refs=filter(lambda x, self=self: self.database.hasArticle(self.archive, x), 
-			    article.references)
-		if len(refs):
-		    refs=map(lambda x, s=self: s.database.getArticle(s.archive, x), refs)
-		    maxdate=refs[0]
-		    for ref in refs[1:]: 
-			if ref.date>maxdate.date: maxdate=ref
-		    parentID=maxdate.msgid
+	    parentID = None
+	    if article.in_reply_to:
+                parentID = article.in_reply_to
+	    elif article.references: 
+		refs = self._remove_external_references(article.references)
+                if refs:
+                    maxdata = max(map(lambda ref:ref.data, refs))
+		    parentID = maxdate.msgid
 	    else:
-		# Get the oldest article with a matching subject, and assume this is 
-		# a follow-up to that article
-		parentID=self.database.getOldestArticle(self.archive, article.subject)
+		# Get the oldest article with a matching subject, and
+		# assume this is a follow-up to that article
+		parentID = self.database.getOldestArticle(self.archive,
+                                                          article.subject) 
 
-	    if parentID!=None and not self.database.hasArticle(self.archive, parentID): 
-		parentID=None
-	    article.parentID=parentID 
-	    if parentID!=None:
-		parent=self.database.getArticle(self.archive, parentID)
-		article.threadKey=parent.threadKey+article.date+'-'
-	    else: article.threadKey=article.date+'-'
-   	    self.database.setThreadKey(self.archive, article.threadKey+'\000'+article.msgid, article.msgid)
+	    if parentID is not None \
+               and not self.database.hasArticle(self.archive, parentID): 
+		parentID = None
+	    article.parentID = parentID 
+	    if parentID is not None:
+		parent = self.database.getArticle(self.archive, parentID)
+		article.threadKey = parent.threadKey + article.date + '-'
+	    else:
+                article.threadKey = article.date + '-'
+            key = article.threadKey + '\000' + article.msgid
+   	    self.database.setThreadKey(self.archive, key, article.msgid)
 	    self.database.addArticle(i, temp, subjectkey, authorkey)
-	    
 	    if i not in self._dirty_archives: 
 		self._dirty_archives.append(i)
-	del temp
+
+    def _remove_external_references(self, refs):
+        keep = []
+        for ref in refs:
+            if self.database.hasArticle(self.archive, ref):
+                kepp.append(ref)
 
     # Abstract methods: these will need to be overridden by subclasses
     # before anything useful can be done.
@@ -512,27 +565,29 @@ class T:
 
 class BSDDBdatabase(Database):
     def __init__(self, basedir):
-	self.__cachekeys=[] ; self.__cachedict={}
-	self.__currentOpenArchive=None   # The currently open indices
-	self.basedir=os.path.expanduser(basedir)
-	self.changed={}         # Recently added articles, indexed only by message ID
+	self.__cachekeys = []
+        self.__cachedict = {}
+	self.__currentOpenArchive = None # The currently open indices
+	self.basedir = os.path.expanduser(basedir)
+	self.changed = {} # Recently added articles, indexed only by
+	                  # message ID 
     def firstdate(self, archive):
-	import time
 	self.__openIndices(archive)
-	date='None'
+	date = 'None'
 	try:
 	    date, msgid = self.dateIndex.first()
-	    date=time.asctime(time.localtime(string.atof(date)))
-	except KeyError: pass
+	    date = time.asctime(time.localtime(string.atof(date)))
+	except KeyError:
+            pass
 	return date
     def lastdate(self, archive):
-	import time
 	self.__openIndices(archive)
-	date='None'
+	date = 'None'
 	try:
 	    date, msgid = self.dateIndex.last()
-	    date=time.asctime(time.localtime(string.atof(date)))
-	except KeyError: pass
+	    date = time.asctime(time.localtime(string.atof(date)))
+	except KeyError:
+            pass
 	return date
     def numArticles(self, archive):
 	self.__openIndices(archive)
@@ -544,55 +599,59 @@ class BSDDBdatabase(Database):
 	self.__openIndices(archive)
 
 	# Add the new article
-	self.dateIndex[article.date]=article.msgid
-	self.authorIndex[authorkey]=article.msgid
-	self.subjectIndex[subjectkey]=article.msgid
-	# Set the 'body' attribute to empty, to avoid storing the whole message
-	temp = article.body ; article.body=[]
-	self.articleIndex[article.msgid]=pickle.dumps(article)
-	article.body=temp
-	self.changed[archive,article.msgid]=None
+	self.dateIndex[article.date] = article.msgid
+	self.authorIndex[authorkey] = article.msgid
+	self.subjectIndex[subjectkey] = article.msgid
+	# Set the 'body' attribute to empty, to avoid storing the
+	# whole message 
+	temp = article.body
+        article.body = []
+	self.articleIndex[article.msgid] = pickle.dumps(article)
+	article.body = temp
+	self.changed[archive,article.msgid] = None
 
-	parentID=article.parentID
-	if parentID!=None and self.articleIndex.has_key(parentID): 
-	    parent=self.getArticle(archive, parentID)
-	    myThreadKey=parent.threadKey+article.date+'-'
-	else: myThreadKey = article.date+'-'
-	article.threadKey=myThreadKey
-	self.setThreadKey(archive, myThreadKey+'\000'+article.msgid, article.msgid)
+	parentID = article.parentID
+	if parentID is not None and self.articleIndex.has_key(parentID): 
+	    parent = self.getArticle(archive, parentID)
+	    myThreadKey = parent.threadKey+article.date + '-'
+	else:
+            myThreadKey = article.date + '-'
+	article.threadKey = myThreadKey
+        key = myThreadKey + '\000' + article.msgid
+	self.setThreadKey(archive, key, article.msgid)
 
     # Open the BSDDB files that are being used as indices
     # (dateIndex, authorIndex, subjectIndex, articleIndex)
     def __openIndices(self, archive):
-	if self.__currentOpenArchive==archive: return
+	if self.__currentOpenArchive == archive: return
 
 	import bsddb
 	self.__closeIndices()
-#	print 'opening indices for [%s]' % (repr(archive),)
-	arcdir=os.path.join(self.basedir, 'database')
+	arcdir = os.path.join(self.basedir, 'database')
 	try: mkdir(arcdir)
 	except os.error: pass
-	for i in ['date', 'author', 'subject', 'article', 'thread']:
-	    t=bsddb.btopen(os.path.join(arcdir, archive+'-'+i), 'c') 
-	    setattr(self, i+'Index', t)
-	self.__currentOpenArchive=archive
+	for hdr in ('date', 'author', 'subject', 'article', 'thread'):
+            path = os.path.join(arcdir, archive + '-' + hdr)
+	    t = bsddb.btopen(path, 'c') 
+	    setattr(self, hdr + 'Index', t)
+	self.__currentOpenArchive = archive
 
     # Close the BSDDB files that are being used as indices (if they're
     # open--this is safe to call if they're already closed)
     def __closeIndices(self):
-	if self.__currentOpenArchive!=None: 
+	if self.__currentOpenArchive is not None: 
 	    pass
-#	    print 'closing indices for [%s]' % (repr(self.__currentOpenArchive),)
-	for i in ['date', 'author', 'subject', 'thread', 'article']:
-	    attr=i+'Index'
+	for hdr in ('date', 'author', 'subject', 'thread', 'article'):
+	    attr = hdr + 'Index'
 	    if hasattr(self, attr): 
-		index=getattr(self, attr) 
-		if i=='article': 
-	            if not hasattr(self, 'archive_length'): self.archive_length={}
-		    self.archive_length[self.__currentOpenArchive]=len(index)
+		index = getattr(self, attr) 
+		if hdr == 'article': 
+	            if not hasattr(self, 'archive_length'):
+                        self.archive_length = {}
+		    self.archive_length[self.__currentOpenArchive] = len(index)
 		index.close() 
 		delattr(self,attr)
-	self.__currentOpenArchive=None
+	self.__currentOpenArchive = None
     def close(self):
 	self.__closeIndices()
     def hasArticle(self, archive, msgid): 
@@ -600,60 +659,71 @@ class BSDDBdatabase(Database):
 	return self.articleIndex.has_key(msgid)
     def setThreadKey(self, archive, key, msgid):
 	self.__openIndices(archive)
-	self.threadIndex[key]=msgid
+	self.threadIndex[key] = msgid
     def getArticle(self, archive, msgid):
 	self.__openIndices(archive)
 	if self.__cachedict.has_key(msgid): 
 	    self.__cachekeys.remove(msgid)
 	    self.__cachekeys.append(msgid)
 	    return self.__cachedict[msgid]
-	if len(self.__cachekeys)==CACHESIZE: 
-	    delkey, self.__cachekeys = self.__cachekeys[0], self.__cachekeys[1:]
+	if len(self.__cachekeys) == CACHESIZE: 
+	    delkey, self.__cachekeys = (self.__cachekeys[0],
+                                        self.__cachekeys[1:])
 	    del self.__cachedict[delkey]
-	s=self.articleIndex[msgid]
-	article=pickle.loads(s)
-	self.__cachekeys.append(msgid) ; self.__cachedict[msgid]=article
+	s = self.articleIndex[msgid]
+	article = pickle.loads(s)
+	self.__cachekeys.append(msgid)
+        self.__cachedict[msgid] = article
 	return article
 
     def first(self, archive, index): 
 	self.__openIndices(archive)
-	index=getattr(self, index+'Index')
+	index = getattr(self, index+'Index')
 	try: 
 	    key, msgid = index.first()
 	    return msgid
-	except KeyError: return None
+	except KeyError:
+            return None
     def next(self, archive, index): 
 	self.__openIndices(archive)
-	index=getattr(self, index+'Index')
+	index = getattr(self, index+'Index')
 	try: 
 	    key, msgid = index.next()
-	    return msgid
-	except KeyError: return None
+	except KeyError:
+            return None
+        else:
+            return msgid
+
 	
     def getOldestArticle(self, archive, subject):
 	self.__openIndices(archive)
-	subject=string.lower(subject)
+	subject = string.lower(subject)
 	try: 
-	    key, tempid=self.subjectIndex.set_location(subject)
+	    key, tempid = self.subjectIndex.set_location(subject)
 	    self.subjectIndex.next()	
-	    [subject2, date]= string.split(key, '\0')
-	    if subject!=subject2: return None
+	    [subject2, date] = string.split(key, '\0')
+	    if subject != subject2:
+                return None
 	    return tempid
-	except KeyError: 
+	except KeyError: # XXX what line raises the KeyError?
 	    return None
 
-    def newArchive(self, archive): pass
+    def newArchive(self, archive):
+        pass
+    
     def clearIndex(self, archive, index):
 	self.__openIndices(archive)
-	index=getattr(self, index+'Index')
-	finished=0
+	index = getattr(self, index+'Index')
+	finished = 0
 	try:
-	    key, msgid=self.threadIndex.first()	    		
-	except KeyError: finished=1
+	    key, msgid = self.threadIndex.first()	    		
+	except KeyError:
+            finished = 1
 	while not finished:
 	    del self.threadIndex[key]
 	    try:
-		key, msgid=self.threadIndex.next()	    		
-	    except KeyError: finished=1
+		key, msgid = self.threadIndex.next()	    		
+	    except KeyError:
+                finished = 1