summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjhylton2000-09-21 19:31:57 +0000
committerjhylton2000-09-21 19:31:57 +0000
commit35c98ba5e2018691b7262f11bcaca6de163ea027 (patch)
tree8895fbc73979e2dfed4d63f0ec69390ec6c52d82
parent7a0f3f0d7b7215cc29459e8cc8b0e4ba5b2ee8df (diff)
downloadmailman-35c98ba5e2018691b7262f11bcaca6de163ea027.tar.gz
mailman-35c98ba5e2018691b7262f11bcaca6de163ea027.tar.zst
mailman-35c98ba5e2018691b7262f11bcaca6de163ea027.zip
-rw-r--r--Mailman/Archiver/HyperArch.py34
-rw-r--r--Mailman/Archiver/pipermail.py702
2 files changed, 401 insertions, 335 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index 7a5c47cd7..8429b8356 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -420,13 +420,14 @@ arch_listing_end = '''\
class HyperArchive(pipermail.T):
+ __super_init = pipermail.T.__init__
# some defaults
DIRMODE=02775
FILEMODE=0660
- VERBOSE=0
+ VERBOSE=1
DEFAULTINDEX='thread'
ARCHIVE_PERIOD='month'
@@ -540,23 +541,19 @@ class HyperArchive(pipermail.T):
d["archive_listing"] = listing
return self.html_TOC_tmpl % d
- def __init__(self, maillist,unlock=1):
- self.maillist=maillist
- self._unlocklist=unlock
- self._lock_file=None
+ def __init__(self, maillist, unlock=1):
+ self.maillist = maillist
+ self._unlocklist = unlock
+ self._lock_file = None
-
- #
# can't init the database while other
# processes are writing to it!
# XXX TODO- implement native locking
# with mailman's LockFile module for HyperDatabase.HyperDatabase
#
- pipermail.T.__init__(
- self,
- maillist.archive_dir(),
- reload=1,
- database=HyperDatabase.HyperDatabase(maillist.archive_dir()))
+ dir = maillist.archive_dir()
+ db = HyperDatabase.HyperDatabase(dir)
+ self.__super_init(dir, reload=1, database=db)
if hasattr(self.maillist,'archive_volume_frequency'):
if self.maillist.archive_volume_frequency == 0:
@@ -738,8 +735,10 @@ class HyperArchive(pipermail.T):
def open_new_archive(self, archive, archivedir):
index_html=os.path.join(archivedir, 'index.html')
- try: os.unlink(index_html)
- except: pass
+ try:
+ os.unlink(index_html)
+ except:
+ pass
os.symlink(self.DEFAULTINDEX+'.html',index_html)
@@ -930,10 +929,10 @@ class HyperArchive(pipermail.T):
self.database.close()
del self.database
f=open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
- pickle.dump(self.__getstate__(), f)
+ pickle.dump(self.getstate(), f)
f.close()
- def __getstate__(self):
+ def getstate(self):
d={}
for each in self.__dict__.keys():
if not (each in ['maillist','_lock_file','_unlocklist']
@@ -941,9 +940,6 @@ class HyperArchive(pipermail.T):
d[each] = self.__dict__[each]
return d
-
-
-
# Add <A HREF="..."> tags around URLs and e-mail addresses.
def __processbody_URLquote(self, source, dest):
diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py
index ccdbb7c9f..c21cad9c7 100644
--- a/Mailman/Archiver/pipermail.py
+++ b/Mailman/Archiver/pipermail.py
@@ -1,6 +1,10 @@
#! /usr/bin/env python
-import os, sys, string, re
+import os
+import re
+import sys
+import string
+import time
try:
import cPickle
@@ -12,17 +16,19 @@ from Mailman.Utils import mkdir, open_ex
# TBD: ugly, ugly, ugly -baw
open = open_ex
-__version__='0.05 (Mailman edition)'
-VERSION=__version__
-CACHESIZE=100 # Number of slots in the cache
+__version__ = '0.05 (Mailman edition)'
+VERSION = __version__
+CACHESIZE = 100 # Number of slots in the cache
-msgid_pat=re.compile(r'(<.*>)')
+msgid_pat = re.compile(r'(<.*>)')
def strip_separators(s):
"Remove quotes or parenthesization from a Message-ID string"
- if s==None or s=="": return ""
- if s[0] in '"<([' and s[-1] in '">)]': s=s[1:-1]
+ if not s:
+ return ""
+ if s[0] in '"<([' and s[-1] in '">)]':
+ s = s[1:-1]
return s
smallNameParts = ['van', 'von', 'der', 'de']
@@ -30,20 +36,24 @@ smallNameParts = ['van', 'von', 'der', 'de']
def fixAuthor(author):
"Canonicalize a name into Last, First format"
# If there's a comma, guess that it's already in "Last, First" format
- if ',' in author: return author
- L=string.split(author)
- i=len(L)-1
- if i==0: return author # The string's one word--forget it
- if string.upper(author)==author or string.lower(author)==author:
+ if ',' in author:
+ return author
+ L = string.split(author)
+ i = len(L) - 1
+ if i == 0:
+ return author # The string's one word--forget it
+ if string.upper(author) == author or string.lower(author) == author:
# Damn, the name is all upper- or lower-case.
- while i>0 and string.lower(L[i-1]) in smallNameParts: i=i-1
+ while i > 0 and string.lower(L[i-1]) in smallNameParts:
+ i = i - 1
else:
# Mixed case; assume that small parts of the last name will be
# in lowercase, and check them against the list.
while i>0 and (L[i-1][0] in string.lowercase or
string.lower(L[i-1]) in smallNameParts):
- i=i-1
- author=string.join(L[-1:]+L[i:-1], ' ')+', '+string.join(L[:i], ' ')
+ i = i - 1
+ author = string.join(L[-1:] + L[i:-1], ' ') \
+ + ', ' + string.join(L[:i], ' ')
return author
# Abstract class for databases
@@ -66,125 +76,144 @@ class Database:
# The Article class encapsulates a single posting. The attributes
# are:
#
-# sequence : Sequence number, unique for each article in a set of archives
-# subject : Subject
-# datestr : The posting date, in human-readable format
-# date : The posting date, in purely numeric format
-# headers : Any other headers of interest
-# author : The author's name (and possibly organization)
-# email : The author's e-mail address
-# msgid : A unique message ID
-# in_reply_to : If !="", this is the msgid of the article being replied to
-# references: A (possibly empty) list of msgid's of earlier articles in the thread
-# body : A list of strings making up the message body
+# sequence : Sequence number, unique for each article in a set of archives
+# subject : Subject
+# datestr : The posting date, in human-readable format
+# date : The posting date, in purely numeric format
+# headers : Any other headers of interest
+# author : The author's name (and possibly organization)
+# email : The author's e-mail address
+# msgid : A unique message ID
+# in_reply_to: If != "", this is the msgid of the article being replied to
+# references : A (possibly empty) list of msgid's of earlier articles
+# in the thread
+# body : A list of strings making up the message body
class Article:
- import time
- __last_article_time=time.time()
- def __init__(self, message=None, sequence=0, keepHeaders=[]):
- import time
- if message==None: return
- self.sequence=sequence
+ __last_article_time = time.time()
+
+ def __init__(self, message = None, sequence = 0, keepHeaders = []):
+ if message is None:
+ return
+ self.sequence = sequence
- self.parentID = None ; self.threadKey = None
+ self.parentID = None
+ self.threadKey = None
# otherwise the current sequence number is used.
- id=strip_separators(message.getheader('Message-Id'))
- if id=="": self.msgid=str(self.sequence)
- else: self.msgid=id
+ id = strip_separators(message.getheader('Message-Id'))
+ if id == "":
+ self.msgid = str(self.sequence)
+ else: self.msgid = id
- if message.has_key('Subject'): self.subject=str(message['Subject'])
- else: self.subject='No subject'
- if self.subject=="": self.subject='No subject'
+ if message.has_key('Subject'):
+ self.subject = str(message['Subject'])
+ else:
+ self.subject = 'No subject'
+ if self.subject == "": self.subject = 'No subject'
if message.has_key('Date'):
- self.datestr=str(message['Date'])
- date=message.getdate_tz('Date')
+ self.datestr = str(message['Date'])
+ date = message.getdate_tz('Date')
else:
- self.datestr='None'
- date=None
- if date!=None:
- date, tzoffset=date[:9], date[-1]
- date=time.mktime(date)-tzoffset
+ self.datestr = 'None'
+ date = None
+ if date is not None:
+ date, tzoffset = date[:9], date[-1]
+ date = time.mktime(date)-tzoffset
else:
- date=self.__last_article_time+1 ; print 'Article without date:', self.msgid
+ date = self.__last_article_time+1
+ print 'Article without date:', self.msgid
- self.__last_article_time=date
- self.date='%011i' % (date,)
+ self.__last_article_time = date
+ self.date = '%011i' % (date,)
# Figure out the e-mail address and poster's name
- self.author, self.email=message.getaddr('From')
- e=message.getheader('Reply-To')
- if e!=None: self.email=e
- self.email=strip_separators(self.email)
- self.author=strip_separators(self.author)
+ self.author, self.email = message.getaddr('From')
+ e = message.getheader('Reply-To')
+ if e is not None:
+ self.email = e
+ self.email = strip_separators(self.email)
+ self.author = strip_separators(self.author)
- if self.author=="": self.author=self.email
+ if self.author == "": self.author = self.email
# Save the 'In-Reply-To:' and 'References:' lines
- i_r_t=message.getheader('In-Reply-To')
- if i_r_t==None: self.in_reply_to=''
+ i_r_t = message.getheader('In-Reply-To')
+ if i_r_t is None:
+ self.in_reply_to = ''
else:
- match=msgid_pat.search(i_r_t)
- if match==None: self.in_reply_to=''
- else: self.in_reply_to=strip_separators(match.group(1))
+ match = msgid_pat.search(i_r_t)
+ if match is None: self.in_reply_to = ''
+ else: self.in_reply_to = strip_separators(match.group(1))
- references=message.getheader('References')
- if references==None: self.references=[]
- else: self.references=map(strip_separators, string.split(references))
+ references = message.getheader('References')
+ if references is None:
+ self.references = []
+ else:
+ self.references = map(strip_separators,
+ string.split(references))
# Save any other interesting headers
- self.headers={}
+ self.headers = {}
for i in keepHeaders:
- if message.has_key(i): self.headers[i]=message[i]
+ if message.has_key(i):
+ self.headers[i] = message[i]
# Read the message body
- self.body=[]
+ self.body = []
message.rewindbody()
- while (1):
- line=message.fp.readline()
- if line=="": break
+ while 1:
+ line = message.fp.readline()
+ if line == "":
+ break
self.body.append(line)
def __repr__(self):
- return '<Article ID='+repr(self.msgid)+'>'
+ return '<Article ID = '+repr(self.msgid)+'>'
# Pipermail formatter class
class T:
- DIRMODE=0755 # Mode to give to created directories
- FILEMODE=0644 # Mode to give to created files
+ DIRMODE = 0755 # Mode to give to created directories
+ FILEMODE = 0644 # Mode to give to created files
INDEX_EXT = ".html" # Extension for indexes
- def __init__(self, basedir=None, reload=1, database=None):
+ def __init__(self, basedir = None, reload = 1, database = None):
# If basedir isn't provided, assume the current directory
- if basedir==None: self.basedir=os.getcwd()
+ if basedir is None:
+ self.basedir = os.getcwd()
else:
- basedir=os.path.expanduser(basedir)
- self.basedir=basedir
- self.database=database
+ basedir = os.path.expanduser(basedir)
+ self.basedir = basedir
+ self.database = database
# If the directory doesn't exist, create it
- try: os.stat(self.basedir)
+ try:
+ os.stat(self.basedir)
except os.error, errdata:
errno, errmsg = errdata
- if errno!=2: raise os.error, errdata
+ if errno != 2:
+ raise os.error, errdata
else:
- self.message('Creating archive directory '+self.basedir)
+ self.message('Creating archive directory ' + self.basedir)
mkdir(self.basedir, self.DIRMODE)
# Try to load previously pickled state
try:
- if not reload: raise IOError
- f=open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
+ if not reload:
+ raise IOError
+ f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
self.message('Reloading pickled archive state')
- d=pickle.load(f)
+ d = pickle.load(f)
f.close()
- for key, value in d.items(): setattr(self, key, value)
+ for key, value in d.items():
+ setattr(self, key, value)
except IOError:
# No pickled version, so initialize various attributes
- self.archives=[] # Archives
- self._dirty_archives=[] # Archives that will have to be updated
- self.sequence=0 # Sequence variable used for numbering articles
- self.update_TOC=0 # Does the TOC need updating?
+ self.archives = [] # Archives
+ self._dirty_archives = [] # Archives that will have to be updated
+ self.sequence = 0 # Sequence variable used for
+ # numbering articles
+ self.update_TOC = 0 # Does the TOC need updating?
#
# make the basedir variable work when passed in as an __init__ arg
# and different from the one in the pickle. Let the one passed in
@@ -195,17 +224,17 @@ class T:
self.basedir = basedir
def close(self):
- "Close an archive, saving its state and updating any changed archives."
- self.update_dirty_archives()# Update all changed archives
- # If required, update the table of contents
- if self.update_TOC or 1:
- self.update_TOC=0
+ "Close an archive, save its state, and update any changed archives."
+ self.update_dirty_archives()
+ if self.update_TOC:
+ self.update_TOC = 0
self.write_TOC()
# Save the collective state
- self.message('Pickling archive state into '+os.path.join(self.basedir, 'pipermail.pck'))
+ self.message('Pickling archive state into ' \
+ + os.path.join(self.basedir, 'pipermail.pck'))
self.database.close()
del self.database
- f=open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
+ f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
pickle.dump(self.__dict__, f)
f.close()
@@ -219,43 +248,51 @@ class T:
# Create a dictionary of various parameters that will be passed
# to the write_index_{header,footer} functions
def __set_parameters(self, archive):
- import time
# Determine the earliest and latest date in the archive
- firstdate=self.database.firstdate(archive)
- lastdate=self.database.lastdate(archive)
+ firstdate = self.database.firstdate(archive)
+ lastdate = self.database.lastdate(archive)
# Get the current time
- now=time.asctime(time.localtime(time.time()))
- self.firstdate=firstdate ; self.lastdate=lastdate
- self.archivedate=now ; self.size=self.database.numArticles(archive)
- self.archive=archive ; self.version=__version__
+ now = time.asctime(time.localtime(time.time()))
+ self.firstdate = firstdate
+ self.lastdate = lastdate
+ self.archivedate = now
+ self.size = self.database.numArticles(archive)
+ self.archive = archive
+ self.version = __version__
# Find the message ID of an article's parent, or return None
# if no parent can be found.
- def __findParent(self, article, children=[]):
- parentID=None
- if article.in_reply_to!='': parentID=article.in_reply_to
- elif article.references!=[]:
+ def __findParent(self, article, children = []):
+ parentID = None
+ if article.in_reply_to:
+ parentID = article.in_reply_to
+ elif article.references:
# Remove article IDs that aren't in the archive
- refs=filter(self.articleIndex.has_key, article.references)
- if len(refs):
- refs=map(lambda x, s=self: s.database.getArticle(s.archive, x), refs)
- maxdate=refs[0]
- for i in refs[1:]:
- if i.date>maxdate.date: maxdate=i
- parentID=maxdate.msgid
+ refs = filter(self.articleIndex.has_key, article.references)
+ if not refs:
+ return None
+ maxdate = self.database.getArticle(self.archive,
+ refs[0])
+ for ref in refs[1:]:
+ a = self.database.getArticle(self.archive, ref)
+ if a.date > maxdate.data:
+ maxdate = a
+ parentID = maxdate.msgid
else:
# Look for the oldest matching subject
try:
- key, tempid=self.subjectIndex.set_location(article.subject)
+ key, tempid = \
+ self.subjectIndex.set_location(article.subject)
print key, tempid
self.subjectIndex.next()
- [subject, date]= string.split(key, '\0')
+ [subject, date] = string.split(key, '\0')
print article.subject, subject, date
- if (subject==article.subject and tempid not in children):
- parentID=tempid
- except KeyError: pass
+ if subject == article.subject and tempid not in children:
+ parentID = tempid
+ except KeyError:
+ pass
return parentID
# Update the threaded index completely
@@ -264,40 +301,26 @@ class T:
self.database.clearIndex(self.archive, 'thread')
# Loop over all the articles
- msgid=self.database.first(self.archive, 'date')
- while (msgid != None):
+ msgid = self.database.first(self.archive, 'date')
+ while msgid is not None:
try:
- article=self.database.getArticle(self.archive, msgid)
+ article = self.database.getArticle(self.archive, msgid)
except KeyError:
pass
else:
- if article.parentID==None or \
+ if article.parentID is None or \
not self.database.hasArticle(self.archive,
article.parentID):
# then
- key=article.date
+ key = article.date
else:
- parent=self.database.getArticle(self.archive,
+ parent = self.database.getArticle(self.archive,
article.parentID)
- article.threadKey=parent.threadKey+article.date+'-'
- self.database.setThreadKey(
- self.archive,
- article.threadKey+'\000'+ article.msgid,
+ article.threadKey = parent.threadKey+article.date+'-'
+ self.database.setThreadKey(self.archive,
+ article.threadKey + '\000' + article.msgid,
msgid)
- msgid=self.database.next(self.archive, 'date')
-
-## L1=[] ; L2=[]
-## while (1):
-## article=self.database.getArticle(self.archive, msgid)
-## L1.append('') ; L2.append(msgid)
-## L1=map(lambda x, d=article.date: d+'-'+x, L1)
-## parentID=self.__findParent(article, L2)
-## if parentID==None or not self.database.hasArticle(parentID):
-## break
-## else: msgid=parentID
-## for i in range(0, len(L1)):
-## self.database.setThreadKey(self.archive, L1[i], '\000'+L2[i])
-## self.database.setThreadKey(self.archive, '\000'+L2[i], L1[i])
+ msgid = self.database.next(self.archive, 'date')
#
# Public methods:
@@ -308,171 +331,201 @@ class T:
# Update a single archive's indices, whether the archive's been
# dirtied or not.
def update_archive(self, archive):
- self.archive=archive
- self.message("Updating index files for archive ["+archive+']')
- arcdir=os.path.join(self.basedir, archive)
- parameters=self.__set_parameters(archive)
- # Handle the 3 simple indices first
- for i in ['Date', 'Subject', 'Author']:
- self.message(" "+i)
- self.type=i
- # Get the right index
- i=string.lower(i)
+ self.archive = archive
+ self.message("Updating index files for archive [%s]" % archive)
+ arcdir = os.path.join(self.basedir, archive)
+ self.__set_parameters(archive)
- # Redirect sys.stdout
- import sys
- f=open(os.path.join(arcdir, i+self.INDEX_EXT), 'w')
-## os.chmod(f.name, self.FILEMODE)
- temp_stdout, sys.stdout=sys.stdout, f
- self.write_index_header()
- count=0
- # Loop over the index entries
- finished=0
- msgid=self.database.first(archive, i)
- while (msgid != None):
- try:
- article=self.database.getArticle(self.archive, msgid)
- except KeyError:
- pass
- else:
- count=count+1
- self.write_index_entry(article)
- msgid = self.database.next(archive, i)
- # Finish up this index
- self.write_index_footer()
- sys.stdout=temp_stdout
- f.close()
+ for hdr in ('Date', 'Subject', 'Author'):
+ self._update_simple_index(hdr, archive, arcdir)
+
+ self._update_thread_index(archive, arcdir)
+
+ def _update_simple_index(self, hdr, archive, arcdir):
+ self.message(" " + hdr)
+ self.type = hdr
+ hdr = string.lower(hdr)
+
+ self._open_index_file_as_stdout(arcdir, hdr)
+ self.write_index_header()
+ count = 0
+ # Loop over the index entries
+ finished = 0
+ msgid = self.database.first(archive, hdr)
+ while msgid is not None:
+ try:
+ article = self.database.getArticle(self.archive, msgid)
+ except KeyError:
+ pass
+ else:
+ count = count + 1
+ self.write_index_entry(article)
+ msgid = self.database.next(archive, hdr)
+ # Finish up this index
+ self.write_index_footer()
+ self._restore_stdout()
- # Print the threaded index
+ def _update_thread_index(self, archive, arcdir):
self.message(" Thread")
- temp_stdout, sys.stdout=sys.stdout, open(os.path.join(arcdir, 'thread' + self.INDEX_EXT), 'w')
-## os.chmod(os.path.join(arcdir, 'thread' + self.INDEX_EXT), self.FILEMODE)
- self.type='Thread'
+ self._open_index_file_as_stdout(arcdir, "thread")
+ self.type = 'Thread'
self.write_index_header()
# To handle the prev./next in thread pointers, we need to
# track articles 5 at a time.
# Get the first 5 articles
- L=[ None ]*5 ; i=2 ; finished=0
- msgid=self.database.first(self.archive, 'thread')
- while msgid!=None and i<5:
- L[i]=self.database.getArticle(self.archive, msgid) ; i=i+1
+ L = [None] * 5
+ i = 2
+ finished = 0
+ msgid = self.database.first(self.archive, 'thread')
+
+ while msgid is not None and i < 5:
+ L[i] = self.database.getArticle(self.archive, msgid)
+ i = i + 1
msgid = self.database.next(self.archive, 'thread')
- while L[2]!=None:
- article=L[2] ; artkey=None
- if article!=None: artkey=article.threadKey
- if artkey!=None:
- import sys
- self.write_threadindex_entry(article, string.count(artkey, '-')-1)
- if self.database.changed.has_key( (archive,article.msgid) ):
- a1=L[1] ; a3=L[3]
+ while L[2] is not None:
+ article = L[2]
+ artkey = None
+ if article is not None:
+ artkey = article.threadKey
+ if artkey is not None:
+ self.write_threadindex_entry(article,
+ string.count(artkey, '-') - 1)
+ if self.database.changed.has_key((archive,article.msgid)):
+ a1 = L[1]
+ a3 = L[3]
self.update_article(arcdir, article, a1, a3)
- if a3!=None: self.database.changed[ (archive,a3.msgid) ]=None
- if a1!=None:
- if not self.database.changed.has_key( (archive,a1.msgid) ):
+ if a3 is not None:
+ self.database.changed[(archive, a3.msgid)] = None
+ if a1 is not None:
+ key = archive, a1.msgid
+ if not self.database.changed.has_key(key):
self.update_article(arcdir, a1, L[0], L[2])
- else: del self.database.changed[ (archive,a1.msgid) ]
- L=L[1:] # Rotate the list
- if msgid==None: L.append(msgid)
- else: L.append( self.database.getArticle(self.archive, msgid) )
+ else:
+ del self.database.changed[key]
+ L = L[1:] # Rotate the list
+ if msgid is None:
+ L.append(msgid)
+ else:
+ L.append(self.database.getArticle(self.archive, msgid))
msgid = self.database.next(self.archive, 'thread')
self.write_index_footer()
- sys.stdout=temp_stdout
+ self._restore_stdout()
+
+ def _open_index_file_as_stdout(self, arcdir, index_name):
+ path = os.path.join(arcdir, index_name + self.INDEX_EXT)
+ self.__f = open(path, "w")
+ self.__stdout = sys.stdout
+ sys.stdout = self.__f
+
+ def _restore_stdout(self):
+ sys.stdout = self.__stdout
+ self.__f.close()
+ del self.__f
+ del self.__stdout
# Update only archives that have been marked as "changed".
def update_dirty_archives(self):
- for i in self._dirty_archives: self.update_archive(i)
- self._dirty_archives=[]
+ for i in self._dirty_archives:
+ self.update_archive(i)
+ self._dirty_archives = []
# Read a Unix mailbox file from the file object <input>,
# and create a series of Article objects. Each article
# object will then be archived.
- def processUnixMailbox(self, input, articleClass=Article):
+ def processUnixMailbox(self, input, articleClass = Article):
import mailbox
- mbox=mailbox.UnixMailbox(input)
+ mbox = mailbox.UnixMailbox(input)
while (1):
- m=mbox.next()
- if not m: break # End of file reached
- a=articleClass(m, self.sequence) # Create an article object
- self.sequence=self.sequence+1 # Increment the archive's sequence number
- self.add_article(a) # Add the article
+ m = mbox.next()
+ if not m:
+ break
+ a = articleClass(m, self.sequence)
+ self.sequence = self.sequence + 1
+ self.add_article(a)
# Archive an Article object.
def add_article(self, article):
# Determine into what archives the article should be placed
- archives=self.get_archives(article)
- if archives==None: archives=[] # If no value was returned, ignore it
- if type(archives)==type(''): archives=[archives] # If a string was returned, convert to a list
- if archives==[]: return # Ignore the article
+ archives = self.get_archives(article)
+ if not archives:
+ return
+ if type(archives) == type(''):
+ archives = [archives]
# Add the article to each archive in turn
- article.filename=filename=self.get_filename(article)
- temp=self.format_article(article) # Reformat the article
- self.message("Processing article #"+str(article.sequence)+' into archives '+str(archives))
+ article.filename = filename = self.get_filename(article)
+ temp = self.format_article(article) # Reformat the article
+ self.message("Processing article #" + str(article.sequence)+ \
+ "into archives " + str(archives))
for i in archives:
- self.archive=i
- archivedir=os.path.join(self.basedir, i)
+ self.archive = i
+ archivedir = os.path.join(self.basedir, i)
# If it's a new archive, create it
if i not in self.archives:
- self.archives.append(i) ; self.update_TOC=1
+ self.archives.append(i)
+ self.update_TOC = 1
self.database.newArchive(i)
# If the archive directory doesn't exist, create it
try: os.stat(archivedir)
except os.error, errdata:
- errno, errmsg=errdata
- if errno==2:
+ errno, errmsg = errdata
+ if errno == 2:
mkdir(archivedir, self.DIRMODE)
else: raise os.error, errdata
self.open_new_archive(i, archivedir)
# Write the HTML-ized article
- f=open(os.path.join(archivedir, filename), 'w')
-## os.chmod(os.path.join(archivedir, filename), self.FILEMODE)
+ f = open(os.path.join(archivedir, filename), 'w')
temp_stdout, sys.stdout = sys.stdout, f
self.write_article_header(temp)
sys.stdout.writelines(temp.body)
self.write_article_footer(temp)
- sys.stdout=temp_stdout
+ sys.stdout = temp_stdout
f.close()
- authorkey=fixAuthor(article.author)+'\000'+article.date
- subjectkey=string.lower(article.subject)+'\000'+article.date
+ authorkey = fixAuthor(article.author)+'\000'+article.date
+ subjectkey = string.lower(article.subject)+'\000'+article.date
# Update parenting info
- parentID=None
- if article.in_reply_to!='': parentID=article.in_reply_to
- elif article.references!=[]:
- # Remove article IDs that aren't in the archive
- refs=filter(lambda x, self=self: self.database.hasArticle(self.archive, x),
- article.references)
- if len(refs):
- refs=map(lambda x, s=self: s.database.getArticle(s.archive, x), refs)
- maxdate=refs[0]
- for ref in refs[1:]:
- if ref.date>maxdate.date: maxdate=ref
- parentID=maxdate.msgid
+ parentID = None
+ if article.in_reply_to:
+ parentID = article.in_reply_to
+ elif article.references:
+ refs = self._remove_external_references(article.references)
+ if refs:
+ maxdata = max(map(lambda ref:ref.data, refs))
+ parentID = maxdate.msgid
else:
- # Get the oldest article with a matching subject, and assume this is
- # a follow-up to that article
- parentID=self.database.getOldestArticle(self.archive, article.subject)
+ # Get the oldest article with a matching subject, and
+ # assume this is a follow-up to that article
+ parentID = self.database.getOldestArticle(self.archive,
+ article.subject)
- if parentID!=None and not self.database.hasArticle(self.archive, parentID):
- parentID=None
- article.parentID=parentID
- if parentID!=None:
- parent=self.database.getArticle(self.archive, parentID)
- article.threadKey=parent.threadKey+article.date+'-'
- else: article.threadKey=article.date+'-'
- self.database.setThreadKey(self.archive, article.threadKey+'\000'+article.msgid, article.msgid)
+ if parentID is not None \
+ and not self.database.hasArticle(self.archive, parentID):
+ parentID = None
+ article.parentID = parentID
+ if parentID is not None:
+ parent = self.database.getArticle(self.archive, parentID)
+ article.threadKey = parent.threadKey + article.date + '-'
+ else:
+ article.threadKey = article.date + '-'
+ key = article.threadKey + '\000' + article.msgid
+ self.database.setThreadKey(self.archive, key, article.msgid)
self.database.addArticle(i, temp, subjectkey, authorkey)
-
if i not in self._dirty_archives:
self._dirty_archives.append(i)
- del temp
+
+ def _remove_external_references(self, refs):
+ keep = []
+ for ref in refs:
+ if self.database.hasArticle(self.archive, ref):
+ kepp.append(ref)
# Abstract methods: these will need to be overridden by subclasses
# before anything useful can be done.
@@ -512,27 +565,29 @@ class T:
class BSDDBdatabase(Database):
def __init__(self, basedir):
- self.__cachekeys=[] ; self.__cachedict={}
- self.__currentOpenArchive=None # The currently open indices
- self.basedir=os.path.expanduser(basedir)
- self.changed={} # Recently added articles, indexed only by message ID
+ self.__cachekeys = []
+ self.__cachedict = {}
+ self.__currentOpenArchive = None # The currently open indices
+ self.basedir = os.path.expanduser(basedir)
+ self.changed = {} # Recently added articles, indexed only by
+ # message ID
def firstdate(self, archive):
- import time
self.__openIndices(archive)
- date='None'
+ date = 'None'
try:
date, msgid = self.dateIndex.first()
- date=time.asctime(time.localtime(string.atof(date)))
- except KeyError: pass
+ date = time.asctime(time.localtime(string.atof(date)))
+ except KeyError:
+ pass
return date
def lastdate(self, archive):
- import time
self.__openIndices(archive)
- date='None'
+ date = 'None'
try:
date, msgid = self.dateIndex.last()
- date=time.asctime(time.localtime(string.atof(date)))
- except KeyError: pass
+ date = time.asctime(time.localtime(string.atof(date)))
+ except KeyError:
+ pass
return date
def numArticles(self, archive):
self.__openIndices(archive)
@@ -544,55 +599,59 @@ class BSDDBdatabase(Database):
self.__openIndices(archive)
# Add the new article
- self.dateIndex[article.date]=article.msgid
- self.authorIndex[authorkey]=article.msgid
- self.subjectIndex[subjectkey]=article.msgid
- # Set the 'body' attribute to empty, to avoid storing the whole message
- temp = article.body ; article.body=[]
- self.articleIndex[article.msgid]=pickle.dumps(article)
- article.body=temp
- self.changed[archive,article.msgid]=None
+ self.dateIndex[article.date] = article.msgid
+ self.authorIndex[authorkey] = article.msgid
+ self.subjectIndex[subjectkey] = article.msgid
+ # Set the 'body' attribute to empty, to avoid storing the
+ # whole message
+ temp = article.body
+ article.body = []
+ self.articleIndex[article.msgid] = pickle.dumps(article)
+ article.body = temp
+ self.changed[archive,article.msgid] = None
- parentID=article.parentID
- if parentID!=None and self.articleIndex.has_key(parentID):
- parent=self.getArticle(archive, parentID)
- myThreadKey=parent.threadKey+article.date+'-'
- else: myThreadKey = article.date+'-'
- article.threadKey=myThreadKey
- self.setThreadKey(archive, myThreadKey+'\000'+article.msgid, article.msgid)
+ parentID = article.parentID
+ if parentID is not None and self.articleIndex.has_key(parentID):
+ parent = self.getArticle(archive, parentID)
+ myThreadKey = parent.threadKey+article.date + '-'
+ else:
+ myThreadKey = article.date + '-'
+ article.threadKey = myThreadKey
+ key = myThreadKey + '\000' + article.msgid
+ self.setThreadKey(archive, key, article.msgid)
# Open the BSDDB files that are being used as indices
# (dateIndex, authorIndex, subjectIndex, articleIndex)
def __openIndices(self, archive):
- if self.__currentOpenArchive==archive: return
+ if self.__currentOpenArchive == archive: return
import bsddb
self.__closeIndices()
-# print 'opening indices for [%s]' % (repr(archive),)
- arcdir=os.path.join(self.basedir, 'database')
+ arcdir = os.path.join(self.basedir, 'database')
try: mkdir(arcdir)
except os.error: pass
- for i in ['date', 'author', 'subject', 'article', 'thread']:
- t=bsddb.btopen(os.path.join(arcdir, archive+'-'+i), 'c')
- setattr(self, i+'Index', t)
- self.__currentOpenArchive=archive
+ for hdr in ('date', 'author', 'subject', 'article', 'thread'):
+ path = os.path.join(arcdir, archive + '-' + hdr)
+ t = bsddb.btopen(path, 'c')
+ setattr(self, hdr + 'Index', t)
+ self.__currentOpenArchive = archive
# Close the BSDDB files that are being used as indices (if they're
# open--this is safe to call if they're already closed)
def __closeIndices(self):
- if self.__currentOpenArchive!=None:
+ if self.__currentOpenArchive is not None:
pass
-# print 'closing indices for [%s]' % (repr(self.__currentOpenArchive),)
- for i in ['date', 'author', 'subject', 'thread', 'article']:
- attr=i+'Index'
+ for hdr in ('date', 'author', 'subject', 'thread', 'article'):
+ attr = hdr + 'Index'
if hasattr(self, attr):
- index=getattr(self, attr)
- if i=='article':
- if not hasattr(self, 'archive_length'): self.archive_length={}
- self.archive_length[self.__currentOpenArchive]=len(index)
+ index = getattr(self, attr)
+ if hdr == 'article':
+ if not hasattr(self, 'archive_length'):
+ self.archive_length = {}
+ self.archive_length[self.__currentOpenArchive] = len(index)
index.close()
delattr(self,attr)
- self.__currentOpenArchive=None
+ self.__currentOpenArchive = None
def close(self):
self.__closeIndices()
def hasArticle(self, archive, msgid):
@@ -600,60 +659,71 @@ class BSDDBdatabase(Database):
return self.articleIndex.has_key(msgid)
def setThreadKey(self, archive, key, msgid):
self.__openIndices(archive)
- self.threadIndex[key]=msgid
+ self.threadIndex[key] = msgid
def getArticle(self, archive, msgid):
self.__openIndices(archive)
if self.__cachedict.has_key(msgid):
self.__cachekeys.remove(msgid)
self.__cachekeys.append(msgid)
return self.__cachedict[msgid]
- if len(self.__cachekeys)==CACHESIZE:
- delkey, self.__cachekeys = self.__cachekeys[0], self.__cachekeys[1:]
+ if len(self.__cachekeys) == CACHESIZE:
+ delkey, self.__cachekeys = (self.__cachekeys[0],
+ self.__cachekeys[1:])
del self.__cachedict[delkey]
- s=self.articleIndex[msgid]
- article=pickle.loads(s)
- self.__cachekeys.append(msgid) ; self.__cachedict[msgid]=article
+ s = self.articleIndex[msgid]
+ article = pickle.loads(s)
+ self.__cachekeys.append(msgid)
+ self.__cachedict[msgid] = article
return article
def first(self, archive, index):
self.__openIndices(archive)
- index=getattr(self, index+'Index')
+ index = getattr(self, index+'Index')
try:
key, msgid = index.first()
return msgid
- except KeyError: return None
+ except KeyError:
+ return None
def next(self, archive, index):
self.__openIndices(archive)
- index=getattr(self, index+'Index')
+ index = getattr(self, index+'Index')
try:
key, msgid = index.next()
- return msgid
- except KeyError: return None
+ except KeyError:
+ return None
+ else:
+ return msgid
+
def getOldestArticle(self, archive, subject):
self.__openIndices(archive)
- subject=string.lower(subject)
+ subject = string.lower(subject)
try:
- key, tempid=self.subjectIndex.set_location(subject)
+ key, tempid = self.subjectIndex.set_location(subject)
self.subjectIndex.next()
- [subject2, date]= string.split(key, '\0')
- if subject!=subject2: return None
+ [subject2, date] = string.split(key, '\0')
+ if subject != subject2:
+ return None
return tempid
- except KeyError:
+ except KeyError: # XXX what line raises the KeyError?
return None
- def newArchive(self, archive): pass
+ def newArchive(self, archive):
+ pass
+
def clearIndex(self, archive, index):
self.__openIndices(archive)
- index=getattr(self, index+'Index')
- finished=0
+ index = getattr(self, index+'Index')
+ finished = 0
try:
- key, msgid=self.threadIndex.first()
- except KeyError: finished=1
+ key, msgid = self.threadIndex.first()
+ except KeyError:
+ finished = 1
while not finished:
del self.threadIndex[key]
try:
- key, msgid=self.threadIndex.next()
- except KeyError: finished=1
+ key, msgid = self.threadIndex.next()
+ except KeyError:
+ finished = 1