summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Mailman/Archiver/HyperArch.py121
-rw-r--r--Mailman/Archiver/pipermail.py32
2 files changed, 39 insertions, 114 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index 4ed0987c9..c4aadf55f 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -47,8 +47,6 @@ from Mailman import mm_cfg
from Mailman.Logging.Syslog import syslog
from Mailman.Utils import mkdir, open_ex
-# TBD: ugly, ugly, ugly -baw
-open = open_ex
gzip = None
if mm_cfg.GZIP_ARCHIVE_TXT_FILES:
@@ -553,11 +551,11 @@ class HyperArchive(pipermail.T):
#if the working file is still here, the archiver may have
# crashed during archiving. Save it, log an error, and move on.
try:
- wf=open(wname,'r')
+ wf=open_ex(wname,'r')
syslog("error","Archive working file %s present. "
"Check %s for possibly unarchived msgs"
% (wname,ename))
- ef=open(ename, 'a+')
+ ef=open_ex(ename, 'a+')
ef.seek(1,2)
if ef.read(1) <> '\n':
ef.write('\n')
@@ -570,7 +568,7 @@ class HyperArchive(pipermail.T):
os.rename(name,wname)
if self._unlocklist:
self.maillist.Unlock()
- archfile=open(wname,'r')
+ archfile=open_ex(wname,'r')
self.processUnixMailbox(archfile, Article)
archfile.close()
os.unlink(wname)
@@ -722,104 +720,21 @@ class HyperArchive(pipermail.T):
def write_TOC(self):
self.sortarchives()
- toc=open(os.path.join(self.basedir, 'index.html'), 'w')
+ toc=open_ex(os.path.join(self.basedir, 'index.html'), 'w')
toc.write(self.html_TOC())
toc.close()
+ def write_article(self, index, article, path):
+ f = open_ex(path, 'w')
+ f.write(article.as_html())
+ f.close()
- # Archive an Article object.
- def add_article(self, article):
- # Determine into what archives the article should be placed
- archives=self.get_archives(article)
- # If no value was returned, ignore it:
- if archives==None: archives=[]
- # If a string was returned, convert to a list:
- if type(archives)==type(''): archives=[archives]
- if archives==[]: return # Ignore the article
-
- # Add the article to each archive in turn
- article.filename=filename=self.get_filename(article)
- article_text=article.as_text()
- temp=self.format_article(article) # Reformat the article
- self.message("Processing article #"
- + str(article.sequence)
- + ' into archives '
- + str(archives))
- for i in archives:
- self.archive=i
- archivedir=os.path.join(self.basedir, i)
- # If it's a new archive, create it
- if i not in self.archives:
- self.archives.append(i)
- self.update_TOC=1
- self.database.newArchive(i)
- # If the archive directory doesn't exist, create it
- try: os.stat(archivedir)
- except os.error, errdata:
- errno, errmsg=errdata
- if errno==2:
- mkdir(archivedir)
- else: raise os.error, errdata
- self.open_new_archive(i, archivedir)
-
- # Write the HTML-ized article to the html archive.
- f=open(os.path.join(archivedir, filename), 'w')
-
- f.write(temp.as_html())
- f.close()
-
- # Write the text article to the text archive.
- archivetextfile=os.path.join(self.basedir,"%s.txt" % i)
- f=open(archivetextfile, 'a+')
-
- f.write(article_text)
- f.close()
-
- authorkey=pipermail.fixAuthor(article.author)+'\000'+article.date
- subjectkey=string.lower(article.subject)+'\000'+article.date
-
- # Update parenting info
- parentID=None
- if article.in_reply_to!='': parentID=article.in_reply_to
- elif article.references!=[]:
- # Remove article IDs that aren't in the archive
- refs=filter(
- lambda x, self=self: self.database.hasArticle(self.archive,
- x),
- article.references)
- if len(refs):
- refs=map(
- lambda x, s=self: s.database.getArticle(s.archive, x),
- refs)
- maxdate=refs[0]
- for ref in refs[1:]:
- if ref.date>maxdate.date: maxdate=ref
- parentID=maxdate.msgid
- else:
- # Get the oldest article with a matching subject, and assume
- # this is a follow-up to that article
- parentID=self.database.getOldestArticle(self.archive,
- article.subject)
-
- if parentID!=None and not self.database.hasArticle(self.archive,
- parentID):
- parentID=None
- article.parentID=parentID
- if parentID!=None:
- parent=self.database.getArticle(self.archive, parentID)
- article.threadKey=parent.threadKey+article.date+'-'
- else: article.threadKey=article.date+'-'
- self.database.setThreadKey(self.archive,
- article.threadKey
- + '\000' + article.msgid,
- article.msgid)
- self.database.addArticle(i, temp, subjectkey, authorkey)
-
- if i not in self._dirty_archives:
- self._dirty_archives.append(i)
- del temp
-
-
+ # Write the text article to the text archive.
+ path = os.path.join(self.basedir, "%s.txt" % index)
+ f =open_ex(path, 'a+')
+ f.write(article.as_text())
+ f.close()
+
# Update only archives that have been marked as "changed".
def update_dirty_archives(self):
for i in self._dirty_archives:
@@ -835,7 +750,7 @@ class HyperArchive(pipermail.T):
gzipfile = os.path.join(self.basedir, '%s.txt.gz' % i)
oldgzip = os.path.join(self.basedir, '%s.old.txt.gz' % i)
# open the plain text file
- archt = open(txtfile, 'r')
+ archt = open_ex(txtfile, 'r')
try:
os.rename(gzipfile, oldgzip)
archz = gzip.open(oldgzip)
@@ -870,7 +785,7 @@ class HyperArchive(pipermail.T):
+ os.path.join(self.basedir, 'pipermail.pck'))
self.database.close()
del self.database
- f=open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
+ f=open_ex(os.path.join(self.basedir, 'pipermail.pck'), 'w')
pickle.dump(self.getstate(), f)
f.close()
@@ -985,7 +900,7 @@ class HyperArchive(pipermail.T):
def update_article(self, arcdir, article, prev, next):
self.message('Updating HTML for article '+str(article.sequence))
try:
- f=open(os.path.join(arcdir, article.filename), 'r')
+ f=open_ex(os.path.join(arcdir, article.filename), 'r')
article.loadbody_fromHTML(f)
f.close()
except IOError:
@@ -993,6 +908,6 @@ class HyperArchive(pipermail.T):
% os.path.join(arcdir, article.filename))
article.prev=prev
article.next=next
- f=open(os.path.join(arcdir, article.filename), 'w')
+ f=open_ex(os.path.join(arcdir, article.filename), 'w')
f.write(article.as_html())
f.close()
diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py
index 2ef5eb355..8d0937793 100644
--- a/Mailman/Archiver/pipermail.py
+++ b/Mailman/Archiver/pipermail.py
@@ -468,8 +468,8 @@ class T:
# Add the article to each archive in turn
article.filename = filename = self.get_filename(article)
temp = self.format_article(article) # Reformat the article
- self.message("Processing article #" + str(article.sequence)+ \
- "into archives " + str(archives))
+ fmt = "Processing article #%s into archives %s"
+ self.message(fmt % (article.sequence, archives))
for i in archives:
self.archive = i
archivedir = os.path.join(self.basedir, i)
@@ -488,13 +488,8 @@ class T:
self.open_new_archive(i, archivedir)
# Write the HTML-ized article
- f = open(os.path.join(archivedir, filename), 'w')
- temp_stdout, sys.stdout = sys.stdout, f
- self.write_article_header(temp)
- sys.stdout.writelines(temp.body)
- self.write_article_footer(temp)
- sys.stdout = temp_stdout
- f.close()
+ self.write_article(i, temp, os.path.join(archivedir,
+ filename))
authorkey = fixAuthor(article.author)+'\000'+article.date
subjectkey = string.lower(article.subject)+'\000'+article.date
@@ -506,7 +501,12 @@ class T:
elif article.references:
refs = self._remove_external_references(article.references)
if refs:
- maxdata = max(map(lambda ref:ref.data, refs))
+ maxdate = self.database.getArticle(self.archive,
+ refs[0])
+ for ref in refs[1:]:
+ a = self.database.getArticle(self.archive, ref)
+ if a.date > maxdate.date:
+ maxdate = a
parentID = maxdate.msgid
else:
# Get the oldest article with a matching subject, and
@@ -529,11 +529,21 @@ class T:
if i not in self._dirty_archives:
self._dirty_archives.append(i)
+ def write_article(self, index, article, path):
+ f = open(path, 'w')
+ temp_stdout, sys.stdout = sys.stdout, f
+ self.write_article_header(article)
+ sys.stdout.writelines(article.body)
+ self.write_article_footer(article)
+ sys.stdout = temp_stdout
+ f.close()
+
def _remove_external_references(self, refs):
keep = []
for ref in refs:
if self.database.hasArticle(self.archive, ref):
- kepp.append(ref)
+ keep.append(ref)
+ return keep
# Abstract methods: these will need to be overridden by subclasses
# before anything useful can be done.