diff options
Diffstat (limited to '')
| -rw-r--r-- | Mailman/Archiver/HyperArch.py | 121 | ||||
| -rw-r--r-- | Mailman/Archiver/pipermail.py | 32 |
2 files changed, 39 insertions, 114 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py index 4ed0987c9..c4aadf55f 100644 --- a/Mailman/Archiver/HyperArch.py +++ b/Mailman/Archiver/HyperArch.py @@ -47,8 +47,6 @@ from Mailman import mm_cfg from Mailman.Logging.Syslog import syslog from Mailman.Utils import mkdir, open_ex -# TBD: ugly, ugly, ugly -baw -open = open_ex gzip = None if mm_cfg.GZIP_ARCHIVE_TXT_FILES: @@ -553,11 +551,11 @@ class HyperArchive(pipermail.T): #if the working file is still here, the archiver may have # crashed during archiving. Save it, log an error, and move on. try: - wf=open(wname,'r') + wf=open_ex(wname,'r') syslog("error","Archive working file %s present. " "Check %s for possibly unarchived msgs" % (wname,ename)) - ef=open(ename, 'a+') + ef=open_ex(ename, 'a+') ef.seek(1,2) if ef.read(1) <> '\n': ef.write('\n') @@ -570,7 +568,7 @@ class HyperArchive(pipermail.T): os.rename(name,wname) if self._unlocklist: self.maillist.Unlock() - archfile=open(wname,'r') + archfile=open_ex(wname,'r') self.processUnixMailbox(archfile, Article) archfile.close() os.unlink(wname) @@ -722,104 +720,21 @@ class HyperArchive(pipermail.T): def write_TOC(self): self.sortarchives() - toc=open(os.path.join(self.basedir, 'index.html'), 'w') + toc=open_ex(os.path.join(self.basedir, 'index.html'), 'w') toc.write(self.html_TOC()) toc.close() + def write_article(self, index, article, path): + f = open_ex(path, 'w') + f.write(article.as_html()) + f.close() - # Archive an Article object. - def add_article(self, article): - # Determine into what archives the article should be placed - archives=self.get_archives(article) - # If no value was returned, ignore it: - if archives==None: archives=[] - # If a string was returned, convert to a list: - if type(archives)==type(''): archives=[archives] - if archives==[]: return # Ignore the article - - # Add the article to each archive in turn - article.filename=filename=self.get_filename(article) - article_text=article.as_text() - temp=self.format_article(article) # Reformat the article - self.message("Processing article #" - + str(article.sequence) - + ' into archives ' - + str(archives)) - for i in archives: - self.archive=i - archivedir=os.path.join(self.basedir, i) - # If it's a new archive, create it - if i not in self.archives: - self.archives.append(i) - self.update_TOC=1 - self.database.newArchive(i) - # If the archive directory doesn't exist, create it - try: os.stat(archivedir) - except os.error, errdata: - errno, errmsg=errdata - if errno==2: - mkdir(archivedir) - else: raise os.error, errdata - self.open_new_archive(i, archivedir) - - # Write the HTML-ized article to the html archive. - f=open(os.path.join(archivedir, filename), 'w') - - f.write(temp.as_html()) - f.close() - - # Write the text article to the text archive. - archivetextfile=os.path.join(self.basedir,"%s.txt" % i) - f=open(archivetextfile, 'a+') - - f.write(article_text) - f.close() - - authorkey=pipermail.fixAuthor(article.author)+'\000'+article.date - subjectkey=string.lower(article.subject)+'\000'+article.date - - # Update parenting info - parentID=None - if article.in_reply_to!='': parentID=article.in_reply_to - elif article.references!=[]: - # Remove article IDs that aren't in the archive - refs=filter( - lambda x, self=self: self.database.hasArticle(self.archive, - x), - article.references) - if len(refs): - refs=map( - lambda x, s=self: s.database.getArticle(s.archive, x), - refs) - maxdate=refs[0] - for ref in refs[1:]: - if ref.date>maxdate.date: maxdate=ref - parentID=maxdate.msgid - else: - # Get the oldest article with a matching subject, and assume - # this is a follow-up to that article - parentID=self.database.getOldestArticle(self.archive, - article.subject) - - if parentID!=None and not self.database.hasArticle(self.archive, - parentID): - parentID=None - article.parentID=parentID - if parentID!=None: - parent=self.database.getArticle(self.archive, parentID) - article.threadKey=parent.threadKey+article.date+'-' - else: article.threadKey=article.date+'-' - self.database.setThreadKey(self.archive, - article.threadKey - + '\000' + article.msgid, - article.msgid) - self.database.addArticle(i, temp, subjectkey, authorkey) - - if i not in self._dirty_archives: - self._dirty_archives.append(i) - del temp - - + # Write the text article to the text archive. + path = os.path.join(self.basedir, "%s.txt" % index) + f =open_ex(path, 'a+') + f.write(article.as_text()) + f.close() + # Update only archives that have been marked as "changed". def update_dirty_archives(self): for i in self._dirty_archives: @@ -835,7 +750,7 @@ class HyperArchive(pipermail.T): gzipfile = os.path.join(self.basedir, '%s.txt.gz' % i) oldgzip = os.path.join(self.basedir, '%s.old.txt.gz' % i) # open the plain text file - archt = open(txtfile, 'r') + archt = open_ex(txtfile, 'r') try: os.rename(gzipfile, oldgzip) archz = gzip.open(oldgzip) @@ -870,7 +785,7 @@ class HyperArchive(pipermail.T): + os.path.join(self.basedir, 'pipermail.pck')) self.database.close() del self.database - f=open(os.path.join(self.basedir, 'pipermail.pck'), 'w') + f=open_ex(os.path.join(self.basedir, 'pipermail.pck'), 'w') pickle.dump(self.getstate(), f) f.close() @@ -985,7 +900,7 @@ class HyperArchive(pipermail.T): def update_article(self, arcdir, article, prev, next): self.message('Updating HTML for article '+str(article.sequence)) try: - f=open(os.path.join(arcdir, article.filename), 'r') + f=open_ex(os.path.join(arcdir, article.filename), 'r') article.loadbody_fromHTML(f) f.close() except IOError: @@ -993,6 +908,6 @@ class HyperArchive(pipermail.T): % os.path.join(arcdir, article.filename)) article.prev=prev article.next=next - f=open(os.path.join(arcdir, article.filename), 'w') + f=open_ex(os.path.join(arcdir, article.filename), 'w') f.write(article.as_html()) f.close() diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py index 2ef5eb355..8d0937793 100644 --- a/Mailman/Archiver/pipermail.py +++ b/Mailman/Archiver/pipermail.py @@ -468,8 +468,8 @@ class T: # Add the article to each archive in turn article.filename = filename = self.get_filename(article) temp = self.format_article(article) # Reformat the article - self.message("Processing article #" + str(article.sequence)+ \ - "into archives " + str(archives)) + fmt = "Processing article #%s into archives %s" + self.message(fmt % (article.sequence, archives)) for i in archives: self.archive = i archivedir = os.path.join(self.basedir, i) @@ -488,13 +488,8 @@ class T: self.open_new_archive(i, archivedir) # Write the HTML-ized article - f = open(os.path.join(archivedir, filename), 'w') - temp_stdout, sys.stdout = sys.stdout, f - self.write_article_header(temp) - sys.stdout.writelines(temp.body) - self.write_article_footer(temp) - sys.stdout = temp_stdout - f.close() + self.write_article(i, temp, os.path.join(archivedir, + filename)) authorkey = fixAuthor(article.author)+'\000'+article.date subjectkey = string.lower(article.subject)+'\000'+article.date @@ -506,7 +501,12 @@ class T: elif article.references: refs = self._remove_external_references(article.references) if refs: - maxdata = max(map(lambda ref:ref.data, refs)) + maxdate = self.database.getArticle(self.archive, + refs[0]) + for ref in refs[1:]: + a = self.database.getArticle(self.archive, ref) + if a.date > maxdate.date: + maxdate = a parentID = maxdate.msgid else: # Get the oldest article with a matching subject, and @@ -529,11 +529,21 @@ class T: if i not in self._dirty_archives: self._dirty_archives.append(i) + def write_article(self, index, article, path): + f = open(path, 'w') + temp_stdout, sys.stdout = sys.stdout, f + self.write_article_header(article) + sys.stdout.writelines(article.body) + self.write_article_footer(article) + sys.stdout = temp_stdout + f.close() + def _remove_external_references(self, refs): keep = [] for ref in refs: if self.database.hasArticle(self.archive, ref): - kepp.append(ref) + keep.append(ref) + return keep # Abstract methods: these will need to be overridden by subclasses # before anything useful can be done. |
