diff options
| author | cotton | 1998-10-22 21:14:44 +0000 |
|---|---|---|
| committer | cotton | 1998-10-22 21:14:44 +0000 |
| commit | 73b134e83997212d049c58946d9e2d2e2b4b070c (patch) | |
| tree | 9e3bd65b1ac74228498231863970337cc5098097 /Mailman/Archiver/HyperArch.py | |
| parent | 0eb0572a6f7f521c23cd88d13b06fd8c48d15511 (diff) | |
| download | mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.tar.gz mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.tar.zst mailman-73b134e83997212d049c58946d9e2d2e2b4b070c.zip | |
Diffstat (limited to 'Mailman/Archiver/HyperArch.py')
| -rw-r--r-- | Mailman/Archiver/HyperArch.py | 967 |
1 files changed, 967 insertions, 0 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py new file mode 100644 index 000000000..f4066800f --- /dev/null +++ b/Mailman/Archiver/HyperArch.py @@ -0,0 +1,967 @@ +# Copyright (C) 1998 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +"""HyperArch: Pipermail archiving for MailMan + + - The Dragon De Monsyne <dragondm@integral.org> + + TODO: + - The templates should be be files in Mailman's Template dir, instead + of static strings. + - Each list should be able to have it's own templates. + Also, it should automatically fall back to default template in case + of error in list specific template. + - Should be able to force all HTML to be regenerated next time the archive + is run, incase a template is changed. + - Run a command to generate tarball of html archives for downloading + (prolly in the 'update_dirty_archives' method ) + +""" + +import re, cgi, urllib, string +import time, pickle, os, posixfile +import HyperDatabase +import pipermail +import Mailman.mm_cfg +mm_cfg = Mailman.mm_cfg + + +def html_quote(s): + repls = ( ('&', '&'), + ("<", '<'), + (">", '>'), + ('"', '"')) + for thing, repl in repls: + s = string.replace(s, thing, repl) + return s + +def url_quote(s): + return urllib.quote(s) + + +article_text_template="""\ +From %(email)s %(datestr)s +Date: %(datestr)s +From: %(author)s %(email)s +Subject: %(subject)s + +%(body)s + +""" + +article_template="""\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> +<HTML> + <HEAD> + <TITLE> %(subject_html)s</TITLE> + <LINK REL="Index" HREF="index.html" > + <LINK REL="made" HREF="mailto:%(email_url)s"> + %(prev)s + %(next)s + </HEAD> + <BODY BGCOLOR="#ffffff"> + <H1>%(subject_html)s</H1> + <B>%(author_html)s</B> + <A HREF="mailto:%(email_url)s" TITLE="%(subject_html)s">%(email_html)s</A><BR> + <I>%(datestr_html)s</I> + <P><UL> + %(prev_wsubj)s + %(next_wsubj)s + <LI> <B>Messages sorted by:</B> + <a href="date.html#%(sequence)s">[ date ]</a> + <a href="thread.html#%(sequence)s">[ thread ]</a> + <a href="subject.html#%(sequence)s">[ subject ]</a> + <a href="author.html#%(sequence)s">[ author ]</a> + </LI> + </UL> + <HR> +<!--beginarticle--> +%(body)s + +<!--endarticle--> + <HR> + <P><UL> + <!--threads--> + %(prev_wsubj)s + %(next_wsubj)s + <LI> <B>Messages sorted by:</B> + <a href="date.html#%(sequence)s">[ date ]</a> + <a href="thread.html#%(sequence)s">[ thread ]</a> + <a href="subject.html#%(sequence)s">[ subject ]</a> + <a href="author.html#%(sequence)s">[ author ]</a> + </LI> + </UL> +</body></html> +""" + + + +def CGIescape(arg): + s=cgi.escape(str(arg)) + s=re.sub('"', '"', s) + return s + +# Parenthesized human name +paren_name_pat=re.compile(r'([(].*[)])') +# Subject lines preceded with 'Re:' +REpat=re.compile( r"\s*RE\s*:\s*", + re.IGNORECASE) +# E-mail addresses and URLs in text +emailpat=re.compile(r'([-+,.\w]+@[-+.\w]+)') +# Argh! This pattern is buggy, and will choke on URLs with GET parameters. +urlpat=re.compile(r'(\w+://[^>)\s]+)') # URLs in text +# Blank lines +blankpat=re.compile(r'^\s*$') + +# +# Starting <html> directive +htmlpat=re.compile(r'^\s*<HTML>\s*$', re.IGNORECASE) +# Ending </html> directive +nohtmlpat=re.compile(r'^\s*</HTML>\s*$', re.IGNORECASE) +# Match quoted text +quotedpat=re.compile(r'^([>|:]|>)+') + + +# Note: I'm overriding most, if not all of the pipermail Article class here -ddm +# The Article class encapsulates a single posting. The attributes +# are: +# +# sequence : Sequence number, unique for each article in a set of archives +# subject : Subject +# datestr : The posting date, in human-readable format +# date : The posting date, in purely numeric format +# headers : Any other headers of interest +# author : The author's name (and possibly organization) +# email : The author's e-mail address +# msgid : A unique message ID +# in_reply_to : If !="", this is the msgid of the article being replied to +# references: A (possibly empty) list of msgid's of earlier articles in the thread +# body : A list of strings making up the message body + +class Article(pipermail.Article): + __last_article_time=time.time() + + html_tmpl=article_template + text_tmpl=article_text_template + + + def as_html(self): + d = self.__dict__.copy() + if self.prev: + d["prev"] = '<LINK REL="Previous" HREF="%s">' % \ + (url_quote(self.prev.filename)) + d["prev_wsubj"] = '<LI> Previous message: <A HREF="%s">%s</A></li>' % \ + (url_quote(self.prev.filename), html_quote(self.prev.subject)) + else: + d["prev"] = d["prev_wsubj"] = "" + + if self.next: + d["next"] = '<LINK REL="Next" HREF="%s">' % \ + (html_quote(self.next.filename)) + d["next_wsubj"] = '<LI> Next message: <A HREF="%s">%s</A></li>' % \ + (url_quote(self.next.filename), html_quote(self.next.subject)) + else: + d["next"] = d["next_wsubj"] = "" + + d["email_html"] = html_quote(self.email) + d["subject_html"] = html_quote(self.subject) + d["author_html"] = html_quote(self.author) + d["email_url"] = url_quote(self.email) + d["datestr_html"] = html_quote(self.datestr) + d["body"] = string.join(self.body, "") + return self.html_tmpl % d + + def as_text(self): + d = self.__dict__.copy() + d["body"] = string.join(self.body, "") + return self.text_tmpl % d + + + def __init__(self, message=None, sequence=0, keepHeaders=[]): + import time + if message==None: return + self.sequence=sequence + + self.parentID = None + self.threadKey = None + self.prev=None + self.next=None + # otherwise the current sequence number is used. + id=pipermail.strip_separators(message.getheader('Message-Id')) + if id=="": self.msgid=str(self.sequence) + else: self.msgid=id + + if message.has_key('Subject'): self.subject=str(message['Subject']) + else: self.subject='No subject' + i=0 + while (i!=-1): + result=REpat.match(self.subject) + if result: + i = result.end(0) + self.subject=self.subject[i:] + else: i=-1 + if self.subject=="": self.subject='No subject' + + if message.has_key('Date'): + self.datestr=str(message['Date']) + date=message.getdate_tz('Date') + else: + self.datestr='None' + date=None + if date!=None: + date, tzoffset=date[:9], date[-1] + if not tzoffset: + tzoffset = 0 + date=time.mktime(date)-tzoffset + else: + date=self.__last_article_time+1 + + self.__last_article_time=date + self.date='%011i' % (date,) + + # Figure out the e-mail address and poster's name + self.author, self.email=message.getaddr('From') + self.email=pipermail.strip_separators(self.email) + self.author=pipermail.strip_separators(self.author) + + if self.author=="": self.author=self.email + + # Save the 'In-Reply-To:' and 'References:' lines + i_r_t=message.getheader('In-Reply-To') + if i_r_t==None: self.in_reply_to='' + else: + match=pipermail.msgid_pat.search(i_r_t) + if match==None: self.in_reply_to='' + else: self.in_reply_to=pipermail.strip_separators(match.group(1)) + + references=message.getheader('References') + if references==None: self.references=[] + else: self.references=map(pipermail.strip_separators, string.split(references)) + + # Save any other interesting headers + self.headers={} + for i in keepHeaders: + if message.has_key(i): self.headers[i]=message[i] + + # Read the message body + self.body=[] + message.rewindbody() + while (1): + line=message.fp.readline() + if line=="": break + self.body.append(line) + + def loadbody_fromHTML(self,fileobj): + self.body=[] + begin=0 + while(1): + line=fileobj.readline() + if not line: + break + if (not begin) and string.strip(line)=='<!--beginarticle-->': + begin=1 + continue + if string.strip(line)=='<!--endarticle-->': + break + if begin: + self.body.append(line) + + def __getstate__(self): + d={} + for each in self.__dict__.keys(): + if each in ['maillist','prev','next','body']: + d[each] = None + else: + d[each] = self.__dict__[each] + d['body']=[] + return d + + +# +# Archive class specific stuff +# +index_header_template="""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> +<HTML> + <HEAD> + <title>The %(listname)s %(archive)s Archive by %(archtype)s</title> + </HEAD> + <BODY BGCOLOR="#ffffff"> + <a name="start"></A> + <h1>%(archive)s Archives by %(archtype)s</h1> + <ul> + <li> <b>Messages sorted by:</b> + %(thread_ref)s + %(subject_ref)s + %(author_ref)s + %(date_ref)s + + <li><b><a href="%(listinfo)s">More info on this list...</a></b></li> + </ul> + <p><b>Starting:</b> <i>%(firstdate)s</i><br> + <b>Ending:</b> <i>%(lastdate)s</i><br> + <b>Messages:</b> %(size)s<p> + <ul> +""" + +index_footer_template="""\ + </ul> + <p> + <a name="end"><b>Last message date:</b></a> + <i>%(lastdate)s</i><br> + <b>Archived on:</b> <i><!--#var archivedate --></i> + <p> + <ul> + <li> <b>Messages sorted by:</b> + %(thread_ref)s + %(subject_ref)s + %(author_ref)s + %(date_ref)s + <li><b><a href="%(listinfo)s">More info on this list...</a></b></li> + </ul> + <p> + <hr> + <i>This archive was generated by + <a href="http://starship.skyport.net/crew/amk/maintained/pipermail.html"> + Pipermail %(version)s</a>.</i> + </BODY> +</HTML> +""" + +TOC_template="""\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> +<HTML> + <HEAD> + <title>The %(listname)s Archives</title> + </HEAD> + <BODY BGCOLOR="#ffffff"> + <h1>The %(listname)s Archives </h1> + <p> + <a href="%(listinfo)s">More info on this list...</a> + </p> + %(noarchive_msg)s + %(archive_listing_start)s + %(archive_listing)s + %(archive_listing_end)s + </BODY> + </HTML> +""" + +TOC_entry_template = """\ + + <tr> + <td>%(archive)s:</td> + <td> + <A href="%(archive)s/thread.html">[ Thread ]</a> + <A href="%(archive)s/subject.html">[ Subject ]</a> + <A href="%(archive)s/author.html">[ Author ]</a> + <A href="%(archive)s/date.html">[ Date ]</a> + </td> + <td><A href="%(archive)s.txt">[ Text ]</a></td> + </tr> + +""" +arch_listing_start = """\ + <table border=3> + <tr><td>Archive</td> <td>View by:</td> <td>Downloadable version</td></tr> +""" + +arch_listing_end = """\ + </table> +""" + + +class HyperArchive(pipermail.T): + + # some defaults + DIRMODE=0775 + FILEMODE=0664 + + + VERBOSE=0 + DEFAULTINDEX='thread' + ARCHIVE_PERIOD='month' + + THREADLAZY=0 + THREADLEVELS=3 + + ALLOWHTML=1 + SHOWHTML=1 + IQUOTES=1 + SHOWBR=1 + + html_hdr_tmpl=index_header_template + html_foot_tmpl=index_footer_template + html_TOC_tmpl=TOC_template + TOC_entry_tmpl = TOC_entry_template + arch_listing_start = arch_listing_start + arch_listing_end = arch_listing_end + + def html_foot(self): + d = {"lastdate": html_quote(self.lastdate), + "archivedate": html_quote(self.archivedate), + "listinfo": self.maillist.GetAbsoluteScriptURL('listinfo'), + "version": self.version} + for t in ("thread", "subject", "author", "date"): + cap = string.upper(t[0]) + t[1:] + if self.type == cap: + d["%s_ref" % (t)] = "" + else: + d["%s_ref" % (t)] = '<a href="%s.html#start">[ %s ]</a>' % (t, t) + return self.html_foot_tmpl % d + + + def html_head(self): + d = {"listname": html_quote(self.maillist.real_name), + "archtype": self.type, + "archive": self.archive, + "listinfo": self.maillist.GetAbsoluteScriptURL('listinfo'), + "firstdate": html_quote(self.firstdate), + "lastdate": html_quote(self.lastdate), + "size": self.size, + } + for t in ("thread", "subject", "author", "date"): + cap = string.upper(t[0]) + t[1:] + if self.type == cap: + d["%s_ref" % (t)] = "" + else: + d["%s_ref" % (t)] = '<a href="%s.html#start">[ %s ]</a>' % (t, t) + return self.html_hdr_tmpl % d + + + + def html_TOC(self): + d = {"listname": self.maillist.real_name, + "listinfo": self.maillist.GetAbsoluteScriptURL('listinfo') } + listing = "" + if not self.archives: + d["noarchive_msg"] = '<P>Currently, there are no archives. </P>' + d["archive_listing_start"] = "" + d["archive_listing_end"] = "" + d["archive_listing"] = "" + else: + d["noarchive_msg"] = "" + d["archive_listing_start"] = self.arch_listing_start + d["archive_listing_end"] = self.arch_listing_end + for a in self.archives: + listing = listing + self.TOC_entry_tmpl % {"archive": a} + d["archive_listing"] = listing + return self.html_TOC_tmpl % d + + + + def __init__(self, maillist,unlock=1): + self.maillist=maillist + self._unlocklist=unlock + self._lock_file=None + + + # + # this is always called from inside it's own forked + # process, and access is protected via list.Save() + # so we're leavin' the perms wide open from here on out + # + ou = os.umask(0) + # + # can't init the database while other + # processes are writing to it! + # XXX TODO- implement native locking + # with mailman's flock module for HyperDatabase.HyperDatabase + # + pipermail.T.__init__(self, + maillist.archive_directory, + reload=1, + database=HyperDatabase.HyperDatabase(maillist.archive_directory)) + + if hasattr(self.maillist,'archive_volume_frequency'): + if self.maillist.archive_volume_frequency == 0: + self.ARCHIVE_PERIOD='year' + elif self.maillist.archive_volume_frequency == 2: + self.ARCHIVE_PERIOD='quarter' + elif self.maillist.archive_volume_frequency == 3: + self.ARCHIVE_PERIOD='week' + elif self.maillist.archive_volume_frequency == 4: + self.ARCHIVE_PERIOD='day' + else: + self.ARCHIVE_PERIOD='month' + + def GetArchLock(self): + if self._lock_file: + return 1 + ou = os.umask(0) + try: + self._lock_file = posixfile.open( + os.path.join(mm_cfg.LOCK_DIR, '%s@arch.lock' % + self.maillist._internal_name), 'a+') + finally: + os.umask(ou) + # minor race condition here, there is no way to atomicly + # check & get a lock. That shouldn't matter here tho' -ddm + if not self._lock_file.lock('w?', 1): + self._lock_file.lock('w|', 1) + else: + return 0 + return 1 + + def DropArchLock(self): + if self._lock_file: + self._lock_file.lock('u') + self._lock_file.close() + self._lock_file = None + + def processListArch(self): + name = self.maillist.ArchiveFileName() + wname= name+'.working' + ename= name+'.err_unarchived' + try: + os.stat(name) + except (IOError,os.error): + #no archive file, nothin to do -ddm + return + + #see if arch is locked here -ddm + if not self.GetArchLock(): + #another archiver is running, nothing to do. -ddm + return + + #if the working file is still here, the archiver may have + # crashed during archiving. Save it, log an error, and move on. + try: + wf=open(wname,'r') + self.maillist.LogMsg("error","Archive working file %s present. " + "Check %s for possibly unarchived msgs" % + (wname,ename) ) + ef=open(ename, 'a+') + ef.seek(1,2) + if ef.read(1) <> '\n': + ef.write('\n') + ef.write(wf.read()) + ef.close() + wf.close() + os.unlink(wname) + except IOError: + pass + os.rename(name,wname) + if self._unlocklist: + self.maillist.Unlock() + archfile=open(wname,'r') + self.processUnixMailbox(archfile, Article) + archfile.close() + os.unlink(wname) + self.DropArchLock() + + def get_filename(self, article): + return '%06i.html' % (article.sequence,) + + def get_archives(self, article): + """Return a list of indexes where the article should be filed. + A string can be returned if the list only contains one entry, + and the empty list is legal.""" + if article.subject in ['subscribe', 'unsubscribe']: return None + res = self.dateToVolName(string.atof(article.date)) + import sys + sys.stderr.write("figuring article archives\n") + sys.stderr.write(res + "\n") + return res + + + +# The following two methods should be inverses of each other. -ddm + + def dateToVolName(self,date): + datetuple=time.gmtime(date) + if self.ARCHIVE_PERIOD=='year': + return time.strftime("%Y",datetuple) + elif self.ARCHIVE_PERIOD=='quarter': + if datetuple[1] in [1,2,3]: + return time.strftime("%Yq1",datetuple) + elif datetuple[1] in [4,5,6]: + return time.strftime("%Yq2",datetuple) + elif datetuple[1] in [7,8,9]: + return time.strftime("%Yq3",datetuple) + else: + return time.strftime("%Yq4",datetuple) + elif self.ARCHIVE_PERIOD == 'day': + return time.strftime("%Y%m%d", datetuple) + elif self.ARCHIVE_PERIOD == 'week': + datetuple = list(datetuple) + datetuple[2] = datetuple[2] - datetuple[6] # subtract week day + # + # even if the the day of the month counter is negative, + # we still get the right thing from strftime! -scott + # + return time.strftime("Week-of-Mon-%Y%m%d", tuple(datetuple)) + # month. -ddm + else: + return time.strftime("%Y-%B",datetuple) + + + def volNameToDate(self,volname): + volname=string.strip(volname) + volre= { 'year' : r'^(?P<year>[0-9]{4,4})$', + 'quarter' : r'^(?P<year>[0-9]{4,4})q(?P<quarter>[1234])$', + 'month' : r'^(?P<year>[0-9]{4,4})-(?P<month>[a-zA-Z]+)$', + 'week': r'^Week-of-Mon-(?P<year>[0-9]{4,4})(?P<month>[01][0-9])(?P<day>[0123][0-9])', + 'day': r'^(?P<year>[0-9]{4,4})(?P<month>[01][0-9])(?P<day>[0123][0-9])$'} + for each in volre.keys(): + match=re.match(volre[each],volname) + if match: + year=string.atoi(match.group('year')) + month=1 + day = 1 + if each == 'quarter': + q=string.atoi(match.group('quarter')) + month=(q*3)-2 + elif each == 'month': + monthstr=string.lower(match.group('month')) + m=[] + for i in range(1,13): + m.append(string.lower( + time.strftime("%B",(1999,i,1,0,0,0,0,1,0)))) + try: + month=m.index(monthstr)+1 + except ValueError: + pass + elif each == 'week' or each == 'day': + month = string.atoi(match.group("month")) + day = string.atoi(match.group("day")) + return time.mktime((year,month,1,0,0,0,0,1,-1)) + return 0.0 + + def sortarchives(self): + def sf(a,b,s=self): + al=s.volNameToDate(a) + bl=s.volNameToDate(b) + if al>bl: + return 1 + elif al<bl: + return -1 + else: + return 0 + if self.ARCHIVE_PERIOD in ('month','year','quarter'): + self.archives.sort(sf) + else: + self.archives.sort() + + def message(self, msg): + if self.VERBOSE: + import sys + f = sys.stderr + f.write(msg) + if msg[-1:]!='\n': f.write('\n') + f.flush() + + def open_new_archive(self, archive, archivedir): + import os + index_html=os.path.join(archivedir, 'index.html') + try: os.unlink(index_html) + except: pass + os.symlink(self.DEFAULTINDEX+'.html',index_html) + + + def write_index_header(self): + self.depth=0 + print self.html_head() + + if not self.THREADLAZY and self.type=='Thread': + # Update the threaded index + self.message("Computing threaded index\n") + self.updateThreadedIndex() + + + def write_index_footer(self): + import string + for i in range(self.depth): print '</UL>' + print self.html_foot() + + def write_index_entry(self, article): + print '<LI> <A HREF="%s">%s</A> <A NAME="%i"></A><I>%s</I>' % (urllib.quote(article.filename), + CGIescape(article.subject), article.sequence, + CGIescape(article.author)) + + def write_threadindex_entry(self, article, depth): + if depth<0: + sys.stderr.write('depth<0') ; depth=0 + if depth>self.THREADLEVELS: depth=self.THREADLEVELS + if depth<self.depth: + for i in range(self.depth-depth): print '</UL>' + elif depth>self.depth: + for i in range(depth-self.depth): print '<UL>' + print '<!--%i %s -->' % (depth, article.threadKey) + self.depth=depth + print '<LI> <A HREF="%s">%s</A> <A NAME="%i"></A><I>%s</I>' % (CGIescape(urllib.quote(article.filename)), + CGIescape(article.subject), article.sequence+910, + CGIescape(article.author)) + + def write_TOC(self): + self.sortarchives() + toc=open(os.path.join(self.basedir, 'index.html'), 'w') + toc.write(self.html_TOC()) + toc.close() + + + # Archive an Article object. + def add_article(self, article): + # Determine into what archives the article should be placed + archives=self.get_archives(article) + if archives==None: archives=[] # If no value was returned, ignore it + if type(archives)==type(''): archives=[archives] # If a string was returned, convert to a list + if archives==[]: return # Ignore the article + + # Add the article to each archive in turn + article.filename=filename=self.get_filename(article) + article_text=article.as_text() + temp=self.format_article(article) # Reformat the article + self.message("Processing article #"+str(article.sequence)+' into archives '+str(archives)) + for i in archives: + self.archive=i + archivedir=os.path.join(self.basedir, i) + # If it's a new archive, create it + if i not in self.archives: + self.archives.append(i) ; self.update_TOC=1 + self.database.newArchive(i) + # If the archive directory doesn't exist, create it + try: os.stat(archivedir) + except os.error, errdata: + errno, errmsg=errdata + if errno==2: + os.mkdir(archivedir) + else: raise os.error, errdata + self.open_new_archive(i, archivedir) + + # Write the HTML-ized article to the html archive. + f=open(os.path.join(archivedir, filename), 'w') + + f.write(temp.as_html()) + f.close() + + # Write the text article to the text archive. + archivetextfile=os.path.join(self.basedir,"%s.txt" % i) + f=open(archivetextfile, 'a+') + + f.write(article_text) + f.close() + + authorkey=pipermail.fixAuthor(article.author)+'\000'+article.date + subjectkey=string.lower(article.subject)+'\000'+article.date + + # Update parenting info + parentID=None + if article.in_reply_to!='': parentID=article.in_reply_to + elif article.references!=[]: + # Remove article IDs that aren't in the archive + refs=filter(lambda x, self=self: self.database.hasArticle(self.archive, x), + article.references) + if len(refs): + refs=map(lambda x, s=self: s.database.getArticle(s.archive, x), refs) + maxdate=refs[0] + for ref in refs[1:]: + if ref.date>maxdate.date: maxdate=ref + parentID=maxdate.msgid + else: + # Get the oldest article with a matching subject, and assume this is + # a follow-up to that article + parentID=self.database.getOldestArticle(self.archive, article.subject) + + if parentID!=None and not self.database.hasArticle(self.archive, parentID): + parentID=None + article.parentID=parentID + if parentID!=None: + parent=self.database.getArticle(self.archive, parentID) + article.threadKey=parent.threadKey+article.date+'-' + else: article.threadKey=article.date+'-' + self.database.setThreadKey(self.archive, article.threadKey+'\000'+article.msgid, article.msgid) + self.database.addArticle(i, temp, subjectkey, authorkey) + + if i not in self._dirty_archives: + self._dirty_archives.append(i) + del temp + + + # Update only archives that have been marked as "changed". + def update_dirty_archives(self): + for i in self._dirty_archives: + self.update_archive(i) + archz=None + archt=None + try: + import gzip + try: + archt=open(os.path.join(self.basedir,"%s.txt" % i),"r") + try: + os.rename(os.path.join(self.basedir,"%s.txt.gz" % i), + os.path.join(self.basedir,"%s.old.txt.gz" % i)) + archz=gzip.open(os.path.join(self.basedir,"%s.old.txt.gz" % i),"r") + except (IOError, RuntimeError, os.error): + pass + newz=gzip.open(os.path.join(self.basedir,"%s.txt.gz" % i),"w") + if archz : + newz.write(archz.read()) + archz.close() + os.unlink(os.path.join(self.basedir,"%s.old.txt.gz" % i)) + newz.write(archt.read()) + newz.close() + archt.close() + os.unlink(os.path.join(self.basedir,"%s.txt" % i)) + except IOError: + pass + except ImportError: + pass + self._dirty_archives=[] + + def close(self): + "Close an archive, saving its state and updating any changed archives." + self.update_dirty_archives()# Update all changed archives + # If required, update the table of contents + if self.update_TOC or 1: + self.update_TOC=0 + self.write_TOC() + # Save the collective state + self.message('Pickling archive state into '+os.path.join(self.basedir, 'pipermail.pck')) + self.database.close() + del self.database + f=open(os.path.join(self.basedir, 'pipermail.pck'), 'w') + pickle.dump(self.__getstate__(), f) + f.close() + + def __getstate__(self): + d={} + for each in self.__dict__.keys(): + if not (each in ['maillist','_lock_file','_unlocklist']): + d[each] = self.__dict__[each] + return d + + + + + # Add <A HREF="..."> tags around URLs and e-mail addresses. + + def __processbody_URLquote(self, source, dest): + body2=[] + last_line_was_quoted=0 + for i in xrange(0, len(source)): + Lorig=L=source[i] ; prefix=suffix="" + if L==None: continue + # Italicise quoted text + if self.IQUOTES: + quoted=quotedpat.match(L) + if quoted==None: last_line_was_quoted=0 + else: + quoted = quoted.end(0) + prefix=CGIescape(L[:quoted]) + '<i>' + suffix='</I>' + if self.SHOWHTML: suffix=suffix+'<BR>' + if not last_line_was_quoted: prefix='<BR>'+prefix + L= L[quoted:] + last_line_was_quoted=1 + # Check for an e-mail address + L2="" ; jr=emailpat.search(L) ; kr=urlpat.search(L) + while jr!=None or kr!=None: + if jr==None: j=-1 + else: j = jr.start(0) + if kr==None: k=-1 + else: k = kr.start(0) + if j!=-1 and (j<k or k==-1): text=jr.group(1) ; URL='mailto:'+text ; pos=j + elif k!=-1 and (j>k or j==-1): text=URL=kr.group(1) ; pos=k + else: # j==k + raise ValueError, "j==k: This can't happen!" + length=len(text) +# sys.stderr.write("URL: %s %s %s \n" % (CGIescape(L[:pos]), URL, CGIescape(text))) + L2=L2+'%s<A HREF="%s">%s</A>' % (CGIescape(L[:pos]), URL, CGIescape(text)) + L=L[pos+length:] + jr=emailpat.search(L) ; kr=urlpat.search(L) + if jr==None and kr==None: L=CGIescape(L) + L=prefix+L2+L+suffix + if L!=Lorig: source[i], dest[i]=None, L + + # Escape all special characters + def __processbody_CGIescape(self, source, dest): + import cgi + for i in xrange(0, len(source)): + if source[i]!=None: + dest[i]=cgi.escape(source[i]) ; source[i]=None + + # Perform Hypermail-style processing of <HTML></HTML> directives + # in message bodies. Lines between <HTML> and </HTML> will be written + # out precisely as they are; other lines will be passed to func2 + # for further processing . + + def __processbody_HTML(self, source, dest): + l=len(source) ; i=0 + while i<l: + while i<l and htmlpat.match(source[i])==None: i=i+1 + if i<l: source[i]=None ; i=i+1 + while i<l and nohtmlpat.match(source[i])==None: + dest[i], source[i] = source[i], None + i=i+1 + if i<l: source[i]=None ; i=i+1 + + def format_article(self, article): + source=article.body ; dest=[None]*len(source) + # Handle <HTML> </HTML> directives + if self.ALLOWHTML: + self.__processbody_HTML(source, dest) + self.__processbody_URLquote(source, dest) + if not self.SHOWHTML: + # Do simple formatting here: <PRE>..</PRE> + for i in range(0, len(source)): + s=source[i] + if s==None: continue + dest[i]=CGIescape(s) ; source[i]=None + if len(dest) > 0: + dest[0]='<PRE>'+dest[0] ; dest[-1]=dest[-1]+'</PRE>' + else: + # Do fancy formatting here + if self.SHOWBR: + # Add <BR> onto every line + for i in range(0, len(source)): + s=source[i] + if s==None: continue + s=CGIescape(s) +'<BR>' + dest[i]=s ; source[i]=None + else: + for i in range(0, len(source)): + s=source[i] + if s==None: continue + s=CGIescape(s) + if s[0:1] in ' \t\n': s='<P>'+s + dest[i]=s ; source[i]=None + article.body=filter(lambda x: x!=None, dest) + return article + + def update_article(self, arcdir, article, prev, next): + import os + self.message('Updating HTML for article '+str(article.sequence)) + try: + f=open(os.path.join(arcdir, article.filename), 'r') + article.loadbody_fromHTML(f) + f.close() + except IOError: + self.message("article file %s is missing!" % os.path.join(arcdir, article.filename)) + article.prev=prev + article.next=next + f=open(os.path.join(arcdir, article.filename), 'w') + f.write(article.as_html()) + f.close() + + + + + + + + + + |
