All these changes are for implemented integrated pipermail based archives.

Original patches are from The Dragon de Monsyne with the following changes: -added support for private archives as well as public -added support for archiving daily and weekly -made archiving happen in real time -replaced use of pipermail's BSDBDatabase with homegrown python version -took out the need for DocumentTemplate here's a listing of changed files and relevant changes: Makefile.in - added public_html/archives to installdirs Mailman/Archiver.py - changed ArchiveMail to do real time archiving Mailman/Defaults.py.in - added archive frequency and and archive url extension variables Mailman/MailList.py - changed .Save() to alter perms on public vs. private archives Mailman/htmlformat.py - changes directly from The Dragon do Monsyne's patches. I don't know what they are exactly, but all the cgi's seem to work fine, so I assume they are OK. Mailman/versions.py - changes to add archiving based variables back to the list Mailman/Cgi/private - changed to make it work with default installation and made background white on login page src/Makefile.in - changes to make all wrappers setuid mailman: since various processes may access an archive, and the archiving mechanism uses "chmod", all archives must be owned by mailman, so all wrappers need to be owned by and setuid mailman added files: Mailman/HyperArch.py - from The Dragon de Monsyne with changes made noted above Mailman/HyperDatabase.py - the replacement for pipermail.BSDBDatabase scott
author: cotton 1998-10-09 14:14:30 +0000
committer: cotton 1998-10-09 14:14:30 +0000
commit: 8aa10fbdc0e464bfe1df5f244b7a2576dcc72afe (patch)
tree: f4bf432ea0f5ad6a6aae9a222c5abfdcaa59149a
parent: 13ca1d9ac610544b62b5b578676dc5ba9de32985 (diff)
download: mailman-8aa10fbdc0e464bfe1df5f244b7a2576dcc72afe.tar.gz
mailman-8aa10fbdc0e464bfe1df5f244b7a2576dcc72afe.tar.zst
mailman-8aa10fbdc0e464bfe1df5f244b7a2576dcc72afe.zip
9 files changed, 1374 insertions, 152 deletions
diff --git a/Mailman/Archiver.py b/Mailman/Archiver.py
index 50555f0c2..c3f1009b8 100644
--- a/Mailman/Archiver.py
+++ b/Mailman/Archiver.py
@@ -27,25 +27,17 @@ import sys, os, string
 import Utils
 import Mailbox
 import mm_cfg
+import sys
 
 
-## ARCHIVE_PENDING = "to-archive.mail"
-## # ARCHIVE_RETAIN will be ignored, below, in our hook up with andrew's new
-## # pipermail.
-## ARCHIVE_RETAIN = "retained.mail"
-
 class Archiver:
     def InitVars(self):
 	# Configurable
 	self.archive = 1
 	# 0=public, 1=private:
 	self.archive_private = mm_cfg.DEFAULT_ARCHIVE_PRIVATE
-## 	self.archive_update_frequency = \
-## 		 mm_cfg.DEFAULT_ARCHIVE_UPDATE_FREQUENCY
-## 	self.archive_volume_frequency = \
-## 		mm_cfg.DEFAULT_ARCHIVE_VOLUME_FREQUENCY
-## 	self.archive_retain_text_copy = \
-## 		mm_cfg.DEFAULT_ARCHIVE_RETAIN_TEXT_COPY
+ 	self.archive_volume_frequency = \
+ 		mm_cfg.DEFAULT_ARCHIVE_VOLUME_FREQUENCY
 
 	# Not configurable
 	self.clobber_date = 0
@@ -62,10 +54,10 @@ class Archiver:
     def GetBaseArchiveURL(self):
         if self.archive_private:
             return os.path.join(mm_cfg.PRIVATE_ARCHIVE_URL,
-                                self._internal_name + ".html")
+                                self._internal_name + mm_cfg.PRIVATE_ARCHIVE_URL_EXT)
         else:
             return os.path.join(mm_cfg.PUBLIC_ARCHIVE_URL,
-                                self._internal_name + ".html")
+                                self._internal_name + mm_cfg.PRIVATE_ARCHIVE_URL_EXT)
 
     def GetConfigInfo(self):
 	return [
@@ -81,17 +73,10 @@ class Archiver:
 	     'Set date in archive to when the mail is claimed to have been '
              'sent, or to the time we resend it?'),
 
-## 	    ('archive_update_frequency', mm_cfg.Number, 3, 0,
-## 	     "How often should new messages be incorporated?  "
-## 	     "0 for no archival, 1 for daily, 2 for hourly"),
-
-## 	    ('archive_volume_frequency', mm_cfg.Radio, ('Yearly', 'Monthly'),
-## 	     0,
-## 	     'How often should a new archive volume be started?'),
+ 	    ('archive_volume_frequency', mm_cfg.Radio, 
+               ('Yearly', 'Monthly','Quarterly', 'Weekly', 'Daily'), 0,
+ 	     'How often should a new archive volume be started?'),
 
-## 	    ('archive_retain_text_copy', mm_cfg.Toggle, ('No', 'Yes'),
-## 	     0,
-## 	     'Retain plain text copy of archive?'),
 	    ]
 
     def UpdateArchive(self):
@@ -123,26 +108,35 @@ class Archiver:
 	f.truncate(0)
 	f.close()
 
-# Internal function, don't call this.
-    def ArchiveMail(self, post):
-	"""Retain a text copy of the message in an mbox file."""
-	if self.clobber_date:
-	    import time
-	    olddate = post.getheader('date')
-	    post.SetHeader('Date', time.ctime(time.time()))
+
+    #
+    # archiving in real time  this is called from list.post(msg)
+    #
+    def ArchiveMail(self, msg):
+	#
+	# first we fork so that errors here won't
+	# disrupt normal list delivery  -scott
+	#
+	if os.fork(): 
+	    return
 	try:
-	    afn = self.ArchiveFileName()
-	    mbox = self.ArchiveFile(afn)
-	    mbox.AppendMessage(post)
-	    mbox.fp.close()
-	except IOError, msg:
-	    self.LogMsg("error", ("Archive file access failure:\n"
-				   "\t%s %s"
-				   % (afn, `msg[1]`)))
-	if self.clobber_date:
-	    # Resurrect original date setting.
-	    post.SetHeader('Date', olddate)
-	self.Save ()
+	    from cStringIO import StringIO
+	except ImportError:
+	    from StringIO import StringIO
+	txt = msg.unixfrom
+	for h in msg.headers:
+	    txt = txt + h
+	if msg.body[0] != '\n':
+	    txt = txt + "\n"
+	txt = txt + msg.body
+	f = StringIO(txt)
+	import HyperArch
+	h = HyperArch.HyperArchive(self)
+	h.processUnixMailbox(f, HyperArch.Article)
+	h.close()
+	f.close()
+        os._exit(0)
+	
 
     def ArchiveFileName(self):
 	"""The mbox name where messages are left for archive construction."""
@@ -152,6 +146,7 @@ class Archiver:
 	else:
 	    return os.path.join(self.public_archive_file_dir,
 				self._internal_name)
+
     def ArchiveFile(self, afn):
 	"""Open (creating, if necessary) the named archive file."""
 	ou = os.umask(002)
@@ -162,3 +157,7 @@ class Archiver:
 		raise IOError, msg
 	finally:
 	    os.umask(ou)
+
+
+
+
diff --git a/Mailman/Cgi/private.py b/Mailman/Cgi/private.py
index d3cd16fa5..47a341782 100644
--- a/Mailman/Cgi/private.py
+++ b/Mailman/Cgi/private.py
@@ -27,11 +27,16 @@ subscribers.
    executables are).
 """
 
-import sys, os, string, re
+import sys, os, string
 from Mailman import MailList, Errors
 from Mailman import Cookie
+from Mailman.Logging.Utils import LogStdErr
 
-ROOT = "/local/pipermail/private/"
+LogStdErr("error", "private")
+
+
+
+ROOT = "/home/mailman/public_html/archives"
 SECRET = "secret"  # XXX used for hashing
 
 PAGE = '''
@@ -39,11 +44,11 @@ PAGE = '''
 <head>
   <title>%(listname)s Private Archives Authentication</title>
 </head>
-<body>
-<FORM METHOD=POST ACTION="%(basepath)s/%(path)s">
-  <TABLE WIDTH="100%" BORDER="0" CELLSPACING="4" CELLPADDING="5">
+<body bgcolor="#ffffff">
+<FORM METHOD=POST ACTION="%(basepath)s/">
+  <TABLE WIDTH="100%%" BORDER="0" CELLSPACING="4" CELLPADDING="5">
     <TR>
-      <TD COLSPAN="2" WIDTH="100%" BGCOLOR="#99CCFF" ALIGN="CENTER">
+      <TD COLSPAN="2" WIDTH="100%%" BGCOLOR="#99CCFF" ALIGN="CENTER">
 	<B><FONT COLOR="#000000" SIZE="+1">%(listname)s Private Archives
 	    Authentication</FONT></B>
       </TD>
@@ -68,29 +73,12 @@ PAGE = '''
 </FORM>
 '''
 
+	
 login_attempted = 0
 _list = None
-name_pat = re.compile(
-    r'(?: '                             # Being first alternative...
-    r'/ (?: \d{4} q \d\. )?'            # Match "/", and, optionally, 1998q1.
-    r'( [^/]* ) /?'                     # The list name
-    r'/[^/]*$'                          # The trailing 12345.html portion
-    r')'                                # End first alternative
-    r' | '
-    r'(?:'                              # Begin second alternative...
-    r'/ ( [^/.]* )'                     # Match matrix-sig
-    r'(?:\.html)?'                      # Optionally match .html
-    r'/?'                               # Optionally match a trailing slash
-    r'$'                                # Must match to end of string
-    r')'                                # And close the second alternate.
-    , re.VERBOSE)
 
 def getListName(path):
-    match = name_pat.search(path)
-    if match is None: return
-    if match.group(1): return match.group(1)
-    if match.group(2): return match.group(2)
-    raise ValueError, "Can't identify SIG name"
+    return string.split(path, os.sep)[1]
 
 
 def GetListobj(list_name):
@@ -109,16 +97,8 @@ def isAuthenticated(list_name):
     if os.environ.has_key('HTTP_COOKIE'):
 	c = Cookie.Cookie( os.environ['HTTP_COOKIE'] )
 	if c.has_key(list_name):
-	    # The user has a token like 'c++-sig=AE23446AB...'; verify 
-	    # that it's correct.
-	    token = string.replace(c[list_name].value,"@","\n")
-	    import base64, md5
-	    if base64.decodestring(token) != md5.new(SECRET
-						     + list_name
-						     + SECRET).digest():
-		return 0
-	    return 1
-
+            if c[list_name].value == `hash(list_name)`:
+                return 1
     # No corresponding cookie.  OK, then check for username, password
     # CGI variables 
     import cgi
@@ -139,21 +119,16 @@ def isAuthenticated(list_name):
     # be displayed with an appropriate message.
     global login_attempted
     login_attempted=1
-
     listobj = GetListobj(list_name)
     if not listobj:
         print '\n<P>A list named,', repr(list_name), "was not found."
         return 0
-    
     try:
 	listobj.ConfirmUserPassword( username, password)
     except (Errors.MMBadUserError, Errors.MMBadPasswordError): 
 	return 0
 
-    import base64, md5
-    token = md5.new(SECRET + list_name + SECRET).digest()
-    token = base64.encodestring(token)
-    token = string.replace(token, "\n", "@")
+    token = `hash(list_name)`
     c = Cookie.Cookie()
     c[list_name] = token
     print c				# Output the cookie
@@ -162,66 +137,49 @@ def isAuthenticated(list_name):
 
 def true_path(path):
     "Ensure that the path is safe by removing .."
-    path = string.split(path, '/')
-    for i in range(len(path)):
-	if path[i] == ".": path[i] = ""  # ./ is just redundant
-	elif path[i] == "..":
-	    # Remove any .. components
-	    path[i] = ""
-	    j=i-1
-	    while j>0 and path[j] == "": j=j-1
-	    path[j] = ""
-
-    path = filter(None, path)
-    return string.join(path, '/')
-
-def processPage(page):
-    """Change any URLs that start with ../ to work properly when output from
-    /cgi-bin/private"""
-    # Escape any % signs not followed by (
-    page = re.sub('%([^(])', r'%%\1', page)
+    path = string.replace(path, "../", "")
+    path = string.replace(path, "./", "")
+    return path[1:]
 
-    # Convert references like HREF="../doc" to just /doc.
-    page = re.sub('([\'="])../', r'\1/', page)
-
-    return page
 
 def main():
-        print 'Content-type: text/html\n'
-        path = os.environ.get('PATH_INFO', "/index.html")
-	true_filename = os.path.join(ROOT, true_path(path) )
-        list_name = getListName(path)
-        
-	if os.path.isdir(true_filename):
-	    true_filename = true_filename + '/index.html'
+    path = os.environ.get('PATH_INFO', "/index.html")
+    true_filename = os.path.join(ROOT, true_path(path) )
+    list_name = getListName(path)
+    if os.path.isdir(true_filename):
+        true_filename = true_filename + '/index.html'
 
-	if not isAuthenticated(list_name):
-	    # Output the password form
-            page = processPage( PAGE )
+    if not isAuthenticated(list_name):
+        # Output the password form
+        print 'Content-type: text/html\n'
+        page = PAGE
             
-	    listobj = GetListobj(list_name)
-	    if login_attempted:
-		message = ("Your email address or password were incorrect."
-			   " Please try again.")
-	    else:
-		message = ("Please enter your %s subscription email address"
-			   " and password." % listobj.real_name)
-            while path and path[0] == '/': path=path[1:]  # Remove leading /'s
-	    basepath = os.path.split(listobj.GetBaseArchiveURL())[0]
-	    listname = listobj.real_name
-	    print '\n\n', page % vars()
-            sys.exit(0)
-
-	print '\n\n'
-	# Authorization confirmed... output the desired file
-	try:
-	    f = open(true_filename, 'r')
-	except IOError:
-	    print "<H3>Archive File Not Found</H3>"
-	    print "No file", path
+        listobj = GetListobj(list_name)
+        if login_attempted:
+            message = ("Your email address or password were incorrect."
+                       " Please try again.")
         else:
-            while (1):
-                data = f.read(16384)
-                if data == "": break
-                sys.stdout.write(data)
-            f.close()
+            message = ("Please enter your %s subscription email address"
+                       " and password." % listobj.real_name)
+        while path and path[0] == '/': path=path[1:]  # Remove leading /'s
+        basepath = os.path.split(listobj.GetBaseArchiveURL())[0]
+        listname = listobj.real_name
+        print '\n\n', page % vars()
+        sys.exit(0)
+    print 'Content-type: text/html\n'
+    
+    print '\n\n'
+    # Authorization confirmed... output the desired file
+    try:
+        f = open(true_filename, 'r')
+    except IOError:
+        print "<H3>Archive File Not Found</H3>"
+        print "No file", path
+    else:
+        while (1):
+            data = f.read(16384)
+            if data == "": break
+            sys.stdout.write(data)
+        f.close()
+
+
diff --git a/Mailman/Defaults.py.in b/Mailman/Defaults.py.in
index b9b2a005d..780d06fc6 100644
--- a/Mailman/Defaults.py.in
+++ b/Mailman/Defaults.py.in
@@ -38,6 +38,16 @@ PUBLIC_ARCHIVE_URL = 'http://www.OVERRIDE.WITH.YOUR.PUBLIC.ARCHIVE.URL/'
 PRIVATE_ARCHIVE_URL = 'http://www.OVERRIDE.WITH.YOUR.PRIVATE.ARCHIVE.URL/'
 
 DEFAULT_ARCHIVE_PRIVATE    = 0		# 0=public, 1=private
+# 0 - yearly
+# 1 - month
+# 2 - quarter
+# 3 - week
+# 4 - day
+DEFAULT_ARCHIVE_VOLUME_FREQUENCY = 1
+
+PUBLIC_ARCHIVE_URL_EXT = ''
+PRIVATE_ARCHIVE_URL_EXT = '/'
+
 HOME_PAGE         = 'index.html'
 MAILMAN_OWNER     = 'mailman-owner@%s' % DEFAULT_HOST_NAME
 
diff --git a/Mailman/HyperArch.py b/Mailman/HyperArch.py
new file mode 100644
index 000000000..11a6611b0
--- /dev/null
+++ b/Mailman/HyperArch.py
@@ -0,0 +1,944 @@
+"""HyperArch:  Pipermail archiving for MailMan
+
+       - The Dragon De Monsyne <dragondm@integral.org> 
+
+   TODO:
+     - The templates should be be files in Mailman's Template dir, instead
+       of static strings.
+     - Each list should be able to have it's own templates.
+       Also, it should automatically fall back to default template in case 
+       of error in list specific template. 
+     - Should be able to force all HTML to be regenerated next time the archive
+       is run, incase a template is changed. 
+     - Run a command to generate tarball of html archives for downloading
+       (prolly in the 'update_dirty_archives' method )
+
+"""   
+
+import re, cgi, urllib, string
+import time, pickle, os, posixfile
+import HyperDatabase
+import pipermail
+import mm_cfg
+
+
+def html_quote(s):
+    repls = ( ('&', '&amp;'),
+	      ("<", '&lt;'),
+	      (">", '&gt;'),
+	      ('"', '&quot;'))
+    for thing, repl in repls:
+	s = string.replace(s, thing, repl)
+    return s
+
+def url_quote(s):
+    return urllib.quote(s)
+
+
+article_text_template="""\
+From %(email)s %(datestr)s
+Date: %(datestr)s
+From: %(author)s %(email)s
+Subject: %(subject)s
+
+%(body)s
+
+"""
+
+article_template="""\
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
+<HTML>
+ <HEAD>
+   <TITLE> %(subject_html)s</TITLE>
+   <LINK REL="Index" HREF="index.html" >
+   <LINK REL="made" HREF="mailto:%(email_url)s">
+   %(prev)s
+   %(next)s
+ </HEAD>
+ <BODY BGCOLOR="#ffffff">
+   <H1>%(subject_html)s</H1>
+    <B>%(author_html)s</B> 
+    <A HREF="mailto:%(email_url)s" TITLE="%(subject_html)s">%(email_html)s</A><BR>
+    <I>%(datestr_html)s</I>
+    <P><UL>
+        %(prev_wsubj)s
+        %(next_wsubj)s
+         <LI> <B>Messages sorted by:</B> 
+              <a href="date.html#%(sequence)s">[ date ]</a>
+              <a href="thread.html#%(sequence)s">[ thread ]</a>
+              <a href="subject.html#%(sequence)s">[ subject ]</a>
+              <a href="author.html#%(sequence)s">[ author ]</a>
+         </LI>
+       </UL>
+    <HR>  
+<!--beginarticle-->
+%(body)s
+
+<!--endarticle-->
+    <HR>
+    <P><UL>
+        <!--threads-->
+	%(prev_wsubj)s
+	%(next_wsubj)s
+         <LI> <B>Messages sorted by:</B> 
+              <a href="date.html#%(sequence)s">[ date ]</a>
+              <a href="thread.html#%(sequence)s">[ thread ]</a>
+              <a href="subject.html#%(sequence)s">[ subject ]</a>
+              <a href="author.html#%(sequence)s">[ author ]</a>
+         </LI>
+       </UL>
+</body></html>
+"""
+
+
+
+def CGIescape(arg): 
+    s=cgi.escape(str(arg))
+    s=re.sub('"', '&quot;', s)
+    return s
+
+# Parenthesized human name 
+paren_name_pat=re.compile(r'([(].*[)])') 
+# Subject lines preceded with 'Re:' 
+REpat=re.compile( r"\s*RE\s*:\s*",
+		  re.IGNORECASE)
+# E-mail addresses and URLs in text
+emailpat=re.compile(r'([-+,.\w]+@[-+.\w]+)') 
+#  Argh!  This pattern is buggy, and will choke on URLs with GET parameters.
+urlpat=re.compile(r'(\w+://[^>)\s]+)') # URLs in text
+# Blank lines
+blankpat=re.compile(r'^\s*$')
+
+# 
+# Starting <html> directive
+htmlpat=re.compile(r'^\s*<HTML>\s*$', re.IGNORECASE)    
+# Ending </html> directive
+nohtmlpat=re.compile(r'^\s*</HTML>\s*$', re.IGNORECASE) 
+# Match quoted text
+quotedpat=re.compile(r'^([>|:]|&gt;)+')
+
+
+# Note: I'm overriding most, if not all of the pipermail Article class here -ddm
+# The Article class encapsulates a single posting.  The attributes 
+# are:
+#
+#  sequence : Sequence number, unique for each article in a set of archives
+#  subject  : Subject
+#  datestr  : The posting date, in human-readable format
+#  date     : The posting date, in purely numeric format
+#  headers  : Any other headers of interest
+#  author   : The author's name (and possibly organization)
+#  email    : The author's e-mail address
+#  msgid    : A unique message ID
+#  in_reply_to : If !="", this is the msgid of the article being replied to
+#  references: A (possibly empty) list of msgid's of earlier articles in the thread
+#  body     : A list of strings making up the message body
+
+class Article(pipermail.Article):
+    __last_article_time=time.time()
+
+    html_tmpl=article_template
+    text_tmpl=article_text_template
+
+
+    def as_html(self):
+	d = self.__dict__.copy()
+	if self.prev:
+	    d["prev"] = '<LINK REL="Previous"  HREF="%s">' % \
+			(url_quote(self.prev.filename))
+	    d["prev_wsubj"] = '<LI> Previous message: <A HREF="%s">%s</A></li>' % \
+			      (url_quote(self.prev.filename), html_quote(self.prev.subject))
+	else:
+	    d["prev"] = d["prev_wsubj"] = ""
+	    
+	if self.next:
+	    d["next"] = '<LI> Next message: <A HREF="%s"></A></li>' % \
+			(html_quote(self.next.filename))
+	    d["next_wsubj"] = '<LI> Next message: <A HREF="%s">%s</A></li>' % \
+			      (url_quote(self.next.filename), html_quote(self.next.subject))	    
+	else:
+	    d["next"] = d["next_wsubj"] = ""
+	
+	d["email_html"] = html_quote(self.email)
+	d["subject_html"] = html_quote(self.subject)
+	d["author_html"] = html_quote(self.author)
+	d["email_url"] = url_quote(self.email)
+	d["datestr_html"] = html_quote(self.datestr)
+	d["body"] = string.join(self.body, "")
+        return self.html_tmpl % d
+
+    def as_text(self):
+	d = self.__dict__.copy()
+	d["body"] = string.join(self.body, "")
+        return self.text_tmpl % d
+
+
+    def __init__(self, message=None, sequence=0, keepHeaders=[]):
+	import time
+	if message==None: return
+	self.sequence=sequence
+
+	self.parentID = None 
+        self.threadKey = None
+        self.prev=None
+        self.next=None
+	# otherwise the current sequence number is used.
+	id=pipermail.strip_separators(message.getheader('Message-Id'))
+	if id=="": self.msgid=str(self.sequence)
+	else: self.msgid=id
+
+	if message.has_key('Subject'): self.subject=str(message['Subject'])
+	else: self.subject='No subject'
+	i=0
+	while (i!=-1):
+	    result=REpat.match(self.subject)
+	    if result: 
+		i = result.end(0)
+		self.subject=self.subject[i:]
+	    else: i=-1
+	if self.subject=="": self.subject='No subject'
+
+	if message.has_key('Date'): 
+	    self.datestr=str(message['Date'])
+   	    date=message.getdate_tz('Date')
+	else: 
+	    self.datestr='None' 
+	    date=None
+	if date!=None:
+	    date, tzoffset=date[:9], date[-1] 
+            if not tzoffset:
+                tzoffset = 0
+	    date=time.mktime(date)-tzoffset
+	else:
+	    date=self.__last_article_time+1 
+	    
+	self.__last_article_time=date 
+	self.date='%011i' % (date,)
+
+	# Figure out the e-mail address and poster's name
+	self.author, self.email=message.getaddr('From')
+	self.email=pipermail.strip_separators(self.email)
+	self.author=pipermail.strip_separators(self.author)
+
+	if self.author=="": self.author=self.email
+
+	# Save the 'In-Reply-To:' and 'References:' lines
+	i_r_t=message.getheader('In-Reply-To')
+	if i_r_t==None: self.in_reply_to=''
+	else:
+	    match=pipermail.msgid_pat.search(i_r_t)
+	    if match==None: self.in_reply_to=''
+	    else: self.in_reply_to=pipermail.strip_separators(match.group(1))
+		
+	references=message.getheader('References')
+	if references==None: self.references=[]
+	else: self.references=map(pipermail.strip_separators, string.split(references))
+
+	# Save any other interesting headers
+	self.headers={}
+	for i in keepHeaders:
+	    if message.has_key(i): self.headers[i]=message[i]
+
+	# Read the message body
+	self.body=[]
+	message.rewindbody()
+	while (1):
+	    line=message.fp.readline()
+	    if line=="": break
+	    self.body.append(line)
+	
+    def loadbody_fromHTML(self,fileobj):
+        self.body=[]
+        begin=0
+	while(1):
+            line=fileobj.readline()
+            if not line:
+                break
+            if (not begin) and string.strip(line)=='<!--beginarticle-->':
+	        begin=1
+                continue
+            if string.strip(line)=='<!--endarticle-->':
+                break
+            if begin:
+                self.body.append(line)
+
+    def __getstate__(self):
+        d={}
+        for each in self.__dict__.keys():
+            if each in ['maillist','prev','next','body']:
+                d[each] = None
+            else:
+                d[each] = self.__dict__[each]
+        d['body']=[]
+        return d
+
+
+#
+# Archive class specific stuff
+#
+index_header_template="""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
+<HTML>
+  <HEAD>
+     <title>The %(listname)s %(archive)s Archive by %(archtype)s</title>
+  </HEAD>
+  <BODY BGCOLOR="#ffffff">
+      <a name="start"></A>
+      <h1>%(archive)s Archives by %(archtype)s</h1>
+      <ul>
+         <li> <b>Messages sorted by:</b>
+	        %(thread_ref)s
+		%(subject_ref)s
+		%(author_ref)s
+		%(date_ref)s
+
+	     <li><b><a href="%(listinfo)s">More info on this list...</a></b></li>
+      </ul>
+      <p><b>Starting:</b> <i>%(firstdate)s</i><br>
+         <b>Ending:</b> <i>%(lastdate)s</i><br>
+         <b>Messages:</b> %(size)s<p>
+     <ul>
+"""
+
+index_footer_template="""\
+    </ul>
+    <p>
+      <a name="end"><b>Last message date:</b></a> 
+       <i>%(lastdate)s</i><br>
+    <b>Archived on:</b> <i><!--#var archivedate --></i>
+    <p>
+   <ul>
+         <li> <b>Messages sorted by:</b>
+	        %(thread_ref)s
+		%(subject_ref)s
+		%(author_ref)s
+		%(date_ref)s
+	     <li><b><a href="%(listinfo)s">More info on this list...</a></b></li>
+     </ul>
+     <p>
+     <hr>
+     <i>This archive was generated by
+     <a href="http://starship.skyport.net/crew/amk/maintained/pipermail.html">
+     Pipermail %(version)s</a>.</i>
+  </BODY>
+</HTML>
+"""
+
+TOC_template="""\
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
+<HTML>
+  <HEAD>
+     <title>The %(listname)s Archives</title>
+  </HEAD>
+  <BODY BGCOLOR="#ffffff">
+     <h1>The %(listname)s Archives </h1>
+     <p>
+      <a href="%(listinfo)s">More info on this list...</a>
+     </p>
+     %(noarchive_msg)s
+     %(archive_listing_start)s
+     %(archive_listing)s
+     %(archive_listing_end)s
+     </BODY>
+     </HTML>
+"""
+
+TOC_entry_template = """\
+
+	    <tr>
+            <td>%(archive)s:</td>
+            <td>
+              <A href="%(archive)s/thread.html">[ Thread ]</a>
+              <A href="%(archive)s/subject.html">[ Subject ]</a>
+              <A href="%(archive)s/author.html">[ Author ]</a>
+              <A href="%(archive)s/date.html">[ Date ]</a>
+            </td>
+            <td><A href="%(archive)s.txt">[ Text ]</a></td>
+            </tr>
+
+"""
+arch_listing_start = """\
+	<table border=3>
+          <tr><td>Archive</td> <td>View by:</td> <td>Downloadable version</td></tr>
+"""
+
+arch_listing_end = """\
+         </table>
+"""
+ 
+
+class HyperArchive(pipermail.T):
+
+    # some defaults
+    DIRMODE=0775 
+    FILEMODE=0664
+    
+
+    VERBOSE=0
+    DEFAULTINDEX='thread'
+    ARCHIVE_PERIOD='month'
+ 
+    THREADLAZY=0
+    THREADLEVELS=3
+
+    ALLOWHTML=1
+    SHOWHTML=1
+    IQUOTES=1
+    SHOWBR=1
+
+    html_hdr_tmpl=index_header_template
+    html_foot_tmpl=index_footer_template
+    html_TOC_tmpl=TOC_template
+    TOC_entry_tmpl = TOC_entry_template    
+    arch_listing_start = arch_listing_start
+    arch_listing_end = arch_listing_end
+
+    def html_foot(self):
+	d = {"lastdate": html_quote(self.lastdate),
+	     "archivedate": html_quote(self.archivedate),
+	     "listinfo": self.maillist.GetAbsoluteScriptURL('listinfo'),
+	     "version": self.version}
+	for t in ("thread", "subject", "author", "date"):
+	    cap = string.upper(t[0]) + t[1:]
+	    if self.type == cap:
+		d["%s_ref" % (t)] = ""
+	    else:
+		d["%s_ref" % (t)] = '<a href="%s.html#start">[ %s ]</a>' % (t, t)	
+        return self.html_foot_tmpl % d
+
+
+    def html_head(self):
+	d = {"listname": html_quote(self.maillist.real_name),
+	     "archtype": self.type,
+	     "archive": self.archive,
+	     "listinfo": self.maillist.GetAbsoluteScriptURL('listinfo'),
+	     "firstdate": html_quote(self.firstdate),
+	     "lastdate": html_quote(self.lastdate),
+	     "size": self.size,
+	     }
+	for t in ("thread", "subject", "author", "date"):
+	    cap = string.upper(t[0]) + t[1:]
+	    if self.type == cap:
+		d["%s_ref" % (t)] = ""
+	    else:
+		d["%s_ref" % (t)] = '<a href="%s.html#start">[ %s ]</a>' % (t, t)
+        return self.html_hdr_tmpl % d
+
+
+
+    def html_TOC(self):
+        d = {"listname": self.maillist.real_name,
+             "listinfo": self.maillist.GetAbsoluteScriptURL('listinfo') }
+        listing = ""
+        if not self.archives:
+            d["noarchive_msg"] = '<P>Currently, there are no archives. </P>'
+            d["archive_listing_start"] = ""
+            d["archive_listing_end"] = ""
+            d["archive_listing"] = ""
+        else:
+            d["noarchive_msg"] = ""
+            d["archive_listing_start"] = self.arch_listing_start
+            d["archive_listing_end"] = self.arch_listing_end
+            for a in self.archives:
+                listing = listing + self.TOC_entry_tmpl % {"archive": a}
+        d["archive_listing"] = listing
+        return self.html_TOC_tmpl % d
+
+
+
+    def __init__(self, maillist,unlock=1):
+        self.maillist=maillist
+        self._unlocklist=unlock
+        self._lock_file=None
+ 
+
+        #
+        # this is always called from inside it's own forked
+        # process, and access is protected via list.Save()
+        # so we're leavin' the perms wide open from here on out
+        #
+        ou = os.umask(0)
+	pipermail.T.__init__(self,
+			     maillist.archive_directory,
+			     reload=1,
+			     database=HyperDatabase.HyperDatabase(maillist.archive_directory))
+
+        if hasattr(self.maillist,'archive_volume_frequency'):
+            if self.maillist.archive_volume_frequency == 0:
+                self.ARCHIVE_PERIOD='year'
+            elif self.maillist.archive_volume_frequency == 2:
+                self.ARCHIVE_PERIOD='quarter'
+	    elif self.maillist.archive_volume_frequency == 3:
+		self.ARCHIVE_PERIOD='week'
+	    elif self.maillist.archive_volume_frequency == 4:
+		self.ARCHIVE_PERIOD='day'
+            else:
+                self.ARCHIVE_PERIOD='month'
+
+    def GetArchLock(self):
+        if self._lock_file:
+            return 1
+        ou = os.umask(0)
+        try:
+            self._lock_file = posixfile.open(
+                              os.path.join(mm_cfg.LOCK_DIR, '%s@arch.lock' % 
+                              self.maillist._internal_name), 'a+')
+        finally:
+            os.umask(ou)
+        # minor race condition here, there is no way to atomicly 
+        # check & get a lock. That shouldn't matter here tho' -ddm
+        if not self._lock_file.lock('w?', 1):
+            self._lock_file.lock('w|', 1)
+        else:
+            return 0
+        return 1
+
+    def DropArchLock(self):
+        if self._lock_file:
+            self._lock_file.lock('u')
+            self._lock_file.close()
+            self._lock_file = None
+
+    def processListArch(self):
+        name = self.maillist.ArchiveFileName()
+        wname= name+'.working'
+        ename= name+'.err_unarchived'
+        try:
+            os.stat(name)
+        except (IOError,os.error):
+            #no archive file, nothin to do -ddm
+            return
+ 
+        #see if arch is locked here -ddm 
+        if not self.GetArchLock():
+            #another archiver is running, nothing to do. -ddm
+            return
+
+        #if the working file is still here, the archiver may have 
+        # crashed during archiving. Save it, log an error, and move on. 
+	try:
+            wf=open(wname,'r')
+            self.maillist.LogMsg("error","Archive working file %s present. "
+                                 "Check %s for possibly unarchived msgs" %
+                                  (wname,ename) )
+            ef=open(ename, 'a+')
+            ef.seek(1,2)
+            if ef.read(1) <> '\n':
+                ef.write('\n')
+            ef.write(wf.read())
+            ef.close()
+            wf.close()
+            os.unlink(wname)
+        except IOError:
+            pass
+        os.rename(name,wname)
+        if self._unlocklist:
+            self.maillist.Unlock()
+        archfile=open(wname,'r')
+        self.processUnixMailbox(archfile, Article)
+        archfile.close()
+        os.unlink(wname)
+        self.DropArchLock()
+
+    def get_filename(self, article):
+	return '%06i.html' % (article.sequence,)
+
+    def get_archives(self, article):
+	"""Return a list of indexes where the article should be filed.
+	A string can be returned if the list only contains one entry, 
+	and the empty list is legal."""
+	if article.subject in ['subscribe', 'unsubscribe']: return None
+        res = self.dateToVolName(string.atof(article.date))
+        import sys
+        sys.stderr.write("figuring article archives\n")
+        sys.stderr.write(res + "\n")
+        return res
+    
+
+
+# The following two methods should be inverses of each other. -ddm
+
+    def dateToVolName(self,date):
+        datetuple=time.gmtime(date)
+	if self.ARCHIVE_PERIOD=='year':
+	    return time.strftime("%Y",datetuple)
+	elif self.ARCHIVE_PERIOD=='quarter':
+	    if datetuple[1] in [1,2,3]:
+	        return time.strftime("%Yq1",datetuple)
+	    elif datetuple[1] in [4,5,6]:
+	        return time.strftime("%Yq2",datetuple)
+	    elif datetuple[1] in [7,8,9]:
+	        return time.strftime("%Yq3",datetuple)
+	    else:
+	        return time.strftime("%Yq4",datetuple)
+	elif self.ARCHIVE_PERIOD == 'day':
+	    return time.strftime("%Y%m%d", datetuple)
+	elif self.ARCHIVE_PERIOD == 'week':
+	    datetuple = list(datetuple)
+	    datetuple[2] = datetuple[2] - datetuple[6] # subtract week day
+	    #
+	    # even if the the day of the month counter is negative,
+	    # we still get the right thing from strftime! -scott
+	    #
+	    return time.strftime("Week-of-Mon-%Y%m%d", tuple(datetuple))
+        # month. -ddm
+ 	else:
+            return time.strftime("%Y-%B",datetuple)
+
+
+    def volNameToDate(self,volname):
+        volname=string.strip(volname)
+        volre= { 'year' : r'^(?P<year>[0-9]{4,4})$',
+                 'quarter' : r'^(?P<year>[0-9]{4,4})q(?P<quarter>[1234])$',
+                 'month' : r'^(?P<year>[0-9]{4,4})-(?P<month>[a-zA-Z]+)$',
+		 'week': r'^Week-of-Mon-(?P<year>[0-9]{4,4})(?P<month>[01][0-9])(?P<day>[0123][0-9])',
+		 'day': r'^(?P<year>[0-9]{4,4})(?P<month>[01][0-9])(?P<day>[0123][0-9])$'}
+        for each in volre.keys():
+            match=re.match(volre[each],volname)
+            if match:
+                year=string.atoi(match.group('year'))
+                month=1
+		day = 1
+                if each == 'quarter':
+                    q=string.atoi(match.group('quarter'))
+                    month=(q*3)-2
+                elif each == 'month':
+                    monthstr=string.lower(match.group('month'))
+                    m=[]
+                    for i in range(1,13):
+                        m.append(string.lower(
+                                 time.strftime("%B",(1999,i,1,0,0,0,0,1,0))))
+                    try:
+                        month=m.index(monthstr)+1
+                    except ValueError:
+                        pass
+		elif each == 'week' or each == 'day':
+		    month = string.atoi(match.group("month"))
+		    day = string.atoi(match.group("day"))
+                return time.mktime((year,month,1,0,0,0,0,1,-1))
+        return 0.0
+
+    def sortarchives(self):
+        def sf(a,b,s=self):
+            al=s.volNameToDate(a)
+            bl=s.volNameToDate(b)
+            if al>bl:
+                return 1
+            elif al<bl:
+                return -1
+            else:
+                return 0
+        if self.ARCHIVE_PERIOD in ('month','year','quarter'):
+            self.archives.sort(sf)
+        else:
+            self.archives.sort()
+
+    def message(self, msg):
+	if self.VERBOSE:
+            import sys
+            f = sys.stderr
+            f.write(msg)
+            if msg[-1:]!='\n': f.write('\n')
+            f.flush()
+
+    def open_new_archive(self, archive, archivedir):
+	import os
+	index_html=os.path.join(archivedir, 'index.html') 
+	try: os.unlink(index_html)
+	except: pass
+	os.symlink(self.DEFAULTINDEX+'.html',index_html)
+
+
+    def write_index_header(self):
+	self.depth=0
+        print self.html_head()
+
+        if not self.THREADLAZY and self.type=='Thread':
+	    # Update the threaded index
+	    self.message("Computing threaded index\n")
+	    self.updateThreadedIndex()
+
+
+    def write_index_footer(self):
+	import string
+	for i in range(self.depth): print '</UL>'
+        print self.html_foot()
+
+    def write_index_entry(self, article):
+	print '<LI> <A HREF="%s">%s</A> <A NAME="%i"></A><I>%s</I>' % (urllib.quote(article.filename), 
+								     CGIescape(article.subject), article.sequence, 
+								     CGIescape(article.author))
+
+    def write_threadindex_entry(self, article, depth):
+	if depth<0: 
+	    sys.stderr.write('depth<0') ; depth=0
+	if depth>self.THREADLEVELS: depth=self.THREADLEVELS
+	if depth<self.depth: 
+	    for i in range(self.depth-depth): print '</UL>'
+	elif depth>self.depth: 
+	    for i in range(depth-self.depth): print '<UL>'
+	print '<!--%i %s -->' % (depth, article.threadKey)
+	self.depth=depth
+	print '<LI> <A HREF="%s">%s</A> <A NAME="%i"></A><I>%s</I>' % (CGIescape(urllib.quote(article.filename)),
+								     CGIescape(article.subject), article.sequence+910, 
+								     CGIescape(article.author))
+
+    def write_TOC(self):
+        self.sortarchives()
+        toc=open(os.path.join(self.basedir, 'index.html'), 'w')
+        toc.write(self.html_TOC())
+        toc.close()
+
+
+    # Archive an Article object.
+    def add_article(self, article):
+        # Determine into what archives the article should be placed
+        archives=self.get_archives(article)
+        if archives==None: archives=[]        # If no value was returned, ignore it
+        if type(archives)==type(''): archives=[archives]        # If a string was returned, convert to a list
+        if archives==[]: return         # Ignore the article
+
+        # Add the article to each archive in turn
+        article.filename=filename=self.get_filename(article)
+        article_text=article.as_text()
+        temp=self.format_article(article) # Reformat the article
+        self.message("Processing article #"+str(article.sequence)+' into archives '+str(archives))
+        for i in archives:
+            self.archive=i
+            archivedir=os.path.join(self.basedir, i)
+            # If it's a new archive, create it
+            if i not in self.archives: 
+                self.archives.append(i) ; self.update_TOC=1
+                self.database.newArchive(i)
+                # If the archive directory doesn't exist, create it
+                try: os.stat(archivedir)
+                except os.error, errdata:
+                    errno, errmsg=errdata
+                    if errno==2: 
+                        os.mkdir(archivedir)
+                    else: raise os.error, errdata
+                self.open_new_archive(i, archivedir)
+
+            # Write the HTML-ized article to the html archive.
+            f=open(os.path.join(archivedir, filename), 'w')
+
+            f.write(temp.as_html())
+            f.close()
+
+            # Write the text article to the text archive.
+            archivetextfile=os.path.join(self.basedir,"%s.txt" % i)
+            f=open(archivetextfile, 'a+')
+
+            f.write(article_text)
+            f.close()
+
+            authorkey=pipermail.fixAuthor(article.author)+'\000'+article.date
+            subjectkey=string.lower(article.subject)+'\000'+article.date
+
+            # Update parenting info
+            parentID=None
+            if article.in_reply_to!='': parentID=article.in_reply_to
+            elif article.references!=[]: 
+                # Remove article IDs that aren't in the archive
+                refs=filter(lambda x, self=self: self.database.hasArticle(self.archive, x), 
+                            article.references)
+                if len(refs):
+                    refs=map(lambda x, s=self: s.database.getArticle(s.archive, x), refs)
+                    maxdate=refs[0]
+                    for ref in refs[1:]: 
+                        if ref.date>maxdate.date: maxdate=ref
+                    parentID=maxdate.msgid
+            else:
+                # Get the oldest article with a matching subject, and assume this is 
+                # a follow-up to that article
+                parentID=self.database.getOldestArticle(self.archive, article.subject)
+
+            if parentID!=None and not self.database.hasArticle(self.archive, parentID): 
+                parentID=None
+            article.parentID=parentID 
+            if parentID!=None:
+                parent=self.database.getArticle(self.archive, parentID)
+                article.threadKey=parent.threadKey+article.date+'-'
+            else: article.threadKey=article.date+'-'
+            self.database.setThreadKey(self.archive, article.threadKey+'\000'+article.msgid, article.msgid)
+            self.database.addArticle(i, temp, subjectkey, authorkey)
+            
+            if i not in self._dirty_archives: 
+                self._dirty_archives.append(i)
+        del temp
+
+
+    # Update only archives that have been marked as "changed".
+    def update_dirty_archives(self):
+        for i in self._dirty_archives:
+            self.update_archive(i)
+            archz=None
+            archt=None
+            try:
+                import gzip
+                try: 
+                    archt=open(os.path.join(self.basedir,"%s.txt" % i),"r") 
+                    try: 
+                        os.rename(os.path.join(self.basedir,"%s.txt.gz" % i),
+                              os.path.join(self.basedir,"%s.old.txt.gz" % i))
+                        archz=gzip.open(os.path.join(self.basedir,"%s.old.txt.gz" % i),"r")
+                    except (IOError, RuntimeError, os.error):
+                        pass
+                    newz=gzip.open(os.path.join(self.basedir,"%s.txt.gz" % i),"w") 
+		    if archz :
+                        newz.write(archz.read())
+                        archz.close()
+                        os.unlink(os.path.join(self.basedir,"%s.old.txt.gz" % i))
+                    newz.write(archt.read())
+                    newz.close()
+                    archt.close()
+                    os.unlink(os.path.join(self.basedir,"%s.txt" % i))
+                except IOError:
+                    pass
+            except ImportError:
+                pass
+        self._dirty_archives=[]
+
+    def close(self):
+        "Close an archive, saving its state and updating any changed archives."
+        self.update_dirty_archives()# Update all changed archives
+        # If required, update the table of contents
+        if self.update_TOC or 1:
+            self.update_TOC=0
+            self.write_TOC()
+        # Save the collective state 
+        self.message('Pickling archive state into '+os.path.join(self.basedir, 'pipermail.pck'))
+        self.database.close()
+        del self.database
+        f=open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
+        pickle.dump(self.__getstate__(), f)
+        f.close()
+
+    def __getstate__(self):
+        d={}
+        for each in self.__dict__.keys():
+            if not (each in ['maillist','_lock_file','_unlocklist']):
+                d[each] = self.__dict__[each]
+        return d
+
+        
+ 
+
+    # Add <A HREF="..."> tags around URLs and e-mail addresses.
+
+    def __processbody_URLquote(self, source, dest):
+	body2=[]
+	last_line_was_quoted=0
+	for i in xrange(0, len(source)):
+	    Lorig=L=source[i] ; prefix=suffix=""
+	    if L==None: continue
+	    # Italicise quoted text
+	    if self.IQUOTES:
+		quoted=quotedpat.match(L)
+		if quoted==None: last_line_was_quoted=0
+		else:
+		    quoted = quoted.end(0)
+		    prefix=CGIescape(L[:quoted]) + '<i>' 
+		    suffix='</I>'
+		    if self.SHOWHTML: suffix=suffix+'<BR>'
+		    if not last_line_was_quoted: prefix='<BR>'+prefix
+		    L= L[quoted:] 
+		    last_line_was_quoted=1
+	    # Check for an e-mail address
+	    L2="" ; jr=emailpat.search(L) ; kr=urlpat.search(L)
+	    while jr!=None or kr!=None:
+		if jr==None: j=-1
+		else: j = jr.start(0)
+		if kr==None: k=-1
+		else: k = kr.start(0)
+		if j!=-1 and (j<k or k==-1): text=jr.group(1) ; URL='mailto:'+text ; pos=j
+		elif k!=-1 and (j>k or j==-1): text=URL=kr.group(1) ; pos=k
+		else: # j==k
+		    raise ValueError, "j==k: This can't happen!"
+		length=len(text)
+#		sys.stderr.write("URL: %s %s %s \n" % (CGIescape(L[:pos]), URL, CGIescape(text)))
+		L2=L2+'%s<A HREF="%s">%s</A>' % (CGIescape(L[:pos]), URL, CGIescape(text))
+		L=L[pos+length:]
+		jr=emailpat.search(L) ; kr=urlpat.search(L)
+	    if jr==None and kr==None: L=CGIescape(L)
+	    L=prefix+L2+L+suffix
+	    if L!=Lorig: source[i], dest[i]=None, L
+
+    # Escape all special characters
+    def __processbody_CGIescape(self, source, dest):
+        import cgi
+        for i in xrange(0, len(source)):
+	    if source[i]!=None: 
+	        dest[i]=cgi.escape(source[i]) ; source[i]=None
+		
+    # Perform Hypermail-style processing of <HTML></HTML> directives
+    # in message bodies.  Lines between <HTML> and </HTML> will be written
+    # out precisely as they are; other lines will be passed to func2
+    # for further processing .
+
+    def __processbody_HTML(self, source, dest):
+        l=len(source) ; i=0
+	while i<l:
+	    while i<l and htmlpat.match(source[i])==None: i=i+1
+	    if i<l: source[i]=None ; i=i+1
+	    while i<l and nohtmlpat.match(source[i])==None:
+	        dest[i], source[i] = source[i], None
+	        i=i+1
+	    if i<l: source[i]=None ; i=i+1
+	    
+    def format_article(self, article):
+	source=article.body ; dest=[None]*len(source)
+	# Handle <HTML> </HTML> directives
+	if self.ALLOWHTML: 
+	    self.__processbody_HTML(source, dest)
+	self.__processbody_URLquote(source, dest)
+	if not self.SHOWHTML: 
+	    # Do simple formatting here: <PRE>..</PRE>
+	    for i in range(0, len(source)):
+		s=source[i]
+		if s==None: continue
+		dest[i]=CGIescape(s) ; source[i]=None
+	    if len(dest) > 0:
+		dest[0]='<PRE>'+dest[0] ; dest[-1]=dest[-1]+'</PRE>'
+	else:
+	    # Do fancy formatting here
+	    if self.SHOWBR:
+		# Add <BR> onto every line
+		for i in range(0, len(source)):
+		    s=source[i]
+		    if s==None: continue
+		    s=CGIescape(s) +'<BR>'
+		    dest[i]=s ; source[i]=None
+	    else:
+		for i in range(0, len(source)):
+		    s=source[i]
+		    if s==None: continue
+		    s=CGIescape(s)
+		    if s[0:1] in ' \t\n': s='<P>'+s
+		    dest[i]=s ; source[i]=None
+        article.body=filter(lambda x: x!=None, dest)
+	return article
+
+    def update_article(self, arcdir, article, prev, next):
+	import os
+	self.message('Updating HTML for article '+str(article.sequence))
+	try:
+	    f=open(os.path.join(arcdir, article.filename), 'r')
+            article.loadbody_fromHTML(f)
+	    f.close()
+        except IOError:
+            self.message("article file %s is missing!" % os.path.join(arcdir, article.filename)) 
+        article.prev=prev
+        article.next=next
+	f=open(os.path.join(arcdir, article.filename), 'w')
+	f.write(article.as_html())
+	f.close()
+
+
+
+
+
+
+
+
+
+
diff --git a/Mailman/HyperDatabase.py b/Mailman/HyperDatabase.py
new file mode 100644
index 000000000..33e3773d5
--- /dev/null
+++ b/Mailman/HyperDatabase.py
@@ -0,0 +1,276 @@
+
+import os
+import marshal
+import string
+
+import pipermail
+CACHESIZE = pipermail.CACHESIZE
+
+try:
+    import cPickle
+    pickle = cPickle
+except ImportError:
+    import pickle
+
+
+#
+# we're using a python dict in place of
+# of bsddb.btree database.  only defining
+# the parts of the interface used by class HyperDatabase
+#
+class DumbBTree:
+
+    def __init__(self, path):
+        if os.path.exists(path):
+            self.dict = marshal.load(open(path))
+        else:
+            self.dict = {}
+        self.sorted = self.dict.keys()
+        self.sorted.sort()
+        self.current_index = 0
+        self.path = path
+
+    def __delitem__(self, item):
+	try:
+	    ci = self.sorted[self.current_index]
+	except IndexError:
+	    ci = None
+	if ci == item:
+	    try:
+		ci = self.sorted[self.current_index + 1]
+	    except IndexError:
+		ci = None
+	del self.dict[item]
+	self.sorted = self.dict.keys()
+	self.sorted.sort()
+	if ci is not None:
+	    self.current_index = self.sorted.index(ci)
+	else:
+	    self.current_index = self.current_index + 1
+
+	
+
+
+    def first(self):
+        if not self.sorted:
+            raise KeyError
+        else:
+	    sorted = self.sorted
+            res =  sorted[0], self.dict[sorted[0]]
+            self.current_index = 1
+	    return res
+
+    def last(self):
+        if not self.sorted:
+            raise KeyError
+        else:
+	    sorted = self.sorted
+	    self.current_index = len(self.sorted) - 1
+            return sorted[-1], self.dict[sorted[-1]]
+	
+
+    def next(self):
+        try:
+            key = self.sorted[self.current_index]
+        except IndexError:
+            raise KeyError
+	self.current_index = self.current_index + 1
+        return key, self.dict[key]
+
+    def has_key(self, key):
+        return self.dict.has_key(key)
+
+
+    def set_location(self, loc):
+        if not self.dict.has_key(loc):
+            raise KeyError
+        self.current_index = self.sorted.index(loc)
+
+
+    def __getitem__(self, item):
+        return self.dict[item]
+
+
+    def __setitem__(self, item, val):
+	try:
+	    current_item = self.sorted[self.current_index]
+	except IndexError:
+	    current_item = item
+        self.dict[item] = val
+        self.sorted = self.dict.keys()
+        self.sorted.sort()
+        self.current_index = self.sorted.index(current_item)
+
+    def __len__(self):
+        return len(self.sorted)
+
+    def close(self):
+        fp = open(self.path, "w")
+        fp.write(marshal.dumps(self.dict))
+        fp.close()
+        
+
+
+
+#
+# this is lifted straight out of pipermail with
+# the bsddb.btree replaced with above class.
+# didn't use inheritance because of all the
+# __internal stuff that needs to be here -scott
+#
+class HyperDatabase(pipermail.Database):
+    def __init__(self, basedir):
+	self.__cachekeys=[] ; self.__cachedict={}
+	self.__currentOpenArchive=None   # The currently open indices
+	self.basedir=os.path.expanduser(basedir)
+	self.changed={}         # Recently added articles, indexed only by message ID
+
+    def firstdate(self, archive):
+	import time
+	self.__openIndices(archive)
+	date='None'
+	try:
+	    date, msgid = self.dateIndex.first()
+	    date=time.asctime(time.localtime(string.atof(date)))
+	except KeyError: pass
+	return date
+
+    def lastdate(self, archive):
+	import time
+	self.__openIndices(archive)
+	date='None'
+	try:
+	    date, msgid = self.dateIndex.last()
+	    date=time.asctime(time.localtime(string.atof(date)))
+	except KeyError: pass
+	return date
+
+    def numArticles(self, archive):
+	self.__openIndices(archive)
+	return len(self.dateIndex)    
+
+    # Add a single article to the internal indexes for an archive.
+
+    def addArticle(self, archive, article, subjectkey, authorkey):
+	self.__openIndices(archive)
+
+	# Add the new article
+	self.dateIndex[article.date]=article.msgid
+	self.authorIndex[authorkey]=article.msgid
+	self.subjectIndex[subjectkey]=article.msgid
+	# Set the 'body' attribute to empty, to avoid storing the whole message
+	temp = article.body ; article.body=[]
+	self.articleIndex[article.msgid]=pickle.dumps(article)
+	article.body=temp
+	self.changed[archive,article.msgid]=None
+
+	parentID=article.parentID
+	if parentID!=None and self.articleIndex.has_key(parentID): 
+	    parent=self.getArticle(archive, parentID)
+	    myThreadKey=parent.threadKey+article.date+'-'
+	else: myThreadKey = article.date+'-'
+	article.threadKey=myThreadKey
+	self.setThreadKey(archive, myThreadKey+'\000'+article.msgid, article.msgid)
+
+    # Open the BSDDB files that are being used as indices
+    # (dateIndex, authorIndex, subjectIndex, articleIndex)
+    def __openIndices(self, archive):
+	if self.__currentOpenArchive==archive: return
+	self.__closeIndices()
+	arcdir=os.path.join(self.basedir, 'database')
+	try: os.mkdir(arcdir, 0700)
+	except os.error: pass
+	for i in ['date', 'author', 'subject', 'article', 'thread']:
+	    t=DumbBTree(os.path.join(arcdir, archive+'-'+i)) 
+	    setattr(self, i+'Index', t)
+	self.__currentOpenArchive=archive
+
+    # Close the BSDDB files that are being used as indices (if they're
+    # open--this is safe to call if they're already closed)
+    def __closeIndices(self):
+	if self.__currentOpenArchive!=None: 
+	    pass
+#	    print 'closing indices for [%s]' % (repr(self.__currentOpenArchive),)
+	for i in ['date', 'author', 'subject', 'thread', 'article']:
+	    attr=i+'Index'
+	    if hasattr(self, attr): 
+		index=getattr(self, attr) 
+		if i=='article': 
+	            if not hasattr(self, 'archive_length'): self.archive_length={}
+		    self.archive_length[self.__currentOpenArchive]=len(index)
+		index.close() 
+		delattr(self,attr)
+	self.__currentOpenArchive=None
+    def close(self):
+	self.__closeIndices()
+    def hasArticle(self, archive, msgid): 
+	self.__openIndices(archive)
+	return self.articleIndex.has_key(msgid)
+    def setThreadKey(self, archive, key, msgid):
+	self.__openIndices(archive)
+	self.threadIndex[key]=msgid
+    def getArticle(self, archive, msgid):
+	self.__openIndices(archive)
+	if self.__cachedict.has_key(msgid): 
+	    self.__cachekeys.remove(msgid)
+	    self.__cachekeys.append(msgid)
+	    return self.__cachedict[msgid]
+	if len(self.__cachekeys)==CACHESIZE: 
+	    delkey, self.__cachekeys = self.__cachekeys[0], self.__cachekeys[1:]
+	    del self.__cachedict[delkey]
+	s=self.articleIndex[msgid]
+	article=pickle.loads(s)
+	self.__cachekeys.append(msgid) ; self.__cachedict[msgid]=article
+	return article
+
+    def first(self, archive, index): 
+	self.__openIndices(archive)
+	index=getattr(self, index+'Index')
+	try: 
+	    key, msgid = index.first()
+	    return msgid
+	except KeyError: return None
+    def next(self, archive, index): 
+	self.__openIndices(archive)
+	index=getattr(self, index+'Index')
+	try: 
+	    key, msgid = index.next()
+	    return msgid
+	except KeyError: return None
+	
+    def getOldestArticle(self, archive, subject):
+	self.__openIndices(archive)
+	subject=string.lower(subject)
+	try: 
+	    key, tempid=self.subjectIndex.set_location(subject)
+	    self.subjectIndex.next()	
+	    [subject2, date]= string.split(key, '\0')
+	    if subject!=subject2: return None
+	    return tempid
+	except KeyError: 
+	    return None
+
+    def newArchive(self, archive): pass
+    def clearIndex(self, archive, index):
+	self.__openIndices(archive)
+	index=getattr(self, index+'Index')
+	finished=0
+	try:
+	    key, msgid=self.threadIndex.first()	    		
+	except KeyError: finished=1
+	while not finished:
+	    del self.threadIndex[key]
+	    try:
+		key, msgid=self.threadIndex.next()	    		
+	    except KeyError: finished=1
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Mailman/MailList.py b/Mailman/MailList.py
index 476cbd1ed..6f686717c 100644
--- a/Mailman/MailList.py
+++ b/Mailman/MailList.py
@@ -533,11 +533,6 @@ class MailList(MailCommandHandler, HTMLFormatter, Deliverer, ListAdmin,
 	# A "just-in-case" thing.  This shouldn't have to be here.
 	ou = os.umask(002)
 	try:
-## 	    import mm_archive
-## 	    open(os.path.join(self._full_path,
-## 			      mm_archive.ARCHIVE_PENDING), "a+").close()
-## 	    open(os.path.join(self._full_path,
-## 			      mm_archive.ARCHIVE_RETAIN), "a+").close()
 	    open(os.path.join(mm_cfg.LOCK_DIR, '%s.lock' % 
 			      self._internal_name), 'a+').close()
 	    open(os.path.join(self._full_path, "next-digest"), "a+").close()
@@ -569,6 +564,38 @@ class MailList(MailCommandHandler, HTMLFormatter, Deliverer, ListAdmin,
 		dict[key] = value
 	marshal.dump(dict, file)
 	file.close()
+        #
+        # we need to make sure that the archive
+        # directory has the right perms for public vs
+        # private.  If it doesn't exist, or some weird
+        # permissions errors prevent us from stating
+        # the directory, it's pointless to try to
+        # fix the perms, so we just return  -scott
+        #
+        try:
+            st = os.stat(self.archive_directory)
+        except os.error, rest:
+            sys.stderr.write("MailList.Save(): error getting archive mode "
+                             "for %s!: %s\n" % (self.real_name, str(rest)))
+            return
+        import stat
+        mode = st[stat.ST_MODE]
+        if self.archive_private:
+            if mode != 0770:
+                try:
+                    ou = os.umask(0)
+                    os.chmod(self.archive_directory, 0770)
+                except os.error, rest:
+                    sys.stderr.write("MailList.Save(): error setting archive mode "
+                                     "to private for %s!: %s\n" % (self.real_name, str(rest)))
+        else:
+            if mode != 0775:
+                try:
+                    os.chmod(self.archive_directory, 0775)
+                except os.error, rest:
+                    sys.stderr.write("MailList.Save(): error setting archive mode "
+                                     "to public for %s!: %s\n" % (self.real_name, str(rest)))
+                    
 
     def Load(self, check_version = 1):
 	if self._tmp_lock:
@@ -942,3 +969,10 @@ class MailList(MailCommandHandler, HTMLFormatter, Deliverer, ListAdmin,
 	return ("<%s.%s %s%s at %s>"
 		% (self.__module__, self.__class__.__name__,
 		   `self._internal_name`, status, hex(id(self))[2:]))
+
+
+
+
+
+
+
diff --git a/Mailman/versions.py b/Mailman/versions.py
index 4546fd1bb..94ece1ad6 100644
--- a/Mailman/versions.py
+++ b/Mailman/versions.py
@@ -62,10 +62,9 @@ def UpdateOldVars(l, stored_state):
     PreferStored('bad_posters', 'forbidden_posters')
     PreferStored('automatically_remove', 'automatic_bounce_action')
     #  - dropped vars:
-    for a in ['archive_retain_text_copy',
-              'archive_update_frequency',
-              'archive_volume_frequency']:
-        if hasattr(l, a): delattr(l, a)
+#    for a in ['archive_retain_text_copy',
+#              'archive_update_frequency']:
+#        if hasattr(l, a): delattr(l, a)
 
 def UpdateOldUsers(l):
     """Transform sense of changed user options."""
diff --git a/Makefile.in b/Makefile.in
index 28ddb206e..7f45fad9b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -39,7 +39,7 @@ DEFS=   	@DEFS@
 OPT=		@OPT@
 CFLAGS=		$(OPT) $(DEFS)
 
-ARCH_INDEP_DIRS=	public_html logs archives bin \
+ARCH_INDEP_DIRS=	public_html public_html/archives logs archives bin \
 archives/private archives/public lists locks templates scripts filters \
 cron data Mailman Mailman/Cgi Mailman/Logging
 ARCH_DEP_DIRS=	cgi-bin mail
diff --git a/src/Makefile.in b/src/Makefile.in
index 363aed1b5..1e488a325 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -101,12 +101,14 @@ install: all
 	do \
 	    exe=$(CGIDIR)/$$f$(CGIEXT); \
 	    $(INSTALL_PROGRAM) $$f $$exe; \
-	    chmod g+s $$exe; \
+	    chown mailman $$exe; \
+	    chmod ug+s $$exe; \
 	done
 	for f in $(MAIL_PROGS); \
 	do \
 	    $(INSTALL_PROGRAM) $$f $(MAILDIR); \
-	    chmod g+s $(MAILDIR)/$$f; \
+	    chown mailman $(MAILDIR)/$$f; \
+	    chmod ug+s $(MAILDIR)/$$f; \
 	done
 #	@for f in $(ALIAS_PROGS); \
 #	do \
author	cotton	1998-10-09 14:14:30 +0000
committer	cotton	1998-10-09 14:14:30 +0000
commit	8aa10fbdc0e464bfe1df5f244b7a2576dcc72afe (patch)
tree	f4bf432ea0f5ad6a6aae9a222c5abfdcaa59149a
parent	13ca1d9ac610544b62b5b578676dc5ba9de32985 (diff)
download	mailman-8aa10fbdc0e464bfe1df5f244b7a2576dcc72afe.tar.gz mailman-8aa10fbdc0e464bfe1df5f244b7a2576dcc72afe.tar.zst mailman-8aa10fbdc0e464bfe1df5f244b7a2576dcc72afe.zip