#!/usr/local/bin/python # Hey Emacs, this is -*-Python-*- code! # # Pipermail 0.0.2-mm # # **NOTE** # # This internal version of pipermail has been deprecated in favor of use of # an external version of pipermail, available from: # http://starship.skyport.net/crew/amk/maintained/pipermail.html # The external version should be pointed at the created archive files. # # # Some minor mods have been made for use with the Mailman mailing list manager. # All changes will have JV by them. # # (C) Copyright 1996, A.M. Kuchling (amk@magnet.com) # Home page at http://amarok.magnet.com/python/pipermail.html # # HTML code for frames courtesy of Scott Hassan (hassan@cs.stanford.edu) # # TODO: # * Prev. article, next. article pointers in each article # * I suspect there may be problems with rfc822.py's getdate() method; # take a look at the threads "Greenaway and the net (fwd)" or # "Pillow Book pictures". To be looked into... # * Anything else Hypermail can do that we can't? # * General code cleanups # * Profiling & optimization # * Should there be an option to enable/disable frames? # * Like any truly useful program, Pipermail should have an ILU interface. # * There's now an option to keep from preserving line breaks, # so paragraphs in messages would be reflowed by the browser. # Unfortunately, this mangles .sigs horribly, and pipermail doesn't yet # put in paragraph breaks. Putting in the breaks will only require a # half hour or so; I have no clue as to how to preserve .sigs. # * Outside URLs shouldn't appear in the display frame. How to fix? # VERSION = "0.0.2.mm" import posixpath, time, os, string, sys, rfc822 # JV -- to get HOME_PAGE import mm_cfg class ListDict: def __init__(self): self.dict={} def keys(self): return self.dict.keys() def __setitem__(self, key, value): "Add the value to a list for the key, creating the list if needed." if not self.dict.has_key(key): self.dict[key]=[value] else: self.dict[key].append(value) def __getitem__(self, key): "Return the list matching a key" return self.dict[key] def PrintUsage(): print """Pipermail %s usage: pipermail [options] options: -a URL : URL to other archives -b URL : URL to archive information -c file : name of configuration file (default: ~/.pmrc) -d dir : directory where the output files will be placed (default: archive/) -l name : name of the output archive -m file : name of input file -s file : name where the archive state is stored (default: +'.pipermail' -u : Select 'update' mode -v : verbose mode of operation """ % (VERSION,) sys.exit(0) # Compile various important regexp patterns import regex, regsub # Starting directive htmlpat=regex.compile('^[ \t]*[ \t]*$') # Ending directive nohtmlpat=regex.compile('^[ \t]*[ \t]*$') # Match quoted text quotedpat=regex.compile('^[>|:]+') # Parenthesized human name paren_name_pat=regex.compile('.*$[(].*[)]$.*') # Subject lines preceded with 'Re:' REpat=regex.compile('[ \t]*[Rr][Ee][ \t]*:[ \t]*') # Lines in the configuration file: set pm_XXX = cfg_line_pat=regex.compile('^[ \t]*[sS][eE][tT][ \t]*[Pp][Mm]_$[a-zA-Z0-9]*$' '[ \t]*=[ \t]*$.*$[ \t\n]*$') # E-mail addresses and URLs in text emailpat=regex.compile('$[-+,.a-zA-Z0-9]*@[-+.a-zA-Z0-9]*$') urlpat=regex.compile('$[a-zA-Z0-9]+://[^ \t\n]+$') # URLs in text # Blank lines blankpat=regex.compile('^[ \t\n]*$') def ReadCfgFile(prefs): import posixpath try: f=open(posixpath.expanduser(prefs['CONFIGFILE']), 'r') except IOError, (num, msg): if num==2: return else: raise IOError, (num, msg) line=0 while(1): L=f.readline() ; line=line+1 if L=="": break if string.strip(L)=="": continue # Skip blank lines match=cfg_line_pat.match(L) if match==-1: print "Syntax error in line %i of %s" %(line, prefs['CONFIGFILE']) print L else: varname, value=cfg_line_pat.group(1,2) varname=string.upper(varname) if not prefs.has_key(varname): print ("Unknown variable name %s in line %i of %s" %(varname, line, prefs['CONFIGFILE'])) print L else: prefs[varname]=eval(value) f.close() def ReadEnvironment(prefs): import sys, os for key in prefs.keys(): envvar=string.upper('PM_'+key) if os.environ.has_key(envvar): if type(prefs[key])==type(''): prefs[key]=os.environ[envvar] else: prefs[key]=string.atoi(os.environ[envvar]) def UpdateMsgHeaders(prefs, filename, L): """Update the next/previous message information in a message header. The message is scanned for and comments, and new pointers are written. Otherwise, the text is simply copied without any processing.""" pass def ProcessMsgBody(prefs, msg, filename, articles): """Transform one mail message from plain text to HTML. This involves writing an HTML header, scanning through the text looking for directives, e-mail addresses, and URLs, and finishing off with a footer.""" import cgi, posixpath outputname=posixpath.join(prefs['DIR'], filename) output=open(outputname, 'w') os.chmod(outputname, prefs['FILEMODE']) subject, email, poster, date, datestr, parent, id = articles[filename] # JV if not email: email = '' if not subject: subject = '' if not poster: poster = '*Unknown*' if not datestr: datestr = '' output.write('' "%s Mailing List: %s" "

%s

" "%s (%s)
%s

" % (prefs['LABEL'], cgi.escape(subject),cgi.escape(subject), cgi.escape(poster),cgi.escape(email), cgi.escape(datestr))) output.write('

Messages sorted by:' '[ date ]' '[ thread ]' '[ subject ]' '[ author ]

\n') html_mode=0 if prefs['SHOWHR']: output.write('

') output.write('

') if not prefs['SHOWHTML']: output.write('

\n')
    msg.rewindbody()			# Seek to start of message body
    quoted=-1
    while (1):
	L=msg.fp.readline()
	if L=="": break
	if html_mode:
	    # If in HTML mode, check for ending tag; otherwise, we
	    # copy the line without any changes.
	    if nohtmlpat.match(L)==-1:
		output.write(L) ; continue
	    else:
		html_mode=0
		if not prefs['SHOWHTML']: output.write('\n')
		continue
	# Check for opening  tag
	elif htmlpat.match(L)!=-1:
	    html_mode=1
	    if not prefs['SHOWHTML']: output.write('\n')
	    continue
	if prefs['SHOWHTML'] and prefs['IQUOTES']:
	    # Check for a line of quoted text and italicise it
	    # (We have to do this before escaping HTML special
	    # characters because '>' is commonly used.) 
	    quoted=quotedpat.match(L)
	    if quoted!=-1:
		L=cgi.escape(L[:quoted]) + '' + cgi.escape(L[quoted:]) + ''
		# If we're flowing the message text together, quoted lines
		# need explicit breaks, no matter what mode we're in.
		if prefs['SHOWHTML']: L=L+'
'
	    else: L=cgi.escape(L)
	else: L=cgi.escape(L)
	
	# Check for an e-mail address
	L2="" ; i=emailpat.search(L)
	while i!=-1:
	    length=len(emailpat.group(1))
	    mailcmd=prefs['MAILCOMMAND'] % {'TO':L[i:i+length]}
	    L2=L2+'%s%s' % (L[:i],
		 mailcmd, L[i:i+length])
	    L=L[i+length:] 
	    i=emailpat.search(L)
	L=L2+L ; L2=""; i=urlpat.search(L)
	while i!=-1:
	    length=len(urlpat.group(1))
	    L2=L2+'%s%s' % (L[:i],
		 L[i:i+length], L[i:i+length])
	    L=L[i+length:]
	    i=urlpat.search(L)
	L=L2+L
	if prefs['SHOWHTML']:
	    while (L!="" and L[-1] in '\015\012'): L=L[:-1]
	    if prefs['SHOWBR']:
		# We don't want to flow quoted passages
		if quoted==-1: L=L+'
'
	    else:
		# If we're not adding 
 to each line, we'll need to
		# insert  markup on blank lines to separate paragraphs.
		if blankpat.match(L)!=-1: L=L+'
'
	    L=L+'\n'
	output.write(L)
	
    if not prefs['SHOWHTML'] and not html_mode: output.write('

') if prefs['SHOWHR']: output.write('

') output.write('\n\n') output.close() def WriteHTMLIndex(prefs, fp, L, articles, indexname): """Process a list L into an HTML index, written to fp. L is processed from left to right, and contains a list of 2-tuples; an integer of 1 or more giving the depth of indentation, and a list of strings which are used to reference the 'articles' dictionary. Most of the time the lists contain only 1 element.""" fp.write('\n' "" "%s Mailing List Archive by %s\n" % (prefs['LABEL'], indexname)) fp.write('

%s Mailing List Archive by %s

' '

Most recent messages' '
Messages sorted by:' % (prefs['LABEL'], indexname)) if indexname!='Date': fp.write('[ date ]') if indexname!='Subject': fp.write('[ subject ]') if indexname!='Author': fp.write('[ author ]') if indexname!='Thread': fp.write('[ thread ]') if prefs['ARCHIVES']!='NONE': fp.write('
Other mail archives' % (prefs['ARCHIVES'],)) # This doesn't look professional. -- JV # mailcmd=prefs['MAILCOMMAND'] % {'TO':'amk@magnet.com'} # fp.write('

Please inform amk@magnet.com if any of the messages are formatted incorrectly.' % (mailcmd,) ) fp.write("

Starting: %s
" "Ending: %s
Messages: %i

" % (prefs['firstDate'], prefs['endDate'], len(L)) ) # Write the index level=1 fp.write('

'+'\n') level=indent for j in keys: subj, email, poster, date, datestr, parent, id=articles[j] # XXX Should we put a mailto URL in here? fp.write('

%s %s\n' % (j, subj, poster) ) for i in range(0, indent): fp.write('

') # Write the footer import time now=time.asctime(time.localtime(time.time())) # JV -- Fixed a bug here. if prefs['ARCHIVES'] <> 'NONE': otherstr=('

Other mail archives' % (prefs['ARCHIVES'],) ) else: otherstr="" fp.write('Last message date: %s
' 'Archived on: %s
- Messages sorted by:' '[ date ]' '[ subject ]' '[ author ]' '[ thread ]' '%s
' % (prefs['endDate'], now, otherstr)) fp.write('
This archive was generated by ' # JV Updated the URL. '' 'Pipermail %s.' % (VERSION,)) # Set the hard-wired preferences first # JV Changed the SHOWHTML pref default to 0 because 1 looks bad. prefs={'CONFIGFILE':'~/.pmrc', 'MBOX':'mbox', 'ARCHIVES': 'NONE', 'ABOUT':'NONE', 'REVERSE':0, 'SHOWHEADERS':0, 'SHOWHTML':0, 'LABEL':"", 'DIR':'archive', 'DIRMODE':0755, 'FILEMODE':0644, 'OVERWRITE':0, 'VERBOSE':0, 'THRDLEVELS':3, 'SHOWBR':0, 'IQUOTES':1, 'SHOWHR':1, 'MAILCOMMAND':'mailto:%(TO)s', 'INDEXFILE':'NONE' } # Read the ~/.pmrc file ReadCfgFile(prefs) # Read environment variables ReadEnvironment(prefs) # Parse command-line options import getopt options, params=getopt.getopt(sys.argv[1:], 'a:b:c:d:l:m:s:uipvxzh?') for option, value in options: if option=='-a': prefs['ARCHIVES']=value if option=='-b': prefs['ABOUT']=value if option=='-c': prefs['CONFIGFILE']=value if option=='-d': prefs['DIR']=value # if option=='-f': prefs.frames=1 if option=='-i': prefs['MBOX']='-' if option=='-l': prefs['LABEL']=value if option=='-m': prefs['MBOX']=value if option=='-s': prefs['INDEXFILE']=value if option=='-p' or option=='-v': prefs['VERBOSE']=1 if option=='-x': prefs['OVERWRITE']=1 if option=='-z' or option=='-h' or option=='-?': PrintUsage() # Set up various variables articles={} ; sequence=0 for key in ['INDEXFILE', 'MBOX', 'CONFIGFILE', 'DIR']: prefs[key]=posixpath.expanduser(prefs[key]) if prefs['INDEXFILE']=='NONE': if prefs['MBOX']!='-': prefs['INDEXFILE']=prefs['MBOX']+'.pipermail' else: prefs['INDEXFILE']='mbox.pipermail' # Read an index file, if one can be found if not prefs['OVERWRITE']: # Look for a file contained pickled state import pickle try: if prefs['VERBOSE']: print 'Attempting to read index file', prefs['INDEXFILE'] f=open(prefs['INDEXFILE'], 'r') articles, sequence =pickle.load(f) f.close() except IOError: if prefs['VERBOSE']: print 'No index file found.' pass # Ignore errors # Open the input file if prefs['MBOX']=='-': prefs['MBOX']=sys.stdin else: if prefs['VERBOSE']: print 'Opening input file', prefs['MBOX'] prefs['MBOX']=open(prefs['MBOX'], 'r') # Create the destination directory; if it already exists, we don't care try: os.mkdir(prefs['DIR'], prefs['DIRMODE']) if prefs['VERBOSE']: print 'Directory %s created'%(prefs['DIR'],) except os.error, (errno, errmsg): pass # Create various data structures: # msgids maps Message-IDs to filenames. # roots maps Subject lines to (date, filename) tuples, and is used to # identify the oldest article with a given subject line for threading. msgids={} ; roots={} for i in articles.keys(): subject, email, poster, date, datestr, parent, id =articles[i] if id: msgids[id]=i if not roots.has_key(subject) or roots[subject]', '', L)) # Check if there's a name in parentheses i=paren_name_pat.match(poster) if i!=-1: poster=paren_name_pat.group(1)[1:-1] datestr=m.getheader('Date') # JV -- Hacks to make the getdate work. # These hacks might skew the post time a bit. # Crude, but so far, effective. words = string.split(datestr) if ((len(words[-1]) == 4) and (len(words) == 5) and (words[-1][:-1] == '199')): try: date = time.mktime(rfc822.parsedate('%s, %s %s %s %s' % (words[0], words[2], words[1], words[4], words[3]))) except: date = time.mktime(m.getdate('Date')) # Odd elif len(words) > 4 and words[4][-1] == ',': try: date = time.mktime(rfc822.parsedate('%s, %s %s %s %s' % (words[0], words[1], words[2], words[3], words[4][:-1]))) except: date = time.mktime(m.getdate('Date')) # Hmm else: try: date=time.mktime(m.getdate('Date')) except: print 'Error getting date!' print 'Subject = ', m.getheader('subject') print 'Date = ', m.getheader('date') id=m.getheader('Message-Id') if id: id=id[1:-1] ; msgids[id]=filename parent=None in_reply_to=m.getheader('In-Reply-To') if in_reply_to: in_reply_to=in_reply_to[1:-1] if msgids.has_key(in_reply_to): parent=msgids[in_reply_to] elif roots.has_key(subj) and roots[subj][0]") ; f.close() # JV changed... f=open(posixpath.join(prefs['DIR'], mm_cfg.HOME_PAGE), 'w') f.write("""%s Pipermail Archive """ % (prefs['LABEL'],prefs['LABEL']) ) import pickle if prefs['VERBOSE']: print 'Writing index file', prefs['INDEXFILE'] f=open(prefs['INDEXFILE'], 'w') pickle.dump( (articles, sequence), f ) f.close()