diff options
| author | bwarsaw | 2002-02-23 05:58:53 +0000 |
|---|---|---|
| committer | bwarsaw | 2002-02-23 05:58:53 +0000 |
| commit | d26184863640945f34f72f52cb5d3973721a0fcb (patch) | |
| tree | 2837c59f7a9e2735546f02c9769c91594c23f358 | |
| parent | e9bd5dcabb14254f5bf6b3809755f148726cd3fd (diff) | |
| download | mailman-d26184863640945f34f72f52cb5d3973721a0fcb.tar.gz mailman-d26184863640945f34f72f52cb5d3973721a0fcb.tar.zst mailman-d26184863640945f34f72f52cb5d3973721a0fcb.zip | |
| -rw-r--r-- | Mailman/pythonlib/.cvsignore | 3 | ||||
| -rw-r--r-- | Mailman/pythonlib/Makefile.in | 70 | ||||
| -rw-r--r-- | Mailman/pythonlib/StringIO.py | 22 | ||||
| -rw-r--r-- | Mailman/pythonlib/__init__.py | 15 | ||||
| -rwxr-xr-x | Mailman/pythonlib/cgi.py | 1040 | ||||
| -rwxr-xr-x | Mailman/pythonlib/mailbox.py | 312 | ||||
| -rw-r--r-- | Mailman/pythonlib/rfc822.py | 1002 |
7 files changed, 0 insertions, 2464 deletions
diff --git a/Mailman/pythonlib/.cvsignore b/Mailman/pythonlib/.cvsignore deleted file mode 100644 index 4ef7207b0..000000000 --- a/Mailman/pythonlib/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -Makefile -mm_cfg.py.dist -Defaults.py diff --git a/Mailman/pythonlib/Makefile.in b/Mailman/pythonlib/Makefile.in deleted file mode 100644 index 48a02783a..000000000 --- a/Mailman/pythonlib/Makefile.in +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (C) 1998,1999,2000 by the Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -# NOTE: Makefile.in is converted into Makefile by the configure script -# in the parent directory. Once configure has run, you can recreate -# the Makefile by running just config.status. - -# Variables set by configure - -VPATH= @srcdir@ -srcdir= @srcdir@ -bindir= @bindir@ -prefix= @prefix@ -exec_prefix= @exec_prefix@ - -CC= @CC@ -CHMOD= @CHMOD@ -INSTALL= @INSTALL@ - -DEFS= @DEFS@ - -# Customizable but not set by configure - -OPT= @OPT@ -CFLAGS= $(OPT) $(DEFS) -PACKAGEDIR= $(prefix)/Mailman/pythonlib -SHELL= /bin/sh - -MODULES= *.py -SUBDIRS= - -# Modes for directories and executables created by the install -# process. Default to group-writable directories but -# user-only-writable for executables. -DIRMODE= 775 -EXEMODE= 755 -FILEMODE= 644 -INSTALL_PROGRAM=$(INSTALL) -m $(EXEMODE) - - -# Rules - -all: - -install: - for f in $(MODULES); \ - do \ - $(INSTALL) -m $(FILEMODE) $$f $(PACKAGEDIR); \ - done - -finish: - -clean: - -distclean: - -rm Makefile - -rm *.pyc diff --git a/Mailman/pythonlib/StringIO.py b/Mailman/pythonlib/StringIO.py deleted file mode 100644 index b8a2dc15d..000000000 --- a/Mailman/pythonlib/StringIO.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 1998,1999,2000 by the Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -"""Convenience wrapper which tries to load cStringIO, followed by StringIO.""" - -try: - from cStringIO import * -except ImportError: - from StringIO import * diff --git a/Mailman/pythonlib/__init__.py b/Mailman/pythonlib/__init__.py deleted file mode 100644 index a46d82f55..000000000 --- a/Mailman/pythonlib/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (C) 1998,1999,2000 by the Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. diff --git a/Mailman/pythonlib/cgi.py b/Mailman/pythonlib/cgi.py deleted file mode 100755 index 19304af9e..000000000 --- a/Mailman/pythonlib/cgi.py +++ /dev/null @@ -1,1040 +0,0 @@ -#! /usr/local/bin/python - -# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is -# intentionally NOT "/usr/bin/env python". On many systems -# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI -# scripts, and /usr/local/bin is the default directory where Python is -# installed, so /usr/bin/env would be unable to find python. Granted, -# binary installations by Linux vendors often install Python in -# /usr/bin. So let those vendors patch cgi.py to match their choice -# of installation. - -"""Support module for CGI (Common Gateway Interface) scripts. - -This module defines a number of utilities for use by CGI scripts -written in Python. -""" - -# XXX Perhaps there should be a slimmed version that doesn't contain -# all those backwards compatible and debugging classes and functions? - -# History -# ------- -# -# Michael McLay started this module. Steve Majewski changed the -# interface to SvFormContentDict and FormContentDict. The multipart -# parsing was inspired by code submitted by Andreas Paepcke. Guido van -# Rossum rewrote, reformatted and documented the module and is currently -# responsible for its maintenance. -# - -__version__ = "2.6" - - -# Imports -# ======= - -import sys -import os -import urllib -import mimetools -import rfc822 -import UserDict -from StringIO import StringIO - -__all__ = ["MiniFieldStorage", "FieldStorage", "FormContentDict", - "SvFormContentDict", "InterpFormContentDict", "FormContent", - "parse", "parse_qs", "parse_qsl", "parse_multipart", - "parse_header", "print_exception", "print_environ", - "print_form", "print_directory", "print_arguments", - "print_environ_usage", "escape"] - -# Logging support -# =============== - -logfile = "" # Filename to log to, if not empty -logfp = None # File object to log to, if not None - -def initlog(*allargs): - """Write a log message, if there is a log file. - - Even though this function is called initlog(), you should always - use log(); log is a variable that is set either to initlog - (initially), to dolog (once the log file has been opened), or to - nolog (when logging is disabled). - - The first argument is a format string; the remaining arguments (if - any) are arguments to the % operator, so e.g. - log("%s: %s", "a", "b") - will write "a: b" to the log file, followed by a newline. - - If the global logfp is not None, it should be a file object to - which log data is written. - - If the global logfp is None, the global logfile may be a string - giving a filename to open, in append mode. This file should be - world writable!!! If the file can't be opened, logging is - silently disabled (since there is no safe place where we could - send an error message). - - """ - global logfp, log - if logfile and not logfp: - try: - logfp = open(logfile, "a") - except IOError: - pass - if not logfp: - log = nolog - else: - log = dolog - apply(log, allargs) - -def dolog(fmt, *args): - """Write a log message to the log file. See initlog() for docs.""" - logfp.write(fmt%args + "\n") - -def nolog(*allargs): - """Dummy function, assigned to log when logging is disabled.""" - pass - -log = initlog # The current logging function - - -# Parsing functions -# ================= - -# Maximum input we will accept when REQUEST_METHOD is POST -# 0 ==> unlimited input -maxlen = 0 - -def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): - """Parse a query in the environment or from a file (default stdin) - - Arguments, all optional: - - fp : file pointer; default: sys.stdin - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - URL encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - """ - if not fp: - fp = sys.stdin - if not environ.has_key('REQUEST_METHOD'): - environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone - if environ['REQUEST_METHOD'] == 'POST': - ctype, pdict = parse_header(environ['CONTENT_TYPE']) - if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict) - elif ctype == 'application/x-www-form-urlencoded': - clength = int(environ['CONTENT_LENGTH']) - if maxlen and clength > maxlen: - raise ValueError, 'Maximum content length exceeded' - qs = fp.read(clength) - else: - qs = '' # Unknown content-type - if environ.has_key('QUERY_STRING'): - if qs: qs = qs + '&' - qs = qs + environ['QUERY_STRING'] - elif sys.argv[1:]: - if qs: qs = qs + '&' - qs = qs + sys.argv[1] - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - elif environ.has_key('QUERY_STRING'): - qs = environ['QUERY_STRING'] - else: - if sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - return parse_qs(qs, keep_blank_values, strict_parsing) - - -def parse_qs(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument. - - Arguments: - - qs: URL-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - URL encoded queries should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - """ - dict = {} - for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): - if dict.has_key(name): - dict[name].append(value) - else: - dict[name] = [value] - return dict - -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument. - - Arguments: - - qs: URL-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - URL encoded queries should be treated as blank strings. A - true value indicates that blanks should be retained as blank - strings. The default false value indicates that blank values - are to be ignored and treated as if they were not included. - - strict_parsing: flag indicating what to do with parsing errors. If - false (the default), errors are silently ignored. If true, - errors raise a ValueError exception. - - Returns a list, as G-d intended. - """ - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError, "bad query field: %s" % `name_value` - continue - if len(nv[1]) or keep_blank_values: - name = urllib.unquote(nv[0].replace('+', ' ')) - value = urllib.unquote(nv[1].replace('+', ' ')) - r.append((name, value)) - - return r - - -def parse_multipart(fp, pdict): - """Parse multipart input. - - Arguments: - fp : input file - pdict: dictionary containing other parameters of conten-type header - - Returns a dictionary just like parse_qs(): keys are the field names, each - value is a list of values for that field. This is easy to use but not - much good if you are expecting megabytes to be uploaded -- in that case, - use the FieldStorage class instead which is much more flexible. Note - that content-type is the raw, unparsed contents of the content-type - header. - - XXX This does not parse nested multipart parts -- use FieldStorage for - that. - - XXX This should really be subsumed by FieldStorage altogether -- no - point in having two implementations of the same parsing algorithm. - - """ - boundary = "" - if pdict.has_key('boundary'): - boundary = pdict['boundary'] - if not valid_boundary(boundary): - raise ValueError, ('Invalid boundary in multipart form: %s' - % `ib`) - - nextpart = "--" + boundary - lastpart = "--" + boundary + "--" - partdict = {} - terminator = "" - - while terminator != lastpart: - bytes = -1 - data = None - if terminator: - # At start of next part. Read headers first. - headers = mimetools.Message(fp) - clength = headers.getheader('content-length') - if clength: - try: - bytes = int(clength) - except ValueError: - pass - if bytes > 0: - if maxlen and bytes > maxlen: - raise ValueError, 'Maximum content length exceeded' - data = fp.read(bytes) - else: - data = "" - # Read lines until end of part. - lines = [] - while 1: - line = fp.readline() - if not line: - terminator = lastpart # End outer loop - break - if line[:2] == "--": - terminator = line.strip() - if terminator in (nextpart, lastpart): - break - lines.append(line) - # Done with part. - if data is None: - continue - if bytes < 0: - if lines: - # Strip final line terminator - line = lines[-1] - if line[-2:] == "\r\n": - line = line[:-2] - elif line[-1:] == "\n": - line = line[:-1] - lines[-1] = line - data = "".join(lines) - line = headers['content-disposition'] - if not line: - continue - key, params = parse_header(line) - if key != 'form-data': - continue - if params.has_key('name'): - name = params['name'] - else: - continue - if partdict.has_key(name): - partdict[name].append(data) - else: - partdict[name] = [data] - - return partdict - - -def parse_header(line): - """Parse a Content-type like header. - - Return the main content-type and a dictionary of options. - - """ - plist = map(lambda x: x.strip(), line.split(';')) - key = plist[0].lower() - del plist[0] - pdict = {} - for p in plist: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i+1:].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - pdict[name] = value - return key, pdict - - -# Classes for field storage -# ========================= - -class MiniFieldStorage: - - """Like FieldStorage, for use when no file uploads are possible.""" - - # Dummy attributes - filename = None - list = None - type = None - file = None - type_options = {} - disposition = None - disposition_options = {} - headers = {} - - def __init__(self, name, value): - """Constructor from field name and value.""" - self.name = name - self.value = value - # self.file = StringIO(value) - - def __repr__(self): - """Return printable representation.""" - return "MiniFieldStorage(%s, %s)" % (`self.name`, `self.value`) - - -class FieldStorage: - - """Store a sequence of fields, reading multipart/form-data. - - This class provides naming, typing, files stored on disk, and - more. At the top level, it is accessible like a dictionary, whose - keys are the field names. (Note: None can occur as a field name.) - The items are either a Python list (if there's multiple values) or - another FieldStorage or MiniFieldStorage object. If it's a single - object, it has the following attributes: - - name: the field name, if specified; otherwise None - - filename: the filename, if specified; otherwise None; this is the - client side filename, *not* the file name on which it is - stored (that's a temporary file you don't deal with) - - value: the value as a *string*; for file uploads, this - transparently reads the file every time you request the value - - file: the file(-like) object from which you can read the data; - None if the data is stored a simple string - - type: the content-type, or None if not specified - - type_options: dictionary of options specified on the content-type - line - - disposition: content-disposition, or None if not specified - - disposition_options: dictionary of corresponding options - - headers: a dictionary(-like) object (sometimes rfc822.Message or a - subclass thereof) containing *all* headers - - The class is subclassable, mostly for the purpose of overriding - the make_file() method, which is called internally to come up with - a file open for reading and writing. This makes it possible to - override the default choice of storing all files in a temporary - directory and unlinking them as soon as they have been opened. - - """ - - def __init__(self, fp=None, headers=None, outerboundary="", - environ=os.environ, keep_blank_values=0, strict_parsing=0): - """Constructor. Read multipart/* until last part. - - Arguments, all optional: - - fp : file pointer; default: sys.stdin - (not used when the request method is GET) - - headers : header dictionary-like object; default: - taken from environ as per CGI spec - - outerboundary : terminating multipart boundary - (for internal use only) - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - URL encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - """ - method = 'GET' - self.keep_blank_values = keep_blank_values - self.strict_parsing = strict_parsing - if environ.has_key('REQUEST_METHOD'): - method = environ['REQUEST_METHOD'].upper() - if method == 'GET' or method == 'HEAD': - if environ.has_key('QUERY_STRING'): - qs = environ['QUERY_STRING'] - elif sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - fp = StringIO(qs) - if headers is None: - headers = {'content-type': - "application/x-www-form-urlencoded"} - if headers is None: - headers = {} - if method == 'POST': - # Set default content-type for POST to what's traditional - headers['content-type'] = "application/x-www-form-urlencoded" - if environ.has_key('CONTENT_TYPE'): - headers['content-type'] = environ['CONTENT_TYPE'] - if environ.has_key('CONTENT_LENGTH'): - headers['content-length'] = environ['CONTENT_LENGTH'] - self.fp = fp or sys.stdin - self.headers = headers - self.outerboundary = outerboundary - - # Process content-disposition header - cdisp, pdict = "", {} - if self.headers.has_key('content-disposition'): - cdisp, pdict = parse_header(self.headers['content-disposition']) - self.disposition = cdisp - self.disposition_options = pdict - self.name = None - if pdict.has_key('name'): - self.name = pdict['name'] - self.filename = None - if pdict.has_key('filename'): - self.filename = pdict['filename'] - - # Process content-type header - # - # Honor any existing content-type header. But if there is no - # content-type header, use some sensible defaults. Assume - # outerboundary is "" at the outer level, but something non-false - # inside a multi-part. The default for an inner part is text/plain, - # but for an outer part it should be urlencoded. This should catch - # bogus clients which erroneously forget to include a content-type - # header. - # - # See below for what we do if there does exist a content-type header, - # but it happens to be something we don't understand. - if self.headers.has_key('content-type'): - ctype, pdict = parse_header(self.headers['content-type']) - elif self.outerboundary or method != 'POST': - ctype, pdict = "text/plain", {} - else: - ctype, pdict = 'application/x-www-form-urlencoded', {} - self.type = ctype - self.type_options = pdict - self.innerboundary = "" - if pdict.has_key('boundary'): - self.innerboundary = pdict['boundary'] - clen = -1 - if self.headers.has_key('content-length'): - try: - clen = int(self.headers['content-length']) - except: - pass - if maxlen and clen > maxlen: - raise ValueError, 'Maximum content length exceeded' - self.length = clen - - self.list = self.file = None - self.done = 0 - if ctype == 'application/x-www-form-urlencoded': - self.read_urlencoded() - elif ctype[:10] == 'multipart/': - self.read_multi(environ, keep_blank_values, strict_parsing) - else: - self.read_single() - - def __repr__(self): - """Return a printable representation.""" - return "FieldStorage(%s, %s, %s)" % ( - `self.name`, `self.filename`, `self.value`) - - def __getattr__(self, name): - if name != 'value': - raise AttributeError, name - if self.file: - self.file.seek(0) - value = self.file.read() - self.file.seek(0) - elif self.list is not None: - value = self.list - else: - value = None - return value - - def __getitem__(self, key): - """Dictionary style indexing.""" - if self.list is None: - raise TypeError, "not indexable" - found = [] - for item in self.list: - if item.name == key: found.append(item) - if not found: - raise KeyError, key - if len(found) == 1: - return found[0] - else: - return found - - def getvalue(self, key, default=None): - """Dictionary style get() method, including 'value' lookup.""" - if self.has_key(key): - value = self[key] - if type(value) is type([]): - return map(lambda v: v.value, value) - else: - return value.value - else: - return default - - def getfirst(self, key, default=None): - """ Return the first value received.""" - if self.has_key(key): - value = self[key] - if type(value) is type([]): - return value[0].value - else: - return value.value - else: - return default - - def getlist(self, key): - """ Return list of received values.""" - if self.has_key(key): - value = self[key] - if type(value) is type([]): - return map(lambda v: v.value, value) - else: - return [value.value] - else: - return [] - - def keys(self): - """Dictionary style keys() method.""" - if self.list is None: - raise TypeError, "not indexable" - keys = [] - for item in self.list: - if item.name not in keys: keys.append(item.name) - return keys - - def has_key(self, key): - """Dictionary style has_key() method.""" - if self.list is None: - raise TypeError, "not indexable" - for item in self.list: - if item.name == key: return 1 - return 0 - - def __len__(self): - """Dictionary style len(x) support.""" - return len(self.keys()) - - def read_urlencoded(self): - """Internal: read data in query string format.""" - qs = self.fp.read(self.length) - self.list = list = [] - for key, value in parse_qsl(qs, self.keep_blank_values, - self.strict_parsing): - list.append(MiniFieldStorage(key, value)) - self.skip_lines() - - FieldStorageClass = None - - def read_multi(self, environ, keep_blank_values, strict_parsing): - """Internal: read a part that is itself multipart.""" - ib = self.innerboundary - if not valid_boundary(ib): - raise ValueError, ('Invalid boundary in multipart form: %s' - % `ib`) - self.list = [] - klass = self.FieldStorageClass or self.__class__ - part = klass(self.fp, {}, ib, - environ, keep_blank_values, strict_parsing) - # Throw first part away - while not part.done: - headers = rfc822.Message(self.fp) - part = klass(self.fp, headers, ib, - environ, keep_blank_values, strict_parsing) - self.list.append(part) - self.skip_lines() - - def read_single(self): - """Internal: read an atomic part.""" - if self.length >= 0: - self.read_binary() - self.skip_lines() - else: - self.read_lines() - self.file.seek(0) - - bufsize = 8*1024 # I/O buffering size for copy to file - - def read_binary(self): - """Internal: read binary data.""" - self.file = self.make_file('b') - todo = self.length - if todo >= 0: - while todo > 0: - data = self.fp.read(min(todo, self.bufsize)) - if not data: - self.done = -1 - break - self.file.write(data) - todo = todo - len(data) - - def read_lines(self): - """Internal: read lines until EOF or outerboundary.""" - self.file = self.__file = StringIO() - if self.outerboundary: - self.read_lines_to_outerboundary() - else: - self.read_lines_to_eof() - - def __write(self, line): - if self.__file is not None: - if self.__file.tell() + len(line) > 1000: - self.file = self.make_file('') - self.file.write(self.__file.getvalue()) - self.__file = None - self.file.write(line) - - def read_lines_to_eof(self): - """Internal: read lines until EOF.""" - while 1: - line = self.fp.readline() - if not line: - self.done = -1 - break - self.__write(line) - - def read_lines_to_outerboundary(self): - """Internal: read lines until outerboundary.""" - next = "--" + self.outerboundary - last = next + "--" - delim = "" - while 1: - line = self.fp.readline() - if not line: - self.done = -1 - break - if line[:2] == "--": - strippedline = line.strip() - if strippedline == next: - break - if strippedline == last: - self.done = 1 - break - odelim = delim - if line[-2:] == "\r\n": - delim = "\r\n" - line = line[:-2] - elif line[-1] == "\n": - delim = "\n" - line = line[:-1] - else: - delim = "" - self.__write(odelim + line) - - def skip_lines(self): - """Internal: skip lines until outer boundary if defined.""" - if not self.outerboundary or self.done: - return - next = "--" + self.outerboundary - last = next + "--" - while 1: - line = self.fp.readline() - if not line: - self.done = -1 - break - if line[:2] == "--": - strippedline = line.strip() - if strippedline == next: - break - if strippedline == last: - self.done = 1 - break - - def make_file(self, binary=None): - """Overridable: return a readable & writable file. - - The file will be used as follows: - - data is written to it - - seek(0) - - data is read from it - - The 'binary' argument is unused -- the file is always opened - in binary mode. - - This version opens a temporary file for reading and writing, - and immediately deletes (unlinks) it. The trick (on Unix!) is - that the file can still be used, but it can't be opened by - another process, and it will automatically be deleted when it - is closed or when the current process terminates. - - If you want a more permanent file, you derive a class which - overrides this method. If you want a visible temporary file - that is nevertheless automatically deleted when the script - terminates, try defining a __del__ method in a derived class - which unlinks the temporary files you have created. - - """ - import tempfile - return tempfile.TemporaryFile("w+b") - - - -# Backwards Compatibility Classes -# =============================== - -class FormContentDict(UserDict.UserDict): - """Form content as dictionary with a list of values per field. - - form = FormContentDict() - - form[key] -> [value, value, ...] - form.has_key(key) -> Boolean - form.keys() -> [key, key, ...] - form.values() -> [[val, val, ...], [val, val, ...], ...] - form.items() -> [(key, [val, val, ...]), (key, [val, val, ...]), ...] - form.dict == {key: [val, val, ...], ...} - - """ - def __init__(self, environ=os.environ): - self.dict = self.data = parse(environ=environ) - self.query_string = environ['QUERY_STRING'] - - -class SvFormContentDict(FormContentDict): - """Form content as dictionary expecting a single value per field. - - If you only expect a single value for each field, then form[key] - will return that single value. It will raise an IndexError if - that expectation is not true. If you expect a field to have - possible multiple values, than you can use form.getlist(key) to - get all of the values. values() and items() are a compromise: - they return single strings where there is a single value, and - lists of strings otherwise. - - """ - def __getitem__(self, key): - if len(self.dict[key]) > 1: - raise IndexError, 'expecting a single value' - return self.dict[key][0] - def getlist(self, key): - return self.dict[key] - def values(self): - result = [] - for value in self.dict.values(): - if len(value) == 1: - result.append(value[0]) - else: result.append(value) - return result - def items(self): - result = [] - for key, value in self.dict.items(): - if len(value) == 1: - result.append((key, value[0])) - else: result.append((key, value)) - return result - - -class InterpFormContentDict(SvFormContentDict): - """This class is present for backwards compatibility only.""" - def __getitem__(self, key): - v = SvFormContentDict.__getitem__(self, key) - if v[0] in '0123456789+-.': - try: return int(v) - except ValueError: - try: return float(v) - except ValueError: pass - return v.strip() - def values(self): - result = [] - for key in self.keys(): - try: - result.append(self[key]) - except IndexError: - result.append(self.dict[key]) - return result - def items(self): - result = [] - for key in self.keys(): - try: - result.append((key, self[key])) - except IndexError: - result.append((key, self.dict[key])) - return result - - -class FormContent(FormContentDict): - """This class is present for backwards compatibility only.""" - def values(self, key): - if self.dict.has_key(key) :return self.dict[key] - else: return None - def indexed_value(self, key, location): - if self.dict.has_key(key): - if len(self.dict[key]) > location: - return self.dict[key][location] - else: return None - else: return None - def value(self, key): - if self.dict.has_key(key): return self.dict[key][0] - else: return None - def length(self, key): - return len(self.dict[key]) - def stripped(self, key): - if self.dict.has_key(key): return self.dict[key][0].strip() - else: return None - def pars(self): - return self.dict - - -# Test/debug code -# =============== - -def test(environ=os.environ): - """Robust test CGI script, usable as main program. - - Write minimal HTTP headers and dump all information provided to - the script in HTML form. - - """ - import traceback - print "Content-type: text/html" - print - sys.stderr = sys.stdout - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - print_environ_usage() - def f(): - exec "testing print_exception() -- <I>italics?</I>" - def g(f=f): - f() - print "<H3>What follows is a test, not an actual exception:</H3>" - g() - except: - print_exception() - - print "<H1>Second try with a small maxlen...</H1>" - - global maxlen - maxlen = 50 - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - except: - print_exception() - -def print_exception(type=None, value=None, tb=None, limit=None): - if type is None: - type, value, tb = sys.exc_info() - import traceback - print - print "<H3>Traceback (most recent call last):</H3>" - list = traceback.format_tb(tb, limit) + \ - traceback.format_exception_only(type, value) - print "<PRE>%s<B>%s</B></PRE>" % ( - escape("".join(list[:-1])), - escape(list[-1]), - ) - del tb - -def print_environ(environ=os.environ): - """Dump the shell environment as HTML.""" - keys = environ.keys() - keys.sort() - print - print "<H3>Shell Environment:</H3>" - print "<DL>" - for key in keys: - print "<DT>", escape(key), "<DD>", escape(environ[key]) - print "</DL>" - print - -def print_form(form): - """Dump the contents of a form as HTML.""" - keys = form.keys() - keys.sort() - print - print "<H3>Form Contents:</H3>" - if not keys: - print "<P>No form fields." - print "<DL>" - for key in keys: - print "<DT>" + escape(key) + ":", - value = form[key] - print "<i>" + escape(`type(value)`) + "</i>" - print "<DD>" + escape(`value`) - print "</DL>" - print - -def print_directory(): - """Dump the current directory as HTML.""" - print - print "<H3>Current Working Directory:</H3>" - try: - pwd = os.getcwd() - except os.error, msg: - print "os.error:", escape(str(msg)) - else: - print escape(pwd) - print - -def print_arguments(): - print - print "<H3>Command Line Arguments:</H3>" - print - print sys.argv - print - -def print_environ_usage(): - """Dump a list of environment variables used by CGI as HTML.""" - print """ -<H3>These environment variables could have been set:</H3> -<UL> -<LI>AUTH_TYPE -<LI>CONTENT_LENGTH -<LI>CONTENT_TYPE -<LI>DATE_GMT -<LI>DATE_LOCAL -<LI>DOCUMENT_NAME -<LI>DOCUMENT_ROOT -<LI>DOCUMENT_URI -<LI>GATEWAY_INTERFACE -<LI>LAST_MODIFIED -<LI>PATH -<LI>PATH_INFO -<LI>PATH_TRANSLATED -<LI>QUERY_STRING -<LI>REMOTE_ADDR -<LI>REMOTE_HOST -<LI>REMOTE_IDENT -<LI>REMOTE_USER -<LI>REQUEST_METHOD -<LI>SCRIPT_NAME -<LI>SERVER_NAME -<LI>SERVER_PORT -<LI>SERVER_PROTOCOL -<LI>SERVER_ROOT -<LI>SERVER_SOFTWARE -</UL> -In addition, HTTP headers sent by the server may be passed in the -environment as well. Here are some common variable names: -<UL> -<LI>HTTP_ACCEPT -<LI>HTTP_CONNECTION -<LI>HTTP_HOST -<LI>HTTP_PRAGMA -<LI>HTTP_REFERER -<LI>HTTP_USER_AGENT -</UL> -""" - - -# Utilities -# ========= - -def escape(s, quote=None): - """Replace special characters '&', '<' and '>' by SGML entities.""" - s = s.replace("&", "&") # Must be done first! - s = s.replace("<", "<") - s = s.replace(">", ">") - if quote: - s = s.replace('"', """) - return s - -def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"): - import re - return re.match(_vb_pattern, s) - -# Invoke mainline -# =============== - -# Call test() when this file is run as a script (not imported as a module) -if __name__ == '__main__': - test() diff --git a/Mailman/pythonlib/mailbox.py b/Mailman/pythonlib/mailbox.py deleted file mode 100755 index 2f96106c6..000000000 --- a/Mailman/pythonlib/mailbox.py +++ /dev/null @@ -1,312 +0,0 @@ -#! /usr/bin/env python - -"""Classes to handle Unix style, MMDF style, and MH style mailboxes.""" - - -import rfc822 -import os - -__all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"] - -class _Mailbox: - def __init__(self, fp, factory=rfc822.Message): - self.fp = fp - self.seekp = 0 - self.factory = factory - - def seek(self, pos, whence=0): - if whence==1: # Relative to current position - self.pos = self.pos + pos - if whence==2: # Relative to file's end - self.pos = self.stop + pos - else: # Default - absolute position - self.pos = self.start + pos - - def next(self): - while 1: - self.fp.seek(self.seekp) - try: - self._search_start() - except EOFError: - self.seekp = self.fp.tell() - return None - start = self.fp.tell() - self._search_end() - self.seekp = stop = self.fp.tell() - if start != stop: - break - return self.factory(_Subfile(self.fp, start, stop)) - - -class _Subfile: - def __init__(self, fp, start, stop): - self.fp = fp - self.start = start - self.stop = stop - self.pos = self.start - - def read(self, length = None): - if self.pos >= self.stop: - return '' - remaining = self.stop - self.pos - if length is None or length < 0: - length = remaining - elif length > remaining: - length = remaining - self.fp.seek(self.pos) - data = self.fp.read(length) - self.pos = self.fp.tell() - return data - - def readline(self, length = None): - if self.pos >= self.stop: - return '' - if length is None: - length = self.stop - self.pos - self.fp.seek(self.pos) - data = self.fp.readline(length) - self.pos = self.fp.tell() - return data - - def readlines(self, sizehint = -1): - lines = [] - while 1: - line = self.readline() - if not line: - break - lines.append(line) - if sizehint >= 0: - sizehint = sizehint - len(line) - if sizehint <= 0: - break - return lines - - def tell(self): - return self.pos - self.start - - def seek(self, pos, whence=0): - if whence == 0: - self.pos = self.start + pos - elif whence == 1: - self.pos = self.pos + pos - elif whence == 2: - self.pos = self.stop + pos - - def close(self): - del self.fp - - -class UnixMailbox(_Mailbox): - def _search_start(self): - while 1: - pos = self.fp.tell() - line = self.fp.readline() - if not line: - raise EOFError - if line[:5] == 'From ' and self._isrealfromline(line): - self.fp.seek(pos) - return - - def _search_end(self): - self.fp.readline() # Throw away header line - while 1: - pos = self.fp.tell() - line = self.fp.readline() - if not line: - return - if line[:5] == 'From ' and self._isrealfromline(line): - self.fp.seek(pos) - return - - # An overridable mechanism to test for From-line-ness. You can either - # specify a different regular expression or define a whole new - # _isrealfromline() method. Note that this only gets called for lines - # starting with the 5 characters "From ". - # - # BAW: According to - #http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html - # the only portable, reliable way to find message delimiters in a BSD (i.e - # Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the - # beginning of the file, "^From .*\n". While _fromlinepattern below seems - # like a good idea, in practice, there are too many variations for more - # strict parsing of the line to be completely accurate. - # - # _strict_isrealfromline() is the old version which tries to do stricter - # parsing of the From_ line. _portable_isrealfromline() simply returns - # true, since it's never called if the line doesn't already start with - # "From ". - # - # This algorithm, and the way it interacts with _search_start() and - # _search_end() may not be completely correct, because it doesn't check - # that the two characters preceding "From " are \n\n or the beginning of - # the file. Fixing this would require a more extensive rewrite than is - # necessary. For convenience, we've added a StrictUnixMailbox class which - # uses the older, more strict _fromlinepattern regular expression. - - _fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \ - r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$" - _regexp = None - - def _strict_isrealfromline(self, line): - if not self._regexp: - import re - self._regexp = re.compile(self._fromlinepattern) - return self._regexp.match(line) - - def _portable_isrealfromline(self, line): - return 1 - - _isrealfromline = _strict_isrealfromline - - -class PortableUnixMailbox(UnixMailbox): - _isrealfromline = UnixMailbox._portable_isrealfromline - - -class MmdfMailbox(_Mailbox): - def _search_start(self): - while 1: - line = self.fp.readline() - if not line: - raise EOFError - if line[:5] == '\001\001\001\001\n': - return - - def _search_end(self): - while 1: - pos = self.fp.tell() - line = self.fp.readline() - if not line: - return - if line == '\001\001\001\001\n': - self.fp.seek(pos) - return - - -class MHMailbox: - def __init__(self, dirname, factory=rfc822.Message): - import re - pat = re.compile('^[1-9][0-9]*$') - self.dirname = dirname - # the three following lines could be combined into: - # list = map(long, filter(pat.match, os.listdir(self.dirname))) - list = os.listdir(self.dirname) - list = filter(pat.match, list) - list = map(long, list) - list.sort() - # This only works in Python 1.6 or later; - # before that str() added 'L': - self.boxes = map(str, list) - self.factory = factory - - def next(self): - if not self.boxes: - return None - fn = self.boxes[0] - del self.boxes[0] - fp = open(os.path.join(self.dirname, fn)) - return self.factory(fp) - - -class Maildir: - # Qmail directory mailbox - - def __init__(self, dirname, factory=rfc822.Message): - self.dirname = dirname - self.factory = factory - - # check for new mail - newdir = os.path.join(self.dirname, 'new') - boxes = [os.path.join(newdir, f) - for f in os.listdir(newdir) if f[0] != '.'] - - # Now check for current mail in this maildir - curdir = os.path.join(self.dirname, 'cur') - boxes += [os.path.join(curdir, f) - for f in os.listdir(curdir) if f[0] != '.'] - - self.boxes = boxes - - def next(self): - if not self.boxes: - return None - fn = self.boxes[0] - del self.boxes[0] - fp = open(fn) - return self.factory(fp) - - -class BabylMailbox(_Mailbox): - def _search_start(self): - while 1: - line = self.fp.readline() - if not line: - raise EOFError - if line == '*** EOOH ***\n': - return - - def _search_end(self): - while 1: - pos = self.fp.tell() - line = self.fp.readline() - if not line: - return - if line == '\037\014\n': - self.fp.seek(pos) - return - - -def _test(): - import time - import sys - import os - - args = sys.argv[1:] - if not args: - for key in 'MAILDIR', 'MAIL', 'LOGNAME', 'USER': - if os.environ.has_key(key): - mbox = os.environ[key] - break - else: - print "$MAIL, $LOGNAME nor $USER set -- who are you?" - return - else: - mbox = args[0] - if mbox[:1] == '+': - mbox = os.environ['HOME'] + '/Mail/' + mbox[1:] - elif not '/' in mbox: - mbox = '/usr/mail/' + mbox - if os.path.isdir(mbox): - if os.path.isdir(os.path.join(mbox, 'cur')): - mb = Maildir(mbox) - else: - mb = MHMailbox(mbox) - else: - fp = open(mbox, 'r') - mb = UnixMailbox(fp) - - msgs = [] - while 1: - msg = mb.next() - if msg is None: - break - msgs.append(msg) - if len(args) <= 1: - msg.fp = None - if len(args) > 1: - num = int(args[1]) - print 'Message %d body:'%num - msg = msgs[num-1] - msg.rewindbody() - sys.stdout.write(msg.fp.read()) - else: - print 'Mailbox',mbox,'has',len(msgs),'messages:' - for msg in msgs: - f = msg.getheader('from') or "" - s = msg.getheader('subject') or "" - d = msg.getheader('date') or "" - print '-%20.20s %20.20s %-30.30s'%(f, d[5:], s) - - -if __name__ == '__main__': - _test() diff --git a/Mailman/pythonlib/rfc822.py b/Mailman/pythonlib/rfc822.py deleted file mode 100644 index 09a2fd90a..000000000 --- a/Mailman/pythonlib/rfc822.py +++ /dev/null @@ -1,1002 +0,0 @@ -"""RFC 2822 message manipulation. - -Note: This is only a very rough sketch of a full RFC-822 parser; in particular -the tokenizing of addresses does not adhere to all the quoting rules. - -Note: RFC 2822 is a long awaited update to RFC 822. This module should -conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some -effort at RFC 2822 updates have been made, but a thorough audit has not been -performed. Consider any RFC 2822 non-conformance to be a bug. - - RFC 2822: http://www.faqs.org/rfcs/rfc2822.html - RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete) - -Directions for use: - -To create a Message object: first open a file, e.g.: - - fp = open(file, 'r') - -You can use any other legal way of getting an open file object, e.g. use -sys.stdin or call os.popen(). Then pass the open file object to the Message() -constructor: - - m = Message(fp) - -This class can work with any input object that supports a readline method. If -the input object has seek and tell capability, the rewindbody method will -work; also illegal lines will be pushed back onto the input stream. If the -input object lacks seek but has an `unread' method that can push back a line -of input, Message will use that to push back illegal lines. Thus this class -can be used to parse messages coming from a buffered stream. - -The optional `seekable' argument is provided as a workaround for certain stdio -libraries in which tell() discards buffered data before discovering that the -lseek() system call doesn't work. For maximum portability, you should set the -seekable argument to zero to prevent that initial \code{tell} when passing in -an unseekable object such as a a file object created from a socket object. If -it is 1 on entry -- which it is by default -- the tell() method of the open -file object is called once; if this raises an exception, seekable is reset to -0. For other nonzero values of seekable, this test is not made. - -To get the text of a particular header there are several methods: - - str = m.getheader(name) - str = m.getrawheader(name) - -where name is the name of the header, e.g. 'Subject'. The difference is that -getheader() strips the leading and trailing whitespace, while getrawheader() -doesn't. Both functions retain embedded whitespace (including newlines) -exactly as they are specified in the header, and leave the case of the text -unchanged. - -For addresses and address lists there are functions - - realname, mailaddress = m.getaddr(name) - list = m.getaddrlist(name) - -where the latter returns a list of (realname, mailaddr) tuples. - -There is also a method - - time = m.getdate(name) - -which parses a Date-like field and returns a time-compatible tuple, -i.e. a tuple such as returned by time.localtime() or accepted by -time.mktime(). - -See the class definition for lower level access methods. - -There are also some utility functions here. -""" -# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com> - -import time - -__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"] - -_blanklines = ('\r\n', '\n') # Optimization for islast() - - -class Message: - """Represents a single RFC 2822-compliant message.""" - - def __init__(self, fp, seekable = 1): - """Initialize the class instance and read the headers.""" - if seekable == 1: - # Exercise tell() to make sure it works - # (and then assume seek() works, too) - try: - fp.tell() - except (AttributeError, IOError): - seekable = 0 - else: - seekable = 1 - self.fp = fp - self.seekable = seekable - self.startofheaders = None - self.startofbody = None - # - if self.seekable: - try: - self.startofheaders = self.fp.tell() - except IOError: - self.seekable = 0 - # - self.readheaders() - # - if self.seekable: - try: - self.startofbody = self.fp.tell() - except IOError: - self.seekable = 0 - - def rewindbody(self): - """Rewind the file to the start of the body (if seekable).""" - if not self.seekable: - raise IOError, "unseekable file" - self.fp.seek(self.startofbody) - - def readheaders(self): - """Read header lines. - - Read header lines up to the entirely blank line that terminates them. - The (normally blank) line that ends the headers is skipped, but not - included in the returned list. If a non-header line ends the headers, - (which is an error), an attempt is made to backspace over it; it is - never included in the returned list. - - The variable self.status is set to the empty string if all went well, - otherwise it is an error message. The variable self.headers is a - completely uninterpreted list of lines contained in the header (so - printing them will reproduce the header exactly as it appears in the - file). - """ - self.dict = {} - self.unixfrom = '' - self.headers = list = [] - self.status = '' - headerseen = "" - firstline = 1 - startofline = unread = tell = None - if hasattr(self.fp, 'unread'): - unread = self.fp.unread - elif self.seekable: - tell = self.fp.tell - while 1: - if tell: - try: - startofline = tell() - except IOError: - startofline = tell = None - self.seekable = 0 - line = self.fp.readline() - if not line: - self.status = 'EOF in headers' - break - # Skip unix From name time lines - if firstline and line.startswith('From '): - self.unixfrom = self.unixfrom + line - continue - firstline = 0 - if headerseen and line[0] in ' \t': - # It's a continuation line. - list.append(line) - x = (self.dict[headerseen] + "\n " + line.strip()) - self.dict[headerseen] = x.strip() - continue - elif self.iscomment(line): - # It's a comment. Ignore it. - continue - elif self.islast(line): - # Note! No pushback here! The delimiter line gets eaten. - break - headerseen = self.isheader(line) - if headerseen: - # It's a legal header line, save it. - list.append(line) - self.dict[headerseen] = line[len(headerseen)+1:].strip() - continue - else: - # It's not a header line; throw it back and stop here. - if not self.dict: - self.status = 'No headers' - else: - self.status = 'Non-header line where header expected' - # Try to undo the read. - if unread: - unread(line) - elif tell: - self.fp.seek(startofline) - else: - self.status = self.status + '; bad seek' - break - - def isheader(self, line): - """Determine whether a given line is a legal header. - - This method should return the header name, suitably canonicalized. - You may override this method in order to use Message parsing on tagged - data in RFC 2822-like formats with special header formats. - """ - i = line.find(':') - if i > 0: - return line[:i].lower() - else: - return None - - def islast(self, line): - """Determine whether a line is a legal end of RFC 2822 headers. - - You may override this method if your application wants to bend the - rules, e.g. to strip trailing whitespace, or to recognize MH template - separators ('--------'). For convenience (e.g. for code reading from - sockets) a line consisting of \r\n also matches. - """ - return line in _blanklines - - def iscomment(self, line): - """Determine whether a line should be skipped entirely. - - You may override this method in order to use Message parsing on tagged - data in RFC 2822-like formats that support embedded comments or - free-text data. - """ - return None - - def getallmatchingheaders(self, name): - """Find all header lines matching a given header name. - - Look through the list of headers and find all lines matching a given - header name (and their continuation lines). A list of the lines is - returned, without interpretation. If the header does not occur, an - empty list is returned. If the header occurs multiple times, all - occurrences are returned. Case is not important in the header name. - """ - name = name.lower() + ':' - n = len(name) - list = [] - hit = 0 - for line in self.headers: - if line[:n].lower() == name: - hit = 1 - elif not line[:1].isspace(): - hit = 0 - if hit: - list.append(line) - return list - - def getfirstmatchingheader(self, name): - """Get the first header line matching name. - - This is similar to getallmatchingheaders, but it returns only the - first matching header (and its continuation lines). - """ - name = name.lower() + ':' - n = len(name) - list = [] - hit = 0 - for line in self.headers: - if hit: - if not line[:1].isspace(): - break - elif line[:n].lower() == name: - hit = 1 - if hit: - list.append(line) - return list - - def getrawheader(self, name): - """A higher-level interface to getfirstmatchingheader(). - - Return a string containing the literal text of the header but with the - keyword stripped. All leading, trailing and embedded whitespace is - kept in the string, however. Return None if the header does not - occur. - """ - - list = self.getfirstmatchingheader(name) - if not list: - return None - list[0] = list[0][len(name) + 1:] - return ''.join(list) - - def getheader(self, name, default=None): - """Get the header value for a name. - - This is the normal interface: it returns a stripped version of the - header value for a given header name, or None if it doesn't exist. - This uses the dictionary version which finds the *last* such header. - """ - try: - return self.dict[name.lower()] - except KeyError: - return default - get = getheader - - def getheaders(self, name): - """Get all values for a header. - - This returns a list of values for headers given more than once; each - value in the result list is stripped in the same way as the result of - getheader(). If the header is not given, return an empty list. - """ - result = [] - current = '' - have_header = 0 - for s in self.getallmatchingheaders(name): - if s[0].isspace(): - if current: - current = "%s\n %s" % (current, s.strip()) - else: - current = s.strip() - else: - if have_header: - result.append(current) - current = s[s.find(":") + 1:].strip() - have_header = 1 - if have_header: - result.append(current) - return result - - def getaddr(self, name): - """Get a single address from a header, as a tuple. - - An example return value: - ('Guido van Rossum', 'guido@cwi.nl') - """ - # New, by Ben Escoto - alist = self.getaddrlist(name) - if alist: - return alist[0] - else: - return (None, None) - - def getaddrlist(self, name): - """Get a list of addresses from a header. - - Retrieves a list of addresses from a header, where each address is a - tuple as returned by getaddr(). Scans all named headers, so it works - properly with multiple To: or Cc: headers for example. - """ - raw = [] - for h in self.getallmatchingheaders(name): - if h[0] in ' \t': - raw.append(h) - else: - if raw: - raw.append(', ') - i = h.find(':') - if i > 0: - addr = h[i+1:] - raw.append(addr) - alladdrs = ''.join(raw) - a = AddrlistClass(alladdrs) - return a.getaddrlist() - - def getdate(self, name): - """Retrieve a date field from a header. - - Retrieves a date field from the named header, returning a tuple - compatible with time.mktime(). - """ - try: - data = self[name] - except KeyError: - return None - return parsedate(data) - - def getdate_tz(self, name): - """Retrieve a date field from a header as a 10-tuple. - - The first 9 elements make up a tuple compatible with time.mktime(), - and the 10th is the offset of the poster's time zone from GMT/UTC. - """ - try: - data = self[name] - except KeyError: - return None - return parsedate_tz(data) - - - # Access as a dictionary (only finds *last* header of each type): - - def __len__(self): - """Get the number of headers in a message.""" - return len(self.dict) - - def __getitem__(self, name): - """Get a specific header, as from a dictionary.""" - return self.dict[name.lower()] - - def __setitem__(self, name, value): - """Set the value of a header. - - Note: This is not a perfect inversion of __getitem__, because any - changed headers get stuck at the end of the raw-headers list rather - than where the altered header was. - """ - del self[name] # Won't fail if it doesn't exist - self.dict[name.lower()] = value - text = name + ": " + value - lines = text.split("\n") - for line in lines: - self.headers.append(line + "\n") - - def __delitem__(self, name): - """Delete all occurrences of a specific header, if it is present.""" - name = name.lower() - if not self.dict.has_key(name): - return - del self.dict[name] - name = name + ':' - n = len(name) - list = [] - hit = 0 - for i in range(len(self.headers)): - line = self.headers[i] - if line[:n].lower() == name: - hit = 1 - elif not line[:1].isspace(): - hit = 0 - if hit: - list.append(i) - list.reverse() - for i in list: - del self.headers[i] - - def get(self, name, default=""): - name = name.lower() - if self.dict.has_key(name): - return self.dict[name] - else: - return default - - def setdefault(self, name, default=""): - lowername = name.lower() - if self.dict.has_key(lowername): - return self.dict[lowername] - else: - text = name + ": " + default - lines = text.split("\n") - for line in lines: - self.headers.append(line + "\n") - self.dict[lowername] = default - return default - - def has_key(self, name): - """Determine whether a message contains the named header.""" - return self.dict.has_key(name.lower()) - - def keys(self): - """Get all of a message's header field names.""" - return self.dict.keys() - - def values(self): - """Get all of a message's header field values.""" - return self.dict.values() - - def items(self): - """Get all of a message's headers. - - Returns a list of name, value tuples. - """ - return self.dict.items() - - def __str__(self): - str = '' - for hdr in self.headers: - str = str + hdr - return str - - -# Utility functions -# ----------------- - -# XXX Should fix unquote() and quote() to be really conformant. -# XXX The inverses of the parse functions may also be useful. - - -def unquote(str): - """Remove quotes from a string.""" - if len(str) > 1: - if str[0] == '"' and str[-1:] == '"': - return str[1:-1] - if str[0] == '<' and str[-1:] == '>': - return str[1:-1] - return str - - -def quote(str): - """Add quotes around a string.""" - return str.replace('\\', '\\\\').replace('"', '\\"') - - -def parseaddr(address): - """Parse an address into a (realname, mailaddr) tuple.""" - a = AddrlistClass(address) - list = a.getaddrlist() - if not list: - return (None, None) - else: - return list[0] - - -class AddrlistClass: - """Address parser class by Ben Escoto. - - To understand what this class does, it helps to have a copy of - RFC 2822 in front of you. - - http://www.faqs.org/rfcs/rfc2822.html - - Note: this class interface is deprecated and may be removed in the future. - Use rfc822.AddressList instead. - """ - - def __init__(self, field): - """Initialize a new instance. - - `field' is an unparsed address header field, containing one or more - addresses. - """ - self.specials = '()<>@,:;.\"[]' - self.pos = 0 - self.LWS = ' \t' - self.CR = '\r\n' - self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. - self.phraseends = self.atomends.replace('.', '') - self.field = field - self.commentlist = [] - - def gotonext(self): - """Parse up to the start of the next address.""" - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS + '\n\r': - self.pos = self.pos + 1 - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - else: break - - def getaddrlist(self): - """Parse all addresses. - - Returns a list containing all of the addresses. - """ - ad = self.getaddress() - if ad: - return ad + self.getaddrlist() - else: return [] - - def getaddress(self): - """Parse the next address.""" - self.commentlist = [] - self.gotonext() - - oldpos = self.pos - oldcl = self.commentlist - plist = self.getphraselist() - - self.gotonext() - returnlist = [] - - if self.pos >= len(self.field): - # Bad email address technically, no domain. - if plist: - returnlist = [(' '.join(self.commentlist), plist[0])] - - elif self.field[self.pos] in '.@': - # email address is just an addrspec - # this isn't very efficient since we start over - self.pos = oldpos - self.commentlist = oldcl - addrspec = self.getaddrspec() - returnlist = [(' '.join(self.commentlist), addrspec)] - - elif self.field[self.pos] == ':': - # address is a group - returnlist = [] - - fieldlen = len(self.field) - self.pos = self.pos + 1 - while self.pos < len(self.field): - self.gotonext() - if self.pos < fieldlen and self.field[self.pos] == ';': - self.pos = self.pos + 1 - break - returnlist = returnlist + self.getaddress() - - elif self.field[self.pos] == '<': - # Address is a phrase then a route addr - routeaddr = self.getrouteaddr() - - if self.commentlist: - returnlist = [(' '.join(plist) + ' (' + \ - ' '.join(self.commentlist) + ')', routeaddr)] - else: returnlist = [(' '.join(plist), routeaddr)] - - else: - if plist: - returnlist = [(' '.join(self.commentlist), plist[0])] - elif self.field[self.pos] in self.specials: - self.pos = self.pos + 1 - - self.gotonext() - if self.pos < len(self.field) and self.field[self.pos] == ',': - self.pos = self.pos + 1 - return returnlist - - def getrouteaddr(self): - """Parse a route address (Return-path value). - - This method just skips all the route stuff and returns the addrspec. - """ - if self.field[self.pos] != '<': - return - - expectroute = 0 - self.pos = self.pos + 1 - self.gotonext() - adlist = None - while self.pos < len(self.field): - if expectroute: - self.getdomain() - expectroute = 0 - elif self.field[self.pos] == '>': - self.pos = self.pos + 1 - break - elif self.field[self.pos] == '@': - self.pos = self.pos + 1 - expectroute = 1 - elif self.field[self.pos] == ':': - self.pos = self.pos + 1 - expectaddrspec = 1 - else: - adlist = self.getaddrspec() - self.pos = self.pos + 1 - break - self.gotonext() - - return adlist - - def getaddrspec(self): - """Parse an RFC 2822 addr-spec.""" - aslist = [] - - self.gotonext() - while self.pos < len(self.field): - if self.field[self.pos] == '.': - aslist.append('.') - self.pos = self.pos + 1 - elif self.field[self.pos] == '"': - aslist.append('"%s"' % self.getquote()) - elif self.field[self.pos] in self.atomends: - break - else: aslist.append(self.getatom()) - self.gotonext() - - if self.pos >= len(self.field) or self.field[self.pos] != '@': - return ''.join(aslist) - - aslist.append('@') - self.pos = self.pos + 1 - self.gotonext() - return ''.join(aslist) + self.getdomain() - - def getdomain(self): - """Get the complete domain name from an address.""" - sdlist = [] - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS: - self.pos = self.pos + 1 - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - elif self.field[self.pos] == '[': - sdlist.append(self.getdomainliteral()) - elif self.field[self.pos] == '.': - self.pos = self.pos + 1 - sdlist.append('.') - elif self.field[self.pos] in self.atomends: - break - else: sdlist.append(self.getatom()) - return ''.join(sdlist) - - def getdelimited(self, beginchar, endchars, allowcomments = 1): - """Parse a header fragment delimited by special characters. - - `beginchar' is the start character for the fragment. If self is not - looking at an instance of `beginchar' then getdelimited returns the - empty string. - - `endchars' is a sequence of allowable end-delimiting characters. - Parsing stops when one of these is encountered. - - If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed - within the parsed fragment. - """ - if self.field[self.pos] != beginchar: - return '' - - slist = [''] - quote = 0 - self.pos = self.pos + 1 - while self.pos < len(self.field): - if quote == 1: - slist.append(self.field[self.pos]) - quote = 0 - elif self.field[self.pos] in endchars: - self.pos = self.pos + 1 - break - elif allowcomments and self.field[self.pos] == '(': - slist.append(self.getcomment()) - elif self.field[self.pos] == '\\': - quote = 1 - else: - slist.append(self.field[self.pos]) - self.pos = self.pos + 1 - - return ''.join(slist) - - def getquote(self): - """Get a quote-delimited fragment from self's field.""" - return self.getdelimited('"', '"\r', 0) - - def getcomment(self): - """Get a parenthesis-delimited fragment from self's field.""" - return self.getdelimited('(', ')\r', 1) - - def getdomainliteral(self): - """Parse an RFC 2822 domain-literal.""" - return '[%s]' % self.getdelimited('[', ']\r', 0) - - def getatom(self, atomends=None): - """Parse an RFC 2822 atom. - - Optional atomends specifies a different set of end token delimiters - (the default is to use self.atomends). This is used e.g. in - getphraselist() since phrase endings must not include the `.' (which - is legal in phrases).""" - atomlist = [''] - if atomends is None: - atomends = self.atomends - - while self.pos < len(self.field): - if self.field[self.pos] in atomends: - break - else: atomlist.append(self.field[self.pos]) - self.pos = self.pos + 1 - - return ''.join(atomlist) - - def getphraselist(self): - """Parse a sequence of RFC 2822 phrases. - - A phrase is a sequence of words, which are in turn either RFC 2822 - atoms or quoted-strings. Phrases are canonicalized by squeezing all - runs of continuous whitespace into one space. - """ - plist = [] - - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS: - self.pos = self.pos + 1 - elif self.field[self.pos] == '"': - plist.append(self.getquote()) - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - elif self.field[self.pos] in self.phraseends: - break - else: - plist.append(self.getatom(self.phraseends)) - - return plist - -class AddressList(AddrlistClass): - """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" - def __init__(self, field): - AddrlistClass.__init__(self, field) - if field: - self.addresslist = self.getaddrlist() - else: - self.addresslist = [] - - def __len__(self): - return len(self.addresslist) - - def __str__(self): - return ", ".join(map(dump_address_pair, self.addresslist)) - - def __add__(self, other): - # Set union - newaddr = AddressList(None) - newaddr.addresslist = self.addresslist[:] - for x in other.addresslist: - if not x in self.addresslist: - newaddr.addresslist.append(x) - return newaddr - - def __iadd__(self, other): - # Set union, in-place - for x in other.addresslist: - if not x in self.addresslist: - self.addresslist.append(x) - return self - - def __sub__(self, other): - # Set difference - newaddr = AddressList(None) - for x in self.addresslist: - if not x in other.addresslist: - newaddr.addresslist.append(x) - return newaddr - - def __isub__(self, other): - # Set difference, in-place - for x in other.addresslist: - if x in self.addresslist: - self.addresslist.remove(x) - return self - - def __getitem__(self, index): - # Make indexing, slices, and 'in' work - return self.addresslist[index] - -def dump_address_pair(pair): - """Dump a (name, address) pair in a canonicalized form.""" - if pair[0]: - return '"' + pair[0] + '" <' + pair[1] + '>' - else: - return pair[1] - -# Parse a date field - -_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', - 'aug', 'sep', 'oct', 'nov', 'dec', - 'january', 'february', 'march', 'april', 'may', 'june', 'july', - 'august', 'september', 'october', 'november', 'december'] -_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] - -# The timezone table does not include the military time zones defined -# in RFC822, other than Z. According to RFC1123, the description in -# RFC822 gets the signs wrong, so we can't rely on any such time -# zones. RFC1123 recommends that numeric timezone indicators be used -# instead of timezone names. - -_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, - 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) - 'EST': -500, 'EDT': -400, # Eastern - 'CST': -600, 'CDT': -500, # Central - 'MST': -700, 'MDT': -600, # Mountain - 'PST': -800, 'PDT': -700 # Pacific - } - - -def parsedate_tz(data): - """Convert a date string to a time tuple. - - Accounts for military timezones. - """ - data = data.split() - if data[0][-1] in (',', '.') or data[0].lower() in _daynames: - # There's a dayname here. Skip it - del data[0] - if len(data) == 3: # RFC 850 date, deprecated - stuff = data[0].split('-') - if len(stuff) == 3: - data = stuff + data[1:] - if len(data) == 4: - s = data[3] - i = s.find('+') - if i > 0: - data[3:] = [s[:i], s[i+1:]] - else: - data.append('') # Dummy tz - if len(data) < 5: - return None - data = data[:5] - [dd, mm, yy, tm, tz] = data - mm = mm.lower() - if not mm in _monthnames: - dd, mm = mm, dd.lower() - if not mm in _monthnames: - return None - mm = _monthnames.index(mm)+1 - if mm > 12: mm = mm - 12 - if dd[-1] == ',': - dd = dd[:-1] - i = yy.find(':') - if i > 0: - yy, tm = tm, yy - if yy[-1] == ',': - yy = yy[:-1] - if not yy[0].isdigit(): - yy, tz = tz, yy - if tm[-1] == ',': - tm = tm[:-1] - tm = tm.split(':') - if len(tm) == 2: - [thh, tmm] = tm - tss = '0' - elif len(tm) == 3: - [thh, tmm, tss] = tm - else: - return None - try: - yy = int(yy) - dd = int(dd) - thh = int(thh) - tmm = int(tmm) - tss = int(tss) - except ValueError: - return None - tzoffset = None - tz = tz.upper() - if _timezones.has_key(tz): - tzoffset = _timezones[tz] - else: - try: - tzoffset = int(tz) - except ValueError: - pass - # Convert a timezone offset into seconds ; -0500 -> -18000 - if tzoffset: - if tzoffset < 0: - tzsign = -1 - tzoffset = -tzoffset - else: - tzsign = 1 - tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60) - tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset) - return tuple - - -def parsedate(data): - """Convert a time string to a time tuple.""" - t = parsedate_tz(data) - if type(t) == type( () ): - return t[:9] - else: return t - - -def mktime_tz(data): - """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" - if data[9] is None: - # No zone info, so localtime is better assumption than GMT - return time.mktime(data[:8] + (-1,)) - else: - t = time.mktime(data[:8] + (0,)) - return t - data[9] - time.timezone - -def formatdate(timeval=None): - """Returns time format preferred for Internet standards. - - Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 - """ - if timeval is None: - timeval = time.time() - return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT', - time.gmtime(timeval)) - - -# When used as script, run a small test program. -# The first command line argument must be a filename containing one -# message in RFC-822 format. - -if __name__ == '__main__': - import sys, os - file = os.path.join(os.environ['HOME'], 'Mail/inbox/1') - if sys.argv[1:]: file = sys.argv[1] - f = open(file, 'r') - m = Message(f) - print 'From:', m.getaddr('from') - print 'To:', m.getaddrlist('to') - print 'Subject:', m.getheader('subject') - print 'Date:', m.getheader('date') - date = m.getdate_tz('date') - tz = date[-1] - date = time.localtime(mktime_tz(date)) - if date: - print 'ParsedDate:', time.asctime(date), - hhmmss = tz - hhmm, ss = divmod(hhmmss, 60) - hh, mm = divmod(hhmm, 60) - print "%+03d%02d" % (hh, mm), - if ss: print ".%02d" % ss, - print - else: - print 'ParsedDate:', None - m.rewindbody() - n = 0 - while f.readline(): - n = n + 1 - print 'Lines:', n - print '-'*70 - print 'len =', len(m) - if m.has_key('Date'): print 'Date =', m['Date'] - if m.has_key('X-Nonsense'): pass - print 'keys =', m.keys() - print 'values =', m.values() - print 'items =', m.items() |
