1 files changed, 702 insertions, 0 deletions
diff --git a/src/mailman/Utils.py b/src/mailman/Utils.py
new file mode 100644
index 000000000..9946273c9
--- /dev/null
+++ b/src/mailman/Utils.py
@@ -0,0 +1,702 @@
+# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Miscellaneous essential routines.
+
+This includes actual message transmission routines, address checking and
+message and address munging, a handy-dandy routine to map a function on all
+the mailing lists, and whatever else doesn't belong elsewhere.
+"""
+
+import os
+import re
+import cgi
+import time
+import errno
+import base64
+import random
+import logging
+import htmlentitydefs
+import email.Header
+import email.Iterators
+
+from email.Errors import HeaderParseError
+from lazr.config import as_boolean
+from string import ascii_letters, digits, whitespace
+
+import mailman.templates
+
+from mailman import passwords
+from mailman.config import config
+from mailman.core import errors
+from mailman.utilities.string import expand
+
+
+AT = '@'
+CR = '\r'
+DOT = '.'
+EMPTYSTRING = ''
+IDENTCHARS = ascii_letters + digits + '_'
+NL = '\n'
+UEMPTYSTRING = u''
+TEMPLATE_DIR = os.path.dirname(mailman.templates.__file__)
+
+# Search for $(identifier)s strings, except that the trailing s is optional,
+# since that's a common mistake
+cre = re.compile(r'%\(([_a-z]\w*?)\)s?', re.IGNORECASE)
+# Search for $$, $identifier, or ${identifier}
+dre = re.compile(r'(\${2})|\$([_a-z]\w*)|\${([_a-z]\w*)}', re.IGNORECASE)
+
+log = logging.getLogger('mailman.error')
+
+
+
+# a much more naive implementation than say, Emacs's fill-paragraph!
+def wrap(text, column=70, honor_leading_ws=True):
+    """Wrap and fill the text to the specified column.
+
+    Wrapping is always in effect, although if it is not possible to wrap a
+    line (because some word is longer than `column' characters) the line is
+    broken at the next available whitespace boundary.  Paragraphs are also
+    always filled, unless honor_leading_ws is true and the line begins with
+    whitespace.  This is the algorithm that the Python FAQ wizard uses, and
+    seems like a good compromise.
+
+    """
+    wrapped = ''
+    # first split the text into paragraphs, defined as a blank line
+    paras = re.split('\n\n', text)
+    for para in paras:
+        # fill
+        lines = []
+        fillprev = False
+        for line in para.split(NL):
+            if not line:
+                lines.append(line)
+                continue
+            if honor_leading_ws and line[0] in whitespace:
+                fillthis = False
+            else:
+                fillthis = True
+            if fillprev and fillthis:
+                # if the previous line should be filled, then just append a
+                # single space, and the rest of the current line
+                lines[-1] = lines[-1].rstrip() + ' ' + line
+            else:
+                # no fill, i.e. retain newline
+                lines.append(line)
+            fillprev = fillthis
+        # wrap each line
+        for text in lines:
+            while text:
+                if len(text) <= column:
+                    line = text
+                    text = ''
+                else:
+                    bol = column
+                    # find the last whitespace character
+                    while bol > 0 and text[bol] not in whitespace:
+                        bol -= 1
+                    # now find the last non-whitespace character
+                    eol = bol
+                    while eol > 0 and text[eol] in whitespace:
+                        eol -= 1
+                    # watch out for text that's longer than the column width
+                    if eol == 0:
+                        # break on whitespace after column
+                        eol = column
+                        while eol < len(text) and text[eol] not in whitespace:
+                            eol += 1
+                        bol = eol
+                        while bol < len(text) and text[bol] in whitespace:
+                            bol += 1
+                        bol -= 1
+                    line = text[:eol+1] + '\n'
+                    # find the next non-whitespace character
+                    bol += 1
+                    while bol < len(text) and text[bol] in whitespace:
+                        bol += 1
+                    text = text[bol:]
+                wrapped += line
+            wrapped += '\n'
+            # end while text
+        wrapped += '\n'
+        # end for text in lines
+    # the last two newlines are bogus
+    return wrapped[:-2]
+
+
+
+def QuotePeriods(text):
+    JOINER = '\n .\n'
+    SEP = '\n.\n'
+    return JOINER.join(text.split(SEP))
+
+
+# This takes an email address, and returns a tuple containing (user,host)
+def ParseEmail(email):
+    user = None
+    domain = None
+    email = email.lower()
+    at_sign = email.find('@')
+    if at_sign < 1:
+        return email, None
+    user = email[:at_sign]
+    rest = email[at_sign+1:]
+    domain = rest.split('.')
+    return user, domain
+
+
+def LCDomain(addr):
+    "returns the address with the domain part lowercased"
+    atind = addr.find('@')
+    if atind == -1: # no domain part
+        return addr
+    return addr[:atind] + '@' + addr[atind+1:].lower()
+
+
+# TBD: what other characters should be disallowed?
+_badchars = re.compile(r'[][()<>|;^,\000-\037\177-\377]')
+
+def ValidateEmail(s):
+    """Verify that the an email address isn't grossly evil."""
+    # Pretty minimal, cheesy check.  We could do better...
+    if not s or ' ' in s:
+        raise errors.InvalidEmailAddress(repr(s))
+    if _badchars.search(s) or s[0] == '-':
+        raise errors.InvalidEmailAddress(repr(s))
+    user, domain_parts = ParseEmail(s)
+    # Local, unqualified addresses are not allowed.
+    if not domain_parts:
+        raise errors.InvalidEmailAddress(repr(s))
+    if len(domain_parts) < 2:
+        raise errors.InvalidEmailAddress(repr(s))
+
+
+
+# Patterns which may be used to form malicious path to inject a new
+# line in the mailman error log. (TK: advisory by Moritz Naumann)
+CRNLpat = re.compile(r'[^\x21-\x7e]')
+
+def GetPathPieces(envar='PATH_INFO'):
+    path = os.environ.get(envar)
+    if path:
+        if CRNLpat.search(path):
+            path = CRNLpat.split(path)[0]
+            log.error('Warning: Possible malformed path attack.')
+        return [p for p in path.split('/') if p]
+    return []
+
+
+
+def ScriptURL(target):
+    up = '../' * len(GetPathPieces())
+    return '%s%s' % (up, target + config.CGIEXT)
+
+
+
+def GetPossibleMatchingAddrs(name):
+    """returns a sorted list of addresses that could possibly match
+    a given name.
+
+    For Example, given scott@pobox.com, return ['scott@pobox.com'],
+    given scott@blackbox.pobox.com return ['scott@blackbox.pobox.com',
+                                           'scott@pobox.com']"""
+
+    name = name.lower()
+    user, domain = ParseEmail(name)
+    res = [name]
+    if domain:
+        domain = domain[1:]
+        while len(domain) >= 2:
+            res.append("%s@%s" % (user, DOT.join(domain)))
+            domain = domain[1:]
+    return res
+
+
+
+def List2Dict(L, foldcase=False):
+    """Return a dict keyed by the entries in the list passed to it."""
+    d = {}
+    if foldcase:
+        for i in L:
+            d[i.lower()] = True
+    else:
+        for i in L:
+            d[i] = True
+    return d
+
+
+
+_vowels = ('a', 'e', 'i', 'o', 'u')
+_consonants = ('b', 'c', 'd', 'f', 'g', 'h', 'k', 'm', 'n',
+               'p', 'r', 's', 't', 'v', 'w', 'x', 'z')
+_syllables = []
+
+for v in _vowels:
+    for c in _consonants:
+        _syllables.append(c+v)
+        _syllables.append(v+c)
+del c, v
+
+def UserFriendly_MakeRandomPassword(length):
+    syls = []
+    while len(syls) * 2 < length:
+        syls.append(random.choice(_syllables))
+    return EMPTYSTRING.join(syls)[:length]
+
+
+def Secure_MakeRandomPassword(length):
+    bytesread = 0
+    bytes = []
+    fd = None
+    try:
+        while bytesread < length:
+            try:
+                # Python 2.4 has this on available systems.
+                newbytes = os.urandom(length - bytesread)
+            except (AttributeError, NotImplementedError):
+                if fd is None:
+                    try:
+                        fd = os.open('/dev/urandom', os.O_RDONLY)
+                    except OSError, e:
+                        if e.errno <> errno.ENOENT:
+                            raise
+                        # We have no available source of cryptographically
+                        # secure random characters.  Log an error and fallback
+                        # to the user friendly passwords.
+                        log.error(
+                            'urandom not available, passwords not secure')
+                        return UserFriendly_MakeRandomPassword(length)
+                newbytes = os.read(fd, length - bytesread)
+            bytes.append(newbytes)
+            bytesread += len(newbytes)
+        s = base64.encodestring(EMPTYSTRING.join(bytes))
+        # base64 will expand the string by 4/3rds
+        return s.replace('\n', '')[:length]
+    finally:
+        if fd is not None:
+            os.close(fd)
+
+
+def MakeRandomPassword(length=None):
+    if length is None:
+        length = int(config.passwords.member_password_length)
+    if as_boolean(config.passwords.user_friendly_passwords):
+        password = UserFriendly_MakeRandomPassword(length)
+    else:
+        password = Secure_MakeRandomPassword(length)
+    return password.decode('ascii')
+
+
+def GetRandomSeed():
+    chr1 = int(random.random() * 52)
+    chr2 = int(random.random() * 52)
+    def mkletter(c):
+        if 0 <= c < 26:
+            c += 65
+        if 26 <= c < 52:
+            #c = c - 26 + 97
+            c += 71
+        return c
+    return "%c%c" % tuple(map(mkletter, (chr1, chr2)))
+
+
+
+def set_global_password(pw, siteadmin=True, scheme=None):
+    if scheme is None:
+        scheme = passwords.Schemes.ssha
+    if siteadmin:
+        filename = config.SITE_PW_FILE
+    else:
+        filename = config.LISTCREATOR_PW_FILE
+    try:
+        fp = open(filename, 'w')
+        print >> fp, passwords.make_secret(pw, scheme)
+    finally:
+        fp.close()
+
+
+def get_global_password(siteadmin=True):
+    if siteadmin:
+        filename = config.SITE_PW_FILE
+    else:
+        filename = config.LISTCREATOR_PW_FILE
+    try:
+        fp = open(filename)
+        challenge = fp.read()[:-1]                # strip off trailing nl
+        fp.close()
+    except IOError, e:
+        if e.errno <> errno.ENOENT:
+            raise
+        # It's okay not to have a site admin password
+        return None
+    return challenge
+
+
+def check_global_password(response, siteadmin=True):
+    challenge = get_global_password(siteadmin)
+    if challenge is None:
+        return False
+    return passwords.check_response(challenge, response)
+
+
+
+def websafe(s):
+    return cgi.escape(s, quote=True)
+
+
+def nntpsplit(s):
+    parts = s.split(':', 1)
+    if len(parts) == 2:
+        try:
+            return parts[0], int(parts[1])
+        except ValueError:
+            pass
+    # Use the defaults
+    return s, 119
+
+
+
+# Just changing these two functions should be enough to control the way
+# that email address obscuring is handled.
+def ObscureEmail(addr, for_text=False):
+    """Make email address unrecognizable to web spiders, but invertable.
+
+    When for_text option is set (not default), make a sentence fragment
+    instead of a token."""
+    if for_text:
+        return addr.replace('@', ' at ')
+    else:
+        return addr.replace('@', '--at--')
+
+def UnobscureEmail(addr):
+    """Invert ObscureEmail() conversion."""
+    # Contrived to act as an identity operation on already-unobscured
+    # emails, so routines expecting obscured ones will accept both.
+    return addr.replace('--at--', '@')
+
+
+
+class OuterExit(Exception):
+    pass
+
+def findtext(templatefile, raw_dict=None, raw=False, lang=None, mlist=None):
+    # Make some text from a template file.  The order of searches depends on
+    # whether mlist and lang are provided.  Once the templatefile is found,
+    # string substitution is performed by interpolation in `dict'.  If `raw'
+    # is false, the resulting text is wrapped/filled by calling wrap().
+    #
+    # When looking for a template in a specific language, there are 4 places
+    # that are searched, in this order:
+    #
+    # 1. the list-specific language directory
+    #    lists/<listname>/<language>
+    #
+    # 2. the domain-specific language directory
+    #    templates/<list.host_name>/<language>
+    #
+    # 3. the site-wide language directory
+    #    templates/site/<language>
+    #
+    # 4. the global default language directory
+    #    templates/<language>
+    #
+    # The first match found stops the search.  In this way, you can specialize
+    # templates at the desired level, or, if you use only the default
+    # templates, you don't need to change anything.  You should never modify
+    # files in the templates/<language> subdirectory, since Mailman will
+    # overwrite these when you upgrade.  That's what the templates/site
+    # language directories are for.
+    #
+    # A further complication is that the language to search for is determined
+    # by both the `lang' and `mlist' arguments.  The search order there is
+    # that if lang is given, then the 4 locations above are searched,
+    # substituting lang for <language>.  If no match is found, and mlist is
+    # given, then the 4 locations are searched using the list's preferred
+    # language.  After that, the server default language is used for
+    # <language>.  If that still doesn't yield a template, then the standard
+    # distribution's English language template is used as an ultimate
+    # fallback, and when lang is not 'en', the resulting template is passed
+    # through the translation service.  If this template is missing you've got
+    # big problems. ;)
+    #
+    # A word on backwards compatibility: Mailman versions prior to 2.1 stored
+    # templates in templates/*.{html,txt} and lists/<listname>/*.{html,txt}.
+    # Those directories are no longer searched so if you've got customizations
+    # in those files, you should move them to the appropriate directory based
+    # on the above description.  Mailman's upgrade script cannot do this for
+    # you.
+    #
+    # The function has been revised and renamed as it now returns both the
+    # template text and the path from which it retrieved the template. The
+    # original function is now a wrapper which just returns the template text
+    # as before, by calling this renamed function and discarding the second
+    # item returned.
+    #
+    # Calculate the languages to scan
+    languages = set()
+    if lang is not None:
+        languages.add(lang)
+    if mlist is not None:
+        languages.add(mlist.preferred_language)
+    languages.add(config.mailman.default_language)
+    assert None not in languages, 'None in languages'
+    # Calculate the locations to scan
+    searchdirs = []
+    if mlist is not None:
+        searchdirs.append(mlist.data_path)
+        searchdirs.append(os.path.join(TEMPLATE_DIR, mlist.host_name))
+    searchdirs.append(os.path.join(TEMPLATE_DIR, 'site'))
+    searchdirs.append(TEMPLATE_DIR)
+    # Start scanning
+    fp = None
+    try:
+        for lang in languages:
+            for dir in searchdirs:
+                filename = os.path.join(dir, lang, templatefile)
+                try:
+                    fp = open(filename)
+                    raise OuterExit
+                except IOError, e:
+                    if e.errno <> errno.ENOENT:
+                        raise
+                    # Okay, it doesn't exist, keep looping
+                    fp = None
+    except OuterExit:
+        pass
+    if fp is None:
+        # Try one last time with the distro English template, which, unless
+        # you've got a really broken installation, must be there.
+        try:
+            filename = os.path.join(TEMPLATE_DIR, 'en', templatefile)
+            fp = open(filename)
+        except IOError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+            # We never found the template.  BAD!
+            raise IOError(errno.ENOENT, 'No template file found', templatefile)
+        else:
+            from mailman.i18n import get_translation
+            # XXX BROKEN HACK
+            data = fp.read()[:-1]
+            template = get_translation().ugettext(data)
+            fp.close()
+    else:
+        template = fp.read()
+        fp.close()
+        template = unicode(template, GetCharSet(lang), 'replace')
+    text = template
+    if raw_dict is not None:
+        text = expand(template, raw_dict)
+    if raw:
+        return text, filename
+    return wrap(text), filename
+
+
+def maketext(templatefile, dict=None, raw=False, lang=None, mlist=None):
+    return findtext(templatefile, dict, raw, lang, mlist)[0]
+
+
+
+def GetRequestURI(fallback=None, escape=True):
+    """Return the full virtual path this CGI script was invoked with.
+
+    Newer web servers seems to supply this info in the REQUEST_URI
+    environment variable -- which isn't part of the CGI/1.1 spec.
+    Thus, if REQUEST_URI isn't available, we concatenate SCRIPT_NAME
+    and PATH_INFO, both of which are part of CGI/1.1.
+
+    Optional argument `fallback' (default `None') is returned if both of
+    the above methods fail.
+
+    The url will be cgi escaped to prevent cross-site scripting attacks,
+    unless `escape' is set to 0.
+    """
+    url = fallback
+    if 'REQUEST_URI' in os.environ:
+        url = os.environ['REQUEST_URI']
+    elif 'SCRIPT_NAME' in os.environ and 'PATH_INFO' in os.environ:
+        url = os.environ['SCRIPT_NAME'] + os.environ['PATH_INFO']
+    if escape:
+        return websafe(url)
+    return url
+
+
+
+# XXX Replace this with direct calls.  For now, existing uses of GetCharSet()
+# are too numerous to change.
+def GetCharSet(lang):
+    return config.languages.get_charset(lang)
+
+
+
+def get_request_domain():
+    host = os.environ.get('HTTP_HOST', os.environ.get('SERVER_NAME'))
+    port = os.environ.get('SERVER_PORT')
+    # Strip off the port if there is one
+    if port and host.endswith(':' + port):
+        host = host[:-len(port)-1]
+    return host.lower()
+
+
+def get_site_noreply():
+    return '%s@%s' % (config.NO_REPLY_ADDRESS, config.DEFAULT_EMAIL_HOST)
+
+
+
+# Figure out epoch seconds of midnight at the start of today (or the given
+# 3-tuple date of (year, month, day).
+def midnight(date=None):
+    if date is None:
+        date = time.localtime()[:3]
+    # -1 for dst flag tells the library to figure it out
+    return time.mktime(date + (0,)*5 + (-1,))
+
+
+
+# The opposite of canonstr() -- sorta.  I.e. it attempts to encode s in the
+# charset of the given language, which is the character set that the page will
+# be rendered in, and failing that, replaces non-ASCII characters with their
+# html references.  It always returns a byte string.
+def uncanonstr(s, lang=None):
+    if s is None:
+        s = u''
+    if lang is None:
+        charset = 'us-ascii'
+    else:
+        charset = GetCharSet(lang)
+    # See if the string contains characters only in the desired character
+    # set.  If so, return it unchanged, except for coercing it to a byte
+    # string.
+    try:
+        if isinstance(s, unicode):
+            return s.encode(charset)
+        else:
+            u = unicode(s, charset)
+            return s
+    except UnicodeError:
+        # Nope, it contains funny characters, so html-ref it
+        return uquote(s)
+
+
+def uquote(s):
+    a = []
+    for c in s:
+        o = ord(c)
+        if o > 127:
+            a.append('&#%3d;' % o)
+        else:
+            a.append(c)
+    # Join characters together and coerce to byte string
+    return str(EMPTYSTRING.join(a))
+
+
+def oneline(s, cset='us-ascii', in_unicode=False):
+    # Decode header string in one line and convert into specified charset
+    try:
+        h = email.Header.make_header(email.Header.decode_header(s))
+        ustr = h.__unicode__()
+        line = UEMPTYSTRING.join(ustr.splitlines())
+        if in_unicode:
+            return line
+        else:
+            return line.encode(cset, 'replace')
+    except (LookupError, UnicodeError, ValueError, HeaderParseError):
+        # possibly charset problem. return with undecoded string in one line.
+        return EMPTYSTRING.join(s.splitlines())
+
+
+def strip_verbose_pattern(pattern):
+    # Remove white space and comments from a verbose pattern and return a
+    # non-verbose, equivalent pattern.  Replace CR and NL in the result
+    # with '\\r' and '\\n' respectively to avoid multi-line results.
+    if not isinstance(pattern, str):
+        return pattern
+    newpattern = ''
+    i = 0
+    inclass = False
+    skiptoeol = False
+    copynext = False
+    while i < len(pattern):
+        c = pattern[i]
+        if copynext:
+            if c == NL:
+                newpattern += '\\n'
+            elif c == CR:
+                newpattern += '\\r'
+            else:
+                newpattern += c
+            copynext = False
+        elif skiptoeol:
+            if c == NL:
+                skiptoeol = False
+        elif c == '#' and not inclass:
+            skiptoeol = True
+        elif c == '[' and not inclass:
+            inclass = True
+            newpattern += c
+            copynext = True
+        elif c == ']' and inclass:
+            inclass = False
+            newpattern += c
+        elif re.search('\s', c):
+            if inclass:
+                if c == NL:
+                    newpattern += '\\n'
+                elif c == CR:
+                    newpattern += '\\r'
+                else:
+                    newpattern += c
+        elif c == '\\' and not inclass:
+            newpattern += c
+            copynext = True
+        else:
+            if c == NL:
+                newpattern += '\\n'
+            elif c == CR:
+                newpattern += '\\r'
+            else:
+                newpattern += c
+        i += 1
+    return newpattern
+
+
+
+def get_pattern(email, pattern_list):
+    """Returns matched entry in pattern_list if email matches.
+    Otherwise returns None.
+    """
+    if not pattern_list:
+        return None
+    matched = None
+    for pattern in pattern_list:
+        if pattern.startswith('^'):
+            # This is a regular expression match
+            try:
+                if re.search(pattern, email, re.IGNORECASE):
+                    matched = pattern
+                    break
+            except re.error:
+                # BAW: we should probably remove this pattern
+                pass
+        else:
+            # Do the comparison case insensitively
+            if pattern.lower() == email.lower():
+                matched = pattern
+                break
+    return matched