summaryrefslogtreecommitdiff
path: root/mailman/Utils.py
diff options
context:
space:
mode:
authorBarry Warsaw2009-01-25 13:01:41 -0500
committerBarry Warsaw2009-01-25 13:01:41 -0500
commiteefd06f1b88b8ecbb23a9013cd223b72ca85c20d (patch)
tree72c947fe16fce0e07e996ee74020b26585d7e846 /mailman/Utils.py
parent07871212f74498abd56bef3919bf3e029eb8b930 (diff)
downloadmailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.gz
mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.zst
mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.zip
Diffstat (limited to 'mailman/Utils.py')
-rw-r--r--mailman/Utils.py702
1 files changed, 0 insertions, 702 deletions
diff --git a/mailman/Utils.py b/mailman/Utils.py
deleted file mode 100644
index 9946273c9..000000000
--- a/mailman/Utils.py
+++ /dev/null
@@ -1,702 +0,0 @@
-# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
-
-"""Miscellaneous essential routines.
-
-This includes actual message transmission routines, address checking and
-message and address munging, a handy-dandy routine to map a function on all
-the mailing lists, and whatever else doesn't belong elsewhere.
-"""
-
-import os
-import re
-import cgi
-import time
-import errno
-import base64
-import random
-import logging
-import htmlentitydefs
-import email.Header
-import email.Iterators
-
-from email.Errors import HeaderParseError
-from lazr.config import as_boolean
-from string import ascii_letters, digits, whitespace
-
-import mailman.templates
-
-from mailman import passwords
-from mailman.config import config
-from mailman.core import errors
-from mailman.utilities.string import expand
-
-
-AT = '@'
-CR = '\r'
-DOT = '.'
-EMPTYSTRING = ''
-IDENTCHARS = ascii_letters + digits + '_'
-NL = '\n'
-UEMPTYSTRING = u''
-TEMPLATE_DIR = os.path.dirname(mailman.templates.__file__)
-
-# Search for $(identifier)s strings, except that the trailing s is optional,
-# since that's a common mistake
-cre = re.compile(r'%\(([_a-z]\w*?)\)s?', re.IGNORECASE)
-# Search for $$, $identifier, or ${identifier}
-dre = re.compile(r'(\${2})|\$([_a-z]\w*)|\${([_a-z]\w*)}', re.IGNORECASE)
-
-log = logging.getLogger('mailman.error')
-
-
-
-# a much more naive implementation than say, Emacs's fill-paragraph!
-def wrap(text, column=70, honor_leading_ws=True):
- """Wrap and fill the text to the specified column.
-
- Wrapping is always in effect, although if it is not possible to wrap a
- line (because some word is longer than `column' characters) the line is
- broken at the next available whitespace boundary. Paragraphs are also
- always filled, unless honor_leading_ws is true and the line begins with
- whitespace. This is the algorithm that the Python FAQ wizard uses, and
- seems like a good compromise.
-
- """
- wrapped = ''
- # first split the text into paragraphs, defined as a blank line
- paras = re.split('\n\n', text)
- for para in paras:
- # fill
- lines = []
- fillprev = False
- for line in para.split(NL):
- if not line:
- lines.append(line)
- continue
- if honor_leading_ws and line[0] in whitespace:
- fillthis = False
- else:
- fillthis = True
- if fillprev and fillthis:
- # if the previous line should be filled, then just append a
- # single space, and the rest of the current line
- lines[-1] = lines[-1].rstrip() + ' ' + line
- else:
- # no fill, i.e. retain newline
- lines.append(line)
- fillprev = fillthis
- # wrap each line
- for text in lines:
- while text:
- if len(text) <= column:
- line = text
- text = ''
- else:
- bol = column
- # find the last whitespace character
- while bol > 0 and text[bol] not in whitespace:
- bol -= 1
- # now find the last non-whitespace character
- eol = bol
- while eol > 0 and text[eol] in whitespace:
- eol -= 1
- # watch out for text that's longer than the column width
- if eol == 0:
- # break on whitespace after column
- eol = column
- while eol < len(text) and text[eol] not in whitespace:
- eol += 1
- bol = eol
- while bol < len(text) and text[bol] in whitespace:
- bol += 1
- bol -= 1
- line = text[:eol+1] + '\n'
- # find the next non-whitespace character
- bol += 1
- while bol < len(text) and text[bol] in whitespace:
- bol += 1
- text = text[bol:]
- wrapped += line
- wrapped += '\n'
- # end while text
- wrapped += '\n'
- # end for text in lines
- # the last two newlines are bogus
- return wrapped[:-2]
-
-
-
-def QuotePeriods(text):
- JOINER = '\n .\n'
- SEP = '\n.\n'
- return JOINER.join(text.split(SEP))
-
-
-# This takes an email address, and returns a tuple containing (user,host)
-def ParseEmail(email):
- user = None
- domain = None
- email = email.lower()
- at_sign = email.find('@')
- if at_sign < 1:
- return email, None
- user = email[:at_sign]
- rest = email[at_sign+1:]
- domain = rest.split('.')
- return user, domain
-
-
-def LCDomain(addr):
- "returns the address with the domain part lowercased"
- atind = addr.find('@')
- if atind == -1: # no domain part
- return addr
- return addr[:atind] + '@' + addr[atind+1:].lower()
-
-
-# TBD: what other characters should be disallowed?
-_badchars = re.compile(r'[][()<>|;^,\000-\037\177-\377]')
-
-def ValidateEmail(s):
- """Verify that the an email address isn't grossly evil."""
- # Pretty minimal, cheesy check. We could do better...
- if not s or ' ' in s:
- raise errors.InvalidEmailAddress(repr(s))
- if _badchars.search(s) or s[0] == '-':
- raise errors.InvalidEmailAddress(repr(s))
- user, domain_parts = ParseEmail(s)
- # Local, unqualified addresses are not allowed.
- if not domain_parts:
- raise errors.InvalidEmailAddress(repr(s))
- if len(domain_parts) < 2:
- raise errors.InvalidEmailAddress(repr(s))
-
-
-
-# Patterns which may be used to form malicious path to inject a new
-# line in the mailman error log. (TK: advisory by Moritz Naumann)
-CRNLpat = re.compile(r'[^\x21-\x7e]')
-
-def GetPathPieces(envar='PATH_INFO'):
- path = os.environ.get(envar)
- if path:
- if CRNLpat.search(path):
- path = CRNLpat.split(path)[0]
- log.error('Warning: Possible malformed path attack.')
- return [p for p in path.split('/') if p]
- return []
-
-
-
-def ScriptURL(target):
- up = '../' * len(GetPathPieces())
- return '%s%s' % (up, target + config.CGIEXT)
-
-
-
-def GetPossibleMatchingAddrs(name):
- """returns a sorted list of addresses that could possibly match
- a given name.
-
- For Example, given scott@pobox.com, return ['scott@pobox.com'],
- given scott@blackbox.pobox.com return ['scott@blackbox.pobox.com',
- 'scott@pobox.com']"""
-
- name = name.lower()
- user, domain = ParseEmail(name)
- res = [name]
- if domain:
- domain = domain[1:]
- while len(domain) >= 2:
- res.append("%s@%s" % (user, DOT.join(domain)))
- domain = domain[1:]
- return res
-
-
-
-def List2Dict(L, foldcase=False):
- """Return a dict keyed by the entries in the list passed to it."""
- d = {}
- if foldcase:
- for i in L:
- d[i.lower()] = True
- else:
- for i in L:
- d[i] = True
- return d
-
-
-
-_vowels = ('a', 'e', 'i', 'o', 'u')
-_consonants = ('b', 'c', 'd', 'f', 'g', 'h', 'k', 'm', 'n',
- 'p', 'r', 's', 't', 'v', 'w', 'x', 'z')
-_syllables = []
-
-for v in _vowels:
- for c in _consonants:
- _syllables.append(c+v)
- _syllables.append(v+c)
-del c, v
-
-def UserFriendly_MakeRandomPassword(length):
- syls = []
- while len(syls) * 2 < length:
- syls.append(random.choice(_syllables))
- return EMPTYSTRING.join(syls)[:length]
-
-
-def Secure_MakeRandomPassword(length):
- bytesread = 0
- bytes = []
- fd = None
- try:
- while bytesread < length:
- try:
- # Python 2.4 has this on available systems.
- newbytes = os.urandom(length - bytesread)
- except (AttributeError, NotImplementedError):
- if fd is None:
- try:
- fd = os.open('/dev/urandom', os.O_RDONLY)
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
- # We have no available source of cryptographically
- # secure random characters. Log an error and fallback
- # to the user friendly passwords.
- log.error(
- 'urandom not available, passwords not secure')
- return UserFriendly_MakeRandomPassword(length)
- newbytes = os.read(fd, length - bytesread)
- bytes.append(newbytes)
- bytesread += len(newbytes)
- s = base64.encodestring(EMPTYSTRING.join(bytes))
- # base64 will expand the string by 4/3rds
- return s.replace('\n', '')[:length]
- finally:
- if fd is not None:
- os.close(fd)
-
-
-def MakeRandomPassword(length=None):
- if length is None:
- length = int(config.passwords.member_password_length)
- if as_boolean(config.passwords.user_friendly_passwords):
- password = UserFriendly_MakeRandomPassword(length)
- else:
- password = Secure_MakeRandomPassword(length)
- return password.decode('ascii')
-
-
-def GetRandomSeed():
- chr1 = int(random.random() * 52)
- chr2 = int(random.random() * 52)
- def mkletter(c):
- if 0 <= c < 26:
- c += 65
- if 26 <= c < 52:
- #c = c - 26 + 97
- c += 71
- return c
- return "%c%c" % tuple(map(mkletter, (chr1, chr2)))
-
-
-
-def set_global_password(pw, siteadmin=True, scheme=None):
- if scheme is None:
- scheme = passwords.Schemes.ssha
- if siteadmin:
- filename = config.SITE_PW_FILE
- else:
- filename = config.LISTCREATOR_PW_FILE
- try:
- fp = open(filename, 'w')
- print >> fp, passwords.make_secret(pw, scheme)
- finally:
- fp.close()
-
-
-def get_global_password(siteadmin=True):
- if siteadmin:
- filename = config.SITE_PW_FILE
- else:
- filename = config.LISTCREATOR_PW_FILE
- try:
- fp = open(filename)
- challenge = fp.read()[:-1] # strip off trailing nl
- fp.close()
- except IOError, e:
- if e.errno <> errno.ENOENT:
- raise
- # It's okay not to have a site admin password
- return None
- return challenge
-
-
-def check_global_password(response, siteadmin=True):
- challenge = get_global_password(siteadmin)
- if challenge is None:
- return False
- return passwords.check_response(challenge, response)
-
-
-
-def websafe(s):
- return cgi.escape(s, quote=True)
-
-
-def nntpsplit(s):
- parts = s.split(':', 1)
- if len(parts) == 2:
- try:
- return parts[0], int(parts[1])
- except ValueError:
- pass
- # Use the defaults
- return s, 119
-
-
-
-# Just changing these two functions should be enough to control the way
-# that email address obscuring is handled.
-def ObscureEmail(addr, for_text=False):
- """Make email address unrecognizable to web spiders, but invertable.
-
- When for_text option is set (not default), make a sentence fragment
- instead of a token."""
- if for_text:
- return addr.replace('@', ' at ')
- else:
- return addr.replace('@', '--at--')
-
-def UnobscureEmail(addr):
- """Invert ObscureEmail() conversion."""
- # Contrived to act as an identity operation on already-unobscured
- # emails, so routines expecting obscured ones will accept both.
- return addr.replace('--at--', '@')
-
-
-
-class OuterExit(Exception):
- pass
-
-def findtext(templatefile, raw_dict=None, raw=False, lang=None, mlist=None):
- # Make some text from a template file. The order of searches depends on
- # whether mlist and lang are provided. Once the templatefile is found,
- # string substitution is performed by interpolation in `dict'. If `raw'
- # is false, the resulting text is wrapped/filled by calling wrap().
- #
- # When looking for a template in a specific language, there are 4 places
- # that are searched, in this order:
- #
- # 1. the list-specific language directory
- # lists/<listname>/<language>
- #
- # 2. the domain-specific language directory
- # templates/<list.host_name>/<language>
- #
- # 3. the site-wide language directory
- # templates/site/<language>
- #
- # 4. the global default language directory
- # templates/<language>
- #
- # The first match found stops the search. In this way, you can specialize
- # templates at the desired level, or, if you use only the default
- # templates, you don't need to change anything. You should never modify
- # files in the templates/<language> subdirectory, since Mailman will
- # overwrite these when you upgrade. That's what the templates/site
- # language directories are for.
- #
- # A further complication is that the language to search for is determined
- # by both the `lang' and `mlist' arguments. The search order there is
- # that if lang is given, then the 4 locations above are searched,
- # substituting lang for <language>. If no match is found, and mlist is
- # given, then the 4 locations are searched using the list's preferred
- # language. After that, the server default language is used for
- # <language>. If that still doesn't yield a template, then the standard
- # distribution's English language template is used as an ultimate
- # fallback, and when lang is not 'en', the resulting template is passed
- # through the translation service. If this template is missing you've got
- # big problems. ;)
- #
- # A word on backwards compatibility: Mailman versions prior to 2.1 stored
- # templates in templates/*.{html,txt} and lists/<listname>/*.{html,txt}.
- # Those directories are no longer searched so if you've got customizations
- # in those files, you should move them to the appropriate directory based
- # on the above description. Mailman's upgrade script cannot do this for
- # you.
- #
- # The function has been revised and renamed as it now returns both the
- # template text and the path from which it retrieved the template. The
- # original function is now a wrapper which just returns the template text
- # as before, by calling this renamed function and discarding the second
- # item returned.
- #
- # Calculate the languages to scan
- languages = set()
- if lang is not None:
- languages.add(lang)
- if mlist is not None:
- languages.add(mlist.preferred_language)
- languages.add(config.mailman.default_language)
- assert None not in languages, 'None in languages'
- # Calculate the locations to scan
- searchdirs = []
- if mlist is not None:
- searchdirs.append(mlist.data_path)
- searchdirs.append(os.path.join(TEMPLATE_DIR, mlist.host_name))
- searchdirs.append(os.path.join(TEMPLATE_DIR, 'site'))
- searchdirs.append(TEMPLATE_DIR)
- # Start scanning
- fp = None
- try:
- for lang in languages:
- for dir in searchdirs:
- filename = os.path.join(dir, lang, templatefile)
- try:
- fp = open(filename)
- raise OuterExit
- except IOError, e:
- if e.errno <> errno.ENOENT:
- raise
- # Okay, it doesn't exist, keep looping
- fp = None
- except OuterExit:
- pass
- if fp is None:
- # Try one last time with the distro English template, which, unless
- # you've got a really broken installation, must be there.
- try:
- filename = os.path.join(TEMPLATE_DIR, 'en', templatefile)
- fp = open(filename)
- except IOError, e:
- if e.errno <> errno.ENOENT:
- raise
- # We never found the template. BAD!
- raise IOError(errno.ENOENT, 'No template file found', templatefile)
- else:
- from mailman.i18n import get_translation
- # XXX BROKEN HACK
- data = fp.read()[:-1]
- template = get_translation().ugettext(data)
- fp.close()
- else:
- template = fp.read()
- fp.close()
- template = unicode(template, GetCharSet(lang), 'replace')
- text = template
- if raw_dict is not None:
- text = expand(template, raw_dict)
- if raw:
- return text, filename
- return wrap(text), filename
-
-
-def maketext(templatefile, dict=None, raw=False, lang=None, mlist=None):
- return findtext(templatefile, dict, raw, lang, mlist)[0]
-
-
-
-def GetRequestURI(fallback=None, escape=True):
- """Return the full virtual path this CGI script was invoked with.
-
- Newer web servers seems to supply this info in the REQUEST_URI
- environment variable -- which isn't part of the CGI/1.1 spec.
- Thus, if REQUEST_URI isn't available, we concatenate SCRIPT_NAME
- and PATH_INFO, both of which are part of CGI/1.1.
-
- Optional argument `fallback' (default `None') is returned if both of
- the above methods fail.
-
- The url will be cgi escaped to prevent cross-site scripting attacks,
- unless `escape' is set to 0.
- """
- url = fallback
- if 'REQUEST_URI' in os.environ:
- url = os.environ['REQUEST_URI']
- elif 'SCRIPT_NAME' in os.environ and 'PATH_INFO' in os.environ:
- url = os.environ['SCRIPT_NAME'] + os.environ['PATH_INFO']
- if escape:
- return websafe(url)
- return url
-
-
-
-# XXX Replace this with direct calls. For now, existing uses of GetCharSet()
-# are too numerous to change.
-def GetCharSet(lang):
- return config.languages.get_charset(lang)
-
-
-
-def get_request_domain():
- host = os.environ.get('HTTP_HOST', os.environ.get('SERVER_NAME'))
- port = os.environ.get('SERVER_PORT')
- # Strip off the port if there is one
- if port and host.endswith(':' + port):
- host = host[:-len(port)-1]
- return host.lower()
-
-
-def get_site_noreply():
- return '%s@%s' % (config.NO_REPLY_ADDRESS, config.DEFAULT_EMAIL_HOST)
-
-
-
-# Figure out epoch seconds of midnight at the start of today (or the given
-# 3-tuple date of (year, month, day).
-def midnight(date=None):
- if date is None:
- date = time.localtime()[:3]
- # -1 for dst flag tells the library to figure it out
- return time.mktime(date + (0,)*5 + (-1,))
-
-
-
-# The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the
-# charset of the given language, which is the character set that the page will
-# be rendered in, and failing that, replaces non-ASCII characters with their
-# html references. It always returns a byte string.
-def uncanonstr(s, lang=None):
- if s is None:
- s = u''
- if lang is None:
- charset = 'us-ascii'
- else:
- charset = GetCharSet(lang)
- # See if the string contains characters only in the desired character
- # set. If so, return it unchanged, except for coercing it to a byte
- # string.
- try:
- if isinstance(s, unicode):
- return s.encode(charset)
- else:
- u = unicode(s, charset)
- return s
- except UnicodeError:
- # Nope, it contains funny characters, so html-ref it
- return uquote(s)
-
-
-def uquote(s):
- a = []
- for c in s:
- o = ord(c)
- if o > 127:
- a.append('&#%3d;' % o)
- else:
- a.append(c)
- # Join characters together and coerce to byte string
- return str(EMPTYSTRING.join(a))
-
-
-def oneline(s, cset='us-ascii', in_unicode=False):
- # Decode header string in one line and convert into specified charset
- try:
- h = email.Header.make_header(email.Header.decode_header(s))
- ustr = h.__unicode__()
- line = UEMPTYSTRING.join(ustr.splitlines())
- if in_unicode:
- return line
- else:
- return line.encode(cset, 'replace')
- except (LookupError, UnicodeError, ValueError, HeaderParseError):
- # possibly charset problem. return with undecoded string in one line.
- return EMPTYSTRING.join(s.splitlines())
-
-
-def strip_verbose_pattern(pattern):
- # Remove white space and comments from a verbose pattern and return a
- # non-verbose, equivalent pattern. Replace CR and NL in the result
- # with '\\r' and '\\n' respectively to avoid multi-line results.
- if not isinstance(pattern, str):
- return pattern
- newpattern = ''
- i = 0
- inclass = False
- skiptoeol = False
- copynext = False
- while i < len(pattern):
- c = pattern[i]
- if copynext:
- if c == NL:
- newpattern += '\\n'
- elif c == CR:
- newpattern += '\\r'
- else:
- newpattern += c
- copynext = False
- elif skiptoeol:
- if c == NL:
- skiptoeol = False
- elif c == '#' and not inclass:
- skiptoeol = True
- elif c == '[' and not inclass:
- inclass = True
- newpattern += c
- copynext = True
- elif c == ']' and inclass:
- inclass = False
- newpattern += c
- elif re.search('\s', c):
- if inclass:
- if c == NL:
- newpattern += '\\n'
- elif c == CR:
- newpattern += '\\r'
- else:
- newpattern += c
- elif c == '\\' and not inclass:
- newpattern += c
- copynext = True
- else:
- if c == NL:
- newpattern += '\\n'
- elif c == CR:
- newpattern += '\\r'
- else:
- newpattern += c
- i += 1
- return newpattern
-
-
-
-def get_pattern(email, pattern_list):
- """Returns matched entry in pattern_list if email matches.
- Otherwise returns None.
- """
- if not pattern_list:
- return None
- matched = None
- for pattern in pattern_list:
- if pattern.startswith('^'):
- # This is a regular expression match
- try:
- if re.search(pattern, email, re.IGNORECASE):
- matched = pattern
- break
- except re.error:
- # BAW: we should probably remove this pattern
- pass
- else:
- # Do the comparison case insensitively
- if pattern.lower() == email.lower():
- matched = pattern
- break
- return matched