diff options
| author | Barry Warsaw | 2009-01-25 13:01:41 -0500 |
|---|---|---|
| committer | Barry Warsaw | 2009-01-25 13:01:41 -0500 |
| commit | eefd06f1b88b8ecbb23a9013cd223b72ca85c20d (patch) | |
| tree | 72c947fe16fce0e07e996ee74020b26585d7e846 /mailman/Utils.py | |
| parent | 07871212f74498abd56bef3919bf3e029eb8b930 (diff) | |
| download | mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.gz mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.zst mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.zip | |
Diffstat (limited to 'mailman/Utils.py')
| -rw-r--r-- | mailman/Utils.py | 702 |
1 files changed, 0 insertions, 702 deletions
diff --git a/mailman/Utils.py b/mailman/Utils.py deleted file mode 100644 index 9946273c9..000000000 --- a/mailman/Utils.py +++ /dev/null @@ -1,702 +0,0 @@ -# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Miscellaneous essential routines. - -This includes actual message transmission routines, address checking and -message and address munging, a handy-dandy routine to map a function on all -the mailing lists, and whatever else doesn't belong elsewhere. -""" - -import os -import re -import cgi -import time -import errno -import base64 -import random -import logging -import htmlentitydefs -import email.Header -import email.Iterators - -from email.Errors import HeaderParseError -from lazr.config import as_boolean -from string import ascii_letters, digits, whitespace - -import mailman.templates - -from mailman import passwords -from mailman.config import config -from mailman.core import errors -from mailman.utilities.string import expand - - -AT = '@' -CR = '\r' -DOT = '.' -EMPTYSTRING = '' -IDENTCHARS = ascii_letters + digits + '_' -NL = '\n' -UEMPTYSTRING = u'' -TEMPLATE_DIR = os.path.dirname(mailman.templates.__file__) - -# Search for $(identifier)s strings, except that the trailing s is optional, -# since that's a common mistake -cre = re.compile(r'%\(([_a-z]\w*?)\)s?', re.IGNORECASE) -# Search for $$, $identifier, or ${identifier} -dre = re.compile(r'(\${2})|\$([_a-z]\w*)|\${([_a-z]\w*)}', re.IGNORECASE) - -log = logging.getLogger('mailman.error') - - - -# a much more naive implementation than say, Emacs's fill-paragraph! -def wrap(text, column=70, honor_leading_ws=True): - """Wrap and fill the text to the specified column. - - Wrapping is always in effect, although if it is not possible to wrap a - line (because some word is longer than `column' characters) the line is - broken at the next available whitespace boundary. Paragraphs are also - always filled, unless honor_leading_ws is true and the line begins with - whitespace. This is the algorithm that the Python FAQ wizard uses, and - seems like a good compromise. - - """ - wrapped = '' - # first split the text into paragraphs, defined as a blank line - paras = re.split('\n\n', text) - for para in paras: - # fill - lines = [] - fillprev = False - for line in para.split(NL): - if not line: - lines.append(line) - continue - if honor_leading_ws and line[0] in whitespace: - fillthis = False - else: - fillthis = True - if fillprev and fillthis: - # if the previous line should be filled, then just append a - # single space, and the rest of the current line - lines[-1] = lines[-1].rstrip() + ' ' + line - else: - # no fill, i.e. retain newline - lines.append(line) - fillprev = fillthis - # wrap each line - for text in lines: - while text: - if len(text) <= column: - line = text - text = '' - else: - bol = column - # find the last whitespace character - while bol > 0 and text[bol] not in whitespace: - bol -= 1 - # now find the last non-whitespace character - eol = bol - while eol > 0 and text[eol] in whitespace: - eol -= 1 - # watch out for text that's longer than the column width - if eol == 0: - # break on whitespace after column - eol = column - while eol < len(text) and text[eol] not in whitespace: - eol += 1 - bol = eol - while bol < len(text) and text[bol] in whitespace: - bol += 1 - bol -= 1 - line = text[:eol+1] + '\n' - # find the next non-whitespace character - bol += 1 - while bol < len(text) and text[bol] in whitespace: - bol += 1 - text = text[bol:] - wrapped += line - wrapped += '\n' - # end while text - wrapped += '\n' - # end for text in lines - # the last two newlines are bogus - return wrapped[:-2] - - - -def QuotePeriods(text): - JOINER = '\n .\n' - SEP = '\n.\n' - return JOINER.join(text.split(SEP)) - - -# This takes an email address, and returns a tuple containing (user,host) -def ParseEmail(email): - user = None - domain = None - email = email.lower() - at_sign = email.find('@') - if at_sign < 1: - return email, None - user = email[:at_sign] - rest = email[at_sign+1:] - domain = rest.split('.') - return user, domain - - -def LCDomain(addr): - "returns the address with the domain part lowercased" - atind = addr.find('@') - if atind == -1: # no domain part - return addr - return addr[:atind] + '@' + addr[atind+1:].lower() - - -# TBD: what other characters should be disallowed? -_badchars = re.compile(r'[][()<>|;^,\000-\037\177-\377]') - -def ValidateEmail(s): - """Verify that the an email address isn't grossly evil.""" - # Pretty minimal, cheesy check. We could do better... - if not s or ' ' in s: - raise errors.InvalidEmailAddress(repr(s)) - if _badchars.search(s) or s[0] == '-': - raise errors.InvalidEmailAddress(repr(s)) - user, domain_parts = ParseEmail(s) - # Local, unqualified addresses are not allowed. - if not domain_parts: - raise errors.InvalidEmailAddress(repr(s)) - if len(domain_parts) < 2: - raise errors.InvalidEmailAddress(repr(s)) - - - -# Patterns which may be used to form malicious path to inject a new -# line in the mailman error log. (TK: advisory by Moritz Naumann) -CRNLpat = re.compile(r'[^\x21-\x7e]') - -def GetPathPieces(envar='PATH_INFO'): - path = os.environ.get(envar) - if path: - if CRNLpat.search(path): - path = CRNLpat.split(path)[0] - log.error('Warning: Possible malformed path attack.') - return [p for p in path.split('/') if p] - return [] - - - -def ScriptURL(target): - up = '../' * len(GetPathPieces()) - return '%s%s' % (up, target + config.CGIEXT) - - - -def GetPossibleMatchingAddrs(name): - """returns a sorted list of addresses that could possibly match - a given name. - - For Example, given scott@pobox.com, return ['scott@pobox.com'], - given scott@blackbox.pobox.com return ['scott@blackbox.pobox.com', - 'scott@pobox.com']""" - - name = name.lower() - user, domain = ParseEmail(name) - res = [name] - if domain: - domain = domain[1:] - while len(domain) >= 2: - res.append("%s@%s" % (user, DOT.join(domain))) - domain = domain[1:] - return res - - - -def List2Dict(L, foldcase=False): - """Return a dict keyed by the entries in the list passed to it.""" - d = {} - if foldcase: - for i in L: - d[i.lower()] = True - else: - for i in L: - d[i] = True - return d - - - -_vowels = ('a', 'e', 'i', 'o', 'u') -_consonants = ('b', 'c', 'd', 'f', 'g', 'h', 'k', 'm', 'n', - 'p', 'r', 's', 't', 'v', 'w', 'x', 'z') -_syllables = [] - -for v in _vowels: - for c in _consonants: - _syllables.append(c+v) - _syllables.append(v+c) -del c, v - -def UserFriendly_MakeRandomPassword(length): - syls = [] - while len(syls) * 2 < length: - syls.append(random.choice(_syllables)) - return EMPTYSTRING.join(syls)[:length] - - -def Secure_MakeRandomPassword(length): - bytesread = 0 - bytes = [] - fd = None - try: - while bytesread < length: - try: - # Python 2.4 has this on available systems. - newbytes = os.urandom(length - bytesread) - except (AttributeError, NotImplementedError): - if fd is None: - try: - fd = os.open('/dev/urandom', os.O_RDONLY) - except OSError, e: - if e.errno <> errno.ENOENT: - raise - # We have no available source of cryptographically - # secure random characters. Log an error and fallback - # to the user friendly passwords. - log.error( - 'urandom not available, passwords not secure') - return UserFriendly_MakeRandomPassword(length) - newbytes = os.read(fd, length - bytesread) - bytes.append(newbytes) - bytesread += len(newbytes) - s = base64.encodestring(EMPTYSTRING.join(bytes)) - # base64 will expand the string by 4/3rds - return s.replace('\n', '')[:length] - finally: - if fd is not None: - os.close(fd) - - -def MakeRandomPassword(length=None): - if length is None: - length = int(config.passwords.member_password_length) - if as_boolean(config.passwords.user_friendly_passwords): - password = UserFriendly_MakeRandomPassword(length) - else: - password = Secure_MakeRandomPassword(length) - return password.decode('ascii') - - -def GetRandomSeed(): - chr1 = int(random.random() * 52) - chr2 = int(random.random() * 52) - def mkletter(c): - if 0 <= c < 26: - c += 65 - if 26 <= c < 52: - #c = c - 26 + 97 - c += 71 - return c - return "%c%c" % tuple(map(mkletter, (chr1, chr2))) - - - -def set_global_password(pw, siteadmin=True, scheme=None): - if scheme is None: - scheme = passwords.Schemes.ssha - if siteadmin: - filename = config.SITE_PW_FILE - else: - filename = config.LISTCREATOR_PW_FILE - try: - fp = open(filename, 'w') - print >> fp, passwords.make_secret(pw, scheme) - finally: - fp.close() - - -def get_global_password(siteadmin=True): - if siteadmin: - filename = config.SITE_PW_FILE - else: - filename = config.LISTCREATOR_PW_FILE - try: - fp = open(filename) - challenge = fp.read()[:-1] # strip off trailing nl - fp.close() - except IOError, e: - if e.errno <> errno.ENOENT: - raise - # It's okay not to have a site admin password - return None - return challenge - - -def check_global_password(response, siteadmin=True): - challenge = get_global_password(siteadmin) - if challenge is None: - return False - return passwords.check_response(challenge, response) - - - -def websafe(s): - return cgi.escape(s, quote=True) - - -def nntpsplit(s): - parts = s.split(':', 1) - if len(parts) == 2: - try: - return parts[0], int(parts[1]) - except ValueError: - pass - # Use the defaults - return s, 119 - - - -# Just changing these two functions should be enough to control the way -# that email address obscuring is handled. -def ObscureEmail(addr, for_text=False): - """Make email address unrecognizable to web spiders, but invertable. - - When for_text option is set (not default), make a sentence fragment - instead of a token.""" - if for_text: - return addr.replace('@', ' at ') - else: - return addr.replace('@', '--at--') - -def UnobscureEmail(addr): - """Invert ObscureEmail() conversion.""" - # Contrived to act as an identity operation on already-unobscured - # emails, so routines expecting obscured ones will accept both. - return addr.replace('--at--', '@') - - - -class OuterExit(Exception): - pass - -def findtext(templatefile, raw_dict=None, raw=False, lang=None, mlist=None): - # Make some text from a template file. The order of searches depends on - # whether mlist and lang are provided. Once the templatefile is found, - # string substitution is performed by interpolation in `dict'. If `raw' - # is false, the resulting text is wrapped/filled by calling wrap(). - # - # When looking for a template in a specific language, there are 4 places - # that are searched, in this order: - # - # 1. the list-specific language directory - # lists/<listname>/<language> - # - # 2. the domain-specific language directory - # templates/<list.host_name>/<language> - # - # 3. the site-wide language directory - # templates/site/<language> - # - # 4. the global default language directory - # templates/<language> - # - # The first match found stops the search. In this way, you can specialize - # templates at the desired level, or, if you use only the default - # templates, you don't need to change anything. You should never modify - # files in the templates/<language> subdirectory, since Mailman will - # overwrite these when you upgrade. That's what the templates/site - # language directories are for. - # - # A further complication is that the language to search for is determined - # by both the `lang' and `mlist' arguments. The search order there is - # that if lang is given, then the 4 locations above are searched, - # substituting lang for <language>. If no match is found, and mlist is - # given, then the 4 locations are searched using the list's preferred - # language. After that, the server default language is used for - # <language>. If that still doesn't yield a template, then the standard - # distribution's English language template is used as an ultimate - # fallback, and when lang is not 'en', the resulting template is passed - # through the translation service. If this template is missing you've got - # big problems. ;) - # - # A word on backwards compatibility: Mailman versions prior to 2.1 stored - # templates in templates/*.{html,txt} and lists/<listname>/*.{html,txt}. - # Those directories are no longer searched so if you've got customizations - # in those files, you should move them to the appropriate directory based - # on the above description. Mailman's upgrade script cannot do this for - # you. - # - # The function has been revised and renamed as it now returns both the - # template text and the path from which it retrieved the template. The - # original function is now a wrapper which just returns the template text - # as before, by calling this renamed function and discarding the second - # item returned. - # - # Calculate the languages to scan - languages = set() - if lang is not None: - languages.add(lang) - if mlist is not None: - languages.add(mlist.preferred_language) - languages.add(config.mailman.default_language) - assert None not in languages, 'None in languages' - # Calculate the locations to scan - searchdirs = [] - if mlist is not None: - searchdirs.append(mlist.data_path) - searchdirs.append(os.path.join(TEMPLATE_DIR, mlist.host_name)) - searchdirs.append(os.path.join(TEMPLATE_DIR, 'site')) - searchdirs.append(TEMPLATE_DIR) - # Start scanning - fp = None - try: - for lang in languages: - for dir in searchdirs: - filename = os.path.join(dir, lang, templatefile) - try: - fp = open(filename) - raise OuterExit - except IOError, e: - if e.errno <> errno.ENOENT: - raise - # Okay, it doesn't exist, keep looping - fp = None - except OuterExit: - pass - if fp is None: - # Try one last time with the distro English template, which, unless - # you've got a really broken installation, must be there. - try: - filename = os.path.join(TEMPLATE_DIR, 'en', templatefile) - fp = open(filename) - except IOError, e: - if e.errno <> errno.ENOENT: - raise - # We never found the template. BAD! - raise IOError(errno.ENOENT, 'No template file found', templatefile) - else: - from mailman.i18n import get_translation - # XXX BROKEN HACK - data = fp.read()[:-1] - template = get_translation().ugettext(data) - fp.close() - else: - template = fp.read() - fp.close() - template = unicode(template, GetCharSet(lang), 'replace') - text = template - if raw_dict is not None: - text = expand(template, raw_dict) - if raw: - return text, filename - return wrap(text), filename - - -def maketext(templatefile, dict=None, raw=False, lang=None, mlist=None): - return findtext(templatefile, dict, raw, lang, mlist)[0] - - - -def GetRequestURI(fallback=None, escape=True): - """Return the full virtual path this CGI script was invoked with. - - Newer web servers seems to supply this info in the REQUEST_URI - environment variable -- which isn't part of the CGI/1.1 spec. - Thus, if REQUEST_URI isn't available, we concatenate SCRIPT_NAME - and PATH_INFO, both of which are part of CGI/1.1. - - Optional argument `fallback' (default `None') is returned if both of - the above methods fail. - - The url will be cgi escaped to prevent cross-site scripting attacks, - unless `escape' is set to 0. - """ - url = fallback - if 'REQUEST_URI' in os.environ: - url = os.environ['REQUEST_URI'] - elif 'SCRIPT_NAME' in os.environ and 'PATH_INFO' in os.environ: - url = os.environ['SCRIPT_NAME'] + os.environ['PATH_INFO'] - if escape: - return websafe(url) - return url - - - -# XXX Replace this with direct calls. For now, existing uses of GetCharSet() -# are too numerous to change. -def GetCharSet(lang): - return config.languages.get_charset(lang) - - - -def get_request_domain(): - host = os.environ.get('HTTP_HOST', os.environ.get('SERVER_NAME')) - port = os.environ.get('SERVER_PORT') - # Strip off the port if there is one - if port and host.endswith(':' + port): - host = host[:-len(port)-1] - return host.lower() - - -def get_site_noreply(): - return '%s@%s' % (config.NO_REPLY_ADDRESS, config.DEFAULT_EMAIL_HOST) - - - -# Figure out epoch seconds of midnight at the start of today (or the given -# 3-tuple date of (year, month, day). -def midnight(date=None): - if date is None: - date = time.localtime()[:3] - # -1 for dst flag tells the library to figure it out - return time.mktime(date + (0,)*5 + (-1,)) - - - -# The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the -# charset of the given language, which is the character set that the page will -# be rendered in, and failing that, replaces non-ASCII characters with their -# html references. It always returns a byte string. -def uncanonstr(s, lang=None): - if s is None: - s = u'' - if lang is None: - charset = 'us-ascii' - else: - charset = GetCharSet(lang) - # See if the string contains characters only in the desired character - # set. If so, return it unchanged, except for coercing it to a byte - # string. - try: - if isinstance(s, unicode): - return s.encode(charset) - else: - u = unicode(s, charset) - return s - except UnicodeError: - # Nope, it contains funny characters, so html-ref it - return uquote(s) - - -def uquote(s): - a = [] - for c in s: - o = ord(c) - if o > 127: - a.append('&#%3d;' % o) - else: - a.append(c) - # Join characters together and coerce to byte string - return str(EMPTYSTRING.join(a)) - - -def oneline(s, cset='us-ascii', in_unicode=False): - # Decode header string in one line and convert into specified charset - try: - h = email.Header.make_header(email.Header.decode_header(s)) - ustr = h.__unicode__() - line = UEMPTYSTRING.join(ustr.splitlines()) - if in_unicode: - return line - else: - return line.encode(cset, 'replace') - except (LookupError, UnicodeError, ValueError, HeaderParseError): - # possibly charset problem. return with undecoded string in one line. - return EMPTYSTRING.join(s.splitlines()) - - -def strip_verbose_pattern(pattern): - # Remove white space and comments from a verbose pattern and return a - # non-verbose, equivalent pattern. Replace CR and NL in the result - # with '\\r' and '\\n' respectively to avoid multi-line results. - if not isinstance(pattern, str): - return pattern - newpattern = '' - i = 0 - inclass = False - skiptoeol = False - copynext = False - while i < len(pattern): - c = pattern[i] - if copynext: - if c == NL: - newpattern += '\\n' - elif c == CR: - newpattern += '\\r' - else: - newpattern += c - copynext = False - elif skiptoeol: - if c == NL: - skiptoeol = False - elif c == '#' and not inclass: - skiptoeol = True - elif c == '[' and not inclass: - inclass = True - newpattern += c - copynext = True - elif c == ']' and inclass: - inclass = False - newpattern += c - elif re.search('\s', c): - if inclass: - if c == NL: - newpattern += '\\n' - elif c == CR: - newpattern += '\\r' - else: - newpattern += c - elif c == '\\' and not inclass: - newpattern += c - copynext = True - else: - if c == NL: - newpattern += '\\n' - elif c == CR: - newpattern += '\\r' - else: - newpattern += c - i += 1 - return newpattern - - - -def get_pattern(email, pattern_list): - """Returns matched entry in pattern_list if email matches. - Otherwise returns None. - """ - if not pattern_list: - return None - matched = None - for pattern in pattern_list: - if pattern.startswith('^'): - # This is a regular expression match - try: - if re.search(pattern, email, re.IGNORECASE): - matched = pattern - break - except re.error: - # BAW: we should probably remove this pattern - pass - else: - # Do the comparison case insensitively - if pattern.lower() == email.lower(): - matched = pattern - break - return matched |
