# Copyright (C) 1998-2009 by the Free Software Foundation, Inc. # # This file is part of GNU Mailman. # # GNU Mailman is free software: you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # GNU Mailman is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # GNU Mailman. If not, see . """Miscellaneous essential routines. This includes actual message transmission routines, address checking and message and address munging, a handy-dandy routine to map a function on all the mailing lists, and whatever else doesn't belong elsewhere. """ from __future__ import absolute_import, unicode_literals __metaclass__ = type __all__ = [ ] import os import re import cgi import time import errno import base64 import random import logging import htmlentitydefs # pylint: disable-msg=E0611,W0403 from email.errors import HeaderParseError from email.header import decode_header, make_header from lazr.config import as_boolean from string import ascii_letters, digits, whitespace from zope.component import getUtility import mailman.templates from mailman import passwords from mailman.config import config from mailman.core import errors from mailman.core.i18n import _ from mailman.interfaces.languages import ILanguageManager from mailman.utilities.string import expand AT = '@' CR = '\r' DOT = '.' EMPTYSTRING = '' IDENTCHARS = ascii_letters + digits + '_' NL = '\n' UEMPTYSTRING = u'' TEMPLATE_DIR = os.path.dirname(mailman.templates.__file__) # Search for $(identifier)s strings, except that the trailing s is optional, # since that's a common mistake cre = re.compile(r'%\(([_a-z]\w*?)\)s?', re.IGNORECASE) # Search for $$, $identifier, or ${identifier} dre = re.compile(r'(\${2})|\$([_a-z]\w*)|\${([_a-z]\w*)}', re.IGNORECASE) log = logging.getLogger('mailman.error') # A much more naive implementation than say, Emacs's fill-paragraph! # pylint: disable-msg=R0912 def wrap(text, column=70, honor_leading_ws=True): """Wrap and fill the text to the specified column. Wrapping is always in effect, although if it is not possible to wrap a line (because some word is longer than `column' characters) the line is broken at the next available whitespace boundary. Paragraphs are also always filled, unless honor_leading_ws is true and the line begins with whitespace. This is the algorithm that the Python FAQ wizard uses, and seems like a good compromise. """ wrapped = '' # first split the text into paragraphs, defined as a blank line paras = re.split('\n\n', text) for para in paras: # fill lines = [] fillprev = False for line in para.split(NL): if not line: lines.append(line) continue if honor_leading_ws and line[0] in whitespace: fillthis = False else: fillthis = True if fillprev and fillthis: # if the previous line should be filled, then just append a # single space, and the rest of the current line lines[-1] = lines[-1].rstrip() + ' ' + line else: # no fill, i.e. retain newline lines.append(line) fillprev = fillthis # wrap each line for text in lines: while text: if len(text) <= column: line = text text = '' else: bol = column # find the last whitespace character while bol > 0 and text[bol] not in whitespace: bol -= 1 # now find the last non-whitespace character eol = bol while eol > 0 and text[eol] in whitespace: eol -= 1 # watch out for text that's longer than the column width if eol == 0: # break on whitespace after column eol = column while eol < len(text) and text[eol] not in whitespace: eol += 1 bol = eol while bol < len(text) and text[bol] in whitespace: bol += 1 bol -= 1 line = text[:eol+1] + '\n' # find the next non-whitespace character bol += 1 while bol < len(text) and text[bol] in whitespace: bol += 1 text = text[bol:] wrapped += line wrapped += '\n' # end while text wrapped += '\n' # end for text in lines # the last two newlines are bogus return wrapped[:-2] _vowels = ('a', 'e', 'i', 'o', 'u') _consonants = ('b', 'c', 'd', 'f', 'g', 'h', 'k', 'm', 'n', 'p', 'r', 's', 't', 'v', 'w', 'x', 'z') _syllables = [] for v in _vowels: for c in _consonants: _syllables.append(c+v) _syllables.append(v+c) del c, v def UserFriendly_MakeRandomPassword(length): syls = [] while len(syls) * 2 < length: syls.append(random.choice(_syllables)) return EMPTYSTRING.join(syls)[:length] def Secure_MakeRandomPassword(length): bytesread = 0 bytes = [] fd = None try: while bytesread < length: try: # Python 2.4 has this on available systems. newbytes = os.urandom(length - bytesread) except (AttributeError, NotImplementedError): if fd is None: try: fd = os.open('/dev/urandom', os.O_RDONLY) except OSError, e: if e.errno != errno.ENOENT: raise # We have no available source of cryptographically # secure random characters. Log an error and fallback # to the user friendly passwords. log.error( 'urandom not available, passwords not secure') return UserFriendly_MakeRandomPassword(length) newbytes = os.read(fd, length - bytesread) bytes.append(newbytes) bytesread += len(newbytes) s = base64.encodestring(EMPTYSTRING.join(bytes)) # base64 will expand the string by 4/3rds return s.replace('\n', '')[:length] finally: if fd is not None: os.close(fd) def MakeRandomPassword(length=None): if length is None: length = int(config.passwords.member_password_length) if as_boolean(config.passwords.user_friendly_passwords): password = UserFriendly_MakeRandomPassword(length) else: password = Secure_MakeRandomPassword(length) return password.decode('ascii') def GetRandomSeed(): chr1 = int(random.random() * 52) chr2 = int(random.random() * 52) def mkletter(c): if 0 <= c < 26: c += 65 if 26 <= c < 52: #c = c - 26 + 97 c += 71 return c return "%c%c" % tuple(map(mkletter, (chr1, chr2))) def set_global_password(pw, siteadmin=True, scheme=None): if scheme is None: scheme = passwords.Schemes.ssha if siteadmin: filename = config.SITE_PW_FILE else: filename = config.LISTCREATOR_PW_FILE try: fp = open(filename, 'w') print >> fp, passwords.make_secret(pw, scheme) finally: fp.close() def get_global_password(siteadmin=True): if siteadmin: filename = config.SITE_PW_FILE else: filename = config.LISTCREATOR_PW_FILE try: fp = open(filename) challenge = fp.read()[:-1] # strip off trailing nl fp.close() except IOError, e: if e.errno != errno.ENOENT: raise # It's okay not to have a site admin password return None return challenge def check_global_password(response, siteadmin=True): challenge = get_global_password(siteadmin) if challenge is None: return False return passwords.check_response(challenge, response) def websafe(s): return cgi.escape(s, quote=True) def nntpsplit(s): parts = s.split(':', 1) if len(parts) == 2: try: return parts[0], int(parts[1]) except ValueError: pass # Use the defaults return s, 119 # Just changing these two functions should be enough to control the way # that email address obscuring is handled. def ObscureEmail(addr, for_text=False): """Make email address unrecognizable to web spiders, but invertable. When for_text option is set (not default), make a sentence fragment instead of a token.""" if for_text: return addr.replace('@', ' at ') else: return addr.replace('@', '--at--') def UnobscureEmail(addr): """Invert ObscureEmail() conversion.""" # Contrived to act as an identity operation on already-unobscured # emails, so routines expecting obscured ones will accept both. return addr.replace('--at--', '@') class OuterExit(Exception): pass def findtext(templatefile, raw_dict=None, raw=False, lang=None, mlist=None): # Make some text from a template file. The order of searches depends on # whether mlist and lang are provided. Once the templatefile is found, # string substitution is performed by interpolation in `dict'. If `raw' # is false, the resulting text is wrapped/filled by calling wrap(). # # When looking for a template in a specific language, there are 4 places # that are searched, in this order: # # 1. the list-specific language directory # lists// # # 2. the domain-specific language directory # templates// # # 3. the site-wide language directory # templates/site/ # # 4. the global default language directory # templates/ # # The first match found stops the search. In this way, you can specialize # templates at the desired level, or, if you use only the default # templates, you don't need to change anything. You should never modify # files in the templates/ subdirectory, since Mailman will # overwrite these when you upgrade. That's what the templates/site # language directories are for. # # A further complication is that the language to search for is determined # by both the `lang' and `mlist' arguments. The search order there is # that if lang is given, then the 4 locations above are searched, # substituting lang for . If no match is found, and mlist is # given, then the 4 locations are searched using the list's preferred # language. After that, the server default language is used for # . If that still doesn't yield a template, then the standard # distribution's English language template is used as an ultimate # fallback, and when lang is not 'en', the resulting template is passed # through the translation service. If this template is missing you've got # big problems. ;) # # A word on backwards compatibility: Mailman versions prior to 2.1 stored # templates in templates/*.{html,txt} and lists//*.{html,txt}. # Those directories are no longer searched so if you've got customizations # in those files, you should move them to the appropriate directory based # on the above description. Mailman's upgrade script cannot do this for # you. # # The function has been revised and renamed as it now returns both the # template text and the path from which it retrieved the template. The # original function is now a wrapper which just returns the template text # as before, by calling this renamed function and discarding the second # item returned. # # Calculate the languages to scan languages = set() if lang is not None: languages.add(lang) if mlist is not None: languages.add(mlist.preferred_language.code) languages.add(config.mailman.default_language) assert None not in languages, 'None in languages' # Calculate the locations to scan searchdirs = [] if mlist is not None: searchdirs.append(mlist.data_path) searchdirs.append(os.path.join(TEMPLATE_DIR, mlist.host_name)) searchdirs.append(os.path.join(TEMPLATE_DIR, 'site')) searchdirs.append(TEMPLATE_DIR) # Start scanning fp = None try: for lang in languages: for dir in searchdirs: filename = os.path.join(dir, lang, templatefile) try: fp = open(filename) raise OuterExit except IOError, e: if e.errno != errno.ENOENT: raise # Okay, it doesn't exist, keep looping fp = None except OuterExit: pass if fp is None: # Try one last time with the distro English template, which, unless # you've got a really broken installation, must be there. try: filename = os.path.join(TEMPLATE_DIR, 'en', templatefile) fp = open(filename) except IOError, e: if e.errno != errno.ENOENT: raise # We never found the template. BAD! raise IOError(errno.ENOENT, 'No template file found', templatefile) else: # XXX BROKEN HACK data = fp.read()[:-1] template = _(data) fp.close() else: template = fp.read() fp.close() charset = getUtility(ILanguageManager)[lang].charset template = unicode(template, charset, 'replace') text = template if raw_dict is not None: text = expand(template, raw_dict) if raw: return text, filename return wrap(text), filename def maketext(templatefile, dict=None, raw=False, lang=None, mlist=None): return findtext(templatefile, dict, raw, lang, mlist)[0] # The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the # charset of the given language, which is the character set that the page will # be rendered in, and failing that, replaces non-ASCII characters with their # html references. It always returns a byte string. def uncanonstr(s, lang=None): if s is None: s = u'' if lang is None: charset = 'us-ascii' else: charset = getUtility(ILanguageManager)[lang].charset # See if the string contains characters only in the desired character # set. If so, return it unchanged, except for coercing it to a byte # string. try: if isinstance(s, unicode): return s.encode(charset) else: u = unicode(s, charset) return s except UnicodeError: # Nope, it contains funny characters, so html-ref it return uquote(s) def uquote(s): a = [] for c in s: o = ord(c) if o > 127: a.append('&#%3d;' % o) else: a.append(c) # Join characters together and coerce to byte string return str(EMPTYSTRING.join(a)) def oneline(s, cset='us-ascii', in_unicode=False): # Decode header string in one line and convert into specified charset try: h = make_header(decode_header(s)) ustr = h.__unicode__() line = UEMPTYSTRING.join(ustr.splitlines()) if in_unicode: return line else: return line.encode(cset, 'replace') except (LookupError, UnicodeError, ValueError, HeaderParseError): # possibly charset problem. return with undecoded string in one line. return EMPTYSTRING.join(s.splitlines()) def strip_verbose_pattern(pattern): # Remove white space and comments from a verbose pattern and return a # non-verbose, equivalent pattern. Replace CR and NL in the result # with '\\r' and '\\n' respectively to avoid multi-line results. if not isinstance(pattern, str): return pattern newpattern = '' i = 0 inclass = False skiptoeol = False copynext = False while i < len(pattern): c = pattern[i] if copynext: if c == NL: newpattern += '\\n' elif c == CR: newpattern += '\\r' else: newpattern += c copynext = False elif skiptoeol: if c == NL: skiptoeol = False elif c == '#' and not inclass: skiptoeol = True elif c == '[' and not inclass: inclass = True newpattern += c copynext = True elif c == ']' and inclass: inclass = False newpattern += c elif re.search('\s', c): if inclass: if c == NL: newpattern += '\\n' elif c == CR: newpattern += '\\r' else: newpattern += c elif c == '\\' and not inclass: newpattern += c copynext = True else: if c == NL: newpattern += '\\n' elif c == CR: newpattern += '\\r' else: newpattern += c i += 1 return newpattern def get_pattern(email, pattern_list): """Returns matched entry in pattern_list if email matches. Otherwise returns None. """ if not pattern_list: return None matched = None for pattern in pattern_list: if pattern.startswith('^'): # This is a regular expression match try: if re.search(pattern, email, re.IGNORECASE): matched = pattern break except re.error: # BAW: we should probably remove this pattern pass else: # Do the comparison case insensitively if pattern.lower() == email.lower(): matched = pattern break return matched