# Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
#
# This file is part of GNU Mailman.
#
# GNU Mailman is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# GNU Mailman. If not, see .
"""Miscellaneous essential routines.
This includes actual message transmission routines, address checking and
message and address munging, a handy-dandy routine to map a function on all
the mailing lists, and whatever else doesn't belong elsewhere.
"""
from __future__ import absolute_import, unicode_literals
__metaclass__ = type
__all__ = [
]
import os
import re
import cgi
import time
import errno
import base64
import random
import logging
import htmlentitydefs
# pylint: disable-msg=E0611,W0403
from email.errors import HeaderParseError
from email.header import decode_header, make_header
from lazr.config import as_boolean
from string import ascii_letters, digits, whitespace
from zope.component import getUtility
import mailman.templates
from mailman import passwords
from mailman.config import config
from mailman.core import errors
from mailman.core.i18n import _
from mailman.interfaces.languages import ILanguageManager
from mailman.utilities.string import expand
AT = '@'
CR = '\r'
DOT = '.'
EMPTYSTRING = ''
IDENTCHARS = ascii_letters + digits + '_'
NL = '\n'
UEMPTYSTRING = u''
TEMPLATE_DIR = os.path.dirname(mailman.templates.__file__)
# Search for $(identifier)s strings, except that the trailing s is optional,
# since that's a common mistake
cre = re.compile(r'%\(([_a-z]\w*?)\)s?', re.IGNORECASE)
# Search for $$, $identifier, or ${identifier}
dre = re.compile(r'(\${2})|\$([_a-z]\w*)|\${([_a-z]\w*)}', re.IGNORECASE)
log = logging.getLogger('mailman.error')
# A much more naive implementation than say, Emacs's fill-paragraph!
# pylint: disable-msg=R0912
def wrap(text, column=70, honor_leading_ws=True):
"""Wrap and fill the text to the specified column.
Wrapping is always in effect, although if it is not possible to wrap a
line (because some word is longer than `column' characters) the line is
broken at the next available whitespace boundary. Paragraphs are also
always filled, unless honor_leading_ws is true and the line begins with
whitespace. This is the algorithm that the Python FAQ wizard uses, and
seems like a good compromise.
"""
wrapped = ''
# first split the text into paragraphs, defined as a blank line
paras = re.split('\n\n', text)
for para in paras:
# fill
lines = []
fillprev = False
for line in para.split(NL):
if not line:
lines.append(line)
continue
if honor_leading_ws and line[0] in whitespace:
fillthis = False
else:
fillthis = True
if fillprev and fillthis:
# if the previous line should be filled, then just append a
# single space, and the rest of the current line
lines[-1] = lines[-1].rstrip() + ' ' + line
else:
# no fill, i.e. retain newline
lines.append(line)
fillprev = fillthis
# wrap each line
for text in lines:
while text:
if len(text) <= column:
line = text
text = ''
else:
bol = column
# find the last whitespace character
while bol > 0 and text[bol] not in whitespace:
bol -= 1
# now find the last non-whitespace character
eol = bol
while eol > 0 and text[eol] in whitespace:
eol -= 1
# watch out for text that's longer than the column width
if eol == 0:
# break on whitespace after column
eol = column
while eol < len(text) and text[eol] not in whitespace:
eol += 1
bol = eol
while bol < len(text) and text[bol] in whitespace:
bol += 1
bol -= 1
line = text[:eol+1] + '\n'
# find the next non-whitespace character
bol += 1
while bol < len(text) and text[bol] in whitespace:
bol += 1
text = text[bol:]
wrapped += line
wrapped += '\n'
# end while text
wrapped += '\n'
# end for text in lines
# the last two newlines are bogus
return wrapped[:-2]
_vowels = ('a', 'e', 'i', 'o', 'u')
_consonants = ('b', 'c', 'd', 'f', 'g', 'h', 'k', 'm', 'n',
'p', 'r', 's', 't', 'v', 'w', 'x', 'z')
_syllables = []
for v in _vowels:
for c in _consonants:
_syllables.append(c+v)
_syllables.append(v+c)
del c, v
def UserFriendly_MakeRandomPassword(length):
syls = []
while len(syls) * 2 < length:
syls.append(random.choice(_syllables))
return EMPTYSTRING.join(syls)[:length]
def Secure_MakeRandomPassword(length):
bytesread = 0
bytes = []
fd = None
try:
while bytesread < length:
try:
# Python 2.4 has this on available systems.
newbytes = os.urandom(length - bytesread)
except (AttributeError, NotImplementedError):
if fd is None:
try:
fd = os.open('/dev/urandom', os.O_RDONLY)
except OSError, e:
if e.errno != errno.ENOENT:
raise
# We have no available source of cryptographically
# secure random characters. Log an error and fallback
# to the user friendly passwords.
log.error(
'urandom not available, passwords not secure')
return UserFriendly_MakeRandomPassword(length)
newbytes = os.read(fd, length - bytesread)
bytes.append(newbytes)
bytesread += len(newbytes)
s = base64.encodestring(EMPTYSTRING.join(bytes))
# base64 will expand the string by 4/3rds
return s.replace('\n', '')[:length]
finally:
if fd is not None:
os.close(fd)
def MakeRandomPassword(length=None):
if length is None:
length = int(config.passwords.member_password_length)
if as_boolean(config.passwords.user_friendly_passwords):
password = UserFriendly_MakeRandomPassword(length)
else:
password = Secure_MakeRandomPassword(length)
return password.decode('ascii')
def GetRandomSeed():
chr1 = int(random.random() * 52)
chr2 = int(random.random() * 52)
def mkletter(c):
if 0 <= c < 26:
c += 65
if 26 <= c < 52:
#c = c - 26 + 97
c += 71
return c
return "%c%c" % tuple(map(mkletter, (chr1, chr2)))
def set_global_password(pw, siteadmin=True, scheme=None):
if scheme is None:
scheme = passwords.Schemes.ssha
if siteadmin:
filename = config.SITE_PW_FILE
else:
filename = config.LISTCREATOR_PW_FILE
try:
fp = open(filename, 'w')
print >> fp, passwords.make_secret(pw, scheme)
finally:
fp.close()
def get_global_password(siteadmin=True):
if siteadmin:
filename = config.SITE_PW_FILE
else:
filename = config.LISTCREATOR_PW_FILE
try:
fp = open(filename)
challenge = fp.read()[:-1] # strip off trailing nl
fp.close()
except IOError, e:
if e.errno != errno.ENOENT:
raise
# It's okay not to have a site admin password
return None
return challenge
def check_global_password(response, siteadmin=True):
challenge = get_global_password(siteadmin)
if challenge is None:
return False
return passwords.check_response(challenge, response)
def websafe(s):
return cgi.escape(s, quote=True)
def nntpsplit(s):
parts = s.split(':', 1)
if len(parts) == 2:
try:
return parts[0], int(parts[1])
except ValueError:
pass
# Use the defaults
return s, 119
# Just changing these two functions should be enough to control the way
# that email address obscuring is handled.
def ObscureEmail(addr, for_text=False):
"""Make email address unrecognizable to web spiders, but invertable.
When for_text option is set (not default), make a sentence fragment
instead of a token."""
if for_text:
return addr.replace('@', ' at ')
else:
return addr.replace('@', '--at--')
def UnobscureEmail(addr):
"""Invert ObscureEmail() conversion."""
# Contrived to act as an identity operation on already-unobscured
# emails, so routines expecting obscured ones will accept both.
return addr.replace('--at--', '@')
class OuterExit(Exception):
pass
def findtext(templatefile, raw_dict=None, raw=False, lang=None, mlist=None):
# Make some text from a template file. The order of searches depends on
# whether mlist and lang are provided. Once the templatefile is found,
# string substitution is performed by interpolation in `dict'. If `raw'
# is false, the resulting text is wrapped/filled by calling wrap().
#
# When looking for a template in a specific language, there are 4 places
# that are searched, in this order:
#
# 1. the list-specific language directory
# lists//
#
# 2. the domain-specific language directory
# templates//
#
# 3. the site-wide language directory
# templates/site/
#
# 4. the global default language directory
# templates/
#
# The first match found stops the search. In this way, you can specialize
# templates at the desired level, or, if you use only the default
# templates, you don't need to change anything. You should never modify
# files in the templates/ subdirectory, since Mailman will
# overwrite these when you upgrade. That's what the templates/site
# language directories are for.
#
# A further complication is that the language to search for is determined
# by both the `lang' and `mlist' arguments. The search order there is
# that if lang is given, then the 4 locations above are searched,
# substituting lang for . If no match is found, and mlist is
# given, then the 4 locations are searched using the list's preferred
# language. After that, the server default language is used for
# . If that still doesn't yield a template, then the standard
# distribution's English language template is used as an ultimate
# fallback, and when lang is not 'en', the resulting template is passed
# through the translation service. If this template is missing you've got
# big problems. ;)
#
# A word on backwards compatibility: Mailman versions prior to 2.1 stored
# templates in templates/*.{html,txt} and lists//*.{html,txt}.
# Those directories are no longer searched so if you've got customizations
# in those files, you should move them to the appropriate directory based
# on the above description. Mailman's upgrade script cannot do this for
# you.
#
# The function has been revised and renamed as it now returns both the
# template text and the path from which it retrieved the template. The
# original function is now a wrapper which just returns the template text
# as before, by calling this renamed function and discarding the second
# item returned.
#
# Calculate the languages to scan
languages = set()
if lang is not None:
languages.add(lang)
if mlist is not None:
languages.add(mlist.preferred_language.code)
languages.add(config.mailman.default_language)
assert None not in languages, 'None in languages'
# Calculate the locations to scan
searchdirs = []
if mlist is not None:
searchdirs.append(mlist.data_path)
searchdirs.append(os.path.join(TEMPLATE_DIR, mlist.host_name))
searchdirs.append(os.path.join(TEMPLATE_DIR, 'site'))
searchdirs.append(TEMPLATE_DIR)
# Start scanning
fp = None
try:
for lang in languages:
for dir in searchdirs:
filename = os.path.join(dir, lang, templatefile)
try:
fp = open(filename)
raise OuterExit
except IOError, e:
if e.errno != errno.ENOENT:
raise
# Okay, it doesn't exist, keep looping
fp = None
except OuterExit:
pass
if fp is None:
# Try one last time with the distro English template, which, unless
# you've got a really broken installation, must be there.
try:
filename = os.path.join(TEMPLATE_DIR, 'en', templatefile)
fp = open(filename)
except IOError, e:
if e.errno != errno.ENOENT:
raise
# We never found the template. BAD!
raise IOError(errno.ENOENT, 'No template file found', templatefile)
else:
# XXX BROKEN HACK
data = fp.read()[:-1]
template = _(data)
fp.close()
else:
template = fp.read()
fp.close()
charset = getUtility(ILanguageManager)[lang].charset
template = unicode(template, charset, 'replace')
text = template
if raw_dict is not None:
text = expand(template, raw_dict)
if raw:
return text, filename
return wrap(text), filename
def maketext(templatefile, dict=None, raw=False, lang=None, mlist=None):
return findtext(templatefile, dict, raw, lang, mlist)[0]
# The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the
# charset of the given language, which is the character set that the page will
# be rendered in, and failing that, replaces non-ASCII characters with their
# html references. It always returns a byte string.
def uncanonstr(s, lang=None):
if s is None:
s = u''
if lang is None:
charset = 'us-ascii'
else:
charset = getUtility(ILanguageManager)[lang].charset
# See if the string contains characters only in the desired character
# set. If so, return it unchanged, except for coercing it to a byte
# string.
try:
if isinstance(s, unicode):
return s.encode(charset)
else:
u = unicode(s, charset)
return s
except UnicodeError:
# Nope, it contains funny characters, so html-ref it
return uquote(s)
def uquote(s):
a = []
for c in s:
o = ord(c)
if o > 127:
a.append('%3d;' % o)
else:
a.append(c)
# Join characters together and coerce to byte string
return str(EMPTYSTRING.join(a))
def oneline(s, cset='us-ascii', in_unicode=False):
# Decode header string in one line and convert into specified charset
try:
h = make_header(decode_header(s))
ustr = h.__unicode__()
line = UEMPTYSTRING.join(ustr.splitlines())
if in_unicode:
return line
else:
return line.encode(cset, 'replace')
except (LookupError, UnicodeError, ValueError, HeaderParseError):
# possibly charset problem. return with undecoded string in one line.
return EMPTYSTRING.join(s.splitlines())
def strip_verbose_pattern(pattern):
# Remove white space and comments from a verbose pattern and return a
# non-verbose, equivalent pattern. Replace CR and NL in the result
# with '\\r' and '\\n' respectively to avoid multi-line results.
if not isinstance(pattern, str):
return pattern
newpattern = ''
i = 0
inclass = False
skiptoeol = False
copynext = False
while i < len(pattern):
c = pattern[i]
if copynext:
if c == NL:
newpattern += '\\n'
elif c == CR:
newpattern += '\\r'
else:
newpattern += c
copynext = False
elif skiptoeol:
if c == NL:
skiptoeol = False
elif c == '#' and not inclass:
skiptoeol = True
elif c == '[' and not inclass:
inclass = True
newpattern += c
copynext = True
elif c == ']' and inclass:
inclass = False
newpattern += c
elif re.search('\s', c):
if inclass:
if c == NL:
newpattern += '\\n'
elif c == CR:
newpattern += '\\r'
else:
newpattern += c
elif c == '\\' and not inclass:
newpattern += c
copynext = True
else:
if c == NL:
newpattern += '\\n'
elif c == CR:
newpattern += '\\r'
else:
newpattern += c
i += 1
return newpattern
def get_pattern(email, pattern_list):
"""Returns matched entry in pattern_list if email matches.
Otherwise returns None.
"""
if not pattern_list:
return None
matched = None
for pattern in pattern_list:
if pattern.startswith('^'):
# This is a regular expression match
try:
if re.search(pattern, email, re.IGNORECASE):
matched = pattern
break
except re.error:
# BAW: we should probably remove this pattern
pass
else:
# Do the comparison case insensitively
if pattern.lower() == email.lower():
matched = pattern
break
return matched