summaryrefslogtreecommitdiff
path: root/Mailman/Handlers
diff options
context:
space:
mode:
authortkikuchi2007-03-25 02:57:18 +0000
committertkikuchi2007-03-25 02:57:18 +0000
commit864162b05e64a351d17e45fd888fbaa822db93b2 (patch)
tree3f7878840504950fb119fb06c2d7a4a31c4d169b /Mailman/Handlers
parenta8b5ce78a7c5ec7c4e9dabfef37f83c153b53d36 (diff)
downloadmailman-864162b05e64a351d17e45fd888fbaa822db93b2.tar.gz
mailman-864162b05e64a351d17e45fd888fbaa822db93b2.tar.zst
mailman-864162b05e64a351d17e45fd888fbaa822db93b2.zip
passwords.py: Looks like we still need unicode checking.
Mark Sapiro's patch for 'format' parameter. (Decorate.py, Scrubber.py) Scrubber.py: More brush up of code ... 'Content-Transfer-Encoding' is not updated by msg.set_payload(). 'Url:' to 'URL:' normalization. test_handlers.py: Test codes for Decorate.py and Scrubber.py.
Diffstat (limited to 'Mailman/Handlers')
-rw-r--r--Mailman/Handlers/Decorate.py30
-rw-r--r--Mailman/Handlers/Scrubber.py74
2 files changed, 61 insertions, 43 deletions
diff --git a/Mailman/Handlers/Decorate.py b/Mailman/Handlers/Decorate.py
index 23826cdd1..2f4aceb51 100644
--- a/Mailman/Handlers/Decorate.py
+++ b/Mailman/Handlers/Decorate.py
@@ -17,6 +17,7 @@
"""Decorate a message by sticking the header and footer around it."""
+import re
import logging
from email.MIMEText import MIMEText
@@ -84,9 +85,15 @@ def process(mlist, msg, msgdata):
# MIME multipart chroming the message?
wrap = True
if not msg.is_multipart() and msgtype == 'text/plain':
+ # Save the RFC-3676 format parameters.
+ format = msg.get_param('format')
+ delsp = msg.get_param('delsp')
+ # Save 'Content-Transfer-Encoding' header in case decoration fails.
+ cte = msg.get('content-transfer-encoding')
# header/footer is now in unicode (2.2)
try:
oldpayload = unicode(msg.get_payload(decode=True), mcset)
+ del msg['content-transfer-encoding']
frontsep = endsep = u''
if header and not header.endswith('\n'):
frontsep = u'\n'
@@ -99,18 +106,21 @@ def process(mlist, msg, msgdata):
# charset, then utf-8. It's okay if some of these are duplicates.
for cset in (lcset, mcset, 'utf-8'):
try:
- pld = payload.encode(cset)
- del msg['content-transfer-encoding']
- del msg['content-type']
- msg.set_payload(pld, cset)
- wrap = False
- break
- # 'except' should be here because set_payload() may fail for
- # 'euc-jp' which re-encode to 'iso-2022-jp'. :(
+ msg.set_payload(payload.encode(cset), cset)
except UnicodeError:
pass
+ else:
+ if format:
+ msg.set_param('format', format)
+ if delsp:
+ msg.set_param('delsp', delsp)
+ wrap = False
+ break
except (LookupError, UnicodeError):
- pass
+ if cte:
+ # Restore the original c-t-e.
+ del msg['content-transfer-encoding']
+ msg['Content-Transfer-Encoding'] = cte
elif msg.get_content_type() == 'multipart/mixed':
# The next easiest thing to do is just prepend the header and append
# the footer as additional subparts
@@ -201,7 +211,7 @@ def decorate(mlist, template, what, extradict={}):
template = Utils.to_percent(template)
# Interpolate into the template
try:
- text = (template % d).replace('\r\n', '\n')
+ text = re.sub(r' *\r?\n', r'\n', template % d)
except (ValueError, TypeError), e:
log.exception('Exception while calculating %s:\n%s', what, e)
what = what.upper()
diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py
index e14f9a549..a7a825852 100644
--- a/Mailman/Handlers/Scrubber.py
+++ b/Mailman/Handlers/Scrubber.py
@@ -144,6 +144,10 @@ def replace_payload_by_text(msg, text, charset):
# message by a text (scrubbing).
del msg['content-type']
del msg['content-transfer-encoding']
+ if isinstance(text, unicode):
+ text = text.encode(charset)
+ if not isinstance(charset, str):
+ charset = str(charset)
msg.set_payload(text, charset)
@@ -160,7 +164,7 @@ def process(mlist, msg, msgdata=None):
if not mlist.scrub_nondigest:
return
dir = calculate_attachments_dir(mlist, msg, msgdata)
- charset = None
+ charset = format = delsp = None
lcset = Utils.GetCharSet(mlist.preferred_language)
lcset_out = Charset(lcset).output_charset or lcset
# Now walk over all subparts of this message and scrub out various types
@@ -170,9 +174,11 @@ def process(mlist, msg, msgdata=None):
if ctype == 'text/plain':
# We need to choose a charset for the scrubbed message, so we'll
# arbitrarily pick the charset of the first text/plain part in the
- # message.
+ # message. Also get the RFC 3676 stuff from this part.
if charset is None:
charset = part.get_content_charset(lcset)
+ format = part.get_param('format')
+ delsp = part.get_param('delsp')
# TK: if part is attached then check charset and scrub if none
if part.get('content-disposition') and \
not part.get_content_charset():
@@ -182,7 +188,7 @@ def process(mlist, msg, msgdata=None):
replace_payload_by_text(part, _("""\
An embedded and charset-unspecified text was scrubbed...
Name: %(filename)s
-Url: %(url)s
+URL: %(url)s
"""), lcset)
elif ctype == 'text/html' and isinstance(sanitize, int):
if sanitize == 0:
@@ -240,7 +246,7 @@ From: %(who)s
Subject: %(subject)s
Date: %(date)s
Size: %(size)s
-Url: %(url)s
+URL: %(url)s
"""), lcset)
# If the message isn't a multipart, then we'll strip it out as an
# attachment that would have to be separately downloaded. Pipermail
@@ -267,7 +273,7 @@ Name: %(filename)s
Type: %(ctype)s
Size: %(size)d bytes
Desc: %(desc)s
-Url : %(url)s
+URL: %(url)s
"""), lcset)
outer = False
# We still have to sanitize multipart messages to flat text because
@@ -289,6 +295,7 @@ Url : %(url)s
# BAW: Martin's original patch suggested we might want to try
# generalizing to utf-8, and that's probably a good idea (eventually).
text = []
+ charsets = []
for part in msg.walk():
# TK: bug-id 1099138 and multipart
if not part or part.is_multipart():
@@ -307,37 +314,38 @@ Url : %(url)s
# null body. See bug 1430236.
except (binascii.Error, TypeError):
t = part.get_payload()
- # TK: get_content_charset() returns 'iso-2022-jp' for internally
- # crafted (scrubbed) 'euc-jp' text part. So, first try
- # get_charset(), then get_content_charset() for the parts
- # which are already embeded in the incoming message.
- partcharset = part.get_charset()
- if partcharset:
- partcharset = str(partcharset)
- else:
- partcharset = part.get_content_charset()
- if partcharset and partcharset <> charset:
- try:
- t = unicode(t, partcharset, 'replace')
- except (UnicodeError, LookupError, ValueError):
- # Replace funny characters. We use errors='replace' for
- # both calls since the first replace will leave U+FFFD,
- # which isn't ASCII encodeable.
- u = unicode(t, 'ascii', 'replace')
- t = u.encode('ascii', 'replace')
- try:
- # Should use HTML-Escape, or try generalizing to UTF-8
- t = t.encode(charset, 'replace')
- except (UnicodeError, LookupError, ValueError):
- t = t.encode(lcset, 'replace')
+ # Email problem was solved by Mark Sapiro. (TK)
+ partcharset = part.get_content_charset('us-ascii')
+ try:
+ t = unicode(t, partcharset, 'replace')
+ except (UnicodeError, LookupError, ValueError, TypeError):
+ # What is the cause to come this exception now ?
+ # Replace funny characters. We use errors='replace'.
+ u = unicode(t, 'ascii', 'replace')
# Separation is useful
- if isinstance(t, str):
+ if isinstance(t, basestring):
if not t.endswith('\n'):
t += '\n'
text.append(t)
+ if partcharset not in charsets:
+ charsets.append(partcharset)
# Now join the text and set the payload
sep = _('-------------- next part --------------\n')
- replace_payload_by_text(msg, sep.join(text), charset)
+ rept = sep.join(text)
+ # Replace entire message with text and scrubbed notice.
+ # Try with message charsets and utf-8
+ if 'utf-8' not in charsets:
+ charsets.append('utf-8')
+ for charset in charsets:
+ try:
+ replace_payload_by_text(msg, rept, charset)
+ break
+ except UnicodeError:
+ pass
+ if format:
+ msg.set_param('format', format)
+ if delsp:
+ msg.set_param('delsp', delsp)
return msg
@@ -467,7 +475,7 @@ def save_attachment(mlist, msg, dir, filter_html=True):
# Private archives will likely have a trailing slash. Normalize.
if baseurl[-1] <> '/':
baseurl += '/'
- # A trailing space in url string may save users who are using
- # RFC-1738 compliant MUA (Not Mozilla).
- url = baseurl + '%s/%s%s%s ' % (dir, filebase, extra, ext)
+ # Trailing space will definitely be a problem with format=flowed.
+ # Bracket the URL instead.
+ url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext)
return url