summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Mailman/Handlers/Decorate.py30
-rw-r--r--Mailman/Handlers/Scrubber.py74
-rw-r--r--Mailman/passwords.py3
-rw-r--r--Mailman/testing/test_handlers.py136
4 files changed, 199 insertions, 44 deletions
diff --git a/Mailman/Handlers/Decorate.py b/Mailman/Handlers/Decorate.py
index 23826cdd1..2f4aceb51 100644
--- a/Mailman/Handlers/Decorate.py
+++ b/Mailman/Handlers/Decorate.py
@@ -17,6 +17,7 @@
"""Decorate a message by sticking the header and footer around it."""
+import re
import logging
from email.MIMEText import MIMEText
@@ -84,9 +85,15 @@ def process(mlist, msg, msgdata):
# MIME multipart chroming the message?
wrap = True
if not msg.is_multipart() and msgtype == 'text/plain':
+ # Save the RFC-3676 format parameters.
+ format = msg.get_param('format')
+ delsp = msg.get_param('delsp')
+ # Save 'Content-Transfer-Encoding' header in case decoration fails.
+ cte = msg.get('content-transfer-encoding')
# header/footer is now in unicode (2.2)
try:
oldpayload = unicode(msg.get_payload(decode=True), mcset)
+ del msg['content-transfer-encoding']
frontsep = endsep = u''
if header and not header.endswith('\n'):
frontsep = u'\n'
@@ -99,18 +106,21 @@ def process(mlist, msg, msgdata):
# charset, then utf-8. It's okay if some of these are duplicates.
for cset in (lcset, mcset, 'utf-8'):
try:
- pld = payload.encode(cset)
- del msg['content-transfer-encoding']
- del msg['content-type']
- msg.set_payload(pld, cset)
- wrap = False
- break
- # 'except' should be here because set_payload() may fail for
- # 'euc-jp' which re-encode to 'iso-2022-jp'. :(
+ msg.set_payload(payload.encode(cset), cset)
except UnicodeError:
pass
+ else:
+ if format:
+ msg.set_param('format', format)
+ if delsp:
+ msg.set_param('delsp', delsp)
+ wrap = False
+ break
except (LookupError, UnicodeError):
- pass
+ if cte:
+ # Restore the original c-t-e.
+ del msg['content-transfer-encoding']
+ msg['Content-Transfer-Encoding'] = cte
elif msg.get_content_type() == 'multipart/mixed':
# The next easiest thing to do is just prepend the header and append
# the footer as additional subparts
@@ -201,7 +211,7 @@ def decorate(mlist, template, what, extradict={}):
template = Utils.to_percent(template)
# Interpolate into the template
try:
- text = (template % d).replace('\r\n', '\n')
+ text = re.sub(r' *\r?\n', r'\n', template % d)
except (ValueError, TypeError), e:
log.exception('Exception while calculating %s:\n%s', what, e)
what = what.upper()
diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py
index e14f9a549..a7a825852 100644
--- a/Mailman/Handlers/Scrubber.py
+++ b/Mailman/Handlers/Scrubber.py
@@ -144,6 +144,10 @@ def replace_payload_by_text(msg, text, charset):
# message by a text (scrubbing).
del msg['content-type']
del msg['content-transfer-encoding']
+ if isinstance(text, unicode):
+ text = text.encode(charset)
+ if not isinstance(charset, str):
+ charset = str(charset)
msg.set_payload(text, charset)
@@ -160,7 +164,7 @@ def process(mlist, msg, msgdata=None):
if not mlist.scrub_nondigest:
return
dir = calculate_attachments_dir(mlist, msg, msgdata)
- charset = None
+ charset = format = delsp = None
lcset = Utils.GetCharSet(mlist.preferred_language)
lcset_out = Charset(lcset).output_charset or lcset
# Now walk over all subparts of this message and scrub out various types
@@ -170,9 +174,11 @@ def process(mlist, msg, msgdata=None):
if ctype == 'text/plain':
# We need to choose a charset for the scrubbed message, so we'll
# arbitrarily pick the charset of the first text/plain part in the
- # message.
+ # message. Also get the RFC 3676 stuff from this part.
if charset is None:
charset = part.get_content_charset(lcset)
+ format = part.get_param('format')
+ delsp = part.get_param('delsp')
# TK: if part is attached then check charset and scrub if none
if part.get('content-disposition') and \
not part.get_content_charset():
@@ -182,7 +188,7 @@ def process(mlist, msg, msgdata=None):
replace_payload_by_text(part, _("""\
An embedded and charset-unspecified text was scrubbed...
Name: %(filename)s
-Url: %(url)s
+URL: %(url)s
"""), lcset)
elif ctype == 'text/html' and isinstance(sanitize, int):
if sanitize == 0:
@@ -240,7 +246,7 @@ From: %(who)s
Subject: %(subject)s
Date: %(date)s
Size: %(size)s
-Url: %(url)s
+URL: %(url)s
"""), lcset)
# If the message isn't a multipart, then we'll strip it out as an
# attachment that would have to be separately downloaded. Pipermail
@@ -267,7 +273,7 @@ Name: %(filename)s
Type: %(ctype)s
Size: %(size)d bytes
Desc: %(desc)s
-Url : %(url)s
+URL: %(url)s
"""), lcset)
outer = False
# We still have to sanitize multipart messages to flat text because
@@ -289,6 +295,7 @@ Url : %(url)s
# BAW: Martin's original patch suggested we might want to try
# generalizing to utf-8, and that's probably a good idea (eventually).
text = []
+ charsets = []
for part in msg.walk():
# TK: bug-id 1099138 and multipart
if not part or part.is_multipart():
@@ -307,37 +314,38 @@ Url : %(url)s
# null body. See bug 1430236.
except (binascii.Error, TypeError):
t = part.get_payload()
- # TK: get_content_charset() returns 'iso-2022-jp' for internally
- # crafted (scrubbed) 'euc-jp' text part. So, first try
- # get_charset(), then get_content_charset() for the parts
- # which are already embeded in the incoming message.
- partcharset = part.get_charset()
- if partcharset:
- partcharset = str(partcharset)
- else:
- partcharset = part.get_content_charset()
- if partcharset and partcharset <> charset:
- try:
- t = unicode(t, partcharset, 'replace')
- except (UnicodeError, LookupError, ValueError):
- # Replace funny characters. We use errors='replace' for
- # both calls since the first replace will leave U+FFFD,
- # which isn't ASCII encodeable.
- u = unicode(t, 'ascii', 'replace')
- t = u.encode('ascii', 'replace')
- try:
- # Should use HTML-Escape, or try generalizing to UTF-8
- t = t.encode(charset, 'replace')
- except (UnicodeError, LookupError, ValueError):
- t = t.encode(lcset, 'replace')
+ # Email problem was solved by Mark Sapiro. (TK)
+ partcharset = part.get_content_charset('us-ascii')
+ try:
+ t = unicode(t, partcharset, 'replace')
+ except (UnicodeError, LookupError, ValueError, TypeError):
+ # What is the cause to come this exception now ?
+ # Replace funny characters. We use errors='replace'.
+ u = unicode(t, 'ascii', 'replace')
# Separation is useful
- if isinstance(t, str):
+ if isinstance(t, basestring):
if not t.endswith('\n'):
t += '\n'
text.append(t)
+ if partcharset not in charsets:
+ charsets.append(partcharset)
# Now join the text and set the payload
sep = _('-------------- next part --------------\n')
- replace_payload_by_text(msg, sep.join(text), charset)
+ rept = sep.join(text)
+ # Replace entire message with text and scrubbed notice.
+ # Try with message charsets and utf-8
+ if 'utf-8' not in charsets:
+ charsets.append('utf-8')
+ for charset in charsets:
+ try:
+ replace_payload_by_text(msg, rept, charset)
+ break
+ except UnicodeError:
+ pass
+ if format:
+ msg.set_param('format', format)
+ if delsp:
+ msg.set_param('delsp', delsp)
return msg
@@ -467,7 +475,7 @@ def save_attachment(mlist, msg, dir, filter_html=True):
# Private archives will likely have a trailing slash. Normalize.
if baseurl[-1] <> '/':
baseurl += '/'
- # A trailing space in url string may save users who are using
- # RFC-1738 compliant MUA (Not Mozilla).
- url = baseurl + '%s/%s%s%s ' % (dir, filebase, extra, ext)
+ # Trailing space will definitely be a problem with format=flowed.
+ # Bracket the URL instead.
+ url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext)
return url
diff --git a/Mailman/passwords.py b/Mailman/passwords.py
index a46c11a16..d84ab9f48 100644
--- a/Mailman/passwords.py
+++ b/Mailman/passwords.py
@@ -240,6 +240,9 @@ def check_response(challenge, response):
scheme = scheme_parts[0].lower()
scheme_enum = _SCHEMES_BY_TAG.get(scheme, _DEFAULT_SCHEME)
scheme_class = _SCHEMES_BY_ENUM[scheme_enum]
+ if isinstance(rest_group, unicode):
+ # decode() fails. (challenge is from database)
+ rest_group = str(rest_group)
return scheme_class.check_response(rest_group, response, *scheme_parts[1:])
diff --git a/Mailman/testing/test_handlers.py b/Mailman/testing/test_handlers.py
index 59fd4de08..f963fdf24 100644
--- a/Mailman/testing/test_handlers.py
+++ b/Mailman/testing/test_handlers.py
@@ -48,6 +48,7 @@ from Mailman.Handlers import Hold
from Mailman.Handlers import MimeDel
from Mailman.Handlers import Moderate
from Mailman.Handlers import Replybot
+from Mailman.Handlers import Scrubber
# Don't test handlers such as SMTPDirect and Sendmail here
from Mailman.Handlers import SpamDetect
from Mailman.Handlers import Tagger
@@ -955,7 +956,7 @@ IMAGEDATAIMAGEDATAIMAGEDATA
mlist.description = u'\u65e5\u672c\u8a9e'
msg = Message.Message()
msg.set_payload('Fran\xe7aise', 'iso-8859-1')
- Decorate.process(self._mlist, msg, {})
+ Decorate.process(mlist, msg, {})
self.assertEqual(msg.as_string(unixfrom=0), """\
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
@@ -964,6 +965,46 @@ Content-Transfer-Encoding: base64
5pel5pys6KqeIGhlYWRlcgpGcmFuw6dhaXNlCuaXpeacrOiqniBmb290ZXI=
""")
+ def test_no_multipart_unknown_charset(self):
+ mlist = self._mlist
+ mlist.msg_header = 'header'
+ mlist.msg_footer = 'footer'
+ msg = email.message_from_string("""\
+From: aperson@example.org
+Content-Type: text/plain; charset=unknown
+Content-Transfer-Encoding: 7bit
+
+Here is a message.
+""")
+ Decorate.process(mlist, msg, {})
+ self.assertEqual(len(msg.get_payload()), 3)
+ self.assertEqual(msg.get_payload()[1].as_string(unixfrom=0),"""\
+Content-Type: text/plain; charset=unknown
+Content-Transfer-Encoding: 7bit
+
+Here is a message.
+""")
+
+ def test_no_multipart_flowed(self):
+ mlist = self._mlist
+ mlist.msg_header = 'header'
+ mlist.msg_footer = 'footer'
+ msg = email.message_from_string("""\
+From: aperson@example.org
+Content-Type: text/plain; format=flowed; delsp=no
+
+Here is a message
+with soft line break.
+""")
+ Decorate.process(mlist, msg, {})
+ self.assertEqual(msg.get_param('format'), 'flowed')
+ self.assertEqual(msg.get_param('delsp'), 'no')
+ self.assertEqual(msg.get_payload(), """\
+header
+Here is a message
+with soft line break.
+footer""")
+
class TestFileRecips(TestBase):
@@ -1391,6 +1432,98 @@ class TestReplybot(TestBase):
+class TestScrubber(TestBase):
+ def test_save_attachment(self):
+ mlist = self._mlist
+ msg = email.message_from_string("""\
+Content-Type: image/gif; name="xtest.gif"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="xtest.gif"
+
+R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+""")
+ Scrubber.save_attachment(mlist, msg, '')
+ f = open(os.path.join(mlist.archive_dir(), 'attachment.gif'))
+ img = f.read()
+ self.assertEqual(img.startswith('GIF87a'), True)
+ self.assertEqual(len(img), 34)
+
+ def _saved_file(self, s):
+ # a convenient function to get the saved attachment file
+ for i in s.splitlines():
+ if i.startswith('URL: '):
+ f = i.replace(
+ 'URL: <' + self._mlist.GetBaseArchiveURL() + '/' , '')
+ f = os.path.join(self._mlist.archive_dir(), f.rstrip('>'))
+ return f
+
+ def test_scrub_image(self):
+ mlist = self._mlist
+ msg = email.message_from_string("""\
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+--BOUNDARY
+Content-type: text/plain; charset=us-ascii
+
+This is a message.
+--BOUNDARY
+Content-Type: image/gif; name="xtest.gif"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="xtest.gif"
+
+R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+--BOUNDARY--
+""")
+ Scrubber.process(mlist, msg, {})
+ # saved file
+ img = open(self._saved_file(msg.get_payload())).read()
+ self.assertEqual(img.startswith('GIF87a'), True)
+ self.assertEqual(len(img), 34)
+ # scrubbed message
+ s = '\n'.join([l for l in msg.get_payload().splitlines()
+ if not l.startswith('URL: ')])
+ self.assertEqual(s, """\
+This is a message.
+-------------- next part --------------
+A non-text attachment was scrubbed...
+Name: xtest.gif
+Type: image/gif
+Size: 34 bytes
+Desc: not available""")
+
+ def test_scrub_text(self):
+ mlist = self._mlist
+ msg = email.message_from_string("""\
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+--BOUNDARY
+Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no
+
+This is a message.
+--BOUNDARY
+Content-type: text/plain; name="xtext.txt"
+Content-Disposition: attachment; filename="xtext.txt"
+
+This is a text attachment.
+--BOUNDARY--
+""")
+ Scrubber.process(mlist, msg, {})
+ self.assertEqual(msg.get_param('format'), 'flowed')
+ self.assertEqual(msg.get_param('delsp'), 'no')
+ txt = open(self._saved_file(msg.get_payload())).read()
+ self.assertEqual(txt, 'This is a text attachment.')
+ s = '\n'.join([l for l in msg.get_payload().splitlines()
+ if not l.startswith('URL: ')])
+ self.assertEqual(s, """\
+This is a message.
+-------------- next part --------------
+An embedded and charset-unspecified text was scrubbed...
+Name: xtext.txt""")
+
+
+
class TestSpamDetect(TestBase):
def test_short_circuit(self):
msgdata = {'approved': 1}
@@ -1808,6 +1941,7 @@ def test_suite():
suite.addTest(unittest.makeSuite(TestMimeDel))
suite.addTest(unittest.makeSuite(TestModerate))
suite.addTest(unittest.makeSuite(TestReplybot))
+ suite.addTest(unittest.makeSuite(TestScrubber))
suite.addTest(unittest.makeSuite(TestSpamDetect))
suite.addTest(unittest.makeSuite(TestTagger))
suite.addTest(unittest.makeSuite(TestToArchive))