diff options
| -rw-r--r-- | Mailman/Handlers/Decorate.py | 30 | ||||
| -rw-r--r-- | Mailman/Handlers/Scrubber.py | 74 | ||||
| -rw-r--r-- | Mailman/passwords.py | 3 | ||||
| -rw-r--r-- | Mailman/testing/test_handlers.py | 136 |
4 files changed, 199 insertions, 44 deletions
diff --git a/Mailman/Handlers/Decorate.py b/Mailman/Handlers/Decorate.py index 23826cdd1..2f4aceb51 100644 --- a/Mailman/Handlers/Decorate.py +++ b/Mailman/Handlers/Decorate.py @@ -17,6 +17,7 @@ """Decorate a message by sticking the header and footer around it.""" +import re import logging from email.MIMEText import MIMEText @@ -84,9 +85,15 @@ def process(mlist, msg, msgdata): # MIME multipart chroming the message? wrap = True if not msg.is_multipart() and msgtype == 'text/plain': + # Save the RFC-3676 format parameters. + format = msg.get_param('format') + delsp = msg.get_param('delsp') + # Save 'Content-Transfer-Encoding' header in case decoration fails. + cte = msg.get('content-transfer-encoding') # header/footer is now in unicode (2.2) try: oldpayload = unicode(msg.get_payload(decode=True), mcset) + del msg['content-transfer-encoding'] frontsep = endsep = u'' if header and not header.endswith('\n'): frontsep = u'\n' @@ -99,18 +106,21 @@ def process(mlist, msg, msgdata): # charset, then utf-8. It's okay if some of these are duplicates. for cset in (lcset, mcset, 'utf-8'): try: - pld = payload.encode(cset) - del msg['content-transfer-encoding'] - del msg['content-type'] - msg.set_payload(pld, cset) - wrap = False - break - # 'except' should be here because set_payload() may fail for - # 'euc-jp' which re-encode to 'iso-2022-jp'. :( + msg.set_payload(payload.encode(cset), cset) except UnicodeError: pass + else: + if format: + msg.set_param('format', format) + if delsp: + msg.set_param('delsp', delsp) + wrap = False + break except (LookupError, UnicodeError): - pass + if cte: + # Restore the original c-t-e. + del msg['content-transfer-encoding'] + msg['Content-Transfer-Encoding'] = cte elif msg.get_content_type() == 'multipart/mixed': # The next easiest thing to do is just prepend the header and append # the footer as additional subparts @@ -201,7 +211,7 @@ def decorate(mlist, template, what, extradict={}): template = Utils.to_percent(template) # Interpolate into the template try: - text = (template % d).replace('\r\n', '\n') + text = re.sub(r' *\r?\n', r'\n', template % d) except (ValueError, TypeError), e: log.exception('Exception while calculating %s:\n%s', what, e) what = what.upper() diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py index e14f9a549..a7a825852 100644 --- a/Mailman/Handlers/Scrubber.py +++ b/Mailman/Handlers/Scrubber.py @@ -144,6 +144,10 @@ def replace_payload_by_text(msg, text, charset): # message by a text (scrubbing). del msg['content-type'] del msg['content-transfer-encoding'] + if isinstance(text, unicode): + text = text.encode(charset) + if not isinstance(charset, str): + charset = str(charset) msg.set_payload(text, charset) @@ -160,7 +164,7 @@ def process(mlist, msg, msgdata=None): if not mlist.scrub_nondigest: return dir = calculate_attachments_dir(mlist, msg, msgdata) - charset = None + charset = format = delsp = None lcset = Utils.GetCharSet(mlist.preferred_language) lcset_out = Charset(lcset).output_charset or lcset # Now walk over all subparts of this message and scrub out various types @@ -170,9 +174,11 @@ def process(mlist, msg, msgdata=None): if ctype == 'text/plain': # We need to choose a charset for the scrubbed message, so we'll # arbitrarily pick the charset of the first text/plain part in the - # message. + # message. Also get the RFC 3676 stuff from this part. if charset is None: charset = part.get_content_charset(lcset) + format = part.get_param('format') + delsp = part.get_param('delsp') # TK: if part is attached then check charset and scrub if none if part.get('content-disposition') and \ not part.get_content_charset(): @@ -182,7 +188,7 @@ def process(mlist, msg, msgdata=None): replace_payload_by_text(part, _("""\ An embedded and charset-unspecified text was scrubbed... Name: %(filename)s -Url: %(url)s +URL: %(url)s """), lcset) elif ctype == 'text/html' and isinstance(sanitize, int): if sanitize == 0: @@ -240,7 +246,7 @@ From: %(who)s Subject: %(subject)s Date: %(date)s Size: %(size)s -Url: %(url)s +URL: %(url)s """), lcset) # If the message isn't a multipart, then we'll strip it out as an # attachment that would have to be separately downloaded. Pipermail @@ -267,7 +273,7 @@ Name: %(filename)s Type: %(ctype)s Size: %(size)d bytes Desc: %(desc)s -Url : %(url)s +URL: %(url)s """), lcset) outer = False # We still have to sanitize multipart messages to flat text because @@ -289,6 +295,7 @@ Url : %(url)s # BAW: Martin's original patch suggested we might want to try # generalizing to utf-8, and that's probably a good idea (eventually). text = [] + charsets = [] for part in msg.walk(): # TK: bug-id 1099138 and multipart if not part or part.is_multipart(): @@ -307,37 +314,38 @@ Url : %(url)s # null body. See bug 1430236. except (binascii.Error, TypeError): t = part.get_payload() - # TK: get_content_charset() returns 'iso-2022-jp' for internally - # crafted (scrubbed) 'euc-jp' text part. So, first try - # get_charset(), then get_content_charset() for the parts - # which are already embeded in the incoming message. - partcharset = part.get_charset() - if partcharset: - partcharset = str(partcharset) - else: - partcharset = part.get_content_charset() - if partcharset and partcharset <> charset: - try: - t = unicode(t, partcharset, 'replace') - except (UnicodeError, LookupError, ValueError): - # Replace funny characters. We use errors='replace' for - # both calls since the first replace will leave U+FFFD, - # which isn't ASCII encodeable. - u = unicode(t, 'ascii', 'replace') - t = u.encode('ascii', 'replace') - try: - # Should use HTML-Escape, or try generalizing to UTF-8 - t = t.encode(charset, 'replace') - except (UnicodeError, LookupError, ValueError): - t = t.encode(lcset, 'replace') + # Email problem was solved by Mark Sapiro. (TK) + partcharset = part.get_content_charset('us-ascii') + try: + t = unicode(t, partcharset, 'replace') + except (UnicodeError, LookupError, ValueError, TypeError): + # What is the cause to come this exception now ? + # Replace funny characters. We use errors='replace'. + u = unicode(t, 'ascii', 'replace') # Separation is useful - if isinstance(t, str): + if isinstance(t, basestring): if not t.endswith('\n'): t += '\n' text.append(t) + if partcharset not in charsets: + charsets.append(partcharset) # Now join the text and set the payload sep = _('-------------- next part --------------\n') - replace_payload_by_text(msg, sep.join(text), charset) + rept = sep.join(text) + # Replace entire message with text and scrubbed notice. + # Try with message charsets and utf-8 + if 'utf-8' not in charsets: + charsets.append('utf-8') + for charset in charsets: + try: + replace_payload_by_text(msg, rept, charset) + break + except UnicodeError: + pass + if format: + msg.set_param('format', format) + if delsp: + msg.set_param('delsp', delsp) return msg @@ -467,7 +475,7 @@ def save_attachment(mlist, msg, dir, filter_html=True): # Private archives will likely have a trailing slash. Normalize. if baseurl[-1] <> '/': baseurl += '/' - # A trailing space in url string may save users who are using - # RFC-1738 compliant MUA (Not Mozilla). - url = baseurl + '%s/%s%s%s ' % (dir, filebase, extra, ext) + # Trailing space will definitely be a problem with format=flowed. + # Bracket the URL instead. + url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext) return url diff --git a/Mailman/passwords.py b/Mailman/passwords.py index a46c11a16..d84ab9f48 100644 --- a/Mailman/passwords.py +++ b/Mailman/passwords.py @@ -240,6 +240,9 @@ def check_response(challenge, response): scheme = scheme_parts[0].lower() scheme_enum = _SCHEMES_BY_TAG.get(scheme, _DEFAULT_SCHEME) scheme_class = _SCHEMES_BY_ENUM[scheme_enum] + if isinstance(rest_group, unicode): + # decode() fails. (challenge is from database) + rest_group = str(rest_group) return scheme_class.check_response(rest_group, response, *scheme_parts[1:]) diff --git a/Mailman/testing/test_handlers.py b/Mailman/testing/test_handlers.py index 59fd4de08..f963fdf24 100644 --- a/Mailman/testing/test_handlers.py +++ b/Mailman/testing/test_handlers.py @@ -48,6 +48,7 @@ from Mailman.Handlers import Hold from Mailman.Handlers import MimeDel from Mailman.Handlers import Moderate from Mailman.Handlers import Replybot +from Mailman.Handlers import Scrubber # Don't test handlers such as SMTPDirect and Sendmail here from Mailman.Handlers import SpamDetect from Mailman.Handlers import Tagger @@ -955,7 +956,7 @@ IMAGEDATAIMAGEDATAIMAGEDATA mlist.description = u'\u65e5\u672c\u8a9e' msg = Message.Message() msg.set_payload('Fran\xe7aise', 'iso-8859-1') - Decorate.process(self._mlist, msg, {}) + Decorate.process(mlist, msg, {}) self.assertEqual(msg.as_string(unixfrom=0), """\ MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" @@ -964,6 +965,46 @@ Content-Transfer-Encoding: base64 5pel5pys6KqeIGhlYWRlcgpGcmFuw6dhaXNlCuaXpeacrOiqniBmb290ZXI= """) + def test_no_multipart_unknown_charset(self): + mlist = self._mlist + mlist.msg_header = 'header' + mlist.msg_footer = 'footer' + msg = email.message_from_string("""\ +From: aperson@example.org +Content-Type: text/plain; charset=unknown +Content-Transfer-Encoding: 7bit + +Here is a message. +""") + Decorate.process(mlist, msg, {}) + self.assertEqual(len(msg.get_payload()), 3) + self.assertEqual(msg.get_payload()[1].as_string(unixfrom=0),"""\ +Content-Type: text/plain; charset=unknown +Content-Transfer-Encoding: 7bit + +Here is a message. +""") + + def test_no_multipart_flowed(self): + mlist = self._mlist + mlist.msg_header = 'header' + mlist.msg_footer = 'footer' + msg = email.message_from_string("""\ +From: aperson@example.org +Content-Type: text/plain; format=flowed; delsp=no + +Here is a message +with soft line break. +""") + Decorate.process(mlist, msg, {}) + self.assertEqual(msg.get_param('format'), 'flowed') + self.assertEqual(msg.get_param('delsp'), 'no') + self.assertEqual(msg.get_payload(), """\ +header +Here is a message +with soft line break. +footer""") + class TestFileRecips(TestBase): @@ -1391,6 +1432,98 @@ class TestReplybot(TestBase): +class TestScrubber(TestBase): + def test_save_attachment(self): + mlist = self._mlist + msg = email.message_from_string("""\ +Content-Type: image/gif; name="xtest.gif" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; filename="xtest.gif" + +R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== +""") + Scrubber.save_attachment(mlist, msg, '') + f = open(os.path.join(mlist.archive_dir(), 'attachment.gif')) + img = f.read() + self.assertEqual(img.startswith('GIF87a'), True) + self.assertEqual(len(img), 34) + + def _saved_file(self, s): + # a convenient function to get the saved attachment file + for i in s.splitlines(): + if i.startswith('URL: '): + f = i.replace( + 'URL: <' + self._mlist.GetBaseArchiveURL() + '/' , '') + f = os.path.join(self._mlist.archive_dir(), f.rstrip('>')) + return f + + def test_scrub_image(self): + mlist = self._mlist + msg = email.message_from_string("""\ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-type: text/plain; charset=us-ascii + +This is a message. +--BOUNDARY +Content-Type: image/gif; name="xtest.gif" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; filename="xtest.gif" + +R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== +--BOUNDARY-- +""") + Scrubber.process(mlist, msg, {}) + # saved file + img = open(self._saved_file(msg.get_payload())).read() + self.assertEqual(img.startswith('GIF87a'), True) + self.assertEqual(len(img), 34) + # scrubbed message + s = '\n'.join([l for l in msg.get_payload().splitlines() + if not l.startswith('URL: ')]) + self.assertEqual(s, """\ +This is a message. +-------------- next part -------------- +A non-text attachment was scrubbed... +Name: xtest.gif +Type: image/gif +Size: 34 bytes +Desc: not available""") + + def test_scrub_text(self): + mlist = self._mlist + msg = email.message_from_string("""\ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no + +This is a message. +--BOUNDARY +Content-type: text/plain; name="xtext.txt" +Content-Disposition: attachment; filename="xtext.txt" + +This is a text attachment. +--BOUNDARY-- +""") + Scrubber.process(mlist, msg, {}) + self.assertEqual(msg.get_param('format'), 'flowed') + self.assertEqual(msg.get_param('delsp'), 'no') + txt = open(self._saved_file(msg.get_payload())).read() + self.assertEqual(txt, 'This is a text attachment.') + s = '\n'.join([l for l in msg.get_payload().splitlines() + if not l.startswith('URL: ')]) + self.assertEqual(s, """\ +This is a message. +-------------- next part -------------- +An embedded and charset-unspecified text was scrubbed... +Name: xtext.txt""") + + + class TestSpamDetect(TestBase): def test_short_circuit(self): msgdata = {'approved': 1} @@ -1808,6 +1941,7 @@ def test_suite(): suite.addTest(unittest.makeSuite(TestMimeDel)) suite.addTest(unittest.makeSuite(TestModerate)) suite.addTest(unittest.makeSuite(TestReplybot)) + suite.addTest(unittest.makeSuite(TestScrubber)) suite.addTest(unittest.makeSuite(TestSpamDetect)) suite.addTest(unittest.makeSuite(TestTagger)) suite.addTest(unittest.makeSuite(TestToArchive)) |
