4 files changed, 199 insertions, 44 deletions
diff --git a/Mailman/Handlers/Decorate.py b/Mailman/Handlers/Decorate.py
index 23826cdd1..2f4aceb51 100644
--- a/Mailman/Handlers/Decorate.py
+++ b/Mailman/Handlers/Decorate.py
@@ -17,6 +17,7 @@
 
 """Decorate a message by sticking the header and footer around it."""
 
+import re
 import logging
 
 from email.MIMEText import MIMEText
@@ -84,9 +85,15 @@ def process(mlist, msg, msgdata):
     # MIME multipart chroming the message?
     wrap = True
     if not msg.is_multipart() and msgtype == 'text/plain':
+        # Save the RFC-3676 format parameters.
+        format = msg.get_param('format')
+        delsp = msg.get_param('delsp')
+        # Save 'Content-Transfer-Encoding' header in case decoration fails.
+        cte = msg.get('content-transfer-encoding')
         # header/footer is now in unicode (2.2)
         try:
             oldpayload = unicode(msg.get_payload(decode=True), mcset)
+            del msg['content-transfer-encoding']
             frontsep = endsep = u''
             if header and not header.endswith('\n'):
                 frontsep = u'\n'
@@ -99,18 +106,21 @@ def process(mlist, msg, msgdata):
             # charset, then utf-8.  It's okay if some of these are duplicates.
             for cset in (lcset, mcset, 'utf-8'):
                 try:
-                    pld = payload.encode(cset)
-                    del msg['content-transfer-encoding']
-                    del msg['content-type']
-                    msg.set_payload(pld, cset)
-                    wrap = False
-                    break
-                # 'except' should be here because set_payload() may fail for
-                # 'euc-jp' which re-encode to 'iso-2022-jp'. :(
+                    msg.set_payload(payload.encode(cset), cset)
                 except UnicodeError:
                     pass
+                else:
+                    if format:
+                        msg.set_param('format', format)
+                    if delsp:
+                        msg.set_param('delsp', delsp)
+                    wrap = False
+                    break
         except (LookupError, UnicodeError):
-            pass
+            if cte:
+                # Restore the original c-t-e.
+                del msg['content-transfer-encoding']
+                msg['Content-Transfer-Encoding'] = cte
     elif msg.get_content_type() == 'multipart/mixed':
         # The next easiest thing to do is just prepend the header and append
         # the footer as additional subparts
@@ -201,7 +211,7 @@ def decorate(mlist, template, what, extradict={}):
         template = Utils.to_percent(template)
     # Interpolate into the template
     try:
-        text = (template % d).replace('\r\n', '\n')
+        text = re.sub(r' *\r?\n', r'\n', template % d)
     except (ValueError, TypeError), e:
         log.exception('Exception while calculating %s:\n%s', what, e)
         what = what.upper()
diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py
index e14f9a549..a7a825852 100644
--- a/Mailman/Handlers/Scrubber.py
+++ b/Mailman/Handlers/Scrubber.py
@@ -144,6 +144,10 @@ def replace_payload_by_text(msg, text, charset):
     # message by a text (scrubbing).
     del msg['content-type']
     del msg['content-transfer-encoding']
+    if isinstance(text, unicode):
+        text = text.encode(charset)
+    if not isinstance(charset, str):
+        charset = str(charset)
     msg.set_payload(text, charset)
 
 
@@ -160,7 +164,7 @@ def process(mlist, msg, msgdata=None):
         if not mlist.scrub_nondigest:
             return
     dir = calculate_attachments_dir(mlist, msg, msgdata)
-    charset = None
+    charset = format = delsp = None
     lcset = Utils.GetCharSet(mlist.preferred_language)
     lcset_out = Charset(lcset).output_charset or lcset
     # Now walk over all subparts of this message and scrub out various types
@@ -170,9 +174,11 @@ def process(mlist, msg, msgdata=None):
         if ctype == 'text/plain':
             # We need to choose a charset for the scrubbed message, so we'll
             # arbitrarily pick the charset of the first text/plain part in the
-            # message.
+            # message.  Also get the RFC 3676 stuff from this part.
             if charset is None:
                 charset = part.get_content_charset(lcset)
+                format = part.get_param('format')
+                delsp = part.get_param('delsp')
             # TK: if part is attached then check charset and scrub if none
             if part.get('content-disposition') and \
                not part.get_content_charset():
@@ -182,7 +188,7 @@ def process(mlist, msg, msgdata=None):
                 replace_payload_by_text(part, _("""\
 An embedded and charset-unspecified text was scrubbed...
 Name: %(filename)s
-Url: %(url)s
+URL: %(url)s
 """), lcset)
         elif ctype == 'text/html' and isinstance(sanitize, int):
             if sanitize == 0:
@@ -240,7 +246,7 @@ From: %(who)s
 Subject: %(subject)s
 Date: %(date)s
 Size: %(size)s
-Url: %(url)s
+URL: %(url)s
 """), lcset)
         # If the message isn't a multipart, then we'll strip it out as an
         # attachment that would have to be separately downloaded.  Pipermail
@@ -267,7 +273,7 @@ Name: %(filename)s
 Type: %(ctype)s
 Size: %(size)d bytes
 Desc: %(desc)s
-Url : %(url)s
+URL: %(url)s
 """), lcset)
         outer = False
     # We still have to sanitize multipart messages to flat text because
@@ -289,6 +295,7 @@ Url : %(url)s
         # BAW: Martin's original patch suggested we might want to try
         # generalizing to utf-8, and that's probably a good idea (eventually).
         text = []
+        charsets = []
         for part in msg.walk():
             # TK: bug-id 1099138 and multipart
             if not part or part.is_multipart():
@@ -307,37 +314,38 @@ Url : %(url)s
             # null body. See bug 1430236.
             except (binascii.Error, TypeError):
                 t = part.get_payload()
-            # TK: get_content_charset() returns 'iso-2022-jp' for internally
-            # crafted (scrubbed) 'euc-jp' text part. So, first try
-            # get_charset(), then get_content_charset() for the parts
-            # which are already embeded in the incoming message.
-            partcharset = part.get_charset()
-            if partcharset:
-                partcharset = str(partcharset)
-            else:
-                partcharset = part.get_content_charset()
-            if partcharset and partcharset <> charset:
-                try:
-                    t = unicode(t, partcharset, 'replace')
-                except (UnicodeError, LookupError, ValueError):
-                    # Replace funny characters.  We use errors='replace' for
-                    # both calls since the first replace will leave U+FFFD,
-                    # which isn't ASCII encodeable.
-                    u = unicode(t, 'ascii', 'replace')
-                    t = u.encode('ascii', 'replace')
-                try:
-                    # Should use HTML-Escape, or try generalizing to UTF-8
-                    t = t.encode(charset, 'replace')
-                except (UnicodeError, LookupError, ValueError):
-                    t = t.encode(lcset, 'replace')
+            # Email problem was solved by Mark Sapiro. (TK)
+            partcharset = part.get_content_charset('us-ascii')
+            try:
+                t = unicode(t, partcharset, 'replace')
+            except (UnicodeError, LookupError, ValueError, TypeError):
+                # What is the cause to come this exception now ?
+                # Replace funny characters.  We use errors='replace'.
+                u = unicode(t, 'ascii', 'replace')
             # Separation is useful
-            if isinstance(t, str):
+            if isinstance(t, basestring):
                 if not t.endswith('\n'):
                     t += '\n'
                 text.append(t)
+            if partcharset not in charsets:
+                charsets.append(partcharset)
         # Now join the text and set the payload
         sep = _('-------------- next part --------------\n')
-        replace_payload_by_text(msg, sep.join(text), charset)
+        rept = sep.join(text)
+        # Replace entire message with text and scrubbed notice.
+        # Try with message charsets and utf-8
+        if 'utf-8' not in charsets:
+            charsets.append('utf-8')
+        for charset in charsets:
+            try:
+                replace_payload_by_text(msg, rept, charset)
+                break
+            except UnicodeError:
+                pass
+        if format:
+            msg.set_param('format', format)
+        if delsp:
+            msg.set_param('delsp', delsp)
     return msg
 
 
@@ -467,7 +475,7 @@ def save_attachment(mlist, msg, dir, filter_html=True):
     # Private archives will likely have a trailing slash.  Normalize.
     if baseurl[-1] <> '/':
         baseurl += '/'
-    # A trailing space in url string may save users who are using
-    # RFC-1738 compliant MUA (Not Mozilla).
-    url = baseurl + '%s/%s%s%s ' % (dir, filebase, extra, ext)
+    # Trailing space will definitely be a problem with format=flowed.
+    # Bracket the URL instead.
+    url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext)
     return url
diff --git a/Mailman/passwords.py b/Mailman/passwords.py
index a46c11a16..d84ab9f48 100644
--- a/Mailman/passwords.py
+++ b/Mailman/passwords.py
@@ -240,6 +240,9 @@ def check_response(challenge, response):
     scheme       = scheme_parts[0].lower()
     scheme_enum  = _SCHEMES_BY_TAG.get(scheme, _DEFAULT_SCHEME)
     scheme_class = _SCHEMES_BY_ENUM[scheme_enum]
+    if isinstance(rest_group, unicode):
+        # decode() fails. (challenge is from database)
+        rest_group = str(rest_group)
     return scheme_class.check_response(rest_group, response, *scheme_parts[1:])
 
 
diff --git a/Mailman/testing/test_handlers.py b/Mailman/testing/test_handlers.py
index 59fd4de08..f963fdf24 100644
--- a/Mailman/testing/test_handlers.py
+++ b/Mailman/testing/test_handlers.py
@@ -48,6 +48,7 @@ from Mailman.Handlers import Hold
 from Mailman.Handlers import MimeDel
 from Mailman.Handlers import Moderate
 from Mailman.Handlers import Replybot
+from Mailman.Handlers import Scrubber
 # Don't test handlers such as SMTPDirect and Sendmail here
 from Mailman.Handlers import SpamDetect
 from Mailman.Handlers import Tagger
@@ -955,7 +956,7 @@ IMAGEDATAIMAGEDATAIMAGEDATA
         mlist.description = u'\u65e5\u672c\u8a9e'
         msg = Message.Message()
         msg.set_payload('Fran\xe7aise', 'iso-8859-1')
-        Decorate.process(self._mlist, msg, {})
+        Decorate.process(mlist, msg, {})
         self.assertEqual(msg.as_string(unixfrom=0), """\
 MIME-Version: 1.0
 Content-Type: text/plain; charset="utf-8"
@@ -964,6 +965,46 @@ Content-Transfer-Encoding: base64
 5pel5pys6KqeIGhlYWRlcgpGcmFuw6dhaXNlCuaXpeacrOiqniBmb290ZXI=
 """)
 
+    def test_no_multipart_unknown_charset(self):
+        mlist = self._mlist
+        mlist.msg_header = 'header'
+        mlist.msg_footer = 'footer'
+        msg = email.message_from_string("""\
+From: aperson@example.org
+Content-Type: text/plain; charset=unknown
+Content-Transfer-Encoding: 7bit
+
+Here is a message.
+""")
+        Decorate.process(mlist, msg, {})
+        self.assertEqual(len(msg.get_payload()), 3)
+        self.assertEqual(msg.get_payload()[1].as_string(unixfrom=0),"""\
+Content-Type: text/plain; charset=unknown
+Content-Transfer-Encoding: 7bit
+
+Here is a message.
+""")
+
+    def test_no_multipart_flowed(self):
+        mlist = self._mlist
+        mlist.msg_header = 'header'
+        mlist.msg_footer = 'footer'
+        msg = email.message_from_string("""\
+From: aperson@example.org
+Content-Type: text/plain; format=flowed; delsp=no
+
+Here is a message 
+with soft line break.
+""")
+        Decorate.process(mlist, msg, {})
+        self.assertEqual(msg.get_param('format'), 'flowed')
+        self.assertEqual(msg.get_param('delsp'), 'no')
+        self.assertEqual(msg.get_payload(), """\
+header
+Here is a message 
+with soft line break.
+footer""")
+
 
 
 class TestFileRecips(TestBase):
@@ -1391,6 +1432,98 @@ class TestReplybot(TestBase):
 
 
 
+class TestScrubber(TestBase):
+    def test_save_attachment(self):
+        mlist = self._mlist
+        msg = email.message_from_string("""\
+Content-Type: image/gif; name="xtest.gif"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="xtest.gif"
+
+R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+""")
+        Scrubber.save_attachment(mlist, msg, '')
+        f = open(os.path.join(mlist.archive_dir(), 'attachment.gif'))
+        img = f.read()
+        self.assertEqual(img.startswith('GIF87a'), True)
+        self.assertEqual(len(img), 34)
+
+    def _saved_file(self, s):
+        # a convenient function to get the saved attachment file
+        for i in s.splitlines():
+            if i.startswith('URL: '):
+                f = i.replace(
+                      'URL: <' + self._mlist.GetBaseArchiveURL() + '/' , '')
+        f = os.path.join(self._mlist.archive_dir(), f.rstrip('>'))
+        return f
+
+    def test_scrub_image(self):
+        mlist = self._mlist
+        msg = email.message_from_string("""\
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+--BOUNDARY
+Content-type: text/plain; charset=us-ascii
+
+This is a message.
+--BOUNDARY
+Content-Type: image/gif; name="xtest.gif"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="xtest.gif"
+
+R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
+--BOUNDARY--
+""")
+        Scrubber.process(mlist, msg, {})
+        # saved file
+        img = open(self._saved_file(msg.get_payload())).read()
+        self.assertEqual(img.startswith('GIF87a'), True)
+        self.assertEqual(len(img), 34)
+        # scrubbed message
+        s = '\n'.join([l for l in msg.get_payload().splitlines()
+                               if not l.startswith('URL: ')])
+        self.assertEqual(s, """\
+This is a message.
+-------------- next part --------------
+A non-text attachment was scrubbed...
+Name: xtest.gif
+Type: image/gif
+Size: 34 bytes
+Desc: not available""")
+
+    def test_scrub_text(self):
+        mlist = self._mlist
+        msg = email.message_from_string("""\
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+--BOUNDARY
+Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no
+
+This is a message.
+--BOUNDARY
+Content-type: text/plain; name="xtext.txt"
+Content-Disposition: attachment; filename="xtext.txt"
+
+This is a text attachment.
+--BOUNDARY--
+""")
+        Scrubber.process(mlist, msg, {})
+        self.assertEqual(msg.get_param('format'), 'flowed')
+        self.assertEqual(msg.get_param('delsp'), 'no')
+        txt = open(self._saved_file(msg.get_payload())).read()
+        self.assertEqual(txt, 'This is a text attachment.')
+        s = '\n'.join([l for l in msg.get_payload().splitlines()
+                               if not l.startswith('URL: ')])
+        self.assertEqual(s, """\
+This is a message.
+-------------- next part --------------
+An embedded and charset-unspecified text was scrubbed...
+Name: xtext.txt""")
+
+
+
 class TestSpamDetect(TestBase):
     def test_short_circuit(self):
         msgdata = {'approved': 1}
@@ -1808,6 +1941,7 @@ def test_suite():
     suite.addTest(unittest.makeSuite(TestMimeDel))
     suite.addTest(unittest.makeSuite(TestModerate))
     suite.addTest(unittest.makeSuite(TestReplybot))
+    suite.addTest(unittest.makeSuite(TestScrubber))
     suite.addTest(unittest.makeSuite(TestSpamDetect))
     suite.addTest(unittest.makeSuite(TestTagger))
     suite.addTest(unittest.makeSuite(TestToArchive))