passwords.py: Looks like we still need unicode checking.

Mark Sapiro's patch for 'format' parameter. (Decorate.py, Scrubber.py) Scrubber.py: More brush up of code ... 'Content-Transfer-Encoding' is not updated by msg.set_payload(). 'Url:' to 'URL:' normalization. test_handlers.py: Test codes for Decorate.py and Scrubber.py.
author: tkikuchi 2007-03-25 02:57:18 +0000
committer: tkikuchi 2007-03-25 02:57:18 +0000
commit: 864162b05e64a351d17e45fd888fbaa822db93b2 (patch)
tree: 3f7878840504950fb119fb06c2d7a4a31c4d169b /Mailman/Handlers
parent: a8b5ce78a7c5ec7c4e9dabfef37f83c153b53d36 (diff)
download: mailman-864162b05e64a351d17e45fd888fbaa822db93b2.tar.gz
mailman-864162b05e64a351d17e45fd888fbaa822db93b2.tar.zst
mailman-864162b05e64a351d17e45fd888fbaa822db93b2.zip
2 files changed, 61 insertions, 43 deletions
diff --git a/Mailman/Handlers/Decorate.py b/Mailman/Handlers/Decorate.py
index 23826cdd1..2f4aceb51 100644
--- a/Mailman/Handlers/Decorate.py
+++ b/Mailman/Handlers/Decorate.py
@@ -17,6 +17,7 @@
 
 """Decorate a message by sticking the header and footer around it."""
 
+import re
 import logging
 
 from email.MIMEText import MIMEText
@@ -84,9 +85,15 @@ def process(mlist, msg, msgdata):
     # MIME multipart chroming the message?
     wrap = True
     if not msg.is_multipart() and msgtype == 'text/plain':
+        # Save the RFC-3676 format parameters.
+        format = msg.get_param('format')
+        delsp = msg.get_param('delsp')
+        # Save 'Content-Transfer-Encoding' header in case decoration fails.
+        cte = msg.get('content-transfer-encoding')
         # header/footer is now in unicode (2.2)
         try:
             oldpayload = unicode(msg.get_payload(decode=True), mcset)
+            del msg['content-transfer-encoding']
             frontsep = endsep = u''
             if header and not header.endswith('\n'):
                 frontsep = u'\n'
@@ -99,18 +106,21 @@ def process(mlist, msg, msgdata):
             # charset, then utf-8.  It's okay if some of these are duplicates.
             for cset in (lcset, mcset, 'utf-8'):
                 try:
-                    pld = payload.encode(cset)
-                    del msg['content-transfer-encoding']
-                    del msg['content-type']
-                    msg.set_payload(pld, cset)
-                    wrap = False
-                    break
-                # 'except' should be here because set_payload() may fail for
-                # 'euc-jp' which re-encode to 'iso-2022-jp'. :(
+                    msg.set_payload(payload.encode(cset), cset)
                 except UnicodeError:
                     pass
+                else:
+                    if format:
+                        msg.set_param('format', format)
+                    if delsp:
+                        msg.set_param('delsp', delsp)
+                    wrap = False
+                    break
         except (LookupError, UnicodeError):
-            pass
+            if cte:
+                # Restore the original c-t-e.
+                del msg['content-transfer-encoding']
+                msg['Content-Transfer-Encoding'] = cte
     elif msg.get_content_type() == 'multipart/mixed':
         # The next easiest thing to do is just prepend the header and append
         # the footer as additional subparts
@@ -201,7 +211,7 @@ def decorate(mlist, template, what, extradict={}):
         template = Utils.to_percent(template)
     # Interpolate into the template
     try:
-        text = (template % d).replace('\r\n', '\n')
+        text = re.sub(r' *\r?\n', r'\n', template % d)
     except (ValueError, TypeError), e:
         log.exception('Exception while calculating %s:\n%s', what, e)
         what = what.upper()
diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py
index e14f9a549..a7a825852 100644
--- a/Mailman/Handlers/Scrubber.py
+++ b/Mailman/Handlers/Scrubber.py
@@ -144,6 +144,10 @@ def replace_payload_by_text(msg, text, charset):
     # message by a text (scrubbing).
     del msg['content-type']
     del msg['content-transfer-encoding']
+    if isinstance(text, unicode):
+        text = text.encode(charset)
+    if not isinstance(charset, str):
+        charset = str(charset)
     msg.set_payload(text, charset)
 
 
@@ -160,7 +164,7 @@ def process(mlist, msg, msgdata=None):
         if not mlist.scrub_nondigest:
             return
     dir = calculate_attachments_dir(mlist, msg, msgdata)
-    charset = None
+    charset = format = delsp = None
     lcset = Utils.GetCharSet(mlist.preferred_language)
     lcset_out = Charset(lcset).output_charset or lcset
     # Now walk over all subparts of this message and scrub out various types
@@ -170,9 +174,11 @@ def process(mlist, msg, msgdata=None):
         if ctype == 'text/plain':
             # We need to choose a charset for the scrubbed message, so we'll
             # arbitrarily pick the charset of the first text/plain part in the
-            # message.
+            # message.  Also get the RFC 3676 stuff from this part.
             if charset is None:
                 charset = part.get_content_charset(lcset)
+                format = part.get_param('format')
+                delsp = part.get_param('delsp')
             # TK: if part is attached then check charset and scrub if none
             if part.get('content-disposition') and \
                not part.get_content_charset():
@@ -182,7 +188,7 @@ def process(mlist, msg, msgdata=None):
                 replace_payload_by_text(part, _("""\
 An embedded and charset-unspecified text was scrubbed...
 Name: %(filename)s
-Url: %(url)s
+URL: %(url)s
 """), lcset)
         elif ctype == 'text/html' and isinstance(sanitize, int):
             if sanitize == 0:
@@ -240,7 +246,7 @@ From: %(who)s
 Subject: %(subject)s
 Date: %(date)s
 Size: %(size)s
-Url: %(url)s
+URL: %(url)s
 """), lcset)
         # If the message isn't a multipart, then we'll strip it out as an
         # attachment that would have to be separately downloaded.  Pipermail
@@ -267,7 +273,7 @@ Name: %(filename)s
 Type: %(ctype)s
 Size: %(size)d bytes
 Desc: %(desc)s
-Url : %(url)s
+URL: %(url)s
 """), lcset)
         outer = False
     # We still have to sanitize multipart messages to flat text because
@@ -289,6 +295,7 @@ Url : %(url)s
         # BAW: Martin's original patch suggested we might want to try
         # generalizing to utf-8, and that's probably a good idea (eventually).
         text = []
+        charsets = []
         for part in msg.walk():
             # TK: bug-id 1099138 and multipart
             if not part or part.is_multipart():
@@ -307,37 +314,38 @@ Url : %(url)s
             # null body. See bug 1430236.
             except (binascii.Error, TypeError):
                 t = part.get_payload()
-            # TK: get_content_charset() returns 'iso-2022-jp' for internally
-            # crafted (scrubbed) 'euc-jp' text part. So, first try
-            # get_charset(), then get_content_charset() for the parts
-            # which are already embeded in the incoming message.
-            partcharset = part.get_charset()
-            if partcharset:
-                partcharset = str(partcharset)
-            else:
-                partcharset = part.get_content_charset()
-            if partcharset and partcharset <> charset:
-                try:
-                    t = unicode(t, partcharset, 'replace')
-                except (UnicodeError, LookupError, ValueError):
-                    # Replace funny characters.  We use errors='replace' for
-                    # both calls since the first replace will leave U+FFFD,
-                    # which isn't ASCII encodeable.
-                    u = unicode(t, 'ascii', 'replace')
-                    t = u.encode('ascii', 'replace')
-                try:
-                    # Should use HTML-Escape, or try generalizing to UTF-8
-                    t = t.encode(charset, 'replace')
-                except (UnicodeError, LookupError, ValueError):
-                    t = t.encode(lcset, 'replace')
+            # Email problem was solved by Mark Sapiro. (TK)
+            partcharset = part.get_content_charset('us-ascii')
+            try:
+                t = unicode(t, partcharset, 'replace')
+            except (UnicodeError, LookupError, ValueError, TypeError):
+                # What is the cause to come this exception now ?
+                # Replace funny characters.  We use errors='replace'.
+                u = unicode(t, 'ascii', 'replace')
             # Separation is useful
-            if isinstance(t, str):
+            if isinstance(t, basestring):
                 if not t.endswith('\n'):
                     t += '\n'
                 text.append(t)
+            if partcharset not in charsets:
+                charsets.append(partcharset)
         # Now join the text and set the payload
         sep = _('-------------- next part --------------\n')
-        replace_payload_by_text(msg, sep.join(text), charset)
+        rept = sep.join(text)
+        # Replace entire message with text and scrubbed notice.
+        # Try with message charsets and utf-8
+        if 'utf-8' not in charsets:
+            charsets.append('utf-8')
+        for charset in charsets:
+            try:
+                replace_payload_by_text(msg, rept, charset)
+                break
+            except UnicodeError:
+                pass
+        if format:
+            msg.set_param('format', format)
+        if delsp:
+            msg.set_param('delsp', delsp)
     return msg
 
 
@@ -467,7 +475,7 @@ def save_attachment(mlist, msg, dir, filter_html=True):
     # Private archives will likely have a trailing slash.  Normalize.
     if baseurl[-1] <> '/':
         baseurl += '/'
-    # A trailing space in url string may save users who are using
-    # RFC-1738 compliant MUA (Not Mozilla).
-    url = baseurl + '%s/%s%s%s ' % (dir, filebase, extra, ext)
+    # Trailing space will definitely be a problem with format=flowed.
+    # Bracket the URL instead.
+    url = '<' + baseurl + '%s/%s%s%s>' % (dir, filebase, extra, ext)
     return url
author	tkikuchi	2007-03-25 02:57:18 +0000
committer	tkikuchi	2007-03-25 02:57:18 +0000
commit	864162b05e64a351d17e45fd888fbaa822db93b2 (patch)
tree	3f7878840504950fb119fb06c2d7a4a31c4d169b /Mailman/Handlers
parent	a8b5ce78a7c5ec7c4e9dabfef37f83c153b53d36 (diff)
download	mailman-864162b05e64a351d17e45fd888fbaa822db93b2.tar.gz mailman-864162b05e64a351d17e45fd888fbaa822db93b2.tar.zst mailman-864162b05e64a351d17e45fd888fbaa822db93b2.zip