From 2aece1de2b702626c57a34381bc631f92a2bb024 Mon Sep 17 00:00:00 2001
From: bwarsaw
Date: Sat, 13 Sep 2003 06:00:43 +0000
Subject: process(): In the msg.is_multipart() clause, inside the clause that
 tries to convert t to something reasonable <wink>, we need to use
 errors='replace' when we encode from unicode to string.  This is because the
 preceding unicode('ascii', 'replace') could end up inserted U+FFFD, which
 can't be encoded to ascii.

---
 Mailman/Handlers/Scrubber.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'Mailman')
diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py
index b5be73dfc..7bc5f510d 100644
--- a/Mailman/Handlers/Scrubber.py
+++ b/Mailman/Handlers/Scrubber.py
@@ -301,8 +301,11 @@ Url : %(url)s
                 try:
                     t = unicode(t, partcharset, 'replace')
                 except (UnicodeError, LookupError):
-                    # Replace funny characters
-                    t = unicode(t, 'ascii', 'replace').encode('ascii')
+                    # Replace funny characters.  We use errors='replace' for
+                    # both calls since the first replace will leave U+FFFD,
+                    # which isn't ASCII encodeable.
+                    u = unicode(t, 'ascii', 'replace')
+                    t = u.encode('ascii', 'replace')
                 try:
                     # Should use HTML-Escape, or try generalizing to UTF-8
                     t = t.encode(charset, 'replace')
-- 
cgit v1.2.3-70-g09d2