From 2aece1de2b702626c57a34381bc631f92a2bb024 Mon Sep 17 00:00:00 2001 From: bwarsaw Date: Sat, 13 Sep 2003 06:00:43 +0000 Subject: process(): In the msg.is_multipart() clause, inside the clause that tries to convert t to something reasonable , we need to use errors='replace' when we encode from unicode to string. This is because the preceding unicode('ascii', 'replace') could end up inserted U+FFFD, which can't be encoded to ascii. --- Mailman/Handlers/Scrubber.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'Mailman') diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py index b5be73dfc..7bc5f510d 100644 --- a/Mailman/Handlers/Scrubber.py +++ b/Mailman/Handlers/Scrubber.py @@ -301,8 +301,11 @@ Url : %(url)s try: t = unicode(t, partcharset, 'replace') except (UnicodeError, LookupError): - # Replace funny characters - t = unicode(t, 'ascii', 'replace').encode('ascii') + # Replace funny characters. We use errors='replace' for + # both calls since the first replace will leave U+FFFD, + # which isn't ASCII encodeable. + u = unicode(t, 'ascii', 'replace') + t = u.encode('ascii', 'replace') try: # Should use HTML-Escape, or try generalizing to UTF-8 t = t.encode(charset, 'replace') -- cgit v1.2.3-70-g09d2