author: bwarsaw 2002-11-12 22:35:28 +0000
committer: bwarsaw 2002-11-12 22:35:28 +0000
commit: 7a7e1b3737ba80727d1f3f03a57e937a73b67cd7 (patch)
tree: 771798cd624721ccc1b7a8bcc30d24af523976b9
parent: 42bc9e48b8e5b163458cdf504ff705857d0e5844 (diff)
download: mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.tar.gz
mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.tar.zst
mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.zip
1 files changed, 23 insertions, 16 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index 554fca558..7586a3635 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -35,12 +35,12 @@ import HyperDatabase
 import pipermail
 import weakref
 
+from email.Header import decode_header, make_header
+
 from Mailman import mm_cfg
 from Mailman import Utils
 from Mailman import LockFile
 from Mailman import MailList
-from Mailman import EncWord
-from Mailman import Errors
 from Mailman import i18n
 from Mailman.SafeDict import SafeDict
 from Mailman.Logging.Syslog import syslog
@@ -367,20 +367,27 @@ class Article(pipermail.Article):
     def decode_charset(self, field):
         if field.find("=?") == -1:
             return None, None
-        try:
-            s, c = EncWord.decode(field)
-        except ValueError:
-            return None, None
-        c = c.lower()
-        # If the charset of the header matches the article charset,
-        # leave it as encoded. Otherwise, try Unicode decoding
-        if c.lower() == self.charset:
-            return s, c
-        try:
-            return unicode(s, c), None
-        except (UnicodeError, LookupError):
-            # Unknown encoding
-            return None, None
+        # Get the decoded header as a list of (s, charset) tuples
+        pairs = decode_header(field)
+        mustunicode = 0
+        for s, c in pairs:
+            # If the charset of all the header parts match the article's
+            # charset, leave it as encoded, otherwise try converting to
+            # Unicode.
+            if c <> self.charset:
+                mustunicode = 1
+                break
+        if mustunicode:
+            # Use a large number for maxlinelen so it won't get wrapped
+            h = make_header(pairs, 99999)
+            # Use __unicode__() until we can guarantee Python 2.2
+            try:
+                return h.__unicode__(), None
+            except (UnicodeError, LookupError):
+                # Unknown encoding
+                return None, None
+        # The last value for c will have the proper charset in it
+        return EMPTYSTRING.join([s for s, c in pairs]), c
 
     def as_html(self):
         d = self.__dict__.copy()
author	bwarsaw	2002-11-12 22:35:28 +0000
committer	bwarsaw	2002-11-12 22:35:28 +0000
commit	7a7e1b3737ba80727d1f3f03a57e937a73b67cd7 (patch)
tree	771798cd624721ccc1b7a8bcc30d24af523976b9
parent	42bc9e48b8e5b163458cdf504ff705857d0e5844 (diff)
download	mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.tar.gz mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.tar.zst mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.zip