diff options
| author | bwarsaw | 2002-11-12 22:35:28 +0000 |
|---|---|---|
| committer | bwarsaw | 2002-11-12 22:35:28 +0000 |
| commit | 7a7e1b3737ba80727d1f3f03a57e937a73b67cd7 (patch) | |
| tree | 771798cd624721ccc1b7a8bcc30d24af523976b9 | |
| parent | 42bc9e48b8e5b163458cdf504ff705857d0e5844 (diff) | |
| download | mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.tar.gz mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.tar.zst mailman-7a7e1b3737ba80727d1f3f03a57e937a73b67cd7.zip | |
| -rw-r--r-- | Mailman/Archiver/HyperArch.py | 39 |
1 files changed, 23 insertions, 16 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py index 554fca558..7586a3635 100644 --- a/Mailman/Archiver/HyperArch.py +++ b/Mailman/Archiver/HyperArch.py @@ -35,12 +35,12 @@ import HyperDatabase import pipermail import weakref +from email.Header import decode_header, make_header + from Mailman import mm_cfg from Mailman import Utils from Mailman import LockFile from Mailman import MailList -from Mailman import EncWord -from Mailman import Errors from Mailman import i18n from Mailman.SafeDict import SafeDict from Mailman.Logging.Syslog import syslog @@ -367,20 +367,27 @@ class Article(pipermail.Article): def decode_charset(self, field): if field.find("=?") == -1: return None, None - try: - s, c = EncWord.decode(field) - except ValueError: - return None, None - c = c.lower() - # If the charset of the header matches the article charset, - # leave it as encoded. Otherwise, try Unicode decoding - if c.lower() == self.charset: - return s, c - try: - return unicode(s, c), None - except (UnicodeError, LookupError): - # Unknown encoding - return None, None + # Get the decoded header as a list of (s, charset) tuples + pairs = decode_header(field) + mustunicode = 0 + for s, c in pairs: + # If the charset of all the header parts match the article's + # charset, leave it as encoded, otherwise try converting to + # Unicode. + if c <> self.charset: + mustunicode = 1 + break + if mustunicode: + # Use a large number for maxlinelen so it won't get wrapped + h = make_header(pairs, 99999) + # Use __unicode__() until we can guarantee Python 2.2 + try: + return h.__unicode__(), None + except (UnicodeError, LookupError): + # Unknown encoding + return None, None + # The last value for c will have the proper charset in it + return EMPTYSTRING.join([s for s, c in pairs]), c def as_html(self): d = self.__dict__.copy() |
