Python interpreter has evolved to be strict on ascii charset range.

Subject manipulation should be done in unicode string mode. (temporary change in MAIN because higher version of python should allow simpler algorithms.)
author: tkikuchi 2005-12-26 07:24:02 +0000
committer: tkikuchi 2005-12-26 07:24:02 +0000
commit: 618aedcb8061b5563e1eb78241a0fcc1f17128af (patch)
tree: 7d81040fdbc1f4a897fd62bbc649f68af9f60547 /Mailman
parent: 3662785f5471b88c9d9fa4ded0da851c23697cd6 (diff)
download: mailman-618aedcb8061b5563e1eb78241a0fcc1f17128af.tar.gz
mailman-618aedcb8061b5563e1eb78241a0fcc1f17128af.tar.zst
mailman-618aedcb8061b5563e1eb78241a0fcc1f17128af.zip
1 files changed, 13 insertions, 20 deletions
diff --git a/Mailman/Handlers/CookHeaders.py b/Mailman/Handlers/CookHeaders.py
index 0bd58ec12..eee5991ed 100644
--- a/Mailman/Handlers/CookHeaders.py
+++ b/Mailman/Handlers/CookHeaders.py
@@ -253,6 +253,11 @@ def prefix_subject(mlist, msg, msgdata):
     # subject is mime-encoded and cset is set as us-ascii. See detail
     # for ch_oneline() (CookHeaders one line function).
     subject, cset = ch_oneline(subject)
+    # TK: Python interpreter has eveolved to be strict on ascii charset
+    # code range. It is safe to use unicode string when manupilating
+    # header contents with re module. It would be best to return unicode
+    # in ch_oneline() but here is temporary solution.
+    subject = unicode(subject, cset)
     # If the subject_prefix contains '%d', it is replaced with the
     # mailing list sequential number.  Sequential number format allows
     # '%d' or '%05d' like pattern.
@@ -279,6 +284,7 @@ def prefix_subject(mlist, msg, msgdata):
     if subject.strip() == '':
         subject = _('(no subject)')
         cset = Utils.GetCharSet(mlist.preferred_language)
+        subject = unicode(subject, cset)
     # and substitute %d in prefix with post_id
     try:
         prefix = prefix % mlist.post_id
@@ -289,21 +295,15 @@ def prefix_subject(mlist, msg, msgdata):
     if cset == 'us-ascii':
         try:
             if old_style:
-                h = ' '.join([recolon, prefix, subject])
+                h = u' '.join([recolon, prefix, subject])
             else:
-                h = ' '.join([prefix, recolon, subject])
-            if type(h) == UnicodeType:
-                h = h.encode('us-ascii')
-            else:
-                h = unicode(h, 'us-ascii').encode('us-ascii')
+                h = u' '.join([prefix, recolon, subject])
+            h = h.encode('us-ascii')
             h = uheader(mlist, h, 'Subject', continuation_ws=ws)
             del msg['subject']
             msg['Subject'] = h
-            ss = ' '.join([recolon, subject])
-            if _isunicode(ss):
-                ss = ss.encode('us-ascii')
-            else:
-                ss = unicode(ss, 'us-ascii').encode('us-ascii')
+            ss = u' '.join([recolon, subject])
+            ss = ss.encode('us-ascii')
             ss = uheader(mlist, ss, 'Subject', continuation_ws=ws)
             msgdata['stripped_subject'] = ss
             return
@@ -316,15 +316,8 @@ def prefix_subject(mlist, msg, msgdata):
     else:
         h = uheader(mlist, prefix, 'Subject', continuation_ws=ws)
         h.append(recolon)
-    # in seq version, subject header is already concatnated
-    if not _isunicode(subject):
-        try:
-            subject = unicode(subject, cset, 'replace')
-        except (LookupError, TypeError):
-            # unknown codec
-            cset = Utils.GetCharSet(mlist.preferred_language)
-            subject = unicode(subject, cset, 'replace')
-    subject = subject.encode(cset,'replace')
+    # TK: Subject is concatnated and unicode string.
+    subject = subject.encode(cset, 'replace')
     h.append(subject, cset)
     del msg['subject']
     msg['Subject'] = h
author	tkikuchi	2005-12-26 07:24:02 +0000
committer	tkikuchi	2005-12-26 07:24:02 +0000
commit	618aedcb8061b5563e1eb78241a0fcc1f17128af (patch)
tree	7d81040fdbc1f4a897fd62bbc649f68af9f60547 /Mailman
parent	3662785f5471b88c9d9fa4ded0da851c23697cd6 (diff)
download	mailman-618aedcb8061b5563e1eb78241a0fcc1f17128af.tar.gz mailman-618aedcb8061b5563e1eb78241a0fcc1f17128af.tar.zst mailman-618aedcb8061b5563e1eb78241a0fcc1f17128af.zip