author: bwarsaw 2002-09-18 05:38:51 +0000
committer: bwarsaw 2002-09-18 05:38:51 +0000
commit: 0efe92058114a180ea14d254049e79b4f4bea057 (patch)
tree: 914723f33325a999f9d0644cbdf63c9c9aaac5f8 /Mailman/Utils.py
parent: da20177dfc0769529cfdff0752c098c8e0608eaf (diff)
download: mailman-0efe92058114a180ea14d254049e79b4f4bea057.tar.gz
mailman-0efe92058114a180ea14d254049e79b4f4bea057.tar.zst
mailman-0efe92058114a180ea14d254049e79b4f4bea057.zip
1 files changed, 13 insertions, 3 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py
index 12141c2c8..3443393fa 100644
--- a/Mailman/Utils.py
+++ b/Mailman/Utils.py
@@ -23,6 +23,8 @@ the mailing lists, and whatever else doesn't belong elsewhere.
 
 """
 
+from __future__ import nested_scopes
+
 import os
 import re
 import random
@@ -33,6 +35,7 @@ import time
 import cgi
 import htmlentitydefs
 import email.Iterators
+from types import UnicodeType
 from string import whitespace, digits
 try:
     # Python 2.2
@@ -703,20 +706,27 @@ def percent_identifiers(s):
 def canonstr(s, lang=None):
     newparts = []
     parts = re.split(r'&(?P<ref>[^;]+);', s)
+    def appchr(i):
+        if i < 256:
+            newparts.append(chr(i))
+        else:
+            newparts.append(unichr(i))
     while 1:
         newparts.append(parts.pop(0))
         if not parts:
             break
         ref = parts.pop(0)
         if ref.startswith('#'):
-            newparts.append(chr(int(ref[1:])))
+            appchr(int(ref[1:]))
         else:
             c = htmlentitydefs.entitydefs.get(ref, '?')
             if c.startswith('#') and c.endswith(';'):
-                newparts.append(chr(ref[1:-1]))
+                appchr(int(ref[1:-1]))
             else:
                 newparts.append(c)
     newstr = EMPTYSTRING.join(newparts)
+    if isinstance(newstr, UnicodeType):
+        return newstr
     # We want the default fallback to be iso-8859-1 even if the language is
     # English (us-ascii).  This seems like a practical compromise so that
     # non-ASCII characters in names can be used in English lists w/o having to
@@ -736,7 +746,7 @@ def canonstr(s, lang=None):
 # characters with their html references.
 def uncanonstr(s, lang=None):
     if s is None:
-        s = ''
+        s = u''
     if lang is None:
         charset = 'us-ascii'
     else:
author	bwarsaw	2002-09-18 05:38:51 +0000
committer	bwarsaw	2002-09-18 05:38:51 +0000
commit	0efe92058114a180ea14d254049e79b4f4bea057 (patch)
tree	914723f33325a999f9d0644cbdf63c9c9aaac5f8 /Mailman/Utils.py
parent	da20177dfc0769529cfdff0752c098c8e0608eaf (diff)
download	mailman-0efe92058114a180ea14d254049e79b4f4bea057.tar.gz mailman-0efe92058114a180ea14d254049e79b4f4bea057.tar.zst mailman-0efe92058114a180ea14d254049e79b4f4bea057.zip