summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbwarsaw2002-10-04 20:45:53 +0000
committerbwarsaw2002-10-04 20:45:53 +0000
commit9447ff68aa82ea696844e3a73f5ae79373dd5a9d (patch)
tree4d1f09040a2bfa3901a894ac61683548539f44c2
parentb1f0a5b8f5108fea2466ed04a4de9286b05198b9 (diff)
downloadmailman-9447ff68aa82ea696844e3a73f5ae79373dd5a9d.tar.gz
mailman-9447ff68aa82ea696844e3a73f5ae79373dd5a9d.tar.zst
mailman-9447ff68aa82ea696844e3a73f5ae79373dd5a9d.zip
-rw-r--r--Mailman/Utils.py22
1 files changed, 17 insertions, 5 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py
index e5be24c0c..267f85f71 100644
--- a/Mailman/Utils.py
+++ b/Mailman/Utils.py
@@ -721,7 +721,11 @@ def canonstr(s, lang=None):
break
ref = parts.pop(0)
if ref.startswith('#'):
- appchr(int(ref[1:]))
+ try:
+ appchr(int(ref[1:]))
+ except ValueError:
+ # Non-convertable, stick with what we got
+ newparts.append('&'+ref+';')
else:
c = htmlentitydefs.entitydefs.get(ref, '?')
if c.startswith('#') and c.endswith(';'):
@@ -746,8 +750,9 @@ def canonstr(s, lang=None):
# The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the
-# charset of the given language, and failing that, replaces non-ASCII
-# characters with their html references.
+# charset of the given language, which is the character set that the page will
+# be rendered in, and failing that, replaces non-ASCII characters with their
+# html references. It always returns a byte string.
def uncanonstr(s, lang=None):
if s is None:
s = u''
@@ -755,10 +760,17 @@ def uncanonstr(s, lang=None):
charset = 'us-ascii'
else:
charset = GetCharSet(lang)
- # BAW should change this to a type types of s
+ # See if the string contains characters only in the desired character
+ # set. If so, return it unchanged, except for coercing it to a byte
+ # string.
try:
- return s.encode(charset, 'strict')
+ if isinstance(s, UnicodeType):
+ return s.encode(charset)
+ else:
+ u = unicode(s, charset)
+ return s
except UnicodeError:
+ # Nope, it contains funny characters, so html-ref it
a = []
for c in s:
o = ord(c)