diff options
| author | bwarsaw | 2002-10-04 20:45:53 +0000 |
|---|---|---|
| committer | bwarsaw | 2002-10-04 20:45:53 +0000 |
| commit | 9447ff68aa82ea696844e3a73f5ae79373dd5a9d (patch) | |
| tree | 4d1f09040a2bfa3901a894ac61683548539f44c2 | |
| parent | b1f0a5b8f5108fea2466ed04a4de9286b05198b9 (diff) | |
| download | mailman-9447ff68aa82ea696844e3a73f5ae79373dd5a9d.tar.gz mailman-9447ff68aa82ea696844e3a73f5ae79373dd5a9d.tar.zst mailman-9447ff68aa82ea696844e3a73f5ae79373dd5a9d.zip | |
| -rw-r--r-- | Mailman/Utils.py | 22 |
1 files changed, 17 insertions, 5 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py index e5be24c0c..267f85f71 100644 --- a/Mailman/Utils.py +++ b/Mailman/Utils.py @@ -721,7 +721,11 @@ def canonstr(s, lang=None): break ref = parts.pop(0) if ref.startswith('#'): - appchr(int(ref[1:])) + try: + appchr(int(ref[1:])) + except ValueError: + # Non-convertable, stick with what we got + newparts.append('&'+ref+';') else: c = htmlentitydefs.entitydefs.get(ref, '?') if c.startswith('#') and c.endswith(';'): @@ -746,8 +750,9 @@ def canonstr(s, lang=None): # The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the -# charset of the given language, and failing that, replaces non-ASCII -# characters with their html references. +# charset of the given language, which is the character set that the page will +# be rendered in, and failing that, replaces non-ASCII characters with their +# html references. It always returns a byte string. def uncanonstr(s, lang=None): if s is None: s = u'' @@ -755,10 +760,17 @@ def uncanonstr(s, lang=None): charset = 'us-ascii' else: charset = GetCharSet(lang) - # BAW should change this to a type types of s + # See if the string contains characters only in the desired character + # set. If so, return it unchanged, except for coercing it to a byte + # string. try: - return s.encode(charset, 'strict') + if isinstance(s, UnicodeType): + return s.encode(charset) + else: + u = unicode(s, charset) + return s except UnicodeError: + # Nope, it contains funny characters, so html-ref it a = [] for c in s: o = ord(c) |
