summaryrefslogtreecommitdiff
path: root/src/mailman/utilities/string.py
diff options
context:
space:
mode:
authorBarry Warsaw2011-02-26 21:27:25 -0500
committerBarry Warsaw2011-02-26 21:27:25 -0500
commit2c562fd0191b0af04511dad2b0d0ae57b31198e7 (patch)
treecb4676ece952846f2015c72e2e40c67c8b8c7398 /src/mailman/utilities/string.py
parentca35b80852bd04d5cd39bf494554f8b15e2de048 (diff)
downloadmailman-2c562fd0191b0af04511dad2b0d0ae57b31198e7.tar.gz
mailman-2c562fd0191b0af04511dad2b0d0ae57b31198e7.tar.zst
mailman-2c562fd0191b0af04511dad2b0d0ae57b31198e7.zip
Diffstat (limited to 'src/mailman/utilities/string.py')
-rw-r--r--src/mailman/utilities/string.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/src/mailman/utilities/string.py b/src/mailman/utilities/string.py
index 113403c96..3eda0dc39 100644
--- a/src/mailman/utilities/string.py
+++ b/src/mailman/utilities/string.py
@@ -23,6 +23,7 @@ __metaclass__ = type
__all__ = [
'expand',
'oneline',
+ 'uncanonstr',
'websafe',
]
@@ -33,6 +34,10 @@ import logging
from email.errors import HeaderParseError
from email.header import decode_header, make_header
from string import Template
+from zope.component import getUtility
+
+from mailman.interfaces.languages import ILanguageManager
+
EMPTYSTRING = ''
UEMPTYSTRING = u''
@@ -99,3 +104,37 @@ def oneline(s, cset='us-ascii', in_unicode=False):
def websafe(s):
return cgi.escape(s, quote=True)
+
+
+
+# The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the
+# charset of the given language, which is the character set that the page will
+# be rendered in, and failing that, replaces non-ASCII characters with their
+# html references. It always returns a byte string.
+def uncanonstr(s, lang=None):
+ if s is None:
+ s = u''
+ if lang is None:
+ charset = 'us-ascii'
+ else:
+ charset = getUtility(ILanguageManager)[lang].charset
+ # See if the string contains characters only in the desired character
+ # set. If so, return it unchanged, except for coercing it to a byte
+ # string.
+ try:
+ if isinstance(s, unicode):
+ return s.encode(charset)
+ else:
+ unicode(s, charset)
+ return s
+ except UnicodeError:
+ # Nope, it contains funny characters, so html-ref it
+ a = []
+ for c in s:
+ o = ord(c)
+ if o > 127:
+ a.append('&#%3d;' % o)
+ else:
+ a.append(c)
+ # Join characters together and coerce to byte string
+ return str(EMPTYSTRING.join(a))