Trunk merge

author: Barry Warsaw 2011-04-10 18:03:37 -0400
committer: Barry Warsaw 2011-04-10 18:03:37 -0400
commit: 37038a683cd909438a6dee43beb9b258ef4e4313 (patch)
tree: df50eabfcc890f01203d90c453dc1b1dbde65d2c /src/mailman/utilities/string.py
parent: cce9729cac32b6c5fe2acc77b2bfb6b7c545711f (diff)
parent: ef3a4a87e2c0f4b640e31afc4828d2edbd005846 (diff)
download: mailman-37038a683cd909438a6dee43beb9b258ef4e4313.tar.gz
mailman-37038a683cd909438a6dee43beb9b258ef4e4313.tar.zst
mailman-37038a683cd909438a6dee43beb9b258ef4e4313.zip
1 files changed, 170 insertions, 2 deletions
diff --git a/src/mailman/utilities/string.py b/src/mailman/utilities/string.py
index 44b99876e..9054ed076 100644
--- a/src/mailman/utilities/string.py
+++ b/src/mailman/utilities/string.py
@@ -21,12 +21,28 @@ from __future__ import absolute_import, unicode_literals
 
 __metaclass__ = type
 __all__ = [
-    'expand'
+    'expand',
+    'oneline',
+    'uncanonstr',
+    'websafe',
+    'wrap',
     ]
 
 
+import cgi
 import logging
-from string import Template
+
+from email.errors import HeaderParseError
+from email.header import decode_header, make_header
+from string import Template, whitespace
+from textwrap import TextWrapper, dedent
+from zope.component import getUtility
+
+from mailman.interfaces.languages import ILanguageManager
+
+
+EMPTYSTRING = ''
+NL = '\n'
 
 log = logging.getLogger('mailman.error')
 
@@ -57,3 +73,155 @@ def expand(template, substitutions, template_class=Template):
     except (TypeError, ValueError):
         # The template is really screwed up.
         log.exception('broken template: %s', template)
+
+
+
+def oneline(s, cset='us-ascii', in_unicode=False):
+    """Decode a header string in one line and convert into specified charset.
+
+    :param s: The header string
+    :type s: string
+    :param cset: The character set (encoding) to use.
+    :type cset: string
+    :param in_unicode: Flag specifying whether to return the converted string
+        as a unicode (True) or an 8-bit string (False, the default).
+    :type in_unicode: bool
+    :return: The decoded header string.  If an error occurs while converting
+        the input string, return the string undecoded, as an 8-bit string.
+    :rtype: string
+    """
+    try:
+        h = make_header(decode_header(s))
+        ustr = h.__unicode__()
+        line = EMPTYSTRING.join(ustr.splitlines())
+        if in_unicode:
+            return line
+        else:
+            return line.encode(cset, 'replace')
+    except (LookupError, UnicodeError, ValueError, HeaderParseError):
+        # possibly charset problem. return with undecoded string in one line.
+        return EMPTYSTRING.join(s.splitlines())
+
+
+
+def websafe(s):
+    return cgi.escape(s, quote=True)
+
+
+
+# The opposite of canonstr() -- sorta.  I.e. it attempts to encode s in the
+# charset of the given language, which is the character set that the page will
+# be rendered in, and failing that, replaces non-ASCII characters with their
+# html references.  It always returns a byte string.
+def uncanonstr(s, lang=None):
+    if s is None:
+        s = u''
+    if lang is None:
+        charset = 'us-ascii'
+    else:
+        charset = getUtility(ILanguageManager)[lang].charset
+    # See if the string contains characters only in the desired character
+    # set.  If so, return it unchanged, except for coercing it to a byte
+    # string.
+    try:
+        if isinstance(s, unicode):
+            return s.encode(charset)
+        else:
+            unicode(s, charset)
+            return s
+    except UnicodeError:
+        # Nope, it contains funny characters, so html-ref it
+        a = []
+        for c in s:
+            o = ord(c)
+            if o > 127:
+                a.append('&#%3d;' % o)
+            else:
+                a.append(c)
+        # Join characters together and coerce to byte string
+        return str(EMPTYSTRING.join(a))
+
+
+
+def wrap(text, column=70, honor_leading_ws=True):
+    """Wrap and fill the text to the specified column.
+
+    The input text is wrapped and filled as done by the standard library
+    textwrap module.  The differences here being that this function is capable
+    of filling multiple paragraphs (as defined by text separated by blank
+    lines).  Also, when `honor_leading_ws` is True (the default), paragraphs
+    that being with whitespace are not wrapped.  This is the algorithm that
+    the Python FAQ wizard used.
+    """
+    # First, split the original text into paragraph, keeping all blank lines
+    # between them.
+    paragraphs = []
+    paragraph = []
+    last_indented = False
+    for line in text.splitlines(True):
+        is_indented = (len(line) > 0 and line[0] in whitespace)
+        if line == NL:
+            if len(paragraph) > 0:
+                paragraphs.append(EMPTYSTRING.join(paragraph))
+            paragraphs.append(line)
+            last_indented = False
+            paragraph = []
+        elif last_indented != is_indented:
+            # The indentation level changed.  We treat this as a paragraph
+            # break but no blank line will be issued between paragraphs.
+            if len(paragraph) > 0:
+                paragraphs.append(EMPTYSTRING.join(paragraph))
+            # The next paragraph starts with this line.
+            paragraph = [line]
+            last_indented = is_indented
+        else:
+            # This line does not constitute a paragraph break.
+            paragraph.append(line)
+    # We've consumed all the lines in the original text.  Transfer the last
+    # paragraph we were collecting to the full set of paragraphs.
+    paragraphs.append(EMPTYSTRING.join(paragraph))
+    # Now iterate through all paragraphs, wrapping as necessary.
+    wrapped_paragraphs = []
+    # The dedented wrapper.
+    wrapper = TextWrapper(width=column,
+                          fix_sentence_endings=True)
+    # The indented wrapper.  For this one, we'll clobber initial_indent and
+    # subsequent_indent as needed per indented chunk of text.
+    iwrapper = TextWrapper(width=column,
+                           fix_sentence_endings=True,
+                           )
+    add_paragraph_break = False
+    for paragraph in paragraphs:
+        if add_paragraph_break:
+            wrapped_paragraphs.append(NL)
+            add_paragraph_break = False
+        paragraph_text = EMPTYSTRING.join(paragraph)
+        # Just copy the blank lines to the final set of paragraphs.
+        if paragraph == NL:
+            wrapped_paragraphs.append(NL)
+        # Choose the wrapper based on whether the paragraph is indented or
+        # not.  Also, do not wrap indented paragraphs if honor_leading_ws is
+        # set.
+        elif paragraph[0] in whitespace:
+            if honor_leading_ws:
+                # Leave the indented paragraph verbatim.
+                wrapped_paragraphs.append(paragraph_text)
+            else:
+                # The paragraph should be wrapped, but it must first be
+                # dedented.  The leading whitespace on the first line of the
+                # original text will be used as the indentation for all lines
+                # in the wrapped text.
+                for i, ch in enumerate(paragraph_text):
+                    if ch not in whitespace:
+                        break
+                leading_ws = paragraph[:i]
+                iwrapper.initial_indent=leading_ws
+                iwrapper.subsequent_indent=leading_ws
+                paragraph_text = dedent(paragraph_text)
+                wrapped_paragraphs.append(iwrapper.fill(paragraph_text))
+                add_paragraph_break = True
+        else:
+            # Fill this paragraph.  fill() consumes the trailing newline.
+            wrapped_paragraphs.append(wrapper.fill(paragraph_text))
+            add_paragraph_break = True
+    return EMPTYSTRING.join(wrapped_paragraphs)
author	Barry Warsaw	2011-04-10 18:03:37 -0400
committer	Barry Warsaw	2011-04-10 18:03:37 -0400
commit	37038a683cd909438a6dee43beb9b258ef4e4313 (patch)
tree	df50eabfcc890f01203d90c453dc1b1dbde65d2c /src/mailman/utilities/string.py
parent	cce9729cac32b6c5fe2acc77b2bfb6b7c545711f (diff)
parent	ef3a4a87e2c0f4b640e31afc4828d2edbd005846 (diff)
download	mailman-37038a683cd909438a6dee43beb9b258ef4e4313.tar.gz mailman-37038a683cd909438a6dee43beb9b258ef4e4313.tar.zst mailman-37038a683cd909438a6dee43beb9b258ef4e4313.zip