diff options
| author | Barry Warsaw | 2011-04-10 18:03:37 -0400 |
|---|---|---|
| committer | Barry Warsaw | 2011-04-10 18:03:37 -0400 |
| commit | 37038a683cd909438a6dee43beb9b258ef4e4313 (patch) | |
| tree | df50eabfcc890f01203d90c453dc1b1dbde65d2c /src/mailman/utilities/string.py | |
| parent | cce9729cac32b6c5fe2acc77b2bfb6b7c545711f (diff) | |
| parent | ef3a4a87e2c0f4b640e31afc4828d2edbd005846 (diff) | |
| download | mailman-37038a683cd909438a6dee43beb9b258ef4e4313.tar.gz mailman-37038a683cd909438a6dee43beb9b258ef4e4313.tar.zst mailman-37038a683cd909438a6dee43beb9b258ef4e4313.zip | |
Trunk merge
Diffstat (limited to 'src/mailman/utilities/string.py')
| -rw-r--r-- | src/mailman/utilities/string.py | 172 |
1 files changed, 170 insertions, 2 deletions
diff --git a/src/mailman/utilities/string.py b/src/mailman/utilities/string.py index 44b99876e..9054ed076 100644 --- a/src/mailman/utilities/string.py +++ b/src/mailman/utilities/string.py @@ -21,12 +21,28 @@ from __future__ import absolute_import, unicode_literals __metaclass__ = type __all__ = [ - 'expand' + 'expand', + 'oneline', + 'uncanonstr', + 'websafe', + 'wrap', ] +import cgi import logging -from string import Template + +from email.errors import HeaderParseError +from email.header import decode_header, make_header +from string import Template, whitespace +from textwrap import TextWrapper, dedent +from zope.component import getUtility + +from mailman.interfaces.languages import ILanguageManager + + +EMPTYSTRING = '' +NL = '\n' log = logging.getLogger('mailman.error') @@ -57,3 +73,155 @@ def expand(template, substitutions, template_class=Template): except (TypeError, ValueError): # The template is really screwed up. log.exception('broken template: %s', template) + + + +def oneline(s, cset='us-ascii', in_unicode=False): + """Decode a header string in one line and convert into specified charset. + + :param s: The header string + :type s: string + :param cset: The character set (encoding) to use. + :type cset: string + :param in_unicode: Flag specifying whether to return the converted string + as a unicode (True) or an 8-bit string (False, the default). + :type in_unicode: bool + :return: The decoded header string. If an error occurs while converting + the input string, return the string undecoded, as an 8-bit string. + :rtype: string + """ + try: + h = make_header(decode_header(s)) + ustr = h.__unicode__() + line = EMPTYSTRING.join(ustr.splitlines()) + if in_unicode: + return line + else: + return line.encode(cset, 'replace') + except (LookupError, UnicodeError, ValueError, HeaderParseError): + # possibly charset problem. return with undecoded string in one line. + return EMPTYSTRING.join(s.splitlines()) + + + +def websafe(s): + return cgi.escape(s, quote=True) + + + +# The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the +# charset of the given language, which is the character set that the page will +# be rendered in, and failing that, replaces non-ASCII characters with their +# html references. It always returns a byte string. +def uncanonstr(s, lang=None): + if s is None: + s = u'' + if lang is None: + charset = 'us-ascii' + else: + charset = getUtility(ILanguageManager)[lang].charset + # See if the string contains characters only in the desired character + # set. If so, return it unchanged, except for coercing it to a byte + # string. + try: + if isinstance(s, unicode): + return s.encode(charset) + else: + unicode(s, charset) + return s + except UnicodeError: + # Nope, it contains funny characters, so html-ref it + a = [] + for c in s: + o = ord(c) + if o > 127: + a.append('&#%3d;' % o) + else: + a.append(c) + # Join characters together and coerce to byte string + return str(EMPTYSTRING.join(a)) + + + +def wrap(text, column=70, honor_leading_ws=True): + """Wrap and fill the text to the specified column. + + The input text is wrapped and filled as done by the standard library + textwrap module. The differences here being that this function is capable + of filling multiple paragraphs (as defined by text separated by blank + lines). Also, when `honor_leading_ws` is True (the default), paragraphs + that being with whitespace are not wrapped. This is the algorithm that + the Python FAQ wizard used. + """ + # First, split the original text into paragraph, keeping all blank lines + # between them. + paragraphs = [] + paragraph = [] + last_indented = False + for line in text.splitlines(True): + is_indented = (len(line) > 0 and line[0] in whitespace) + if line == NL: + if len(paragraph) > 0: + paragraphs.append(EMPTYSTRING.join(paragraph)) + paragraphs.append(line) + last_indented = False + paragraph = [] + elif last_indented != is_indented: + # The indentation level changed. We treat this as a paragraph + # break but no blank line will be issued between paragraphs. + if len(paragraph) > 0: + paragraphs.append(EMPTYSTRING.join(paragraph)) + # The next paragraph starts with this line. + paragraph = [line] + last_indented = is_indented + else: + # This line does not constitute a paragraph break. + paragraph.append(line) + # We've consumed all the lines in the original text. Transfer the last + # paragraph we were collecting to the full set of paragraphs. + paragraphs.append(EMPTYSTRING.join(paragraph)) + # Now iterate through all paragraphs, wrapping as necessary. + wrapped_paragraphs = [] + # The dedented wrapper. + wrapper = TextWrapper(width=column, + fix_sentence_endings=True) + # The indented wrapper. For this one, we'll clobber initial_indent and + # subsequent_indent as needed per indented chunk of text. + iwrapper = TextWrapper(width=column, + fix_sentence_endings=True, + ) + add_paragraph_break = False + for paragraph in paragraphs: + if add_paragraph_break: + wrapped_paragraphs.append(NL) + add_paragraph_break = False + paragraph_text = EMPTYSTRING.join(paragraph) + # Just copy the blank lines to the final set of paragraphs. + if paragraph == NL: + wrapped_paragraphs.append(NL) + # Choose the wrapper based on whether the paragraph is indented or + # not. Also, do not wrap indented paragraphs if honor_leading_ws is + # set. + elif paragraph[0] in whitespace: + if honor_leading_ws: + # Leave the indented paragraph verbatim. + wrapped_paragraphs.append(paragraph_text) + else: + # The paragraph should be wrapped, but it must first be + # dedented. The leading whitespace on the first line of the + # original text will be used as the indentation for all lines + # in the wrapped text. + for i, ch in enumerate(paragraph_text): + if ch not in whitespace: + break + leading_ws = paragraph[:i] + iwrapper.initial_indent=leading_ws + iwrapper.subsequent_indent=leading_ws + paragraph_text = dedent(paragraph_text) + wrapped_paragraphs.append(iwrapper.fill(paragraph_text)) + add_paragraph_break = True + else: + # Fill this paragraph. fill() consumes the trailing newline. + wrapped_paragraphs.append(wrapper.fill(paragraph_text)) + add_paragraph_break = True + return EMPTYSTRING.join(wrapped_paragraphs) |
