diff options
| author | jhylton | 2000-09-22 14:49:10 +0000 |
|---|---|---|
| committer | jhylton | 2000-09-22 14:49:10 +0000 |
| commit | 78215c286405f9d4a40b6694f90cfd8c058a2371 (patch) | |
| tree | 42bb91c99d809d9b0b15a35d45ff9c89c8736da7 | |
| parent | eaf390e11544cacf1779f136457a6e0863bb7b79 (diff) | |
| download | mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.tar.gz mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.tar.zst mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.zip | |
decode quoted-printable message bodies
keep _charsets dictionary in pickled rep of archive; this allows the
charset for an index page to be set based on the total count of
charsets in all messages
Diffstat (limited to '')
| -rw-r--r-- | Mailman/Archiver/HyperArch.py | 42 |
1 files changed, 38 insertions, 4 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py index f6cdfd112..d1683ce68 100644 --- a/Mailman/Archiver/HyperArch.py +++ b/Mailman/Archiver/HyperArch.py @@ -193,6 +193,7 @@ class Article(pipermail.Article): # for compatibility with old archives loaded via pickle charset = None + cenc = None decoded = {} def __init__(self, message=None, sequence=0, keepHeaders=[]): @@ -216,6 +217,7 @@ class Article(pipermail.Article): # snag the content-type self.ctype = message.getheader('Content-Type') or "text/plain" + self.cenc = message.getheader('Content-Transfer-Encoding') self.decoded = {} mo = rx_charset.search(self.ctype) if mo: @@ -293,7 +295,7 @@ class Article(pipermail.Article): d["author_html"] = html_quote(self.author) d["email_url"] = url_quote(self.email) d["datestr_html"] = html_quote(self.datestr) - d["body"] = string.join(self.body, "") + d["body"] = self._get_body() if self.charset is not None: d["encoding"] = html_charset % self.charset @@ -304,6 +306,39 @@ class Article(pipermail.Article): return self.html_tmpl % d + _rx_quote = re.compile('=([A-Z0-9][A-Z0-9])') + _rx_softline = re.compile('=[ \t]*$') + + def _get_body(self): + """Return the message body ready for HTML, decoded if necessary""" + if self.charset is None or self.cenc != "quoted-printable": + return string.join(self.body, "") + # the charset is specified and the body is quoted-printable + # first get rid of soft line breaks, then decode literals + lines = [] + rx = self._rx_softline + for line in self.body: + mo = rx.search(line) + if mo: + i = string.rfind(line, "=") + line = line[:i] + lines.append(line) + buf = string.join(lines, "") + + chunks = [] + offset = 0 + rx = self._rx_quote + while 1: + mo = rx.search(buf, offset) + if not mo: + chunks.append(buf[offset:]) + break + i = mo.start() + chunks.append(buf[offset:i]) + offset = i + 3 + chunks.append(chr(int(mo.group(1), 16))) + return string.join(chunks, "") + def _add_decoded(self, d): """Add encoded-word keys to HTML output""" for src, dst in (('author', 'author_html'), @@ -473,8 +508,7 @@ class HyperArchive(pipermail.T): SHOWBR = 0 # Add <br> onto every line def __init__(self, maillist, unlock=1): - # can't init the database while other - # processes are writing to it! + # can't init the database while other processes are writing to it! # XXX TODO- implement native locking # with mailman's LockFile module for HyperDatabase.HyperDatabase # @@ -904,7 +938,7 @@ class HyperArchive(pipermail.T): os.unlink(txtfile) _skip_attrs = ('maillist', '_lock_file', '_unlocklist', - '_charsets', 'charset') + 'charset') def getstate(self): d={} |
