decode quoted-printable message bodies

keep _charsets dictionary in pickled rep of archive; this allows the charset for an index page to be set based on the total count of charsets in all messages
author: jhylton 2000-09-22 14:49:10 +0000
committer: jhylton 2000-09-22 14:49:10 +0000
commit: 78215c286405f9d4a40b6694f90cfd8c058a2371 (patch)
tree: 42bb91c99d809d9b0b15a35d45ff9c89c8736da7
parent: eaf390e11544cacf1779f136457a6e0863bb7b79 (diff)
download: mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.tar.gz
mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.tar.zst
mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.zip
1 files changed, 38 insertions, 4 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index f6cdfd112..d1683ce68 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -193,6 +193,7 @@ class Article(pipermail.Article):
 
     # for compatibility with old archives loaded via pickle
     charset = None
+    cenc = None
     decoded = {}
 
     def __init__(self, message=None, sequence=0, keepHeaders=[]):
@@ -216,6 +217,7 @@ class Article(pipermail.Article):
 
         # snag the content-type
         self.ctype = message.getheader('Content-Type') or "text/plain"
+        self.cenc = message.getheader('Content-Transfer-Encoding')
         self.decoded = {}
         mo = rx_charset.search(self.ctype)
         if mo:
@@ -293,7 +295,7 @@ class Article(pipermail.Article):
 	d["author_html"] = html_quote(self.author)
 	d["email_url"] = url_quote(self.email)
 	d["datestr_html"] = html_quote(self.datestr)
-	d["body"] = string.join(self.body, "")
+        d["body"] = self._get_body()
 
         if self.charset is not None:
             d["encoding"] = html_charset % self.charset
@@ -304,6 +306,39 @@ class Article(pipermail.Article):
             
         return self.html_tmpl % d
 
+    _rx_quote = re.compile('=([A-Z0-9][A-Z0-9])')
+    _rx_softline = re.compile('=[ \t]*$')
+
+    def _get_body(self):
+        """Return the message body ready for HTML, decoded if necessary"""
+        if self.charset is None or self.cenc != "quoted-printable":
+            return string.join(self.body, "")
+        # the charset is specified and the body is quoted-printable
+        # first get rid of soft line breaks, then decode literals
+        lines = []
+        rx = self._rx_softline
+        for line in self.body:
+            mo = rx.search(line)
+            if mo:
+                i = string.rfind(line, "=")
+                line = line[:i]
+            lines.append(line)
+        buf = string.join(lines, "")
+        
+        chunks = []
+        offset = 0
+        rx = self._rx_quote
+        while 1:
+            mo = rx.search(buf, offset)
+            if not mo:
+                chunks.append(buf[offset:])
+                break
+            i = mo.start()
+            chunks.append(buf[offset:i])
+            offset = i + 3
+            chunks.append(chr(int(mo.group(1), 16)))
+        return string.join(chunks, "")
+
     def _add_decoded(self, d):
         """Add encoded-word keys to HTML output"""
         for src, dst in (('author', 'author_html'),
@@ -473,8 +508,7 @@ class HyperArchive(pipermail.T):
     SHOWBR = 0                # Add <br> onto every line
 
     def __init__(self, maillist, unlock=1):
-        # can't init the database while other
-        # processes are writing to it!
+        # can't init the database while other processes are writing to it!
         # XXX TODO- implement native locking
         # with mailman's LockFile module for HyperDatabase.HyperDatabase
         #
@@ -904,7 +938,7 @@ class HyperArchive(pipermail.T):
             os.unlink(txtfile)
 
     _skip_attrs = ('maillist', '_lock_file', '_unlocklist',
-                   '_charsets', 'charset')
+                   'charset')
     
     def getstate(self):
         d={}
author	jhylton	2000-09-22 14:49:10 +0000
committer	jhylton	2000-09-22 14:49:10 +0000
commit	78215c286405f9d4a40b6694f90cfd8c058a2371 (patch)
tree	42bb91c99d809d9b0b15a35d45ff9c89c8736da7
parent	eaf390e11544cacf1779f136457a6e0863bb7b79 (diff)
download	mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.tar.gz mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.tar.zst mailman-78215c286405f9d4a40b6694f90cfd8c058a2371.zip