summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Mailman/Archiver/HyperArch.py44
-rw-r--r--Mailman/Defaults.py.in16
2 files changed, 45 insertions, 15 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index 10307f416..a2345082f 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -194,7 +194,7 @@ class Article(pipermail.Article):
text_tmpl = article_text_template
# for compatibility with old archives loaded via pickle
- charset = None
+ charset = mm_cfg.DEFAULT_CHARSET
cenc = None
decoded = {}
@@ -226,9 +226,11 @@ class Article(pipermail.Article):
self.check_header_charsets(string.lower(mo.group(1)))
else:
self.check_header_charsets()
- if self.charset:
- assert self.charset == string.lower(self.charset), \
- self.charset
+ if self.charset and self.charset in mm_cfg.VERBATIM_ENCODING:
+ self.quote = lambda x:x
+
+ def quote(self, buf):
+ return html_quote(buf)
def check_header_charsets(self, msg_charset=None):
"""Check From and Subject for encoded-words
@@ -276,12 +278,12 @@ class Article(pipermail.Article):
d["prev"], d["prev_wsubj"] = self._get_prev()
d["next"], d["next_wsubj"] = self._get_next()
- d["email_html"] = html_quote(self.email)
- d["title"] = html_quote(self.subject)
- d["subject_html"] = html_quote(self.subject)
- d["author_html"] = html_quote(self.author)
+ d["email_html"] = self.quote(self.email)
+ d["title"] = self.quote(self.subject)
+ d["subject_html"] = self.quote(self.subject)
+ d["author_html"] = self.quote(self.author)
d["email_url"] = url_quote(self.email)
- d["datestr_html"] = html_quote(self.datestr)
+ d["datestr_html"] = self.quote(self.datestr)
d["body"] = self._get_body()
if self.charset is not None:
@@ -302,7 +304,7 @@ class Article(pipermail.Article):
prev_wsubj = ('<LI> Previous message:'
' <A HREF="%s">%s\n</A></li>'
% (url_quote(self.prev.filename),
- html_quote(subject)))
+ self.quote(subject)))
else:
prev = prev_wsubj = ""
return prev, prev_wsubj
@@ -328,7 +330,7 @@ class Article(pipermail.Article):
next_wsubj = ('<LI> Next message:'
' <A HREF="%s">%s\n</A></li>'
% (url_quote(self.next.filename),
- html_quote(subject)))
+ self.quote(subject)))
else:
next = next_wsubj = ""
return next, next_wsubj
@@ -338,13 +340,17 @@ class Article(pipermail.Article):
def _get_body(self):
"""Return the message body ready for HTML, decoded if necessary"""
+ try:
+ body = self.html_body
+ except AttributeError:
+ body = self.body
if self.charset is None or self.cenc != "quoted-printable":
- return null_to_space(string.join(self.body, ""))
+ return null_to_space(string.join(body, ""))
# the charset is specified and the body is quoted-printable
# first get rid of soft line breaks, then decode literals
lines = []
rx = self._rx_softline
- for line in self.body:
+ for line in body:
mo = rx.search(line)
if mo:
i = string.rfind(line, "=")
@@ -401,6 +407,8 @@ class Article(pipermail.Article):
def __getstate__(self):
d={}
for each in self.__dict__.keys():
+ if each == "quote":
+ continue
if each in ['maillist','prev','next','body']:
d[each] = None
else:
@@ -888,6 +896,7 @@ class HyperArchive(pipermail.T):
toc.close()
def write_article(self, index, article, path):
+ # called by add_article
f = open_ex(path, 'w')
f.write(article.as_html())
f.close()
@@ -1049,9 +1058,14 @@ class HyperArchive(pipermail.T):
i = i + 1
def format_article(self, article):
+ # called from add_article
+ # TBD: Why do the HTML formatting here and keep it in the
+ # pipermail database? It makes more sense to do the html
+ # formatting as the article is being written as html and toss
+ # the data after it has been written to the archive file.
lines = filter(None, article.body)
# Handle <HTML> </HTML> directives
- if self.ALLOWHTML:
+ if self.ALLOWHTML:
self.__processbody_HTML(lines)
self.__processbody_URLquote(lines)
if not self.SHOWHTML and lines:
@@ -1066,7 +1080,7 @@ class HyperArchive(pipermail.T):
s = lines[i]
if s[0:1] in ' \t\n':
lines[i] = '<P>' + s
- article.body = lines
+ article.html_body = lines
return article
def update_article(self, arcdir, article, prev, next):
diff --git a/Mailman/Defaults.py.in b/Mailman/Defaults.py.in
index b70bbe81e..036da76f3 100644
--- a/Mailman/Defaults.py.in
+++ b/Mailman/Defaults.py.in
@@ -117,6 +117,22 @@ GZIP_ARCHIVE_TXT_FILES = 0
# in the archives too.
ARCHIVER_OBSCURES_EMAILADDRS = 0
+# Pipermail assumes that messages bodies contain US-ASCII text.
+# Change this option to define a different character set to be used as
+# the default character set for the archive. The term "character set"
+# is used in MIME to refer to a method of converting a sequence of
+# octets into a sequence of characters. If you change the default
+# charset, you might need to add it to VERBATIM_ENCODING below.
+DEFAULT_CHARSET = None
+
+# Most character set encodings require special HTML entity characters
+# to be quoted, otherwise they won't look right in the Pipermail
+# archives. However some character sets must not quote these
+# characters so that they can be rendered properly in the browsers.
+# The primary issue is multi-byte encodings where the octet 0x26 does
+# not always represent the & character. This variable contains a list
+# of such characters sets which are not HTML-quoted in the archives.
+VERBATIM_ENCODING = ['iso-2022-jp']
#####