summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbwarsaw2001-10-27 03:32:56 +0000
committerbwarsaw2001-10-27 03:32:56 +0000
commitd420c4b18a4e2e15ecb46ab56e752d6453077fac (patch)
tree7a6e9bffbbb38899268b3d38b34897944f2ffd29
parentac79a2601e268a91ec76837527f2014b9759447a (diff)
downloadmailman-d420c4b18a4e2e15ecb46ab56e752d6453077fac.tar.gz
mailman-d420c4b18a4e2e15ecb46ab56e752d6453077fac.tar.zst
mailman-d420c4b18a4e2e15ecb46ab56e752d6453077fac.zip
process(): Implement ARCHIVE_HTML_SANITIZER == 2, meaning "leave it
inline but HTML-escape it. Also, expand on the == 1 value (HTML-escape an attachment) a bit so the output looks a little nicer. Pipermail actually does a better job here, but we can't use it. save_attachment(): Grows a filter_html option which says whether to filter text/html parts or not. Default is 1, but if ARCHIVE_HTML_SANITIZER == 2 above, we don't want to filter it through the program.
-rw-r--r--Mailman/Handlers/Scrubber.py37
1 files changed, 31 insertions, 6 deletions
diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py
index 24a3c2b48..9dd37b55b 100644
--- a/Mailman/Handlers/Scrubber.py
+++ b/Mailman/Handlers/Scrubber.py
@@ -20,6 +20,7 @@
import os
import re
import sha
+import cgi
import errno
import mimetypes
import tempfile
@@ -43,6 +44,8 @@ pre = re.compile(r'[/\\:]')
# (essentially anything that isn't an alphanum, dot, slash, or underscore.
sre = re.compile(r'[^-\w.]')
+BR = '<br>\n'
+
# We're using a subclass of the standard Generator because we want to suppress
@@ -66,20 +69,42 @@ class ScrubberGenerator(Generator):
def process(mlist, msg, msgdata=None):
+ sanitize = mm_cfg.ARCHIVE_HTML_SANITIZER
outer = 1
for part in msg.walk():
# If the part is text/plain, we leave it alone
if part.get_type('text/plain') == 'text/plain':
pass
- elif part.get_type() == 'text/html' and \
- not isinstance(mm_cfg.ARCHIVE_HTML_SANITIZER, StringType):
- if mm_cfg.ARCHIVE_HTML_SANITIZER == 0:
+ elif part.get_type() == 'text/html' and sanitize in (0, 1, 2):
+ if sanitize == 0:
if outer:
raise DiscardMessage
part.set_payload(_('HTML attachment scrubbed and removed'))
- else:
+ elif sanitize == 2:
# By leaving it alone, Pipermail will automatically escape it
pass
+ else:
+ # HTML-escape it and store it as an attachment, but make it
+ # look a /little/ bit prettier. :(
+ payload = cgi.escape(part.get_payload())
+ # For whitespace in the margin, change spaces into
+ # non-breaking spaces, and tabs into 8 of those. Then use a
+ # mono-space font. Still looks hideous to me, but then I'd
+ # just as soon discard them.
+ def doreplace(s):
+ return s.replace(' ', '&nbsp;').replace('\t', '&nbsp'*8)
+ lines = [doreplace(s) for s in payload.split('\n')]
+ payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n'
+ part.set_payload(payload)
+ omask = os.umask(002)
+ try:
+ url = save_attachment(mlist, part, filter_html=0)
+ finally:
+ os.umask(omask)
+ part.set_payload(_("""\
+An HTML attachment was scrubbed.
+URL: %(url)s
+"""))
# If the message isn't a multipart, then we'll strip it out as an
# attachment that would have to be separately downloaded. Pipermail
# will transform the url into a hyperlink.
@@ -124,7 +149,7 @@ Url : %(url)s
-def save_attachment(mlist, msg):
+def save_attachment(mlist, msg, filter_html=1):
# The directory to store the attachment in
dir = os.path.join(mlist.archive_dir(), 'attachments')
try:
@@ -203,7 +228,7 @@ def save_attachment(mlist, msg):
# ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be
# here), then send the attachment through the filter program for
# sanitization
- if msg.get_type() == 'text/html':
+ if filter_html and msg.get_type() == 'text/html':
base, ext = os.path.splitext(path)
tmppath = base + '-tmp' + ext
fp = open(tmppath, 'w')