diff options
| author | Barry Warsaw | 2015-06-02 13:51:58 +0000 |
|---|---|---|
| committer | Barry Warsaw | 2015-06-02 10:09:24 -0400 |
| commit | e57787d8f6ff0cc8b1e30f2531a56eaf9a28511a (patch) | |
| tree | 8deb9773fe8e3871b5859a866a70019d92a9a3cb /src | |
| parent | f0f13923af208d2eab97b6b304b77e9d5a55cc5a (diff) | |
| download | mailman-e57787d8f6ff0cc8b1e30f2531a56eaf9a28511a.tar.gz mailman-e57787d8f6ff0cc8b1e30f2531a56eaf9a28511a.tar.zst mailman-e57787d8f6ff0cc8b1e30f2531a56eaf9a28511a.zip | |
Diffstat (limited to 'src')
| -rw-r--r-- | src/mailman/config/schema.cfg | 7 | ||||
| -rw-r--r-- | src/mailman/docs/NEWS.rst | 4 | ||||
| -rw-r--r-- | src/mailman/handlers/docs/filtering.rst | 27 | ||||
| -rw-r--r-- | src/mailman/handlers/mime_delete.py | 58 | ||||
| -rw-r--r-- | src/mailman/handlers/tests/test_mimedel.py | 66 | ||||
| -rw-r--r-- | src/mailman/rest/docs/systemconf.rst | 1 | ||||
| -rw-r--r-- | src/mailman/rest/tests/test_systemconf.py | 13 |
7 files changed, 121 insertions, 55 deletions
diff --git a/src/mailman/config/schema.cfg b/src/mailman/config/schema.cfg index af13beb52..376600fc0 100644 --- a/src/mailman/config/schema.cfg +++ b/src/mailman/config/schema.cfg @@ -64,6 +64,13 @@ layout: here # Can MIME filtered messages be preserved by list owners? filtered_messages_are_preservable: no +# How should text/html parts be converted to text/plain when the mailing list +# is set to convert HTML to plaintext? This names a command to be called, +# where the substitution variable $filename is filled in by Mailman, and +# contains the path to the temporary file that the command should read from. +# The command should print the converted text to stdout. +html_to_plain_text_command: /usr/bin/lynx -dump $filename + [shell] # `mailman shell` (also `withlist`) gives you an interactive prompt that you diff --git a/src/mailman/docs/NEWS.rst b/src/mailman/docs/NEWS.rst index 5702f0125..de34c01d8 100644 --- a/src/mailman/docs/NEWS.rst +++ b/src/mailman/docs/NEWS.rst @@ -19,6 +19,10 @@ Bugs * When `pip` installing Mailman into `/usr/local`, the `master` script is searched for relative to several landmark locations. Originally given by Sambuddha Basu, adapted by Barry Warsaw. (Closes: #3) + * Fix the traceback that occurred when trying to convert a `text/html` + subpart to plaintext via the `mimedel` handler. Now, a configuration + variable `[mailman]html_to_plain_text_command` in the `mailman.cfg` file + defines the command to use. It defaults to `lynx`. (Closes: #109) Configuration ------------- diff --git a/src/mailman/handlers/docs/filtering.rst b/src/mailman/handlers/docs/filtering.rst index 582211d54..427db4273 100644 --- a/src/mailman/handlers/docs/filtering.rst +++ b/src/mailman/handlers/docs/filtering.rst @@ -208,7 +208,7 @@ Clean up. Conversion to plain text ======================== -Many mailing lists prohibit HTML email, and in fact, such email can be a +Some mailing lists prohibit HTML email, and in fact, such email can be a phishing or spam vector. However, many mail readers will send HTML email by default because users think it looks pretty. One approach to handling this would be to filter out ``text/html`` parts and rely on @@ -228,20 +228,8 @@ By default, Mailman sends the message through lynx, but since this program is not guaranteed to exist, we'll craft a simple, but stupid script to simulate the conversion process. The script expects a single argument, which is the name of the file containing the message payload to filter. +:: - >>> import os, sys - >>> script_path = os.path.join(config.DATA_DIR, 'filter.py') - >>> fp = open(script_path, 'w') - >>> try: - ... print("""\ - ... import sys - ... print('Converted text/html to text/plain') - ... print('Filename:', sys.argv[1]) - ... """, file=fp) - ... finally: - ... fp.close() - >>> config.HTML_TO_PLAIN_TEXT_COMMAND = '%s %s %%(filename)s' % ( - ... sys.executable, script_path) >>> msg = message_from_string("""\ ... From: aperson@example.com ... Content-Type: text/html @@ -250,7 +238,10 @@ name of the file containing the message payload to filter. ... <html><head></head> ... <body></body></html> ... """) - >>> process(mlist, msg, {}) + + >>> from mailman.handlers.tests.test_mimedel import dummy_script + >>> with dummy_script(): + ... process(mlist, msg, {}) >>> print(msg.as_string()) From: aperson@example.com MIME-Version: 1.0 @@ -269,6 +260,7 @@ Similarly, if after filtering a multipart section ends up empty, then the entire multipart is discarded. For example, here's a message where an inner ``multipart/mixed`` contains two jpeg subparts. Both jpegs are filtered out, so the entire inner ``multipart/mixed`` is discarded. +:: >>> msg = message_from_string("""\ ... From: aperson@example.com @@ -309,7 +301,10 @@ so the entire inner ``multipart/mixed`` is discarded. ... aaa ... --AAA-- ... """) - >>> process(mlist, msg, {}) + + >>> with dummy_script(): + ... process(mlist, msg, {}) + >>> print(msg.as_string()) From: aperson@example.com Content-Type: multipart/mixed; boundary=AAA diff --git a/src/mailman/handlers/mime_delete.py b/src/mailman/handlers/mime_delete.py index 41bb468ea..a5659b33c 100644 --- a/src/mailman/handlers/mime_delete.py +++ b/src/mailman/handlers/mime_delete.py @@ -30,13 +30,16 @@ __all__ = [ import os -import errno +import shutil import logging import tempfile +import subprocess +from contextlib import ExitStack from email.iterators import typed_subpart_iterator from email.mime.message import MIMEMessage from email.mime.text import MIMEText +from itertools import count from lazr.config import as_boolean from mailman.config import config from mailman.core import errors @@ -46,7 +49,7 @@ from mailman.interfaces.action import FilterAction from mailman.interfaces.handler import IHandler from mailman.utilities.string import oneline from mailman.version import VERSION -from os.path import splitext +from string import Template from zope.interface import implementer @@ -144,8 +147,8 @@ def process(mlist, msg, msgdata): changedp = 0 if numparts != len([subpart for subpart in msg.walk()]): changedp = 1 - # Now perhaps convert all text/html to text/plain - if mlist.convert_html_to_plaintext and config.HTML_TO_PLAIN_TEXT_COMMAND: + # Now perhaps convert all text/html to text/plain. + if mlist.convert_html_to_plaintext: changedp += to_plaintext(msg) # If we're left with only two parts, an empty body and one attachment, # recast the message to one of just that part @@ -236,30 +239,29 @@ def collapse_multipart_alternatives(msg): def to_plaintext(msg): - changedp = False - for subpart in typed_subpart_iterator(msg, 'text', 'html'): - filename = tempfile.mktemp('.html') - fp = open(filename, 'w') - try: - fp.write(subpart.get_payload()) - fp.close() - cmd = os.popen(config.HTML_TO_PLAIN_TEXT_COMMAND % - {'filename': filename}) - plaintext = cmd.read() - rtn = cmd.close() - if rtn: - log.error('HTML->text/plain error: %s', rtn) - finally: + changedp = 0 + counter = count() + with ExitStack() as resources: + tempdir = tempfile.mkdtemp() + resources.callback(shutil.rmtree, tempdir) + for subpart in typed_subpart_iterator(msg, 'text', 'html'): + filename = os.path.join(tempdir, '{}.html'.format(next(counter))) + with open(filename, 'w', encoding='utf-8') as fp: + fp.write(subpart.get_payload()) + template = Template(config.mailman.html_to_plain_text_command) + command = template.safe_substitute(filename=filename).split() try: - os.unlink(filename) - except OSError as e: - if e.errno != errno.ENOENT: - raise - # Now replace the payload of the subpart and twiddle the Content-Type: - del subpart['content-transfer-encoding'] - subpart.set_payload(plaintext) - subpart.set_type('text/plain') - changedp = True + stdout = subprocess.check_output( + command, universal_newlines=True) + except subprocess.CalledProcessError: + log.exception('HTML -> text/plain command error') + else: + # Replace the payload of the subpart with the converted text + # and tweak the content type. + del subpart['content-transfer-encoding'] + subpart.set_payload(stdout) + subpart.set_type('text/plain') + changedp += 1 return changedp @@ -272,7 +274,7 @@ def get_file_ext(m): fext = '' filename = m.get_filename('') or m.get_param('name', '') if filename: - fext = splitext(oneline(filename,'utf-8'))[1] + fext = os.path.splitext(oneline(filename,'utf-8'))[1] if len(fext) > 1: fext = fext[1:] else: diff --git a/src/mailman/handlers/tests/test_mimedel.py b/src/mailman/handlers/tests/test_mimedel.py index cd80b47ab..a82190065 100644 --- a/src/mailman/handlers/tests/test_mimedel.py +++ b/src/mailman/handlers/tests/test_mimedel.py @@ -19,11 +19,18 @@ __all__ = [ 'TestDispose', + 'TestHTMLFilter', + 'dummy_script', ] +import os +import sys +import shutil +import tempfile import unittest +from contextlib import ExitStack, contextmanager from mailman.app.lifecycle import create_list from mailman.config import config from mailman.core import errors @@ -39,10 +46,32 @@ from zope.component import getUtility +@contextmanager +def dummy_script(): + with ExitStack() as resources: + tempdir = tempfile.mkdtemp() + resources.callback(shutil.rmtree, tempdir) + filter_path = os.path.join(tempdir, 'filter.py') + with open(filter_path, 'w', encoding='utf-8') as fp: + print("""\ +import sys +print('Converted text/html to text/plain') +print('Filename:', sys.argv[1]) +""", file=fp) + config.push('dummy script', """\ +[mailman] +html_to_plain_text_command = {exe} {script} $filename +""".format(exe=sys.executable, script=filter_path)) + resources.callback(config.pop, 'dummy script') + yield + + + class TestDispose(unittest.TestCase): """Test the mime_delete handler.""" layer = ConfigLayer + maxxDiff = None def setUp(self): self._mlist = create_list('test@example.com') @@ -57,11 +86,7 @@ Message-ID: <ant> [mailman] site_owner: noreply@example.com """) - # Let assertMultiLineEqual work without bounds. - self.maxDiff = None - - def tearDown(self): - config.pop('dispose') + self.addCleanup(config.pop, 'dispose') def test_dispose_discard(self): self._mlist.filter_action = FilterAction.discard @@ -171,3 +196,34 @@ message. self.assertTrue(line.endswith( '{0} invalid FilterAction: test@example.com. ' 'Treating as discard'.format(action.name))) + + + +class TestHTMLFilter(unittest.TestCase): + """Test the conversion of HTML to plaintext.""" + + layer = ConfigLayer + + def setUp(self): + self._mlist = create_list('test@example.com') + self._mlist.convert_html_to_plaintext = True + self._mlist.filter_content = True + + def test_convert_html_to_plaintext(self): + # Converting to plain text calls a command line script. + msg = mfs("""\ +From: aperson@example.com +Content-Type: text/html +MIME-Version: 1.0 + +<html><head></head> +<body></body></html> +""") + process = config.handlers['mime-delete'].process + with dummy_script(): + process(self._mlist, msg, {}) + self.assertEqual(msg.get_content_type(), 'text/plain') + self.assertTrue( + msg['x-content-filtered-by'].startswith('Mailman/MimeDel')) + payload_lines = msg.get_payload().splitlines() + self.assertEqual(payload_lines[0], 'Converted text/html to text/plain') diff --git a/src/mailman/rest/docs/systemconf.rst b/src/mailman/rest/docs/systemconf.rst index 66953f4ba..fa8b7384b 100644 --- a/src/mailman/rest/docs/systemconf.rst +++ b/src/mailman/rest/docs/systemconf.rst @@ -15,6 +15,7 @@ You can also get all the values for a particular section. default_language: en email_commands_max_lines: 10 filtered_messages_are_preservable: no + html_to_plain_text_command: /usr/bin/lynx -dump $filename http_etag: ... layout: testing noreply_address: noreply diff --git a/src/mailman/rest/tests/test_systemconf.py b/src/mailman/rest/tests/test_systemconf.py index b8f14125b..4aa0dc920 100644 --- a/src/mailman/rest/tests/test_systemconf.py +++ b/src/mailman/rest/tests/test_systemconf.py @@ -43,16 +43,17 @@ class TestSystemConfiguration(unittest.TestCase): self.assertIn('http_etag', json) del json['http_etag'] self.assertEqual(json, dict( - site_owner='noreply@example.com', - noreply_address='noreply', default_language='en', - sender_headers='from from_ reply-to sender', email_commands_max_lines='10', + filtered_messages_are_preservable='no', + html_to_plain_text_command='/usr/bin/lynx -dump $filename', + layout='testing', + noreply_address='noreply', pending_request_life='3d', - pre_hook='', post_hook='', - layout='testing', - filtered_messages_are_preservable='no', + pre_hook='', + sender_headers='from from_ reply-to sender', + site_owner='noreply@example.com', )) def test_dotted_section(self): |
