summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mailman/config/schema.cfg7
-rw-r--r--src/mailman/docs/NEWS.rst4
-rw-r--r--src/mailman/handlers/docs/filtering.rst27
-rw-r--r--src/mailman/handlers/mime_delete.py58
-rw-r--r--src/mailman/handlers/tests/test_mimedel.py66
-rw-r--r--src/mailman/rest/docs/systemconf.rst1
-rw-r--r--src/mailman/rest/tests/test_systemconf.py13
7 files changed, 121 insertions, 55 deletions
diff --git a/src/mailman/config/schema.cfg b/src/mailman/config/schema.cfg
index af13beb52..376600fc0 100644
--- a/src/mailman/config/schema.cfg
+++ b/src/mailman/config/schema.cfg
@@ -64,6 +64,13 @@ layout: here
# Can MIME filtered messages be preserved by list owners?
filtered_messages_are_preservable: no
+# How should text/html parts be converted to text/plain when the mailing list
+# is set to convert HTML to plaintext? This names a command to be called,
+# where the substitution variable $filename is filled in by Mailman, and
+# contains the path to the temporary file that the command should read from.
+# The command should print the converted text to stdout.
+html_to_plain_text_command: /usr/bin/lynx -dump $filename
+
[shell]
# `mailman shell` (also `withlist`) gives you an interactive prompt that you
diff --git a/src/mailman/docs/NEWS.rst b/src/mailman/docs/NEWS.rst
index 5702f0125..de34c01d8 100644
--- a/src/mailman/docs/NEWS.rst
+++ b/src/mailman/docs/NEWS.rst
@@ -19,6 +19,10 @@ Bugs
* When `pip` installing Mailman into `/usr/local`, the `master` script is
searched for relative to several landmark locations. Originally given by
Sambuddha Basu, adapted by Barry Warsaw. (Closes: #3)
+ * Fix the traceback that occurred when trying to convert a `text/html`
+ subpart to plaintext via the `mimedel` handler. Now, a configuration
+ variable `[mailman]html_to_plain_text_command` in the `mailman.cfg` file
+ defines the command to use. It defaults to `lynx`. (Closes: #109)
Configuration
-------------
diff --git a/src/mailman/handlers/docs/filtering.rst b/src/mailman/handlers/docs/filtering.rst
index 582211d54..427db4273 100644
--- a/src/mailman/handlers/docs/filtering.rst
+++ b/src/mailman/handlers/docs/filtering.rst
@@ -208,7 +208,7 @@ Clean up.
Conversion to plain text
========================
-Many mailing lists prohibit HTML email, and in fact, such email can be a
+Some mailing lists prohibit HTML email, and in fact, such email can be a
phishing or spam vector. However, many mail readers will send HTML email by
default because users think it looks pretty. One approach to handling this
would be to filter out ``text/html`` parts and rely on
@@ -228,20 +228,8 @@ By default, Mailman sends the message through lynx, but since this program is
not guaranteed to exist, we'll craft a simple, but stupid script to simulate
the conversion process. The script expects a single argument, which is the
name of the file containing the message payload to filter.
+::
- >>> import os, sys
- >>> script_path = os.path.join(config.DATA_DIR, 'filter.py')
- >>> fp = open(script_path, 'w')
- >>> try:
- ... print("""\
- ... import sys
- ... print('Converted text/html to text/plain')
- ... print('Filename:', sys.argv[1])
- ... """, file=fp)
- ... finally:
- ... fp.close()
- >>> config.HTML_TO_PLAIN_TEXT_COMMAND = '%s %s %%(filename)s' % (
- ... sys.executable, script_path)
>>> msg = message_from_string("""\
... From: aperson@example.com
... Content-Type: text/html
@@ -250,7 +238,10 @@ name of the file containing the message payload to filter.
... <html><head></head>
... <body></body></html>
... """)
- >>> process(mlist, msg, {})
+
+ >>> from mailman.handlers.tests.test_mimedel import dummy_script
+ >>> with dummy_script():
+ ... process(mlist, msg, {})
>>> print(msg.as_string())
From: aperson@example.com
MIME-Version: 1.0
@@ -269,6 +260,7 @@ Similarly, if after filtering a multipart section ends up empty, then the
entire multipart is discarded. For example, here's a message where an inner
``multipart/mixed`` contains two jpeg subparts. Both jpegs are filtered out,
so the entire inner ``multipart/mixed`` is discarded.
+::
>>> msg = message_from_string("""\
... From: aperson@example.com
@@ -309,7 +301,10 @@ so the entire inner ``multipart/mixed`` is discarded.
... aaa
... --AAA--
... """)
- >>> process(mlist, msg, {})
+
+ >>> with dummy_script():
+ ... process(mlist, msg, {})
+
>>> print(msg.as_string())
From: aperson@example.com
Content-Type: multipart/mixed; boundary=AAA
diff --git a/src/mailman/handlers/mime_delete.py b/src/mailman/handlers/mime_delete.py
index 41bb468ea..a5659b33c 100644
--- a/src/mailman/handlers/mime_delete.py
+++ b/src/mailman/handlers/mime_delete.py
@@ -30,13 +30,16 @@ __all__ = [
import os
-import errno
+import shutil
import logging
import tempfile
+import subprocess
+from contextlib import ExitStack
from email.iterators import typed_subpart_iterator
from email.mime.message import MIMEMessage
from email.mime.text import MIMEText
+from itertools import count
from lazr.config import as_boolean
from mailman.config import config
from mailman.core import errors
@@ -46,7 +49,7 @@ from mailman.interfaces.action import FilterAction
from mailman.interfaces.handler import IHandler
from mailman.utilities.string import oneline
from mailman.version import VERSION
-from os.path import splitext
+from string import Template
from zope.interface import implementer
@@ -144,8 +147,8 @@ def process(mlist, msg, msgdata):
changedp = 0
if numparts != len([subpart for subpart in msg.walk()]):
changedp = 1
- # Now perhaps convert all text/html to text/plain
- if mlist.convert_html_to_plaintext and config.HTML_TO_PLAIN_TEXT_COMMAND:
+ # Now perhaps convert all text/html to text/plain.
+ if mlist.convert_html_to_plaintext:
changedp += to_plaintext(msg)
# If we're left with only two parts, an empty body and one attachment,
# recast the message to one of just that part
@@ -236,30 +239,29 @@ def collapse_multipart_alternatives(msg):
def to_plaintext(msg):
- changedp = False
- for subpart in typed_subpart_iterator(msg, 'text', 'html'):
- filename = tempfile.mktemp('.html')
- fp = open(filename, 'w')
- try:
- fp.write(subpart.get_payload())
- fp.close()
- cmd = os.popen(config.HTML_TO_PLAIN_TEXT_COMMAND %
- {'filename': filename})
- plaintext = cmd.read()
- rtn = cmd.close()
- if rtn:
- log.error('HTML->text/plain error: %s', rtn)
- finally:
+ changedp = 0
+ counter = count()
+ with ExitStack() as resources:
+ tempdir = tempfile.mkdtemp()
+ resources.callback(shutil.rmtree, tempdir)
+ for subpart in typed_subpart_iterator(msg, 'text', 'html'):
+ filename = os.path.join(tempdir, '{}.html'.format(next(counter)))
+ with open(filename, 'w', encoding='utf-8') as fp:
+ fp.write(subpart.get_payload())
+ template = Template(config.mailman.html_to_plain_text_command)
+ command = template.safe_substitute(filename=filename).split()
try:
- os.unlink(filename)
- except OSError as e:
- if e.errno != errno.ENOENT:
- raise
- # Now replace the payload of the subpart and twiddle the Content-Type:
- del subpart['content-transfer-encoding']
- subpart.set_payload(plaintext)
- subpart.set_type('text/plain')
- changedp = True
+ stdout = subprocess.check_output(
+ command, universal_newlines=True)
+ except subprocess.CalledProcessError:
+ log.exception('HTML -> text/plain command error')
+ else:
+ # Replace the payload of the subpart with the converted text
+ # and tweak the content type.
+ del subpart['content-transfer-encoding']
+ subpart.set_payload(stdout)
+ subpart.set_type('text/plain')
+ changedp += 1
return changedp
@@ -272,7 +274,7 @@ def get_file_ext(m):
fext = ''
filename = m.get_filename('') or m.get_param('name', '')
if filename:
- fext = splitext(oneline(filename,'utf-8'))[1]
+ fext = os.path.splitext(oneline(filename,'utf-8'))[1]
if len(fext) > 1:
fext = fext[1:]
else:
diff --git a/src/mailman/handlers/tests/test_mimedel.py b/src/mailman/handlers/tests/test_mimedel.py
index cd80b47ab..a82190065 100644
--- a/src/mailman/handlers/tests/test_mimedel.py
+++ b/src/mailman/handlers/tests/test_mimedel.py
@@ -19,11 +19,18 @@
__all__ = [
'TestDispose',
+ 'TestHTMLFilter',
+ 'dummy_script',
]
+import os
+import sys
+import shutil
+import tempfile
import unittest
+from contextlib import ExitStack, contextmanager
from mailman.app.lifecycle import create_list
from mailman.config import config
from mailman.core import errors
@@ -39,10 +46,32 @@ from zope.component import getUtility
+@contextmanager
+def dummy_script():
+ with ExitStack() as resources:
+ tempdir = tempfile.mkdtemp()
+ resources.callback(shutil.rmtree, tempdir)
+ filter_path = os.path.join(tempdir, 'filter.py')
+ with open(filter_path, 'w', encoding='utf-8') as fp:
+ print("""\
+import sys
+print('Converted text/html to text/plain')
+print('Filename:', sys.argv[1])
+""", file=fp)
+ config.push('dummy script', """\
+[mailman]
+html_to_plain_text_command = {exe} {script} $filename
+""".format(exe=sys.executable, script=filter_path))
+ resources.callback(config.pop, 'dummy script')
+ yield
+
+
+
class TestDispose(unittest.TestCase):
"""Test the mime_delete handler."""
layer = ConfigLayer
+ maxxDiff = None
def setUp(self):
self._mlist = create_list('test@example.com')
@@ -57,11 +86,7 @@ Message-ID: <ant>
[mailman]
site_owner: noreply@example.com
""")
- # Let assertMultiLineEqual work without bounds.
- self.maxDiff = None
-
- def tearDown(self):
- config.pop('dispose')
+ self.addCleanup(config.pop, 'dispose')
def test_dispose_discard(self):
self._mlist.filter_action = FilterAction.discard
@@ -171,3 +196,34 @@ message.
self.assertTrue(line.endswith(
'{0} invalid FilterAction: test@example.com. '
'Treating as discard'.format(action.name)))
+
+
+
+class TestHTMLFilter(unittest.TestCase):
+ """Test the conversion of HTML to plaintext."""
+
+ layer = ConfigLayer
+
+ def setUp(self):
+ self._mlist = create_list('test@example.com')
+ self._mlist.convert_html_to_plaintext = True
+ self._mlist.filter_content = True
+
+ def test_convert_html_to_plaintext(self):
+ # Converting to plain text calls a command line script.
+ msg = mfs("""\
+From: aperson@example.com
+Content-Type: text/html
+MIME-Version: 1.0
+
+<html><head></head>
+<body></body></html>
+""")
+ process = config.handlers['mime-delete'].process
+ with dummy_script():
+ process(self._mlist, msg, {})
+ self.assertEqual(msg.get_content_type(), 'text/plain')
+ self.assertTrue(
+ msg['x-content-filtered-by'].startswith('Mailman/MimeDel'))
+ payload_lines = msg.get_payload().splitlines()
+ self.assertEqual(payload_lines[0], 'Converted text/html to text/plain')
diff --git a/src/mailman/rest/docs/systemconf.rst b/src/mailman/rest/docs/systemconf.rst
index 66953f4ba..fa8b7384b 100644
--- a/src/mailman/rest/docs/systemconf.rst
+++ b/src/mailman/rest/docs/systemconf.rst
@@ -15,6 +15,7 @@ You can also get all the values for a particular section.
default_language: en
email_commands_max_lines: 10
filtered_messages_are_preservable: no
+ html_to_plain_text_command: /usr/bin/lynx -dump $filename
http_etag: ...
layout: testing
noreply_address: noreply
diff --git a/src/mailman/rest/tests/test_systemconf.py b/src/mailman/rest/tests/test_systemconf.py
index b8f14125b..4aa0dc920 100644
--- a/src/mailman/rest/tests/test_systemconf.py
+++ b/src/mailman/rest/tests/test_systemconf.py
@@ -43,16 +43,17 @@ class TestSystemConfiguration(unittest.TestCase):
self.assertIn('http_etag', json)
del json['http_etag']
self.assertEqual(json, dict(
- site_owner='noreply@example.com',
- noreply_address='noreply',
default_language='en',
- sender_headers='from from_ reply-to sender',
email_commands_max_lines='10',
+ filtered_messages_are_preservable='no',
+ html_to_plain_text_command='/usr/bin/lynx -dump $filename',
+ layout='testing',
+ noreply_address='noreply',
pending_request_life='3d',
- pre_hook='',
post_hook='',
- layout='testing',
- filtered_messages_are_preservable='no',
+ pre_hook='',
+ sender_headers='from from_ reply-to sender',
+ site_owner='noreply@example.com',
))
def test_dotted_section(self):