diff options
| -rw-r--r-- | src/mailman/app/bounces.py | 11 | ||||
| -rw-r--r-- | src/mailman/config/schema.cfg | 60 | ||||
| -rw-r--r-- | src/mailman/core/errors.py | 11 | ||||
| -rw-r--r-- | src/mailman/core/pipelines.py | 24 | ||||
| -rw-r--r-- | src/mailman/core/tests/test_pipelines.py | 92 | ||||
| -rw-r--r-- | src/mailman/database/schema/postgres.sql | 1 | ||||
| -rw-r--r-- | src/mailman/database/schema/sqlite.sql | 1 | ||||
| -rw-r--r-- | src/mailman/docs/NEWS.rst | 9 | ||||
| -rw-r--r-- | src/mailman/interfaces/action.py | 6 | ||||
| -rw-r--r-- | src/mailman/interfaces/handler.py | 2 | ||||
| -rw-r--r-- | src/mailman/interfaces/mailinglist.py | 6 | ||||
| -rw-r--r-- | src/mailman/model/mailinglist.py | 3 | ||||
| -rw-r--r-- | src/mailman/pipeline/docs/filtering.rst | 2 | ||||
| -rw-r--r-- | src/mailman/pipeline/docs/scrubber.rst | 230 | ||||
| -rw-r--r-- | src/mailman/pipeline/mime_delete.py | 71 | ||||
| -rw-r--r-- | src/mailman/pipeline/scrubber.py | 502 | ||||
| -rw-r--r-- | src/mailman/pipeline/tests/test_mimedel.py | 213 | ||||
| -rw-r--r-- | src/mailman/pipeline/tests/test_scrubber.py | 45 | ||||
| -rw-r--r-- | src/mailman/runners/digest.py | 7 | ||||
| -rw-r--r-- | src/mailman/runners/docs/digester.rst | 6 | ||||
| -rw-r--r-- | src/mailman/styles/default.py | 3 |
21 files changed, 397 insertions, 908 deletions
diff --git a/src/mailman/app/bounces.py b/src/mailman/app/bounces.py index a9bed97ac..5d3a0521a 100644 --- a/src/mailman/app/bounces.py +++ b/src/mailman/app/bounces.py @@ -59,15 +59,16 @@ DOT = '.' -def bounce_message(mlist, msg, e=None): +def bounce_message(mlist, msg, error=None): """Bounce the message back to the original author. :param mlist: The mailing list that the message was posted to. :type mlist: `IMailingList` :param msg: The original message. :type msg: `email.message.Message` - :param e: Optional exception causing the bounce. - :type e: Exception + :param error: Optional exception causing the bounce. The exception + instance must have a `.message` attribute. + :type error: Exception """ # Bounce a message back to the sender, with an error message if provided # in the exception argument. @@ -77,10 +78,10 @@ def bounce_message(mlist, msg, e=None): return subject = msg.get('subject', _('(no subject)')) subject = oneline(subject, mlist.preferred_language.charset) - if e is None: + if error is None: notice = _('[No bounce details are available]') else: - notice = _(e.notice) + notice = _(error.message) # Currently we always craft bounces as MIME messages. bmsg = UserNotification(msg.sender, mlist.owner_address, subject, lang=mlist.preferred_language) diff --git a/src/mailman/config/schema.cfg b/src/mailman/config/schema.cfg index 8b5aa690a..6556eea4e 100644 --- a/src/mailman/config/schema.cfg +++ b/src/mailman/config/schema.cfg @@ -62,6 +62,10 @@ post_hook: # Which paths.* file system layout to use. layout: dev +# Can MIME filtered messages be preserved by list owners? +filtered_messages_are_preservable: no + + [shell] # `bin/mailman shell` (also `withlist`) gives you an interactive prompt that # you can use to interact with an initialized and configured Mailman system. @@ -226,12 +230,11 @@ migrations_path: mailman.database.schema # - http -- Internal wsgi-based web interface # - locks -- Lock state changes # - mischief -- Various types of hostile activity -# - post -- Information about messages posted to mailing lists # - runner -- Runner process start/stops # - smtp -- Successful SMTP activity # - smtp-failure -- Unsuccessful SMTP activity # - subscribe -- Information about leaves/joins -# - vette -- Information related to admindb activity +# - vette -- Message vetting information format: %(asctime)s (%(process)d) %(message)s datefmt: %b %d %H:%M:%S %Y propagate: no @@ -560,59 +563,6 @@ priority: 0 class: mailman.styles.default.DefaultStyle -[scrubber] -# A filter that converts from multipart messages to "flat" messages -# (i.e. containing a single payload). This is required for Pipermail, and you -# may want to set it to 0 for external archivers. You can also replace it -# with your own module as long as it contains a process() function that takes -# a MailList object and a Message object. It should raise -# Errors.DiscardMessage if it wants to throw the message away. Otherwise it -# should modify the Message object as necessary. -archive_scrubber: mailman.archiving.prototype.Prototype - -# This variable defines what happens to text/html subparts. They can be -# stripped completely, escaped, or filtered through an external program. The -# legal values are: -# 0 - Strip out text/html parts completely, leaving a notice of the removal in -# the message. If the outer part is text/html, the entire message is -# discarded. -# 1 - Remove any embedded text/html parts, leaving them as HTML-escaped -# attachments which can be separately viewed. Outer text/html parts are -# simply HTML-escaped. -# 2 - Leave it inline, but HTML-escape it -# 3 - Remove text/html as attachments but don't HTML-escape them. Note: this -# is very dangerous because it essentially means anybody can send an HTML -# email to your site containing evil JavaScript or web bugs, or other -# nasty things, and folks viewing your archives will be susceptible. You -# should only consider this option if you do heavy moderation of your list -# postings. -# -# Note: given the current archiving code, it is not possible to leave -# text/html parts inline and un-escaped. I wouldn't think it'd be a good idea -# to do anyway. -# -# The value can also be a string, in which case it is the name of a command to -# filter the HTML page through. The resulting output is left in an attachment -# or as the entirety of the message when the outer part is text/html. The -# format of the string must include a $filename substitution variable which -# will contain the name of the temporary file that the program should operate -# on. It should write the processed message to stdout. Set this to -# HTML_TO_PLAIN_TEXT_COMMAND to specify an HTML to plain text conversion -# program. -archive_html_sanitizer: 1 - -# Control parameter whether the scrubber should use the message attachment's -# filename as is indicated by the filename parameter or use 'attachement-xxx' -# instead. The default is set 'no' because the applications on PC and Mac -# begin to use longer non-ascii filenames. -use_attachment_filename: no - -# Use of attachment filename extension per se is may be dangerous because -# viruses fakes it. You can set this 'yes' if you filter the attachment by -# filename extension. -use_attachment_filename_extension: no - - [digests] # Headers which should be kept in both RFC 1153 (plain) and MIME digests. RFC # 1153 also specifies these headers in this exact order, so order matters. diff --git a/src/mailman/core/errors.py b/src/mailman/core/errors.py index ea1c78967..529ac86fe 100644 --- a/src/mailman/core/errors.py +++ b/src/mailman/core/errors.py @@ -110,17 +110,6 @@ class DiscardMessage(HandlerError): class RejectMessage(HandlerError): """The message will be bounced back to the sender""" - def __init__(self, notice=None): - super(RejectMessage, self).__init__() - if notice is None: - notice = _('Your message was rejected') - if notice.endswith('\n\n'): - pass - elif notice.endswith('\n'): - notice += '\n' - else: - notice += '\n\n' - self.notice = notice diff --git a/src/mailman/core/pipelines.py b/src/mailman/core/pipelines.py index bd709f41e..d5cee588b 100644 --- a/src/mailman/core/pipelines.py +++ b/src/mailman/core/pipelines.py @@ -31,13 +31,16 @@ import logging from zope.interface import implements from zope.interface.verify import verifyObject +from mailman.app.bounces import bounce_message from mailman.app.finder import find_components from mailman.config import config +from mailman.core import errors from mailman.core.i18n import _ from mailman.interfaces.handler import IHandler from mailman.interfaces.pipeline import IPipeline -log = logging.getLogger('mailman.debug') +dlog = logging.getLogger('mailman.debug') +vlog = logging.getLogger('mailman.vette') @@ -52,9 +55,19 @@ def process(mlist, msg, msgdata, pipeline_name='built-in'): message_id = msg.get('message-id', 'n/a') pipeline = config.pipelines[pipeline_name] for handler in pipeline: - log.debug('[pipeline] processing {0}: {1}'.format( - handler.name, message_id)) - handler.process(mlist, msg, msgdata) + dlog.debug('{0} pipeline {1} processing: {2}'.format( + message_id, pipeline_name, handler.name)) + try: + handler.process(mlist, msg, msgdata) + except errors.DiscardMessage as error: + vlog.info( + '{0} discarded by "{1}" pipeline handler "{2}": {3}'.format( + message_id, pipeline_name, handler.name, error.message)) + except errors.RejectMessage as error: + vlog.info( + '{0} rejected by "{1}" pipeline handler "{2}": {3}'.format( + message_id, pipeline_name, handler.name, error.message)) + bounce_message(mlist, msg, error) @@ -84,7 +97,6 @@ class BuiltInPipeline(BasePipeline): _default_handlers = ( 'mime-delete', - 'scrubber', 'tagger', 'calculate-recipients', 'avoid-duplicates', @@ -92,8 +104,8 @@ class BuiltInPipeline(BasePipeline): 'cleanse-dkim', 'cook-headers', 'rfc-2369', - 'to-digest', 'to-archive', + 'to-digest', 'to-usenet', 'after-delivery', 'acknowledge', diff --git a/src/mailman/core/tests/test_pipelines.py b/src/mailman/core/tests/test_pipelines.py index 363587d3b..0cf3732c9 100644 --- a/src/mailman/core/tests/test_pipelines.py +++ b/src/mailman/core/tests/test_pipelines.py @@ -26,16 +26,58 @@ __all__ = [ import unittest +from zope.interface import implements from mailman.app.lifecycle import create_list +from mailman.config import config +from mailman.core.errors import DiscardMessage, RejectMessage from mailman.core.pipelines import process +from mailman.interfaces.handler import IHandler +from mailman.interfaces.pipeline import IPipeline from mailman.testing.helpers import ( + LogFileMark, + get_queue_messages, reset_the_world, specialized_message_from_string as mfs) from mailman.testing.layers import ConfigLayer +class DiscardingHandler: + implements(IHandler) + name = 'discarding' + + def process(self, mlist, msg, msgdata): + raise DiscardMessage('by test handler') + + +class RejectHandler: + implements(IHandler) + name = 'rejecting' + + def process(self, mlist, msg, msgdata): + raise RejectMessage('by test handler') + + +class DiscardingPipeline: + implements(IPipeline) + name = 'test-discarding' + description = 'Discarding test pipeline' + + def __iter__(self): + yield DiscardingHandler() + + +class RejectingPipeline: + implements(IPipeline) + name = 'test-rejecting' + description = 'Rejectinging test pipeline' + + def __iter__(self): + yield RejectHandler() + + + class TestBuiltinPipeline(unittest.TestCase): """Test various aspects of the built-in postings pipeline.""" @@ -43,21 +85,51 @@ class TestBuiltinPipeline(unittest.TestCase): def setUp(self): self._mlist = create_list('test@example.com') - - def tearDown(self): - reset_the_world() - - def test_rfc2369_headers(self): - # Ensure that RFC 2369 List-* headers are added. - msg = mfs("""\ + config.pipelines['test-discarding'] = DiscardingPipeline() + config.pipelines['test-rejecting'] = RejectingPipeline() + self._msg = mfs("""\ From: Anne Person <anne@example.org> To: test@example.com Subject: a test +Message-ID: <ant> testing """) + + def tearDown(self): + reset_the_world() + del config.pipelines['test-discarding'] + del config.pipelines['test-rejecting'] + + def test_rfc2369_headers(self): + # Ensure that RFC 2369 List-* headers are added. msgdata = {} - process(self._mlist, msg, msgdata, + process(self._mlist, self._msg, msgdata, pipeline_name='default-posting-pipeline') - self.assertEqual(msg['list-id'], '<test.example.com>') - self.assertEqual(msg['list-post'], '<mailto:test@example.com>') + self.assertEqual(self._msg['list-id'], '<test.example.com>') + self.assertEqual(self._msg['list-post'], '<mailto:test@example.com>') + + def test_discarding_pipeline(self): + # If a handler in the pipeline raises DiscardMessage, the message will + # be thrown away, but with a log message. + mark = LogFileMark('mailman.vette') + process(self._mlist, self._msg, {}, 'test-discarding') + line = mark.readline()[:-1] + self.assertTrue(line.endswith( + '<ant> discarded by "test-discarding" pipeline handler ' + '"discarding": by test handler')) + + def test_rejecting_pipeline(self): + # If a handler in the pipeline raises DiscardMessage, the message will + # be thrown away, but with a log message. + mark = LogFileMark('mailman.vette') + process(self._mlist, self._msg, {}, 'test-rejecting') + line = mark.readline()[:-1] + self.assertTrue(line.endswith( + '<ant> rejected by "test-rejecting" pipeline handler ' + '"rejecting": by test handler')) + # In the rejection case, the original message will also be in the + # virgin queue. + messages = get_queue_messages('virgin') + self.assertEqual(len(messages), 1) + self.assertEqual(str(messages[0].msg['subject']), 'a test') diff --git a/src/mailman/database/schema/postgres.sql b/src/mailman/database/schema/postgres.sql index 713d6d1a3..10b318276 100644 --- a/src/mailman/database/schema/postgres.sql +++ b/src/mailman/database/schema/postgres.sql @@ -42,6 +42,7 @@ CREATE TABLE mailinglist ( bounce_you_are_disabled_warnings INTEGER, bounce_you_are_disabled_warnings_interval TEXT, -- Content filtering. + filter_action INTEGER, filter_content BOOLEAN, collapse_alternatives BOOLEAN, convert_html_to_plaintext BOOLEAN, diff --git a/src/mailman/database/schema/sqlite.sql b/src/mailman/database/schema/sqlite.sql index f835a8d84..ac081037d 100644 --- a/src/mailman/database/schema/sqlite.sql +++ b/src/mailman/database/schema/sqlite.sql @@ -138,6 +138,7 @@ CREATE TABLE mailinglist ( bounce_you_are_disabled_warnings INTEGER, bounce_you_are_disabled_warnings_interval TEXT, -- Content filtering. + filter_action INTEGER, filter_content BOOLEAN, collapse_alternatives BOOLEAN, convert_html_to_plaintext BOOLEAN, diff --git a/src/mailman/docs/NEWS.rst b/src/mailman/docs/NEWS.rst index 6869e2889..a2cff94b3 100644 --- a/src/mailman/docs/NEWS.rst +++ b/src/mailman/docs/NEWS.rst @@ -46,6 +46,13 @@ Architecture attribute on the message object, instead of trusting a possibly incorrect value if it's already set. The individual `IArchiver` implementations no longer set the `X-Message-ID-Hash` header. + * Pipermail has been eradicated. + * Configuration variable `[mailman]filtered_messages_are_preservable` + controls whether messages which have their top-level `Content-Type` + filtered out can be preserved in the `bad` queue by list owners. + * Configuration section `[scrubber]` removed, as is the scrubber handler. + This handler was essentially incompatible with Mailman 3 since it required + coordination with Pipermail to store attachments on disk. Database -------- @@ -60,6 +67,8 @@ Database - digest_footer -> digest_footer_uri - start_chain -> posting_chain - pipeline -> posting_pipeline + * Schema additions: + - mailinglist.filter_action REST ---- diff --git a/src/mailman/interfaces/action.py b/src/mailman/interfaces/action.py index c7c79f7d8..9b3c1fbcc 100644 --- a/src/mailman/interfaces/action.py +++ b/src/mailman/interfaces/action.py @@ -20,6 +20,7 @@ __metaclass__ = type __all__ = [ 'Action', + 'FilterAction', ] @@ -33,3 +34,8 @@ class Action(Enum): discard = 2 accept = 3 defer = 4 + + +class FilterAction(Action): + forward = 5 + preserve = 6 diff --git a/src/mailman/interfaces/handler.py b/src/mailman/interfaces/handler.py index f9a075b8c..9007e8490 100644 --- a/src/mailman/interfaces/handler.py +++ b/src/mailman/interfaces/handler.py @@ -17,7 +17,7 @@ """Interface describing a pipeline handler.""" -from __future__ import absolute_import, unicode_literals +from __future__ import absolute_import, print_function, unicode_literals __metaclass__ = type __all__ = [ diff --git a/src/mailman/interfaces/mailinglist.py b/src/mailman/interfaces/mailinglist.py index a3e6e443a..843430caf 100644 --- a/src/mailman/interfaces/mailinglist.py +++ b/src/mailman/interfaces/mailinglist.py @@ -423,6 +423,12 @@ class IMailingList(Interface): Filtering is performed on MIME type and file name extension. """) + filter_action = Attribute( + """Action to take when the top-level content-type is filtered. + + The value is a `FilterAction` enum. + """) + convert_html_to_plaintext = Attribute( """Flag specifying whether text/html parts should be converted. diff --git a/src/mailman/model/mailinglist.py b/src/mailman/model/mailinglist.py index 4a6b000ec..3424b7ec9 100644 --- a/src/mailman/model/mailinglist.py +++ b/src/mailman/model/mailinglist.py @@ -38,7 +38,7 @@ from zope.interface import implements from mailman.config import config from mailman.database.model import Model from mailman.database.types import Enum -from mailman.interfaces.action import Action +from mailman.interfaces.action import Action, FilterAction from mailman.interfaces.address import IAddress from mailman.interfaces.autorespond import ResponseAction from mailman.interfaces.bounce import UnrecognizedBounceDisposition @@ -115,6 +115,7 @@ class MailingList(Model): autorespond_requests = Enum(ResponseAction) autoresponse_request_text = Unicode() # Content filters. + filter_action = Enum(FilterAction) filter_content = Bool() collapse_alternatives = Bool() convert_html_to_plaintext = Bool() diff --git a/src/mailman/pipeline/docs/filtering.rst b/src/mailman/pipeline/docs/filtering.rst index 5b54424e4..fd0b33d3b 100644 --- a/src/mailman/pipeline/docs/filtering.rst +++ b/src/mailman/pipeline/docs/filtering.rst @@ -45,7 +45,7 @@ content type matches the filter, the entire message will be discarded. >>> process(mlist, msg, {}) Traceback (most recent call last): ... - DiscardMessage + DiscardMessage: The message's content type was explicitly disallowed However, if we turn off content filtering altogether, then the handler short-circuits. diff --git a/src/mailman/pipeline/docs/scrubber.rst b/src/mailman/pipeline/docs/scrubber.rst deleted file mode 100644 index 86a8161a7..000000000 --- a/src/mailman/pipeline/docs/scrubber.rst +++ /dev/null @@ -1,230 +0,0 @@ -============ -The scrubber -============ - -The scrubber is an integral part of Mailman, both in the normal delivery of -messages and in components such as the archiver. Its primary purpose is to -scrub attachments from messages so that binary goop doesn't end up in an -archive message. - - >>> mlist = create_list('_xtest@example.com') - >>> mlist.preferred_language = 'en' - -Helper functions for getting the attachment data. -:: - - >>> import os, re - >>> def read_attachment(filename, remove=True): - ... path = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR, - ... mlist.fqdn_listname, filename) - ... fp = open(path) - ... try: - ... data = fp.read() - ... finally: - ... fp.close() - ... if remove: - ... os.unlink(path) - ... return data - - >>> from urlparse import urlparse - >>> def read_url_from_message(msg): - ... url = None - ... for line in msg.get_payload().splitlines(): - ... mo = re.match('URL: <(?P<url>[^>]+)>', line) - ... if mo: - ... url = mo.group('url') - ... break - ... path = '/'.join(urlparse(url).path.split('/')[3:]) - ... return read_attachment(path) - - -Saving attachments -================== - -The Scrubber handler exposes a function called ``save_attachment()`` which can -be used to strip various types of attachments and store them in the archive -directory. This is a public interface used by components outside the normal -processing pipeline. - -Site administrators can decide whether the scrubber should use the attachment -filename suggested in the message's ``Content-Disposition:`` header or not. -If enabled, the filename will be used when this header attribute is present -(yes, this is an unfortunate double negative). -:: - - >>> config.push('test config', """ - ... [scrubber] - ... use_attachment_filename: yes - ... """) - >>> msg = message_from_string("""\ - ... Content-Type: image/gif; name="xtest.gif" - ... Content-Transfer-Encoding: base64 - ... Content-Disposition: attachment; filename="xtest.gif" - ... - ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== - ... """) - - >>> from mailman.pipeline.scrubber import save_attachment - >>> print save_attachment(mlist, msg, 'dir') - <http://www.example.com/pipermail/_xtest@example.com/dir/xtest.gif> - >>> data = read_attachment('dir/xtest.gif') - >>> print data[:6] - GIF87a - >>> len(data) - 34 - -Saving the attachment does not alter the original message. - - >>> print msg.as_string() - Content-Type: image/gif; name="xtest.gif" - Content-Transfer-Encoding: base64 - Content-Disposition: attachment; filename="xtest.gif" - <BLANKLINE> - R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== - -The site administrator can also configure Mailman to ignore the -``Content-Disposition:`` filename. This is the default. - - >>> config.pop('test config') - >>> config.push('test config', """ - ... [scrubber] - ... use_attachment_filename: no - ... """) - >>> msg = message_from_string("""\ - ... Content-Type: image/gif; name="xtest.gif" - ... Content-Transfer-Encoding: base64 - ... Content-Disposition: attachment; filename="xtest.gif" - ... - ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== - ... """) - >>> print save_attachment(mlist, msg, 'dir') - <http://www.example.com/pipermail/_xtest@example.com/dir/attachment.gif> - >>> data = read_attachment('dir/xtest.gif') - Traceback (most recent call last): - IOError: [Errno ...] No such file or directory: - u'.../archives/private/_xtest@example.com/dir/xtest.gif' - >>> data = read_attachment('dir/attachment.gif') - >>> print data[:6] - GIF87a - >>> len(data) - 34 - - -Scrubbing image attachments -=========================== - -When scrubbing image attachments, the original message is modified to include -a reference to the attachment file as available through the on-line archive. - - >>> msg = message_from_string("""\ - ... MIME-Version: 1.0 - ... Content-Type: multipart/mixed; boundary="BOUNDARY" - ... - ... --BOUNDARY - ... Content-type: text/plain; charset=us-ascii - ... - ... This is a message. - ... --BOUNDARY - ... Content-Type: image/gif; name="xtest.gif" - ... Content-Transfer-Encoding: base64 - ... Content-Disposition: attachment; filename="xtest.gif" - ... - ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== - ... --BOUNDARY-- - ... """) - >>> msgdata = {} - -The ``Scrubber.process()`` function is different than other handler process -functions in that it returns the scrubbed message. - - >>> from mailman.pipeline.scrubber import process - >>> scrubbed_msg = process(mlist, msg, msgdata) - >>> scrubbed_msg is msg - True - >>> print scrubbed_msg.as_string() - MIME-Version: 1.0 - Message-ID: ... - Content-Type: text/plain; charset="us-ascii" - Content-Transfer-Encoding: 7bit - <BLANKLINE> - This is a message. - -------------- next part -------------- - A non-text attachment was scrubbed... - Name: xtest.gif - Type: image/gif - Size: 34 bytes - Desc: not available - URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> - <BLANKLINE> - -This is the same as the transformed message originally passed in. - - >>> print msg.as_string() - MIME-Version: 1.0 - Message-ID: ... - Content-Type: text/plain; charset="us-ascii" - Content-Transfer-Encoding: 7bit - <BLANKLINE> - This is a message. - -------------- next part -------------- - A non-text attachment was scrubbed... - Name: xtest.gif - Type: image/gif - Size: 34 bytes - Desc: not available - URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> - <BLANKLINE> - >>> msgdata - {} - -The URL will point to the attachment sitting in the archive. - - >>> data = read_url_from_message(msg) - >>> data[:6] - 'GIF87a' - >>> len(data) - 34 - - -Scrubbing text attachments -========================== - -Similar to image attachments, text attachments will also be scrubbed, but the -placeholder will be slightly different. - - >>> msg = message_from_string("""\ - ... MIME-Version: 1.0 - ... Content-Type: multipart/mixed; boundary="BOUNDARY" - ... - ... --BOUNDARY - ... Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no - ... - ... This is a message. - ... --BOUNDARY - ... Content-type: text/plain; name="xtext.txt" - ... Content-Disposition: attachment; filename="xtext.txt" - ... - ... This is a text attachment. - ... --BOUNDARY-- - ... """) - >>> scrubbed_msg = process(mlist, msg, {}) - >>> print scrubbed_msg.as_string() - MIME-Version: 1.0 - Message-ID: ... - Content-Transfer-Encoding: 7bit - Content-Type: text/plain; charset="us-ascii"; format="flowed"; delsp="no" - <BLANKLINE> - This is a message. - -------------- next part -------------- - An embedded and charset-unspecified text was scrubbed... - Name: xtext.txt - URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.txt> - <BLANKLINE> - >>> read_url_from_message(msg) - 'This is a text attachment.' - - -Clean up -======== - - >>> config.pop('test config') diff --git a/src/mailman/pipeline/mime_delete.py b/src/mailman/pipeline/mime_delete.py index 402d13714..99fdc3ede 100644 --- a/src/mailman/pipeline/mime_delete.py +++ b/src/mailman/pipeline/mime_delete.py @@ -37,14 +37,18 @@ import errno import logging import tempfile -from email.Iterators import typed_subpart_iterator +from email.iterators import typed_subpart_iterator +from email.mime.message import MIMEMessage +from email.mime.text import MIMEText +from lazr.config import as_boolean from os.path import splitext from zope.interface import implements from mailman.config import config from mailman.core import errors from mailman.core.i18n import _ -from mailman.core.switchboard import Switchboard +from mailman.email.message import OwnerNotification +from mailman.interfaces.action import FilterAction from mailman.interfaces.handler import IHandler from mailman.utilities.string import oneline from mailman.version import VERSION @@ -54,6 +58,44 @@ log = logging.getLogger('mailman.error') +def dispose(mlist, msg, msgdata, why): + if mlist.filter_action is FilterAction.reject: + # Bounce the message to the original author. + raise errors.RejectMessage(why) + elif mlist.filter_action is FilterAction.forward: + # Forward it on to the list moderators. + # FIXME 2012-03-16 BAW: Trunk uses .display_name + text=_("""\ +The attached message matched the $mlist.real_name mailing list's content +filtering rules and was prevented from being forwarded on to the list +membership. You are receiving the only remaining copy of the discarded +message. + +""") + subject=_('Content filter message notification') + notice = OwnerNotification(mlist, subject, roster=mlist.moderators) + notice.set_type('multipart/mixed') + notice.attach(MIMEText(text)) + notice.attach(MIMEMessage(msg)) + notice.send(mlist) + # Let this fall through so the original message gets discarded. + elif mlist.filter_action is FilterAction.preserve: + if as_boolean(config.mailman.filtered_messages_are_preservable): + # This is just like discarding the message except that a copy is + # placed in the 'bad' queue should the site administrator want to + # inspect the message. + filebase = config.switchboards['bad'].enqueue(msg, msgdata) + log.info('{0} preserved in file base {1}'.format( + msg.get('message-id', 'n/a'), filebase)) + else: + log.error( + '{1} invalid FilterAction: {0}. Treating as discard'.format( + mlist.fqdn_listname, mlist.filter_action.name)) + # Most cases also discard the message + raise errors.DiscardMessage(why) + + + def process(mlist, msg, msgdata): # We also don't care about our own digests or plaintext ctype = msg.get_content_type() @@ -227,31 +269,6 @@ def to_plaintext(msg): -def dispose(mlist, msg, msgdata, why): - # filter_action == 0 just discards, see below - if mlist.filter_action == 1: - # Bounce the message to the original author - raise errors.RejectMessage, why - if mlist.filter_action == 2: - # Forward it on to the list owner - listname = mlist.internal_name() - mlist.ForwardMessage( - msg, - text=_("""\ -The attached message matched the $listname mailing list's content filtering -rules and was prevented from being forwarded on to the list membership. You -are receiving the only remaining copy of the discarded message. - -"""), - subject=_('Content filtered message notification')) - if mlist.filter_action == 3 and \ - config.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES: - badq = Switchboard('bad', config.BADQUEUE_DIR) - badq.enqueue(msg, msgdata) - # Most cases also discard the message - raise errors.DiscardMessage - - def get_file_ext(m): """ Get filename extension. Caution: some virus don't put filename diff --git a/src/mailman/pipeline/scrubber.py b/src/mailman/pipeline/scrubber.py deleted file mode 100644 index 76d10427e..000000000 --- a/src/mailman/pipeline/scrubber.py +++ /dev/null @@ -1,502 +0,0 @@ -# Copyright (C) 2001-2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Cleanse a message for archiving.""" - -from __future__ import absolute_import, unicode_literals - -__metaclass__ = type -__all__ = [ - 'Scrubber', - ] - - -import os -import re -import time -import hashlib -import logging -import binascii - -from email.charset import Charset -from email.utils import make_msgid, parsedate -from flufl.lock import Lock -from lazr.config import as_boolean -from mimetypes import guess_all_extensions -from string import Template -from zope.interface import implements - -from mailman.config import config -from mailman.core.errors import DiscardMessage -from mailman.core.i18n import _ -from mailman.interfaces.handler import IHandler -from mailman.utilities.filesystem import makedirs -from mailman.utilities.modules import find_name -from mailman.utilities.string import oneline, websafe - - -# Path characters for common platforms -pre = re.compile(r'[/\\:]') -# All other characters to strip out of Content-Disposition: filenames -# (essentially anything that isn't an alphanum, dot, dash, or underscore). -sre = re.compile(r'[^-\w.]') -# Regexp to strip out leading dots -dre = re.compile(r'^\.*') - -BR = '<br>\n' -SPACE = ' ' - -log = logging.getLogger('mailman.error') - - - -def guess_extension(ctype, ext): - """Find the extension mapped to the given content-type. - - mimetypes maps multiple extensions to the same type, e.g. .doc, .dot, and - .wiz are all mapped to application/msword. This sucks for finding the - best reverse mapping. If the extension is one of the giving mappings, - we'll trust that, otherwise we'll just guess. :/ - """ - all_extensions = guess_all_extensions(ctype, strict=False) - if ext in all_extensions: - return ext - return (all_extensions[0] if len(all_extensions) > 0 else None) - - - -def safe_strftime(fmt, t): - """A time.strftime() that eats exceptions, returning None instead.""" - try: - return time.strftime(fmt, t) - except (TypeError, ValueError, OverflowError): - return None - - -def calculate_attachments_dir(msg, msgdata): - """Calculate the directory for attachements. - - Calculate the directory that attachments for this message will go under. - To avoid inode limitations, the scheme will be: - archives/private/<listname>/attachments/YYYYMMDD/<msgid-hash>/<files> - Start by calculating the date-based and msgid-hash components. - """ - fmt = '%Y%m%d' - datestr = msg.get('Date') - if datestr: - now = parsedate(datestr) - else: - now = time.gmtime(msgdata.get('received_time', time.time())) - datedir = safe_strftime(fmt, now) - if not datedir: - datestr = msgdata.get('X-List-Received-Date') - if datestr: - datedir = safe_strftime(fmt, datestr) - if not datedir: - # What next? Unixfrom, I guess. - parts = msg.get_unixfrom().split() - try: - month = {'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, - 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12, - }.get(parts[3], 0) - day = int(parts[4]) - year = int(parts[6]) - except (IndexError, ValueError): - # Best we can do I think - month = day = year = 0 - datedir = '%04d%02d%02d' % (year, month, day) - assert datedir - # As for the msgid hash, we'll base this part on the Message-ID: so that - # all attachments for the same message end up in the same directory (we'll - # uniquify the filenames in that directory as needed). We use the first 2 - # and last 2 bytes of the SHA1 hash of the message id as the basis of the - # directory name. Clashes here don't really matter too much, and that - # still gives us a 32-bit space to work with. - msgid = msg['message-id'] - if msgid is None: - msgid = msg['Message-ID'] = make_msgid() - # We assume that the message id actually /is/ unique! - digest = hashlib.sha1(msgid).hexdigest() - return os.path.join('attachments', datedir, digest[:4] + digest[-4:]) - - -def replace_payload_by_text(msg, text, charset): - """Replace the payload of the message with some text.""" - # TK: This is a common function in replacing the attachment and the main - # message by a text (scrubbing). - del msg['content-type'] - del msg['content-transfer-encoding'] - if isinstance(text, unicode): - text = text.encode(charset) - if not isinstance(charset, str): - charset = str(charset) - msg.set_payload(text, charset) - - - -def process(mlist, msg, msgdata=None): - """Process the message through the scrubber.""" - sanitize = int(config.scrubber.archive_html_sanitizer) - outer = True - if msgdata is None: - msgdata = {} - if msgdata: - # msgdata is available if it is in GLOBAL_PIPELINE - # ie. not in digest or archiver - # check if the list owner want to scrub regular delivery - if not mlist.scrub_nondigest: - return - attachments_dir = calculate_attachments_dir(msg, msgdata) - charset = format_param = delsp = None - lcset = mlist.preferred_language.charset - lcset_out = Charset(lcset).output_charset or lcset - # Now walk over all subparts of this message and scrub out various types - for part in msg.walk(): - ctype = part.get_content_type() - # If the part is text/plain, we leave it alone - if ctype == 'text/plain': - # We need to choose a charset for the scrubbed message, so we'll - # arbitrarily pick the charset of the first text/plain part in the - # message. - # - # Also get the RFC 3676 stuff from this part. This seems to - # work okay for scrub_nondigest. It will also work as far as - # scrubbing messages for the archive is concerned. The plain - # format digest is going to be a disaster in any case as some of - # messages will be format="flowed" and some not. ToDigest creates - # its own Content-Type: header for the plain digest which won't - # have RFC 3676 parameters. If the message Content-Type: headers - # are retained for display in the digest, the parameters will be - # there for information, but not for the MUA. This is the best we - # can do without having get_payload() process the parameters. - if charset is None: - charset = part.get_content_charset(lcset) - format_param = part.get_param('format') - delsp = part.get_param('delsp') - # TK: if part is attached then check charset and scrub if none - if part.get('content-disposition') and \ - not part.get_content_charset(): - url = save_attachment(mlist, part, attachments_dir) - filename = part.get_filename(_('not available')) - filename = oneline(filename, lcset) - replace_payload_by_text(part, _("""\ -An embedded and charset-unspecified text was scrubbed... -Name: $filename -URL: $url -"""), lcset) - elif ctype == 'text/html' and isinstance(sanitize, int): - if sanitize == 0: - if outer: - raise DiscardMessage - replace_payload_by_text(part, - _('HTML attachment scrubbed and removed'), - # Adding charset arg and removing content-type - # sets content-type to text/plain - lcset) - elif sanitize == 2: - # By leaving it alone, Pipermail will automatically escape it. - # XXX 2012-03-13 BAW: Now that Pipermail has been removed, do - # we even need this? - pass - elif sanitize == 3: - # Pull it out as an attachment but leave it unescaped. This - # is dangerous, but perhaps useful for heavily moderated - # lists. - url = save_attachment(mlist, part, attachments_dir, - filter_html=False) - replace_payload_by_text(part, _("""\ -An HTML attachment was scrubbed... -URL: $url -"""), lcset) - else: - # HTML-escape it and store it as an attachment, but make it - # look a /little/ bit prettier. :( - payload = websafe(part.get_payload(decode=True)) - # For whitespace in the margin, change spaces into - # non-breaking spaces, and tabs into 8 of those. Then use a - # mono-space font. Still looks hideous to me, but then I'd - # just as soon discard them. - lines = [s.replace(' ', ' ').replace('\t', ' ' * 8) - for s in payload.split('\n')] - payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n' - part.set_payload(payload) - # We're replacing the payload with the decoded payload so this - # will just get in the way. - del part['content-transfer-encoding'] - url = save_attachment(mlist, part, attachments_dir, - filter_html=False) - replace_payload_by_text(part, _("""\ -An HTML attachment was scrubbed... -URL: $url -"""), lcset) - elif ctype == 'message/rfc822': - # This part contains a submessage, so it too needs scrubbing - submsg = part.get_payload(0) - url = save_attachment(mlist, part, attachments_dir) - subject = submsg.get('subject', _('no subject')) - date = submsg.get('date', _('no date')) - who = submsg.get('from', _('unknown sender')) - size = len(str(submsg)) - replace_payload_by_text(part, _("""\ -An embedded message was scrubbed... -From: $who -Subject: $subject -Date: $date -Size: $size -URL: $url -"""), lcset) - # If the message isn't a multipart, then we'll strip it out as an - # attachment that would have to be separately downloaded. - elif part._payload and not part.is_multipart(): - payload = part.get_payload(decode=True) - ctype = part.get_content_type() - # XXX Under email 2.5, it is possible that payload will be None. - # This can happen when you have a Content-Type: multipart/* with - # only one part and that part has two blank lines between the - # first boundary and the end boundary. In email 3.0 you end up - # with a string in the payload. I think in this case it's safe to - # ignore the part. - if payload is None: - continue - size = len(payload) - url = save_attachment(mlist, part, attachments_dir) - desc = part.get('content-description', _('not available')) - desc = oneline(desc, lcset) - filename = part.get_filename(_('not available')) - filename = oneline(filename, lcset) - replace_payload_by_text(part, _("""\ -A non-text attachment was scrubbed... -Name: $filename -Type: $ctype -Size: $size bytes -Desc: $desc -URL: $url -"""), lcset) - outer = False - # We still have to sanitize multipart messages to flat text because - # Pipermail can't handle messages with list payloads. This is a kludge; - # def (n) clever hack ;). - # - # XXX 2012-03-13 BAW: Now that Pipermail has been removed, do we even need - # this code? - if msg.is_multipart() and sanitize != 2: - # By default we take the charset of the first text/plain part in the - # message, but if there was none, we'll use the list's preferred - # language's charset. - if not charset or charset == 'us-ascii': - charset = lcset_out - else: - # normalize to the output charset if input/output are different - charset = Charset(charset).output_charset or charset - # We now want to concatenate all the parts which have been scrubbed to - # text/plain, into a single text/plain payload. We need to make sure - # all the characters in the concatenated string are in the same - # encoding, so we'll use the 'replace' key in the coercion call. - # BAW: Martin's original patch suggested we might want to try - # generalizing to utf-8, and that's probably a good idea (eventually). - text = [] - charsets = [] - for part in msg.walk(): - # TK: bug-id 1099138 and multipart - # MAS test payload - if part may fail if there are no headers. - if not part._payload or part.is_multipart(): - continue - # All parts should be scrubbed to text/plain by now. - partctype = part.get_content_type() - if partctype != 'text/plain': - text.append(_('Skipped content of type $partctype\n')) - continue - try: - t = part.get_payload(decode=True) or '' - # MAS: TypeError exception can occur if payload is None. This - # was observed with a message that contained an attached - # message/delivery-status part. Because of the special parsing - # of this type, this resulted in a text/plain sub-part with a - # null body. See bug 1430236. - except (binascii.Error, TypeError): - t = part.get_payload() or '' - # Email problem was solved by Mark Sapiro. (TK) - partcharset = part.get_content_charset('us-ascii') - try: - t = unicode(t, partcharset, 'replace') - except (UnicodeError, LookupError, ValueError, TypeError, - AssertionError): - # We can get here if partcharset is bogus in come way. - # Replace funny characters. We use errors='replace'. - t = unicode(t, 'ascii', 'replace') - # Separation is useful - if isinstance(t, basestring): - if not t.endswith('\n'): - t += '\n' - text.append(t) - if partcharset not in charsets: - charsets.append(partcharset) - # Now join the text and set the payload - sep = _('-------------- next part --------------\n') - assert isinstance(sep, unicode), ( - 'Expected a unicode separator, got %s' % type(sep)) - rept = sep.join(text) - # Replace entire message with text and scrubbed notice. - # Try with message charsets and utf-8 - if 'utf-8' not in charsets: - charsets.append('utf-8') - for charset in charsets: - try: - replace_payload_by_text(msg, rept, charset) - break - # Bogus charset can throw several exceptions - except (UnicodeError, LookupError, ValueError, TypeError, - AssertionError): - pass - if format_param: - msg.set_param('format', format_param) - if delsp: - msg.set_param('delsp', delsp) - return msg - - - -def save_attachment(mlist, msg, attachments_dir, filter_html=True): - fsdir = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR, - mlist.fqdn_listname, attachments_dir) - makedirs(fsdir) - # Figure out the attachment type and get the decoded data - decodedpayload = msg.get_payload(decode=True) - # BAW: mimetypes ought to handle non-standard, but commonly found types, - # e.g. image/jpg (should be image/jpeg). For now we just store such - # things as application/octet-streams since that seems the safest. - ctype = msg.get_content_type() - # i18n file name is encoded - lcset = mlist.preferred_language.charset - filename = oneline(msg.get_filename(''), lcset) - filename, fnext = os.path.splitext(filename) - # For safety, we should confirm this is valid ext for content-type - # but we can use fnext if we introduce fnext filtering - if as_boolean(config.scrubber.use_attachment_filename_extension): - # HTML message doesn't have filename :-( - ext = fnext or guess_extension(ctype, fnext) - else: - ext = guess_extension(ctype, fnext) - if not ext: - # We don't know what it is, so assume it's just a shapeless - # application/octet-stream, unless the Content-Type: is - # message/rfc822, in which case we know we'll coerce the type to - # text/plain below. - if ctype == 'message/rfc822': - ext = '.txt' - else: - ext = '.bin' - # Allow only alphanumerics, dash, underscore, and dot - ext = sre.sub('', ext) - path = None - # We need a lock to calculate the next attachment number - with Lock(os.path.join(fsdir, 'attachments.lock')): - # Now base the filename on what's in the attachment, uniquifying it if - # necessary. - if (not filename or - not as_boolean(config.scrubber.use_attachment_filename)): - filebase = 'attachment' - else: - # Sanitize the filename given in the message headers - parts = pre.split(filename) - filename = parts[-1] - # Strip off leading dots - filename = dre.sub('', filename) - # Allow only alphanumerics, dash, underscore, and dot - filename = sre.sub('', filename) - # If the filename's extension doesn't match the type we guessed, - # which one should we go with? For now, let's go with the one we - # guessed so attachments can't lie about their type. Also, if the - # filename /has/ no extension, then tack on the one we guessed. - # The extension was removed from the name above. - filebase = filename - # Now we're looking for a unique name for this file on the file - # system. If msgdir/filebase.ext isn't unique, we'll add a counter - # after filebase, e.g. msgdir/filebase-cnt.ext - counter = 0 - extra = '' - while True: - path = os.path.join(fsdir, filebase + extra + ext) - # Generally it is not a good idea to test for file existance - # before just trying to create it, but the alternatives aren't - # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't - # NFS-safe). Besides, we have an exclusive lock now, so we're - # guaranteed that no other process will be racing with us. - if os.path.exists(path): - counter += 1 - extra = '-%04d' % counter - else: - break - # `path' now contains the unique filename for the attachment. There's - # just one more step we need to do. If the part is text/html and - # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be - # here), then send the attachment through the filter program for - # sanitization - if filter_html and ctype == 'text/html': - base, ext = os.path.splitext(path) - tmppath = base + '-tmp' + ext - fp = open(tmppath, 'w') - try: - fp.write(decodedpayload) - fp.close() - cmd = Template(config.mta.archive_html_sanitizer).safe_substitue( - filename=tmppath) - progfp = os.popen(cmd, 'r') - decodedpayload = progfp.read() - status = progfp.close() - if status: - log.error('HTML sanitizer exited with non-zero status: %s', - status) - finally: - os.unlink(tmppath) - # BAW: Since we've now sanitized the document, it should be plain - # text. Blarg, we really want the sanitizer to tell us what the type - # if the return data is. :( - ext = '.txt' - path = base + '.txt' - # Is it a message/rfc822 attachment? - elif ctype == 'message/rfc822': - submsg = msg.get_payload() - # BAW: I'm sure we can eventually do better than this. :( - decodedpayload = websafe(str(submsg)) - fp = open(path, 'w') - fp.write(decodedpayload) - fp.close() - # Now calculate the url to the list's archive. - scrubber_path = config.scrubber.archive_scrubber - base_url = find_name(scrubber_path).list_url(mlist) - if not base_url.endswith('/'): - base_url += '/' - # Trailing space will definitely be a problem with format=flowed. - # Bracket the URL instead. - url = '<' + base_url + '%s/%s%s%s>' % ( - attachments_dir, filebase, extra, ext) - return url - - - -class Scrubber: - """Cleanse a message for archiving.""" - - implements(IHandler) - - name = 'scrubber' - description = _('Cleanse a message for archiving.') - - def process(self, mlist, msg, msgdata): - """See `IHandler`.""" - process(mlist, msg, msgdata) diff --git a/src/mailman/pipeline/tests/test_mimedel.py b/src/mailman/pipeline/tests/test_mimedel.py new file mode 100644 index 000000000..566c1a40c --- /dev/null +++ b/src/mailman/pipeline/tests/test_mimedel.py @@ -0,0 +1,213 @@ +# Copyright (C) 2012 by the Free Software Foundation, Inc. +# +# This file is part of GNU Mailman. +# +# GNU Mailman is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. + +"""Test the mime_delete handler.""" + +from __future__ import absolute_import, print_function, unicode_literals + +__metaclass__ = type +__all__ = [ + 'TestDispose', + ] + + +import unittest + +from zope.component import getUtility + +from mailman.app.lifecycle import create_list +from mailman.config import config +from mailman.core import errors +from mailman.interfaces.action import FilterAction +from mailman.interfaces.member import MemberRole +from mailman.interfaces.usermanager import IUserManager +from mailman.pipeline import mime_delete +from mailman.testing.helpers import ( + LogFileMark, + get_queue_messages, + specialized_message_from_string as mfs) +from mailman.testing.layers import ConfigLayer + + + +class TestDispose(unittest.TestCase): + """Test the mime_delete handler.""" + + layer = ConfigLayer + + def setUp(self): + self._mlist = create_list('test@example.com') + self._msg = mfs("""\ +From: anne@example.com +To: test@example.com +Subject: A disposable message +Message-ID: <ant> + +""") + # Python 2.7 has assertMultiLineEqual. Let this work without bounds. + self.maxDiff = None + self.eq = getattr(self, 'assertMultiLineEqual', self.assertEqual) + config.push('dispose', """ + [mailman] + site_owner: noreply@example.com + """) + + def tearDown(self): + config.pop('dispose') + + def test_dispose_discard(self): + self._mlist.filter_action = FilterAction.discard + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'discarding') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + self.assertEqual(error.message, 'discarding') + # There should be no messages in the 'bad' queue. + self.assertEqual(len(get_queue_messages('bad')), 0) + + def test_dispose_bounce(self): + self._mlist.filter_action = FilterAction.reject + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'rejecting') + except errors.RejectMessage as error: + pass + else: + raise AssertionError('RejectMessage exception expected') + self.assertEqual(error.message, 'rejecting') + # There should be no messages in the 'bad' queue. + self.assertEqual(len(get_queue_messages('bad')), 0) + + def test_dispose_forward(self): + # The disposed message gets forwarded to the list moderators. So + # first add some moderators. + user_manager = getUtility(IUserManager) + anne = user_manager.create_address('anne@example.com') + bart = user_manager.create_address('bart@example.com') + self._mlist.subscribe(anne, MemberRole.moderator) + self._mlist.subscribe(bart, MemberRole.moderator) + # Now set the filter action and dispose the message. + self._mlist.filter_action = FilterAction.forward + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'forwarding') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + self.assertEqual(error.message, 'forwarding') + # There should now be a multipart message in the virgin queue destined + # for the mailing list owners. + messages = get_queue_messages('virgin') + self.assertEqual(len(messages), 1) + message = messages[0].msg + self.assertEqual(message.get_content_type(), 'multipart/mixed') + # Anne and Bart should be recipients of the message, but it will look + # like the message is going to the list owners. + self.assertEqual(message['to'], 'test-owner@example.com') + self.assertEqual(message.recipients, + set(['anne@example.com', 'bart@example.com'])) + # The list owner should be the sender. + self.assertEqual(message['from'], 'noreply@example.com') + self.assertEqual(message['subject'], + 'Content filter message notification') + # The body of the first part provides the moderators some details. + part0 = message.get_payload(0) + self.assertEqual(part0.get_content_type(), 'text/plain') + self.eq(part0.get_payload(), """\ +The attached message matched the Test mailing list's content +filtering rules and was prevented from being forwarded on to the list +membership. You are receiving the only remaining copy of the discarded +message. + +""") + # The second part is the container for the original message. + part1 = message.get_payload(1) + self.assertEqual(part1.get_content_type(), 'message/rfc822') + # And the first part of *that* message will be the original message. + original = part1.get_payload(0) + self.assertEqual(original['subject'], 'A disposable message') + self.assertEqual(original['message-id'], '<ant>') + + def test_dispose_non_preservable(self): + # Two actions can happen here, depending on a site-wide setting. If + # the site owner has indicated that filtered messages cannot be + # preserved, then this is the same as discarding them. + self._mlist.filter_action = FilterAction.preserve + config.push('non-preservable', """ + [mailman] + filtered_messages_are_preservable: no + """) + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'not preserved') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + finally: + config.pop('non-preservable') + self.assertEqual(error.message, 'not preserved') + # There should be no messages in the 'bad' queue. + self.assertEqual(len(get_queue_messages('bad')), 0) + + def test_dispose_preservable(self): + # Two actions can happen here, depending on a site-wide setting. If + # the site owner has indicated that filtered messages can be + # preserved, then this is similar to discarding the message except + # that a copy is preserved in the 'bad' queue. + self._mlist.filter_action = FilterAction.preserve + config.push('preservable', """ + [mailman] + filtered_messages_are_preservable: yes + """) + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'preserved') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + finally: + config.pop('preservable') + self.assertEqual(error.message, 'preserved') + # There should be no messages in the 'bad' queue. + messages = get_queue_messages('bad') + self.assertEqual(len(messages), 1) + message = messages[0].msg + self.assertEqual(message['subject'], 'A disposable message') + self.assertEqual(message['message-id'], '<ant>') + + def test_bad_action(self): + # This should never happen, but what if it does? + # FilterAction.accept, FilterAction.hold, and FilterAction.defer are + # not valid. They are treated as discard actions, but the problem is + # also logged. + for action in (FilterAction.accept, + FilterAction.hold, + FilterAction.defer): + self._mlist.filter_action = action + mark = LogFileMark('mailman.error') + try: + mime_delete.dispose(self._mlist, self._msg, {}, 'bad action') + except errors.DiscardMessage as error: + pass + else: + raise AssertionError('DiscardMessage exception expected') + self.assertEqual(error.message, 'bad action') + line = mark.readline()[:-1] + self.assertTrue(line.endswith( + '{0} invalid FilterAction: test@example.com. ' + 'Treating as discard'.format(action.name))) diff --git a/src/mailman/pipeline/tests/test_scrubber.py b/src/mailman/pipeline/tests/test_scrubber.py deleted file mode 100644 index 7ac5eb855..000000000 --- a/src/mailman/pipeline/tests/test_scrubber.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2012 by the Free Software Foundation, Inc. -# -# This file is part of GNU Mailman. -# -# GNU Mailman is free software: you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# -# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# GNU Mailman. If not, see <http://www.gnu.org/licenses/>. - -"""Scrubber module tests.""" - -from __future__ import absolute_import, print_function, unicode_literals - -__metaclass__ = type -__all__ = [ - 'TestScrubber', - ] - - -import unittest - -from mailman.pipeline import scrubber - - - -class TestScrubber(unittest.TestCase): - """Scrubber module tests.""" - - def test_guess_extension(self): - # A known extension should be found. - extension = scrubber.guess_extension('application/msword', '.doc') - self.assertEqual(extension, '.doc') - - def test_guess_missing_extension(self): - # Maybe some other extension is better. - extension = scrubber.guess_extension('application/msword', '.xxx') - self.assertEqual(extension, '.doc') diff --git a/src/mailman/runners/digest.py b/src/mailman/runners/digest.py index 2730fc427..5ad82c469 100644 --- a/src/mailman/runners/digest.py +++ b/src/mailman/runners/digest.py @@ -46,7 +46,6 @@ from mailman.core.i18n import _ from mailman.core.runner import Runner from mailman.interfaces.member import DeliveryMode, DeliveryStatus from mailman.pipeline.decorate import decorate -from mailman.pipeline.scrubber import process as scrubber from mailman.utilities.i18n import make from mailman.utilities.mailbox import Mailbox from mailman.utilities.string import oneline, wrap @@ -253,12 +252,6 @@ class RFC1153Digester(Digester): if count > 1: print >> self._text, self._separator30 print >> self._text - # Scrub attachements. - try: - msg = scrubber(self._mlist, msg) - except DiscardMessage: - print >> self._text, _('[Message discarded by content filter]') - return # Each message section contains a few headers. for header in config.digests.plain_digest_keep_headers.split(): if header in msg: diff --git a/src/mailman/runners/docs/digester.rst b/src/mailman/runners/docs/digester.rst index 5a20db556..70c5cc587 100644 --- a/src/mailman/runners/docs/digester.rst +++ b/src/mailman/runners/docs/digester.rst @@ -223,7 +223,6 @@ The RFC 1153 contains the digest in a single plain text message. When replying, please edit your Subject line so it is more specific than "Re: Contents of Test digest..." <BLANKLINE> - <BLANKLINE> Today's Topics: <BLANKLINE> 1. Test message 1 (aperson@example.com) @@ -237,7 +236,6 @@ The RFC 1153 contains the digest in a single plain text message. From: aperson@example.com Subject: Test message 1 To: xtest@example.com - Message-ID: ... <BLANKLINE> Here is message 1 <BLANKLINE> @@ -246,7 +244,6 @@ The RFC 1153 contains the digest in a single plain text message. From: aperson@example.com Subject: Test message 2 To: xtest@example.com - Message-ID: ... <BLANKLINE> Here is message 2 <BLANKLINE> @@ -255,7 +252,6 @@ The RFC 1153 contains the digest in a single plain text message. From: aperson@example.com Subject: Test message 3 To: xtest@example.com - Message-ID: ... <BLANKLINE> Here is message 3 <BLANKLINE> @@ -264,7 +260,6 @@ The RFC 1153 contains the digest in a single plain text message. From: aperson@example.com Subject: Test message 4 To: xtest@example.com - Message-ID: ... <BLANKLINE> Here is message 4 <BLANKLINE> @@ -458,7 +453,6 @@ The content can be decoded to see the actual digest text. "'From: aperson@example.org'", "'Subject: \\xe4\\xb8\\x80\\xe7\\x95\\xaa'", "'To: test@example.com'", - "'Message-ID: ... "'Content-Type: text/plain; charset=iso-2022-jp'", "''", "'\\xe4\\xb8\\x80\\xe7\\x95\\xaa'", diff --git a/src/mailman/styles/default.py b/src/mailman/styles/default.py index 95672c62c..b4064e0e9 100644 --- a/src/mailman/styles/default.py +++ b/src/mailman/styles/default.py @@ -32,7 +32,7 @@ import datetime from zope.interface import implements from mailman.core.i18n import _ -from mailman.interfaces.action import Action +from mailman.interfaces.action import Action, FilterAction from mailman.interfaces.bounce import UnrecognizedBounceDisposition from mailman.interfaces.digests import DigestFrequency from mailman.interfaces.autorespond import ResponseAction @@ -99,6 +99,7 @@ from: .*@uplinkpro.com mlist.preferred_language = 'en' mlist.collapse_alternatives = True mlist.convert_html_to_plaintext = False + mlist.filter_action = FilterAction.discard mlist.filter_content = False # Digest related variables mlist.digestable = True |
