summaryrefslogtreecommitdiff
path: root/src/mailman/pipeline
diff options
context:
space:
mode:
authorBarry Warsaw2012-03-16 15:58:17 -0700
committerBarry Warsaw2012-03-16 15:58:17 -0700
commit24991d17919f2715a7f2e875d2fb7fe72e53efcf (patch)
tree80624655bf271f103f76377d9b09ce2ff0562dbb /src/mailman/pipeline
parent44ec37f890c7d4d35504d8f2e56c01abe8c60940 (diff)
downloadmailman-24991d17919f2715a7f2e875d2fb7fe72e53efcf.tar.gz
mailman-24991d17919f2715a7f2e875d2fb7fe72e53efcf.tar.zst
mailman-24991d17919f2715a7f2e875d2fb7fe72e53efcf.zip
* Pipermail has been eradicated.
* Configuration variable `[mailman]filtered_messages_are_preservable` controls whether messages which have their top-level `Content-Type` filtered out can be preserved in the `bad` queue by list owners. * Configuration section `[scrubber]` removed, as is the scrubber handler. This handler was essentially incompatible with Mailman 3 since it required coordination with Pipermail to store attachments on disk. * Schema additions: - mailinglist.filter_action
Diffstat (limited to 'src/mailman/pipeline')
-rw-r--r--src/mailman/pipeline/docs/filtering.rst2
-rw-r--r--src/mailman/pipeline/docs/scrubber.rst230
-rw-r--r--src/mailman/pipeline/mime_delete.py71
-rw-r--r--src/mailman/pipeline/scrubber.py502
-rw-r--r--src/mailman/pipeline/tests/test_mimedel.py213
-rw-r--r--src/mailman/pipeline/tests/test_scrubber.py45
6 files changed, 258 insertions, 805 deletions
diff --git a/src/mailman/pipeline/docs/filtering.rst b/src/mailman/pipeline/docs/filtering.rst
index 5b54424e4..fd0b33d3b 100644
--- a/src/mailman/pipeline/docs/filtering.rst
+++ b/src/mailman/pipeline/docs/filtering.rst
@@ -45,7 +45,7 @@ content type matches the filter, the entire message will be discarded.
>>> process(mlist, msg, {})
Traceback (most recent call last):
...
- DiscardMessage
+ DiscardMessage: The message's content type was explicitly disallowed
However, if we turn off content filtering altogether, then the handler
short-circuits.
diff --git a/src/mailman/pipeline/docs/scrubber.rst b/src/mailman/pipeline/docs/scrubber.rst
deleted file mode 100644
index 86a8161a7..000000000
--- a/src/mailman/pipeline/docs/scrubber.rst
+++ /dev/null
@@ -1,230 +0,0 @@
-============
-The scrubber
-============
-
-The scrubber is an integral part of Mailman, both in the normal delivery of
-messages and in components such as the archiver. Its primary purpose is to
-scrub attachments from messages so that binary goop doesn't end up in an
-archive message.
-
- >>> mlist = create_list('_xtest@example.com')
- >>> mlist.preferred_language = 'en'
-
-Helper functions for getting the attachment data.
-::
-
- >>> import os, re
- >>> def read_attachment(filename, remove=True):
- ... path = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR,
- ... mlist.fqdn_listname, filename)
- ... fp = open(path)
- ... try:
- ... data = fp.read()
- ... finally:
- ... fp.close()
- ... if remove:
- ... os.unlink(path)
- ... return data
-
- >>> from urlparse import urlparse
- >>> def read_url_from_message(msg):
- ... url = None
- ... for line in msg.get_payload().splitlines():
- ... mo = re.match('URL: <(?P<url>[^>]+)>', line)
- ... if mo:
- ... url = mo.group('url')
- ... break
- ... path = '/'.join(urlparse(url).path.split('/')[3:])
- ... return read_attachment(path)
-
-
-Saving attachments
-==================
-
-The Scrubber handler exposes a function called ``save_attachment()`` which can
-be used to strip various types of attachments and store them in the archive
-directory. This is a public interface used by components outside the normal
-processing pipeline.
-
-Site administrators can decide whether the scrubber should use the attachment
-filename suggested in the message's ``Content-Disposition:`` header or not.
-If enabled, the filename will be used when this header attribute is present
-(yes, this is an unfortunate double negative).
-::
-
- >>> config.push('test config', """
- ... [scrubber]
- ... use_attachment_filename: yes
- ... """)
- >>> msg = message_from_string("""\
- ... Content-Type: image/gif; name="xtest.gif"
- ... Content-Transfer-Encoding: base64
- ... Content-Disposition: attachment; filename="xtest.gif"
- ...
- ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
- ... """)
-
- >>> from mailman.pipeline.scrubber import save_attachment
- >>> print save_attachment(mlist, msg, 'dir')
- <http://www.example.com/pipermail/_xtest@example.com/dir/xtest.gif>
- >>> data = read_attachment('dir/xtest.gif')
- >>> print data[:6]
- GIF87a
- >>> len(data)
- 34
-
-Saving the attachment does not alter the original message.
-
- >>> print msg.as_string()
- Content-Type: image/gif; name="xtest.gif"
- Content-Transfer-Encoding: base64
- Content-Disposition: attachment; filename="xtest.gif"
- <BLANKLINE>
- R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
-
-The site administrator can also configure Mailman to ignore the
-``Content-Disposition:`` filename. This is the default.
-
- >>> config.pop('test config')
- >>> config.push('test config', """
- ... [scrubber]
- ... use_attachment_filename: no
- ... """)
- >>> msg = message_from_string("""\
- ... Content-Type: image/gif; name="xtest.gif"
- ... Content-Transfer-Encoding: base64
- ... Content-Disposition: attachment; filename="xtest.gif"
- ...
- ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
- ... """)
- >>> print save_attachment(mlist, msg, 'dir')
- <http://www.example.com/pipermail/_xtest@example.com/dir/attachment.gif>
- >>> data = read_attachment('dir/xtest.gif')
- Traceback (most recent call last):
- IOError: [Errno ...] No such file or directory:
- u'.../archives/private/_xtest@example.com/dir/xtest.gif'
- >>> data = read_attachment('dir/attachment.gif')
- >>> print data[:6]
- GIF87a
- >>> len(data)
- 34
-
-
-Scrubbing image attachments
-===========================
-
-When scrubbing image attachments, the original message is modified to include
-a reference to the attachment file as available through the on-line archive.
-
- >>> msg = message_from_string("""\
- ... MIME-Version: 1.0
- ... Content-Type: multipart/mixed; boundary="BOUNDARY"
- ...
- ... --BOUNDARY
- ... Content-type: text/plain; charset=us-ascii
- ...
- ... This is a message.
- ... --BOUNDARY
- ... Content-Type: image/gif; name="xtest.gif"
- ... Content-Transfer-Encoding: base64
- ... Content-Disposition: attachment; filename="xtest.gif"
- ...
- ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw==
- ... --BOUNDARY--
- ... """)
- >>> msgdata = {}
-
-The ``Scrubber.process()`` function is different than other handler process
-functions in that it returns the scrubbed message.
-
- >>> from mailman.pipeline.scrubber import process
- >>> scrubbed_msg = process(mlist, msg, msgdata)
- >>> scrubbed_msg is msg
- True
- >>> print scrubbed_msg.as_string()
- MIME-Version: 1.0
- Message-ID: ...
- Content-Type: text/plain; charset="us-ascii"
- Content-Transfer-Encoding: 7bit
- <BLANKLINE>
- This is a message.
- -------------- next part --------------
- A non-text attachment was scrubbed...
- Name: xtest.gif
- Type: image/gif
- Size: 34 bytes
- Desc: not available
- URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif>
- <BLANKLINE>
-
-This is the same as the transformed message originally passed in.
-
- >>> print msg.as_string()
- MIME-Version: 1.0
- Message-ID: ...
- Content-Type: text/plain; charset="us-ascii"
- Content-Transfer-Encoding: 7bit
- <BLANKLINE>
- This is a message.
- -------------- next part --------------
- A non-text attachment was scrubbed...
- Name: xtest.gif
- Type: image/gif
- Size: 34 bytes
- Desc: not available
- URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif>
- <BLANKLINE>
- >>> msgdata
- {}
-
-The URL will point to the attachment sitting in the archive.
-
- >>> data = read_url_from_message(msg)
- >>> data[:6]
- 'GIF87a'
- >>> len(data)
- 34
-
-
-Scrubbing text attachments
-==========================
-
-Similar to image attachments, text attachments will also be scrubbed, but the
-placeholder will be slightly different.
-
- >>> msg = message_from_string("""\
- ... MIME-Version: 1.0
- ... Content-Type: multipart/mixed; boundary="BOUNDARY"
- ...
- ... --BOUNDARY
- ... Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no
- ...
- ... This is a message.
- ... --BOUNDARY
- ... Content-type: text/plain; name="xtext.txt"
- ... Content-Disposition: attachment; filename="xtext.txt"
- ...
- ... This is a text attachment.
- ... --BOUNDARY--
- ... """)
- >>> scrubbed_msg = process(mlist, msg, {})
- >>> print scrubbed_msg.as_string()
- MIME-Version: 1.0
- Message-ID: ...
- Content-Transfer-Encoding: 7bit
- Content-Type: text/plain; charset="us-ascii"; format="flowed"; delsp="no"
- <BLANKLINE>
- This is a message.
- -------------- next part --------------
- An embedded and charset-unspecified text was scrubbed...
- Name: xtext.txt
- URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.txt>
- <BLANKLINE>
- >>> read_url_from_message(msg)
- 'This is a text attachment.'
-
-
-Clean up
-========
-
- >>> config.pop('test config')
diff --git a/src/mailman/pipeline/mime_delete.py b/src/mailman/pipeline/mime_delete.py
index 402d13714..99fdc3ede 100644
--- a/src/mailman/pipeline/mime_delete.py
+++ b/src/mailman/pipeline/mime_delete.py
@@ -37,14 +37,18 @@ import errno
import logging
import tempfile
-from email.Iterators import typed_subpart_iterator
+from email.iterators import typed_subpart_iterator
+from email.mime.message import MIMEMessage
+from email.mime.text import MIMEText
+from lazr.config import as_boolean
from os.path import splitext
from zope.interface import implements
from mailman.config import config
from mailman.core import errors
from mailman.core.i18n import _
-from mailman.core.switchboard import Switchboard
+from mailman.email.message import OwnerNotification
+from mailman.interfaces.action import FilterAction
from mailman.interfaces.handler import IHandler
from mailman.utilities.string import oneline
from mailman.version import VERSION
@@ -54,6 +58,44 @@ log = logging.getLogger('mailman.error')
+def dispose(mlist, msg, msgdata, why):
+ if mlist.filter_action is FilterAction.reject:
+ # Bounce the message to the original author.
+ raise errors.RejectMessage(why)
+ elif mlist.filter_action is FilterAction.forward:
+ # Forward it on to the list moderators.
+ # FIXME 2012-03-16 BAW: Trunk uses .display_name
+ text=_("""\
+The attached message matched the $mlist.real_name mailing list's content
+filtering rules and was prevented from being forwarded on to the list
+membership. You are receiving the only remaining copy of the discarded
+message.
+
+""")
+ subject=_('Content filter message notification')
+ notice = OwnerNotification(mlist, subject, roster=mlist.moderators)
+ notice.set_type('multipart/mixed')
+ notice.attach(MIMEText(text))
+ notice.attach(MIMEMessage(msg))
+ notice.send(mlist)
+ # Let this fall through so the original message gets discarded.
+ elif mlist.filter_action is FilterAction.preserve:
+ if as_boolean(config.mailman.filtered_messages_are_preservable):
+ # This is just like discarding the message except that a copy is
+ # placed in the 'bad' queue should the site administrator want to
+ # inspect the message.
+ filebase = config.switchboards['bad'].enqueue(msg, msgdata)
+ log.info('{0} preserved in file base {1}'.format(
+ msg.get('message-id', 'n/a'), filebase))
+ else:
+ log.error(
+ '{1} invalid FilterAction: {0}. Treating as discard'.format(
+ mlist.fqdn_listname, mlist.filter_action.name))
+ # Most cases also discard the message
+ raise errors.DiscardMessage(why)
+
+
+
def process(mlist, msg, msgdata):
# We also don't care about our own digests or plaintext
ctype = msg.get_content_type()
@@ -227,31 +269,6 @@ def to_plaintext(msg):
-def dispose(mlist, msg, msgdata, why):
- # filter_action == 0 just discards, see below
- if mlist.filter_action == 1:
- # Bounce the message to the original author
- raise errors.RejectMessage, why
- if mlist.filter_action == 2:
- # Forward it on to the list owner
- listname = mlist.internal_name()
- mlist.ForwardMessage(
- msg,
- text=_("""\
-The attached message matched the $listname mailing list's content filtering
-rules and was prevented from being forwarded on to the list membership. You
-are receiving the only remaining copy of the discarded message.
-
-"""),
- subject=_('Content filtered message notification'))
- if mlist.filter_action == 3 and \
- config.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES:
- badq = Switchboard('bad', config.BADQUEUE_DIR)
- badq.enqueue(msg, msgdata)
- # Most cases also discard the message
- raise errors.DiscardMessage
-
-
def get_file_ext(m):
"""
Get filename extension. Caution: some virus don't put filename
diff --git a/src/mailman/pipeline/scrubber.py b/src/mailman/pipeline/scrubber.py
deleted file mode 100644
index 76d10427e..000000000
--- a/src/mailman/pipeline/scrubber.py
+++ /dev/null
@@ -1,502 +0,0 @@
-# Copyright (C) 2001-2012 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
-
-"""Cleanse a message for archiving."""
-
-from __future__ import absolute_import, unicode_literals
-
-__metaclass__ = type
-__all__ = [
- 'Scrubber',
- ]
-
-
-import os
-import re
-import time
-import hashlib
-import logging
-import binascii
-
-from email.charset import Charset
-from email.utils import make_msgid, parsedate
-from flufl.lock import Lock
-from lazr.config import as_boolean
-from mimetypes import guess_all_extensions
-from string import Template
-from zope.interface import implements
-
-from mailman.config import config
-from mailman.core.errors import DiscardMessage
-from mailman.core.i18n import _
-from mailman.interfaces.handler import IHandler
-from mailman.utilities.filesystem import makedirs
-from mailman.utilities.modules import find_name
-from mailman.utilities.string import oneline, websafe
-
-
-# Path characters for common platforms
-pre = re.compile(r'[/\\:]')
-# All other characters to strip out of Content-Disposition: filenames
-# (essentially anything that isn't an alphanum, dot, dash, or underscore).
-sre = re.compile(r'[^-\w.]')
-# Regexp to strip out leading dots
-dre = re.compile(r'^\.*')
-
-BR = '<br>\n'
-SPACE = ' '
-
-log = logging.getLogger('mailman.error')
-
-
-
-def guess_extension(ctype, ext):
- """Find the extension mapped to the given content-type.
-
- mimetypes maps multiple extensions to the same type, e.g. .doc, .dot, and
- .wiz are all mapped to application/msword. This sucks for finding the
- best reverse mapping. If the extension is one of the giving mappings,
- we'll trust that, otherwise we'll just guess. :/
- """
- all_extensions = guess_all_extensions(ctype, strict=False)
- if ext in all_extensions:
- return ext
- return (all_extensions[0] if len(all_extensions) > 0 else None)
-
-
-
-def safe_strftime(fmt, t):
- """A time.strftime() that eats exceptions, returning None instead."""
- try:
- return time.strftime(fmt, t)
- except (TypeError, ValueError, OverflowError):
- return None
-
-
-def calculate_attachments_dir(msg, msgdata):
- """Calculate the directory for attachements.
-
- Calculate the directory that attachments for this message will go under.
- To avoid inode limitations, the scheme will be:
- archives/private/<listname>/attachments/YYYYMMDD/<msgid-hash>/<files>
- Start by calculating the date-based and msgid-hash components.
- """
- fmt = '%Y%m%d'
- datestr = msg.get('Date')
- if datestr:
- now = parsedate(datestr)
- else:
- now = time.gmtime(msgdata.get('received_time', time.time()))
- datedir = safe_strftime(fmt, now)
- if not datedir:
- datestr = msgdata.get('X-List-Received-Date')
- if datestr:
- datedir = safe_strftime(fmt, datestr)
- if not datedir:
- # What next? Unixfrom, I guess.
- parts = msg.get_unixfrom().split()
- try:
- month = {'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6,
- 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12,
- }.get(parts[3], 0)
- day = int(parts[4])
- year = int(parts[6])
- except (IndexError, ValueError):
- # Best we can do I think
- month = day = year = 0
- datedir = '%04d%02d%02d' % (year, month, day)
- assert datedir
- # As for the msgid hash, we'll base this part on the Message-ID: so that
- # all attachments for the same message end up in the same directory (we'll
- # uniquify the filenames in that directory as needed). We use the first 2
- # and last 2 bytes of the SHA1 hash of the message id as the basis of the
- # directory name. Clashes here don't really matter too much, and that
- # still gives us a 32-bit space to work with.
- msgid = msg['message-id']
- if msgid is None:
- msgid = msg['Message-ID'] = make_msgid()
- # We assume that the message id actually /is/ unique!
- digest = hashlib.sha1(msgid).hexdigest()
- return os.path.join('attachments', datedir, digest[:4] + digest[-4:])
-
-
-def replace_payload_by_text(msg, text, charset):
- """Replace the payload of the message with some text."""
- # TK: This is a common function in replacing the attachment and the main
- # message by a text (scrubbing).
- del msg['content-type']
- del msg['content-transfer-encoding']
- if isinstance(text, unicode):
- text = text.encode(charset)
- if not isinstance(charset, str):
- charset = str(charset)
- msg.set_payload(text, charset)
-
-
-
-def process(mlist, msg, msgdata=None):
- """Process the message through the scrubber."""
- sanitize = int(config.scrubber.archive_html_sanitizer)
- outer = True
- if msgdata is None:
- msgdata = {}
- if msgdata:
- # msgdata is available if it is in GLOBAL_PIPELINE
- # ie. not in digest or archiver
- # check if the list owner want to scrub regular delivery
- if not mlist.scrub_nondigest:
- return
- attachments_dir = calculate_attachments_dir(msg, msgdata)
- charset = format_param = delsp = None
- lcset = mlist.preferred_language.charset
- lcset_out = Charset(lcset).output_charset or lcset
- # Now walk over all subparts of this message and scrub out various types
- for part in msg.walk():
- ctype = part.get_content_type()
- # If the part is text/plain, we leave it alone
- if ctype == 'text/plain':
- # We need to choose a charset for the scrubbed message, so we'll
- # arbitrarily pick the charset of the first text/plain part in the
- # message.
- #
- # Also get the RFC 3676 stuff from this part. This seems to
- # work okay for scrub_nondigest. It will also work as far as
- # scrubbing messages for the archive is concerned. The plain
- # format digest is going to be a disaster in any case as some of
- # messages will be format="flowed" and some not. ToDigest creates
- # its own Content-Type: header for the plain digest which won't
- # have RFC 3676 parameters. If the message Content-Type: headers
- # are retained for display in the digest, the parameters will be
- # there for information, but not for the MUA. This is the best we
- # can do without having get_payload() process the parameters.
- if charset is None:
- charset = part.get_content_charset(lcset)
- format_param = part.get_param('format')
- delsp = part.get_param('delsp')
- # TK: if part is attached then check charset and scrub if none
- if part.get('content-disposition') and \
- not part.get_content_charset():
- url = save_attachment(mlist, part, attachments_dir)
- filename = part.get_filename(_('not available'))
- filename = oneline(filename, lcset)
- replace_payload_by_text(part, _("""\
-An embedded and charset-unspecified text was scrubbed...
-Name: $filename
-URL: $url
-"""), lcset)
- elif ctype == 'text/html' and isinstance(sanitize, int):
- if sanitize == 0:
- if outer:
- raise DiscardMessage
- replace_payload_by_text(part,
- _('HTML attachment scrubbed and removed'),
- # Adding charset arg and removing content-type
- # sets content-type to text/plain
- lcset)
- elif sanitize == 2:
- # By leaving it alone, Pipermail will automatically escape it.
- # XXX 2012-03-13 BAW: Now that Pipermail has been removed, do
- # we even need this?
- pass
- elif sanitize == 3:
- # Pull it out as an attachment but leave it unescaped. This
- # is dangerous, but perhaps useful for heavily moderated
- # lists.
- url = save_attachment(mlist, part, attachments_dir,
- filter_html=False)
- replace_payload_by_text(part, _("""\
-An HTML attachment was scrubbed...
-URL: $url
-"""), lcset)
- else:
- # HTML-escape it and store it as an attachment, but make it
- # look a /little/ bit prettier. :(
- payload = websafe(part.get_payload(decode=True))
- # For whitespace in the margin, change spaces into
- # non-breaking spaces, and tabs into 8 of those. Then use a
- # mono-space font. Still looks hideous to me, but then I'd
- # just as soon discard them.
- lines = [s.replace(' ', '&nbsp;').replace('\t', '&nbsp' * 8)
- for s in payload.split('\n')]
- payload = '<tt>\n' + BR.join(lines) + '\n</tt>\n'
- part.set_payload(payload)
- # We're replacing the payload with the decoded payload so this
- # will just get in the way.
- del part['content-transfer-encoding']
- url = save_attachment(mlist, part, attachments_dir,
- filter_html=False)
- replace_payload_by_text(part, _("""\
-An HTML attachment was scrubbed...
-URL: $url
-"""), lcset)
- elif ctype == 'message/rfc822':
- # This part contains a submessage, so it too needs scrubbing
- submsg = part.get_payload(0)
- url = save_attachment(mlist, part, attachments_dir)
- subject = submsg.get('subject', _('no subject'))
- date = submsg.get('date', _('no date'))
- who = submsg.get('from', _('unknown sender'))
- size = len(str(submsg))
- replace_payload_by_text(part, _("""\
-An embedded message was scrubbed...
-From: $who
-Subject: $subject
-Date: $date
-Size: $size
-URL: $url
-"""), lcset)
- # If the message isn't a multipart, then we'll strip it out as an
- # attachment that would have to be separately downloaded.
- elif part._payload and not part.is_multipart():
- payload = part.get_payload(decode=True)
- ctype = part.get_content_type()
- # XXX Under email 2.5, it is possible that payload will be None.
- # This can happen when you have a Content-Type: multipart/* with
- # only one part and that part has two blank lines between the
- # first boundary and the end boundary. In email 3.0 you end up
- # with a string in the payload. I think in this case it's safe to
- # ignore the part.
- if payload is None:
- continue
- size = len(payload)
- url = save_attachment(mlist, part, attachments_dir)
- desc = part.get('content-description', _('not available'))
- desc = oneline(desc, lcset)
- filename = part.get_filename(_('not available'))
- filename = oneline(filename, lcset)
- replace_payload_by_text(part, _("""\
-A non-text attachment was scrubbed...
-Name: $filename
-Type: $ctype
-Size: $size bytes
-Desc: $desc
-URL: $url
-"""), lcset)
- outer = False
- # We still have to sanitize multipart messages to flat text because
- # Pipermail can't handle messages with list payloads. This is a kludge;
- # def (n) clever hack ;).
- #
- # XXX 2012-03-13 BAW: Now that Pipermail has been removed, do we even need
- # this code?
- if msg.is_multipart() and sanitize != 2:
- # By default we take the charset of the first text/plain part in the
- # message, but if there was none, we'll use the list's preferred
- # language's charset.
- if not charset or charset == 'us-ascii':
- charset = lcset_out
- else:
- # normalize to the output charset if input/output are different
- charset = Charset(charset).output_charset or charset
- # We now want to concatenate all the parts which have been scrubbed to
- # text/plain, into a single text/plain payload. We need to make sure
- # all the characters in the concatenated string are in the same
- # encoding, so we'll use the 'replace' key in the coercion call.
- # BAW: Martin's original patch suggested we might want to try
- # generalizing to utf-8, and that's probably a good idea (eventually).
- text = []
- charsets = []
- for part in msg.walk():
- # TK: bug-id 1099138 and multipart
- # MAS test payload - if part may fail if there are no headers.
- if not part._payload or part.is_multipart():
- continue
- # All parts should be scrubbed to text/plain by now.
- partctype = part.get_content_type()
- if partctype != 'text/plain':
- text.append(_('Skipped content of type $partctype\n'))
- continue
- try:
- t = part.get_payload(decode=True) or ''
- # MAS: TypeError exception can occur if payload is None. This
- # was observed with a message that contained an attached
- # message/delivery-status part. Because of the special parsing
- # of this type, this resulted in a text/plain sub-part with a
- # null body. See bug 1430236.
- except (binascii.Error, TypeError):
- t = part.get_payload() or ''
- # Email problem was solved by Mark Sapiro. (TK)
- partcharset = part.get_content_charset('us-ascii')
- try:
- t = unicode(t, partcharset, 'replace')
- except (UnicodeError, LookupError, ValueError, TypeError,
- AssertionError):
- # We can get here if partcharset is bogus in come way.
- # Replace funny characters. We use errors='replace'.
- t = unicode(t, 'ascii', 'replace')
- # Separation is useful
- if isinstance(t, basestring):
- if not t.endswith('\n'):
- t += '\n'
- text.append(t)
- if partcharset not in charsets:
- charsets.append(partcharset)
- # Now join the text and set the payload
- sep = _('-------------- next part --------------\n')
- assert isinstance(sep, unicode), (
- 'Expected a unicode separator, got %s' % type(sep))
- rept = sep.join(text)
- # Replace entire message with text and scrubbed notice.
- # Try with message charsets and utf-8
- if 'utf-8' not in charsets:
- charsets.append('utf-8')
- for charset in charsets:
- try:
- replace_payload_by_text(msg, rept, charset)
- break
- # Bogus charset can throw several exceptions
- except (UnicodeError, LookupError, ValueError, TypeError,
- AssertionError):
- pass
- if format_param:
- msg.set_param('format', format_param)
- if delsp:
- msg.set_param('delsp', delsp)
- return msg
-
-
-
-def save_attachment(mlist, msg, attachments_dir, filter_html=True):
- fsdir = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR,
- mlist.fqdn_listname, attachments_dir)
- makedirs(fsdir)
- # Figure out the attachment type and get the decoded data
- decodedpayload = msg.get_payload(decode=True)
- # BAW: mimetypes ought to handle non-standard, but commonly found types,
- # e.g. image/jpg (should be image/jpeg). For now we just store such
- # things as application/octet-streams since that seems the safest.
- ctype = msg.get_content_type()
- # i18n file name is encoded
- lcset = mlist.preferred_language.charset
- filename = oneline(msg.get_filename(''), lcset)
- filename, fnext = os.path.splitext(filename)
- # For safety, we should confirm this is valid ext for content-type
- # but we can use fnext if we introduce fnext filtering
- if as_boolean(config.scrubber.use_attachment_filename_extension):
- # HTML message doesn't have filename :-(
- ext = fnext or guess_extension(ctype, fnext)
- else:
- ext = guess_extension(ctype, fnext)
- if not ext:
- # We don't know what it is, so assume it's just a shapeless
- # application/octet-stream, unless the Content-Type: is
- # message/rfc822, in which case we know we'll coerce the type to
- # text/plain below.
- if ctype == 'message/rfc822':
- ext = '.txt'
- else:
- ext = '.bin'
- # Allow only alphanumerics, dash, underscore, and dot
- ext = sre.sub('', ext)
- path = None
- # We need a lock to calculate the next attachment number
- with Lock(os.path.join(fsdir, 'attachments.lock')):
- # Now base the filename on what's in the attachment, uniquifying it if
- # necessary.
- if (not filename or
- not as_boolean(config.scrubber.use_attachment_filename)):
- filebase = 'attachment'
- else:
- # Sanitize the filename given in the message headers
- parts = pre.split(filename)
- filename = parts[-1]
- # Strip off leading dots
- filename = dre.sub('', filename)
- # Allow only alphanumerics, dash, underscore, and dot
- filename = sre.sub('', filename)
- # If the filename's extension doesn't match the type we guessed,
- # which one should we go with? For now, let's go with the one we
- # guessed so attachments can't lie about their type. Also, if the
- # filename /has/ no extension, then tack on the one we guessed.
- # The extension was removed from the name above.
- filebase = filename
- # Now we're looking for a unique name for this file on the file
- # system. If msgdir/filebase.ext isn't unique, we'll add a counter
- # after filebase, e.g. msgdir/filebase-cnt.ext
- counter = 0
- extra = ''
- while True:
- path = os.path.join(fsdir, filebase + extra + ext)
- # Generally it is not a good idea to test for file existance
- # before just trying to create it, but the alternatives aren't
- # wonderful (i.e. os.open(..., O_CREAT | O_EXCL) isn't
- # NFS-safe). Besides, we have an exclusive lock now, so we're
- # guaranteed that no other process will be racing with us.
- if os.path.exists(path):
- counter += 1
- extra = '-%04d' % counter
- else:
- break
- # `path' now contains the unique filename for the attachment. There's
- # just one more step we need to do. If the part is text/html and
- # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be
- # here), then send the attachment through the filter program for
- # sanitization
- if filter_html and ctype == 'text/html':
- base, ext = os.path.splitext(path)
- tmppath = base + '-tmp' + ext
- fp = open(tmppath, 'w')
- try:
- fp.write(decodedpayload)
- fp.close()
- cmd = Template(config.mta.archive_html_sanitizer).safe_substitue(
- filename=tmppath)
- progfp = os.popen(cmd, 'r')
- decodedpayload = progfp.read()
- status = progfp.close()
- if status:
- log.error('HTML sanitizer exited with non-zero status: %s',
- status)
- finally:
- os.unlink(tmppath)
- # BAW: Since we've now sanitized the document, it should be plain
- # text. Blarg, we really want the sanitizer to tell us what the type
- # if the return data is. :(
- ext = '.txt'
- path = base + '.txt'
- # Is it a message/rfc822 attachment?
- elif ctype == 'message/rfc822':
- submsg = msg.get_payload()
- # BAW: I'm sure we can eventually do better than this. :(
- decodedpayload = websafe(str(submsg))
- fp = open(path, 'w')
- fp.write(decodedpayload)
- fp.close()
- # Now calculate the url to the list's archive.
- scrubber_path = config.scrubber.archive_scrubber
- base_url = find_name(scrubber_path).list_url(mlist)
- if not base_url.endswith('/'):
- base_url += '/'
- # Trailing space will definitely be a problem with format=flowed.
- # Bracket the URL instead.
- url = '<' + base_url + '%s/%s%s%s>' % (
- attachments_dir, filebase, extra, ext)
- return url
-
-
-
-class Scrubber:
- """Cleanse a message for archiving."""
-
- implements(IHandler)
-
- name = 'scrubber'
- description = _('Cleanse a message for archiving.')
-
- def process(self, mlist, msg, msgdata):
- """See `IHandler`."""
- process(mlist, msg, msgdata)
diff --git a/src/mailman/pipeline/tests/test_mimedel.py b/src/mailman/pipeline/tests/test_mimedel.py
new file mode 100644
index 000000000..566c1a40c
--- /dev/null
+++ b/src/mailman/pipeline/tests/test_mimedel.py
@@ -0,0 +1,213 @@
+# Copyright (C) 2012 by the Free Software Foundation, Inc.
+#
+# This file is part of GNU Mailman.
+#
+# GNU Mailman is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
+
+"""Test the mime_delete handler."""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+ 'TestDispose',
+ ]
+
+
+import unittest
+
+from zope.component import getUtility
+
+from mailman.app.lifecycle import create_list
+from mailman.config import config
+from mailman.core import errors
+from mailman.interfaces.action import FilterAction
+from mailman.interfaces.member import MemberRole
+from mailman.interfaces.usermanager import IUserManager
+from mailman.pipeline import mime_delete
+from mailman.testing.helpers import (
+ LogFileMark,
+ get_queue_messages,
+ specialized_message_from_string as mfs)
+from mailman.testing.layers import ConfigLayer
+
+
+
+class TestDispose(unittest.TestCase):
+ """Test the mime_delete handler."""
+
+ layer = ConfigLayer
+
+ def setUp(self):
+ self._mlist = create_list('test@example.com')
+ self._msg = mfs("""\
+From: anne@example.com
+To: test@example.com
+Subject: A disposable message
+Message-ID: <ant>
+
+""")
+ # Python 2.7 has assertMultiLineEqual. Let this work without bounds.
+ self.maxDiff = None
+ self.eq = getattr(self, 'assertMultiLineEqual', self.assertEqual)
+ config.push('dispose', """
+ [mailman]
+ site_owner: noreply@example.com
+ """)
+
+ def tearDown(self):
+ config.pop('dispose')
+
+ def test_dispose_discard(self):
+ self._mlist.filter_action = FilterAction.discard
+ try:
+ mime_delete.dispose(self._mlist, self._msg, {}, 'discarding')
+ except errors.DiscardMessage as error:
+ pass
+ else:
+ raise AssertionError('DiscardMessage exception expected')
+ self.assertEqual(error.message, 'discarding')
+ # There should be no messages in the 'bad' queue.
+ self.assertEqual(len(get_queue_messages('bad')), 0)
+
+ def test_dispose_bounce(self):
+ self._mlist.filter_action = FilterAction.reject
+ try:
+ mime_delete.dispose(self._mlist, self._msg, {}, 'rejecting')
+ except errors.RejectMessage as error:
+ pass
+ else:
+ raise AssertionError('RejectMessage exception expected')
+ self.assertEqual(error.message, 'rejecting')
+ # There should be no messages in the 'bad' queue.
+ self.assertEqual(len(get_queue_messages('bad')), 0)
+
+ def test_dispose_forward(self):
+ # The disposed message gets forwarded to the list moderators. So
+ # first add some moderators.
+ user_manager = getUtility(IUserManager)
+ anne = user_manager.create_address('anne@example.com')
+ bart = user_manager.create_address('bart@example.com')
+ self._mlist.subscribe(anne, MemberRole.moderator)
+ self._mlist.subscribe(bart, MemberRole.moderator)
+ # Now set the filter action and dispose the message.
+ self._mlist.filter_action = FilterAction.forward
+ try:
+ mime_delete.dispose(self._mlist, self._msg, {}, 'forwarding')
+ except errors.DiscardMessage as error:
+ pass
+ else:
+ raise AssertionError('DiscardMessage exception expected')
+ self.assertEqual(error.message, 'forwarding')
+ # There should now be a multipart message in the virgin queue destined
+ # for the mailing list owners.
+ messages = get_queue_messages('virgin')
+ self.assertEqual(len(messages), 1)
+ message = messages[0].msg
+ self.assertEqual(message.get_content_type(), 'multipart/mixed')
+ # Anne and Bart should be recipients of the message, but it will look
+ # like the message is going to the list owners.
+ self.assertEqual(message['to'], 'test-owner@example.com')
+ self.assertEqual(message.recipients,
+ set(['anne@example.com', 'bart@example.com']))
+ # The list owner should be the sender.
+ self.assertEqual(message['from'], 'noreply@example.com')
+ self.assertEqual(message['subject'],
+ 'Content filter message notification')
+ # The body of the first part provides the moderators some details.
+ part0 = message.get_payload(0)
+ self.assertEqual(part0.get_content_type(), 'text/plain')
+ self.eq(part0.get_payload(), """\
+The attached message matched the Test mailing list's content
+filtering rules and was prevented from being forwarded on to the list
+membership. You are receiving the only remaining copy of the discarded
+message.
+
+""")
+ # The second part is the container for the original message.
+ part1 = message.get_payload(1)
+ self.assertEqual(part1.get_content_type(), 'message/rfc822')
+ # And the first part of *that* message will be the original message.
+ original = part1.get_payload(0)
+ self.assertEqual(original['subject'], 'A disposable message')
+ self.assertEqual(original['message-id'], '<ant>')
+
+ def test_dispose_non_preservable(self):
+ # Two actions can happen here, depending on a site-wide setting. If
+ # the site owner has indicated that filtered messages cannot be
+ # preserved, then this is the same as discarding them.
+ self._mlist.filter_action = FilterAction.preserve
+ config.push('non-preservable', """
+ [mailman]
+ filtered_messages_are_preservable: no
+ """)
+ try:
+ mime_delete.dispose(self._mlist, self._msg, {}, 'not preserved')
+ except errors.DiscardMessage as error:
+ pass
+ else:
+ raise AssertionError('DiscardMessage exception expected')
+ finally:
+ config.pop('non-preservable')
+ self.assertEqual(error.message, 'not preserved')
+ # There should be no messages in the 'bad' queue.
+ self.assertEqual(len(get_queue_messages('bad')), 0)
+
+ def test_dispose_preservable(self):
+ # Two actions can happen here, depending on a site-wide setting. If
+ # the site owner has indicated that filtered messages can be
+ # preserved, then this is similar to discarding the message except
+ # that a copy is preserved in the 'bad' queue.
+ self._mlist.filter_action = FilterAction.preserve
+ config.push('preservable', """
+ [mailman]
+ filtered_messages_are_preservable: yes
+ """)
+ try:
+ mime_delete.dispose(self._mlist, self._msg, {}, 'preserved')
+ except errors.DiscardMessage as error:
+ pass
+ else:
+ raise AssertionError('DiscardMessage exception expected')
+ finally:
+ config.pop('preservable')
+ self.assertEqual(error.message, 'preserved')
+ # There should be no messages in the 'bad' queue.
+ messages = get_queue_messages('bad')
+ self.assertEqual(len(messages), 1)
+ message = messages[0].msg
+ self.assertEqual(message['subject'], 'A disposable message')
+ self.assertEqual(message['message-id'], '<ant>')
+
+ def test_bad_action(self):
+ # This should never happen, but what if it does?
+ # FilterAction.accept, FilterAction.hold, and FilterAction.defer are
+ # not valid. They are treated as discard actions, but the problem is
+ # also logged.
+ for action in (FilterAction.accept,
+ FilterAction.hold,
+ FilterAction.defer):
+ self._mlist.filter_action = action
+ mark = LogFileMark('mailman.error')
+ try:
+ mime_delete.dispose(self._mlist, self._msg, {}, 'bad action')
+ except errors.DiscardMessage as error:
+ pass
+ else:
+ raise AssertionError('DiscardMessage exception expected')
+ self.assertEqual(error.message, 'bad action')
+ line = mark.readline()[:-1]
+ self.assertTrue(line.endswith(
+ '{0} invalid FilterAction: test@example.com. '
+ 'Treating as discard'.format(action.name)))
diff --git a/src/mailman/pipeline/tests/test_scrubber.py b/src/mailman/pipeline/tests/test_scrubber.py
deleted file mode 100644
index 7ac5eb855..000000000
--- a/src/mailman/pipeline/tests/test_scrubber.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (C) 2012 by the Free Software Foundation, Inc.
-#
-# This file is part of GNU Mailman.
-#
-# GNU Mailman is free software: you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free
-# Software Foundation, either version 3 of the License, or (at your option)
-# any later version.
-#
-# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
-
-"""Scrubber module tests."""
-
-from __future__ import absolute_import, print_function, unicode_literals
-
-__metaclass__ = type
-__all__ = [
- 'TestScrubber',
- ]
-
-
-import unittest
-
-from mailman.pipeline import scrubber
-
-
-
-class TestScrubber(unittest.TestCase):
- """Scrubber module tests."""
-
- def test_guess_extension(self):
- # A known extension should be found.
- extension = scrubber.guess_extension('application/msword', '.doc')
- self.assertEqual(extension, '.doc')
-
- def test_guess_missing_extension(self):
- # Maybe some other extension is better.
- extension = scrubber.guess_extension('application/msword', '.xxx')
- self.assertEqual(extension, '.doc')