diff options
| author | Barry Warsaw | 2009-01-25 13:01:41 -0500 |
|---|---|---|
| committer | Barry Warsaw | 2009-01-25 13:01:41 -0500 |
| commit | eefd06f1b88b8ecbb23a9013cd223b72ca85c20d (patch) | |
| tree | 72c947fe16fce0e07e996ee74020b26585d7e846 /src/mailman/pipeline/docs/scrubber.txt | |
| parent | 07871212f74498abd56bef3919bf3e029eb8b930 (diff) | |
| download | mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.gz mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.zst mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.zip | |
Diffstat (limited to 'src/mailman/pipeline/docs/scrubber.txt')
| -rw-r--r-- | src/mailman/pipeline/docs/scrubber.txt | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/src/mailman/pipeline/docs/scrubber.txt b/src/mailman/pipeline/docs/scrubber.txt new file mode 100644 index 000000000..dec1c1f64 --- /dev/null +++ b/src/mailman/pipeline/docs/scrubber.txt @@ -0,0 +1,225 @@ +The scrubber +============ + +The scrubber is an integral part of Mailman, both in the normal delivery of +messages and in components such as the archiver. Its primary purpose is to +scrub attachments from messages so that binary goop doesn't end up in an +archive message. + + >>> from mailman.pipeline.scrubber import process, save_attachment + >>> mlist = config.db.list_manager.create(u'_xtest@example.com') + >>> mlist.preferred_language = u'en' + +Helper functions for getting the attachment data. + + >>> import os, re + >>> def read_attachment(filename, remove=True): + ... path = os.path.join(config.PRIVATE_ARCHIVE_FILE_DIR, + ... mlist.fqdn_listname, filename) + ... fp = open(path) + ... try: + ... data = fp.read() + ... finally: + ... fp.close() + ... if remove: + ... os.unlink(path) + ... return data + + >>> from urlparse import urlparse + >>> def read_url_from_message(msg): + ... url = None + ... for line in msg.get_payload().splitlines(): + ... mo = re.match('URL: <(?P<url>[^>]+)>', line) + ... if mo: + ... url = mo.group('url') + ... break + ... path = '/'.join(urlparse(url).path.split('/')[3:]) + ... return read_attachment(path) + + +Saving attachments +------------------ + +The Scrubber handler exposes a function called save_attachments() which can be +used to strip various types of attachments and store them in the archive +directory. This is a public interface used by components outside the normal +processing pipeline. + +Site administrators can decide whether the scrubber should use the attachment +filename suggested in the message's Content-Disposition: header or not. If +enabled, the filename will be used when this header attribute is present (yes, +this is an unfortunate double negative). + + >>> config.push('test config', """ + ... [scrubber] + ... use_attachment_filename: yes + ... """) + >>> msg = message_from_string("""\ + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... """) + >>> save_attachment(mlist, msg, 'dir') + u'<http://www.example.com/pipermail/_xtest@example.com/dir/xtest.gif>' + >>> data = read_attachment('dir/xtest.gif') + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + +Saving the attachment does not alter the original message. + + >>> print msg.as_string() + Content-Type: image/gif; name="xtest.gif" + Content-Transfer-Encoding: base64 + Content-Disposition: attachment; filename="xtest.gif" + <BLANKLINE> + R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + +The site administrator can also configure Mailman to ignore the +Content-Disposition: filename. This is the default. + + >>> config.pop('test config') + >>> config.push('test config', """ + ... [scrubber] + ... use_attachment_filename: no + ... """) + >>> msg = message_from_string("""\ + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... """) + >>> save_attachment(mlist, msg, 'dir') + u'<http://www.example.com/pipermail/_xtest@example.com/dir/attachment.gif>' + >>> data = read_attachment('dir/xtest.gif') + Traceback (most recent call last): + IOError: [Errno ...] No such file or directory: + u'.../archives/private/_xtest@example.com/dir/xtest.gif' + >>> data = read_attachment('dir/attachment.gif') + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + + +Scrubbing image attachments +--------------------------- + +When scrubbing image attachments, the original message is modified to include +a reference to the attachment file as available through the on-line archive. + + >>> msg = message_from_string("""\ + ... MIME-Version: 1.0 + ... Content-Type: multipart/mixed; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... Content-type: text/plain; charset=us-ascii + ... + ... This is a message. + ... --BOUNDARY + ... Content-Type: image/gif; name="xtest.gif" + ... Content-Transfer-Encoding: base64 + ... Content-Disposition: attachment; filename="xtest.gif" + ... + ... R0lGODdhAQABAIAAAAAAAAAAACwAAAAAAQABAAACAQUAOw== + ... --BOUNDARY-- + ... """) + >>> msgdata = {} + +The Scrubber.process() function is different than other handler process +functions in that it returns the scrubbed message. + + >>> scrubbed_msg = process(mlist, msg, msgdata) + >>> scrubbed_msg is msg + True + >>> print scrubbed_msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + <BLANKLINE> + This is a message. + -------------- next part -------------- + A non-text attachment was scrubbed... + Name: xtest.gif + Type: image/gif + Size: 34 bytes + Desc: not available + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> + <BLANKLINE> + +This is the same as the transformed message originally passed in. + + >>> print msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + <BLANKLINE> + This is a message. + -------------- next part -------------- + A non-text attachment was scrubbed... + Name: xtest.gif + Type: image/gif + Size: 34 bytes + Desc: not available + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.gif> + <BLANKLINE> + >>> msgdata + {} + +The URL will point to the attachment sitting in the archive. + + >>> data = read_url_from_message(msg) + >>> data[:6] + 'GIF87a' + >>> len(data) + 34 + + +Scrubbing text attachments +-------------------------- + +Similar to image attachments, text attachments will also be scrubbed, but the +placeholder will be slightly different. + + >>> msg = message_from_string("""\ + ... MIME-Version: 1.0 + ... Content-Type: multipart/mixed; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... Content-type: text/plain; charset=us-ascii; format=flowed; delsp=no + ... + ... This is a message. + ... --BOUNDARY + ... Content-type: text/plain; name="xtext.txt" + ... Content-Disposition: attachment; filename="xtext.txt" + ... + ... This is a text attachment. + ... --BOUNDARY-- + ... """) + >>> scrubbed_msg = process(mlist, msg, {}) + >>> print scrubbed_msg.as_string() + MIME-Version: 1.0 + Message-ID: ... + Content-Transfer-Encoding: 7bit + Content-Type: text/plain; charset="us-ascii"; format="flowed"; delsp="no" + <BLANKLINE> + This is a message. + -------------- next part -------------- + An embedded and charset-unspecified text was scrubbed... + Name: xtext.txt + URL: <http://www.example.com/pipermail/_xtest@example.com/attachments/.../attachment.txt> + <BLANKLINE> + >>> read_url_from_message(msg) + 'This is a text attachment.' + + +Clean up +-------- + + >>> config.pop('test config') |
