diff options
| author | Barry Warsaw | 2007-07-04 00:16:48 -0400 |
|---|---|---|
| committer | Barry Warsaw | 2007-07-04 00:16:48 -0400 |
| commit | 7c32d18612ce6bdc2b8a10a307df70f60013cbdb (patch) | |
| tree | 1abcdc2adb683e4273c74fd5b51b94820e5dbf75 | |
| parent | 11dfa4f53db6416d2adba0506d4af4d25e490e6e (diff) | |
| download | mailman-7c32d18612ce6bdc2b8a10a307df70f60013cbdb.tar.gz mailman-7c32d18612ce6bdc2b8a10a307df70f60013cbdb.tar.zst mailman-7c32d18612ce6bdc2b8a10a307df70f60013cbdb.zip | |
Convert the TestTagger to a doctest. No other cleaning up of the handler
module is done.
| -rw-r--r-- | Mailman/docs/tagger.txt | 244 | ||||
| -rw-r--r-- | Mailman/testing/test_handlers.py | 117 |
2 files changed, 244 insertions, 117 deletions
diff --git a/Mailman/docs/tagger.txt b/Mailman/docs/tagger.txt new file mode 100644 index 000000000..1d576b9ae --- /dev/null +++ b/Mailman/docs/tagger.txt @@ -0,0 +1,244 @@ +Message tagger +============== + +Mailman has a topics system which works like this: a mailing list +administrator sets up one or more topics, which is essentially a named regular +expression. The topic name can be any arbitrary string, and the name serves +double duty as the 'topic tag'. Each message that flows the mailing list has +its Subject: and Keywords: headers compared against these regular +expressions. The message then gets tagged with the topic names of each hit. + + >>> from Mailman.Handlers.Tagger import process + >>> from Mailman.Message import Message + >>> from Mailman.Queue.Switchboard import Switchboard + >>> from Mailman.configuration import config + >>> from Mailman.database import flush + >>> from email import message_from_string + >>> mlist = config.list_manager.create('_xtest@example.com') + +Topics must be enabled for Mailman to do any topic matching, even if topics +are defined. + + >>> mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', False)] + >>> mlist.topics_enabled = False + >>> mlist.topics_bodylines_limit = 0 + >>> flush() + + >>> msg = message_from_string("""\ + ... Subject: foobar + ... Keywords: barbaz + ... + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: foobar + Keywords: barbaz + <BLANKLINE> + <BLANKLINE> + >>> msgdata + {} + +However, once topics are enabled, message will be tagged. There are two +artifacts of tagging; an X-Topics: header is added with the topic name, and +the message metadata gets a key with a list of matching topic names. + + >>> mlist.topics_enabled = True + >>> flush() + >>> msg = message_from_string("""\ + ... Subject: foobar + ... Keywords: barbaz + ... + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: foobar + Keywords: barbaz + X-Topics: bar fight + <BLANKLINE> + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + + +Scanning body lines +------------------- + +The tagger can also look at a certain number of body lines, but only for +Subject: and Keyword: header-like lines. When set to zero, no body lines are +scanned. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... X-Ignore: something else + ... Subject: foobar + ... Keywords: barbaz + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + <BLANKLINE> + X-Ignore: something else + Subject: foobar + Keywords: barbaz + <BLANKLINE> + >>> msgdata + {} + +But let the tagger scan a few body lines and the matching headers will be +found. + + >>> mlist.topics_bodylines_limit = 5 + >>> flush() + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... X-Ignore: something else + ... Subject: foobar + ... Keywords: barbaz + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + X-Topics: bar fight + <BLANKLINE> + X-Ignore: something else + Subject: foobar + Keywords: barbaz + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + +However, scanning stops at the first body line that doesn't look like a +header. + + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... This is not a header + ... Subject: foobar + ... Keywords: barbaz + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + From: aperson@example.com + Subject: nothing + Keywords: at all + <BLANKLINE> + This is not a header + Subject: foobar + Keywords: barbaz + >>> msgdata + {} + +When set to a negative number, all body lines will be scanned. + + >>> mlist.topics_bodylines_limit = -1 + >>> flush() + >>> lots_of_headers = '\n'.join(['X-Ignore: zip'] * 100) + >>> msg = message_from_string("""\ + ... From: aperson@example.com + ... Subject: nothing + ... Keywords: at all + ... + ... %s + ... Subject: foobar + ... Keywords: barbaz + ... """ % lots_of_headers, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> # Rather than print out 100 X-Ignore: headers, let's just prove that + >>> # the X-Topics: header exists, meaning that the tagger did its job. + >>> msg['x-topics'] + 'bar fight' + >>> msgdata['topichits'] + ['bar fight'] + + +Scanning sub-parts +------------------ + +The tagger will also scan the body lines of text subparts in a multipart +message, using the same rules as if all those body lines lived in a single +text payload. + + >>> msg = message_from_string("""\ + ... Subject: Was + ... Keywords: Raw + ... Content-Type: multipart/alternative; boundary="BOUNDARY" + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY-- + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg.as_string() + Subject: Was + Keywords: Raw + Content-Type: multipart/alternative; boundary="BOUNDARY" + X-Topics: bar fight + <BLANKLINE> + --BOUNDARY + From: sabo + To: obas + <BLANKLINE> + Subject: farbaw + Keywords: barbaz + <BLANKLINE> + --BOUNDARY-- + <BLANKLINE> + >>> msgdata['topichits'] + ['bar fight'] + +But the tagger will not descend into non-text parts. + + >>> msg = message_from_string("""\ + ... Subject: Was + ... Keywords: Raw + ... Content-Type: multipart/alternative; boundary=BOUNDARY + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... Content-Type: message/rfc822 + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY + ... From: sabo + ... To: obas + ... Content-Type: message/rfc822 + ... + ... Subject: farbaw + ... Keywords: barbaz + ... + ... --BOUNDARY-- + ... """, Message) + >>> msgdata = {} + >>> process(mlist, msg, msgdata) + >>> print msg['x-topics'] + None + >>> msgdata + {} diff --git a/Mailman/testing/test_handlers.py b/Mailman/testing/test_handlers.py index 840d62e55..8f9dbedb8 100644 --- a/Mailman/testing/test_handlers.py +++ b/Mailman/testing/test_handlers.py @@ -44,7 +44,6 @@ from Mailman.Handlers import Moderate from Mailman.Handlers import Scrubber # Don't test handlers such as SMTPDirect and Sendmail here from Mailman.Handlers import SpamDetect -from Mailman.Handlers import Tagger from Mailman.Handlers import ToArchive from Mailman.Handlers import ToDigest from Mailman.Handlers import ToOutgoing @@ -434,121 +433,6 @@ A message. -class TestTagger(TestBase): - def setUp(self): - TestBase.setUp(self) - self._mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', 1)] - self._mlist.topics_enabled = 1 - - def test_short_circuit(self): - self._mlist.topics_enabled = 0 - rtn = Tagger.process(self._mlist, None, {}) - # Not really a great test, but there's little else to assert - self.assertEqual(rtn, None) - - def test_simple(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = 0 - msg = email.message_from_string("""\ -Subject: foobar -Keywords: barbaz - -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], 'bar fight') - eq(msgdata.get('topichits'), ['bar fight']) - - def test_all_body_lines_plain_text(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = -1 - msg = email.message_from_string("""\ -Subject: Was -Keywords: Raw - -Subject: farbaw -Keywords: barbaz -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], 'bar fight') - eq(msgdata.get('topichits'), ['bar fight']) - - def test_no_body_lines(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = 0 - msg = email.message_from_string("""\ -Subject: Was -Keywords: Raw - -Subject: farbaw -Keywords: barbaz -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], None) - eq(msgdata.get('topichits'), None) - - def test_body_lines_in_multipart(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = -1 - msg = email.message_from_string("""\ -Subject: Was -Keywords: Raw -Content-Type: multipart/alternative; boundary="BOUNDARY" - ---BOUNDARY -From: sabo -To: obas - -Subject: farbaw -Keywords: barbaz - ---BOUNDARY-- -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], 'bar fight') - eq(msgdata.get('topichits'), ['bar fight']) - - def test_body_lines_no_part(self): - eq = self.assertEqual - mlist = self._mlist - mlist.topics_bodylines_limit = -1 - msg = email.message_from_string("""\ -Subject: Was -Keywords: Raw -Content-Type: multipart/alternative; boundary=BOUNDARY - ---BOUNDARY -From: sabo -To: obas -Content-Type: message/rfc822 - -Subject: farbaw -Keywords: barbaz - ---BOUNDARY -From: sabo -To: obas -Content-Type: message/rfc822 - -Subject: farbaw -Keywords: barbaz - ---BOUNDARY-- -""") - msgdata = {} - Tagger.process(mlist, msg, msgdata) - eq(msg['x-topics'], None) - eq(msgdata.get('topichits'), None) - - - class TestToArchive(TestBase): def setUp(self): TestBase.setUp(self) @@ -813,7 +697,6 @@ def test_suite(): suite.addTest(unittest.makeSuite(TestModerate)) suite.addTest(unittest.makeSuite(TestScrubber)) suite.addTest(unittest.makeSuite(TestSpamDetect)) - suite.addTest(unittest.makeSuite(TestTagger)) suite.addTest(unittest.makeSuite(TestToArchive)) suite.addTest(unittest.makeSuite(TestToDigest)) suite.addTest(unittest.makeSuite(TestToOutgoing)) |
