summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Warsaw2007-07-04 00:16:48 -0400
committerBarry Warsaw2007-07-04 00:16:48 -0400
commit7c32d18612ce6bdc2b8a10a307df70f60013cbdb (patch)
tree1abcdc2adb683e4273c74fd5b51b94820e5dbf75
parent11dfa4f53db6416d2adba0506d4af4d25e490e6e (diff)
downloadmailman-7c32d18612ce6bdc2b8a10a307df70f60013cbdb.tar.gz
mailman-7c32d18612ce6bdc2b8a10a307df70f60013cbdb.tar.zst
mailman-7c32d18612ce6bdc2b8a10a307df70f60013cbdb.zip
Convert the TestTagger to a doctest. No other cleaning up of the handler
module is done.
-rw-r--r--Mailman/docs/tagger.txt244
-rw-r--r--Mailman/testing/test_handlers.py117
2 files changed, 244 insertions, 117 deletions
diff --git a/Mailman/docs/tagger.txt b/Mailman/docs/tagger.txt
new file mode 100644
index 000000000..1d576b9ae
--- /dev/null
+++ b/Mailman/docs/tagger.txt
@@ -0,0 +1,244 @@
+Message tagger
+==============
+
+Mailman has a topics system which works like this: a mailing list
+administrator sets up one or more topics, which is essentially a named regular
+expression. The topic name can be any arbitrary string, and the name serves
+double duty as the 'topic tag'. Each message that flows the mailing list has
+its Subject: and Keywords: headers compared against these regular
+expressions. The message then gets tagged with the topic names of each hit.
+
+ >>> from Mailman.Handlers.Tagger import process
+ >>> from Mailman.Message import Message
+ >>> from Mailman.Queue.Switchboard import Switchboard
+ >>> from Mailman.configuration import config
+ >>> from Mailman.database import flush
+ >>> from email import message_from_string
+ >>> mlist = config.list_manager.create('_xtest@example.com')
+
+Topics must be enabled for Mailman to do any topic matching, even if topics
+are defined.
+
+ >>> mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', False)]
+ >>> mlist.topics_enabled = False
+ >>> mlist.topics_bodylines_limit = 0
+ >>> flush()
+
+ >>> msg = message_from_string("""\
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ...
+ ... """, Message)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ Subject: foobar
+ Keywords: barbaz
+ <BLANKLINE>
+ <BLANKLINE>
+ >>> msgdata
+ {}
+
+However, once topics are enabled, message will be tagged. There are two
+artifacts of tagging; an X-Topics: header is added with the topic name, and
+the message metadata gets a key with a list of matching topic names.
+
+ >>> mlist.topics_enabled = True
+ >>> flush()
+ >>> msg = message_from_string("""\
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ...
+ ... """, Message)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ Subject: foobar
+ Keywords: barbaz
+ X-Topics: bar fight
+ <BLANKLINE>
+ <BLANKLINE>
+ >>> msgdata['topichits']
+ ['bar fight']
+
+
+Scanning body lines
+-------------------
+
+The tagger can also look at a certain number of body lines, but only for
+Subject: and Keyword: header-like lines. When set to zero, no body lines are
+scanned.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: nothing
+ ... Keywords: at all
+ ...
+ ... X-Ignore: something else
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ... """, Message)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: nothing
+ Keywords: at all
+ <BLANKLINE>
+ X-Ignore: something else
+ Subject: foobar
+ Keywords: barbaz
+ <BLANKLINE>
+ >>> msgdata
+ {}
+
+But let the tagger scan a few body lines and the matching headers will be
+found.
+
+ >>> mlist.topics_bodylines_limit = 5
+ >>> flush()
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: nothing
+ ... Keywords: at all
+ ...
+ ... X-Ignore: something else
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ... """, Message)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: nothing
+ Keywords: at all
+ X-Topics: bar fight
+ <BLANKLINE>
+ X-Ignore: something else
+ Subject: foobar
+ Keywords: barbaz
+ <BLANKLINE>
+ >>> msgdata['topichits']
+ ['bar fight']
+
+However, scanning stops at the first body line that doesn't look like a
+header.
+
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: nothing
+ ... Keywords: at all
+ ...
+ ... This is not a header
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ... """, Message)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ From: aperson@example.com
+ Subject: nothing
+ Keywords: at all
+ <BLANKLINE>
+ This is not a header
+ Subject: foobar
+ Keywords: barbaz
+ >>> msgdata
+ {}
+
+When set to a negative number, all body lines will be scanned.
+
+ >>> mlist.topics_bodylines_limit = -1
+ >>> flush()
+ >>> lots_of_headers = '\n'.join(['X-Ignore: zip'] * 100)
+ >>> msg = message_from_string("""\
+ ... From: aperson@example.com
+ ... Subject: nothing
+ ... Keywords: at all
+ ...
+ ... %s
+ ... Subject: foobar
+ ... Keywords: barbaz
+ ... """ % lots_of_headers, Message)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> # Rather than print out 100 X-Ignore: headers, let's just prove that
+ >>> # the X-Topics: header exists, meaning that the tagger did its job.
+ >>> msg['x-topics']
+ 'bar fight'
+ >>> msgdata['topichits']
+ ['bar fight']
+
+
+Scanning sub-parts
+------------------
+
+The tagger will also scan the body lines of text subparts in a multipart
+message, using the same rules as if all those body lines lived in a single
+text payload.
+
+ >>> msg = message_from_string("""\
+ ... Subject: Was
+ ... Keywords: Raw
+ ... Content-Type: multipart/alternative; boundary="BOUNDARY"
+ ...
+ ... --BOUNDARY
+ ... From: sabo
+ ... To: obas
+ ...
+ ... Subject: farbaw
+ ... Keywords: barbaz
+ ...
+ ... --BOUNDARY--
+ ... """, Message)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg.as_string()
+ Subject: Was
+ Keywords: Raw
+ Content-Type: multipart/alternative; boundary="BOUNDARY"
+ X-Topics: bar fight
+ <BLANKLINE>
+ --BOUNDARY
+ From: sabo
+ To: obas
+ <BLANKLINE>
+ Subject: farbaw
+ Keywords: barbaz
+ <BLANKLINE>
+ --BOUNDARY--
+ <BLANKLINE>
+ >>> msgdata['topichits']
+ ['bar fight']
+
+But the tagger will not descend into non-text parts.
+
+ >>> msg = message_from_string("""\
+ ... Subject: Was
+ ... Keywords: Raw
+ ... Content-Type: multipart/alternative; boundary=BOUNDARY
+ ...
+ ... --BOUNDARY
+ ... From: sabo
+ ... To: obas
+ ... Content-Type: message/rfc822
+ ...
+ ... Subject: farbaw
+ ... Keywords: barbaz
+ ...
+ ... --BOUNDARY
+ ... From: sabo
+ ... To: obas
+ ... Content-Type: message/rfc822
+ ...
+ ... Subject: farbaw
+ ... Keywords: barbaz
+ ...
+ ... --BOUNDARY--
+ ... """, Message)
+ >>> msgdata = {}
+ >>> process(mlist, msg, msgdata)
+ >>> print msg['x-topics']
+ None
+ >>> msgdata
+ {}
diff --git a/Mailman/testing/test_handlers.py b/Mailman/testing/test_handlers.py
index 840d62e55..8f9dbedb8 100644
--- a/Mailman/testing/test_handlers.py
+++ b/Mailman/testing/test_handlers.py
@@ -44,7 +44,6 @@ from Mailman.Handlers import Moderate
from Mailman.Handlers import Scrubber
# Don't test handlers such as SMTPDirect and Sendmail here
from Mailman.Handlers import SpamDetect
-from Mailman.Handlers import Tagger
from Mailman.Handlers import ToArchive
from Mailman.Handlers import ToDigest
from Mailman.Handlers import ToOutgoing
@@ -434,121 +433,6 @@ A message.
-class TestTagger(TestBase):
- def setUp(self):
- TestBase.setUp(self)
- self._mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', 1)]
- self._mlist.topics_enabled = 1
-
- def test_short_circuit(self):
- self._mlist.topics_enabled = 0
- rtn = Tagger.process(self._mlist, None, {})
- # Not really a great test, but there's little else to assert
- self.assertEqual(rtn, None)
-
- def test_simple(self):
- eq = self.assertEqual
- mlist = self._mlist
- mlist.topics_bodylines_limit = 0
- msg = email.message_from_string("""\
-Subject: foobar
-Keywords: barbaz
-
-""")
- msgdata = {}
- Tagger.process(mlist, msg, msgdata)
- eq(msg['x-topics'], 'bar fight')
- eq(msgdata.get('topichits'), ['bar fight'])
-
- def test_all_body_lines_plain_text(self):
- eq = self.assertEqual
- mlist = self._mlist
- mlist.topics_bodylines_limit = -1
- msg = email.message_from_string("""\
-Subject: Was
-Keywords: Raw
-
-Subject: farbaw
-Keywords: barbaz
-""")
- msgdata = {}
- Tagger.process(mlist, msg, msgdata)
- eq(msg['x-topics'], 'bar fight')
- eq(msgdata.get('topichits'), ['bar fight'])
-
- def test_no_body_lines(self):
- eq = self.assertEqual
- mlist = self._mlist
- mlist.topics_bodylines_limit = 0
- msg = email.message_from_string("""\
-Subject: Was
-Keywords: Raw
-
-Subject: farbaw
-Keywords: barbaz
-""")
- msgdata = {}
- Tagger.process(mlist, msg, msgdata)
- eq(msg['x-topics'], None)
- eq(msgdata.get('topichits'), None)
-
- def test_body_lines_in_multipart(self):
- eq = self.assertEqual
- mlist = self._mlist
- mlist.topics_bodylines_limit = -1
- msg = email.message_from_string("""\
-Subject: Was
-Keywords: Raw
-Content-Type: multipart/alternative; boundary="BOUNDARY"
-
---BOUNDARY
-From: sabo
-To: obas
-
-Subject: farbaw
-Keywords: barbaz
-
---BOUNDARY--
-""")
- msgdata = {}
- Tagger.process(mlist, msg, msgdata)
- eq(msg['x-topics'], 'bar fight')
- eq(msgdata.get('topichits'), ['bar fight'])
-
- def test_body_lines_no_part(self):
- eq = self.assertEqual
- mlist = self._mlist
- mlist.topics_bodylines_limit = -1
- msg = email.message_from_string("""\
-Subject: Was
-Keywords: Raw
-Content-Type: multipart/alternative; boundary=BOUNDARY
-
---BOUNDARY
-From: sabo
-To: obas
-Content-Type: message/rfc822
-
-Subject: farbaw
-Keywords: barbaz
-
---BOUNDARY
-From: sabo
-To: obas
-Content-Type: message/rfc822
-
-Subject: farbaw
-Keywords: barbaz
-
---BOUNDARY--
-""")
- msgdata = {}
- Tagger.process(mlist, msg, msgdata)
- eq(msg['x-topics'], None)
- eq(msgdata.get('topichits'), None)
-
-
-
class TestToArchive(TestBase):
def setUp(self):
TestBase.setUp(self)
@@ -813,7 +697,6 @@ def test_suite():
suite.addTest(unittest.makeSuite(TestModerate))
suite.addTest(unittest.makeSuite(TestScrubber))
suite.addTest(unittest.makeSuite(TestSpamDetect))
- suite.addTest(unittest.makeSuite(TestTagger))
suite.addTest(unittest.makeSuite(TestToArchive))
suite.addTest(unittest.makeSuite(TestToDigest))
suite.addTest(unittest.makeSuite(TestToOutgoing))