diff options
| author | Barry Warsaw | 2009-01-25 13:01:41 -0500 |
|---|---|---|
| committer | Barry Warsaw | 2009-01-25 13:01:41 -0500 |
| commit | eefd06f1b88b8ecbb23a9013cd223b72ca85c20d (patch) | |
| tree | 72c947fe16fce0e07e996ee74020b26585d7e846 /mailman/pipeline/docs/tagger.txt | |
| parent | 07871212f74498abd56bef3919bf3e029eb8b930 (diff) | |
| download | mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.gz mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.tar.zst mailman-eefd06f1b88b8ecbb23a9013cd223b72ca85c20d.zip | |
Diffstat (limited to 'mailman/pipeline/docs/tagger.txt')
| -rw-r--r-- | mailman/pipeline/docs/tagger.txt | 235 |
1 files changed, 0 insertions, 235 deletions
diff --git a/mailman/pipeline/docs/tagger.txt b/mailman/pipeline/docs/tagger.txt deleted file mode 100644 index 9f0bcd4b2..000000000 --- a/mailman/pipeline/docs/tagger.txt +++ /dev/null @@ -1,235 +0,0 @@ -Message tagger -============== - -Mailman has a topics system which works like this: a mailing list -administrator sets up one or more topics, which is essentially a named regular -expression. The topic name can be any arbitrary string, and the name serves -double duty as the 'topic tag'. Each message that flows the mailing list has -its Subject: and Keywords: headers compared against these regular -expressions. The message then gets tagged with the topic names of each hit. - - >>> from mailman.pipeline.tagger import process - >>> mlist = config.db.list_manager.create(u'_xtest@example.com') - -Topics must be enabled for Mailman to do any topic matching, even if topics -are defined. - - >>> mlist.topics = [('bar fight', '.*bar.*', 'catch any bars', False)] - >>> mlist.topics_enabled = False - >>> mlist.topics_bodylines_limit = 0 - - >>> msg = message_from_string("""\ - ... Subject: foobar - ... Keywords: barbaz - ... - ... """) - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - >>> print msg.as_string() - Subject: foobar - Keywords: barbaz - <BLANKLINE> - <BLANKLINE> - >>> msgdata - {} - -However, once topics are enabled, message will be tagged. There are two -artifacts of tagging; an X-Topics: header is added with the topic name, and -the message metadata gets a key with a list of matching topic names. - - >>> mlist.topics_enabled = True - >>> msg = message_from_string("""\ - ... Subject: foobar - ... Keywords: barbaz - ... - ... """) - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - >>> print msg.as_string() - Subject: foobar - Keywords: barbaz - X-Topics: bar fight - <BLANKLINE> - <BLANKLINE> - >>> msgdata['topichits'] - ['bar fight'] - - -Scanning body lines -------------------- - -The tagger can also look at a certain number of body lines, but only for -Subject: and Keyword: header-like lines. When set to zero, no body lines are -scanned. - - >>> msg = message_from_string("""\ - ... From: aperson@example.com - ... Subject: nothing - ... Keywords: at all - ... - ... X-Ignore: something else - ... Subject: foobar - ... Keywords: barbaz - ... """) - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - >>> print msg.as_string() - From: aperson@example.com - Subject: nothing - Keywords: at all - <BLANKLINE> - X-Ignore: something else - Subject: foobar - Keywords: barbaz - <BLANKLINE> - >>> msgdata - {} - -But let the tagger scan a few body lines and the matching headers will be -found. - - >>> mlist.topics_bodylines_limit = 5 - >>> msg = message_from_string("""\ - ... From: aperson@example.com - ... Subject: nothing - ... Keywords: at all - ... - ... X-Ignore: something else - ... Subject: foobar - ... Keywords: barbaz - ... """) - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - >>> print msg.as_string() - From: aperson@example.com - Subject: nothing - Keywords: at all - X-Topics: bar fight - <BLANKLINE> - X-Ignore: something else - Subject: foobar - Keywords: barbaz - <BLANKLINE> - >>> msgdata['topichits'] - ['bar fight'] - -However, scanning stops at the first body line that doesn't look like a -header. - - >>> msg = message_from_string("""\ - ... From: aperson@example.com - ... Subject: nothing - ... Keywords: at all - ... - ... This is not a header - ... Subject: foobar - ... Keywords: barbaz - ... """) - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - >>> print msg.as_string() - From: aperson@example.com - Subject: nothing - Keywords: at all - <BLANKLINE> - This is not a header - Subject: foobar - Keywords: barbaz - >>> msgdata - {} - -When set to a negative number, all body lines will be scanned. - - >>> mlist.topics_bodylines_limit = -1 - >>> lots_of_headers = '\n'.join(['X-Ignore: zip'] * 100) - >>> msg = message_from_string("""\ - ... From: aperson@example.com - ... Subject: nothing - ... Keywords: at all - ... - ... %s - ... Subject: foobar - ... Keywords: barbaz - ... """ % lots_of_headers) - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - >>> # Rather than print out 100 X-Ignore: headers, let's just prove that - >>> # the X-Topics: header exists, meaning that the tagger did its job. - >>> msg['x-topics'] - u'bar fight' - >>> msgdata['topichits'] - ['bar fight'] - - -Scanning sub-parts ------------------- - -The tagger will also scan the body lines of text subparts in a multipart -message, using the same rules as if all those body lines lived in a single -text payload. - - >>> msg = message_from_string("""\ - ... Subject: Was - ... Keywords: Raw - ... Content-Type: multipart/alternative; boundary="BOUNDARY" - ... - ... --BOUNDARY - ... From: sabo - ... To: obas - ... - ... Subject: farbaw - ... Keywords: barbaz - ... - ... --BOUNDARY-- - ... """) - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - >>> print msg.as_string() - Subject: Was - Keywords: Raw - Content-Type: multipart/alternative; boundary="BOUNDARY" - X-Topics: bar fight - <BLANKLINE> - --BOUNDARY - From: sabo - To: obas - <BLANKLINE> - Subject: farbaw - Keywords: barbaz - <BLANKLINE> - --BOUNDARY-- - <BLANKLINE> - >>> msgdata['topichits'] - ['bar fight'] - -But the tagger will not descend into non-text parts. - - >>> msg = message_from_string("""\ - ... Subject: Was - ... Keywords: Raw - ... Content-Type: multipart/alternative; boundary=BOUNDARY - ... - ... --BOUNDARY - ... From: sabo - ... To: obas - ... Content-Type: message/rfc822 - ... - ... Subject: farbaw - ... Keywords: barbaz - ... - ... --BOUNDARY - ... From: sabo - ... To: obas - ... Content-Type: message/rfc822 - ... - ... Subject: farbaw - ... Keywords: barbaz - ... - ... --BOUNDARY-- - ... """) - >>> msgdata = {} - >>> process(mlist, msg, msgdata) - >>> print msg['x-topics'] - None - >>> msgdata - {} |
