author: bwarsaw 2001-07-19 06:35:22 +0000
committer: bwarsaw 2001-07-19 06:35:22 +0000
commit: 9d7862db08e3017e6cca5ef8a0f96c8759ca1a44 (patch)
tree: b9abe2e5e4c9deb25b46cd9ec8aa3602779573f3 /Mailman/Handlers/Tagger.py
parent: f55c95e2c9bfc437a72d5040fbdda521c9557ebe (diff)
download: mailman-9d7862db08e3017e6cca5ef8a0f96c8759ca1a44.tar.gz
mailman-9d7862db08e3017e6cca5ef8a0f96c8759ca1a44.tar.zst
mailman-9d7862db08e3017e6cca5ef8a0f96c8759ca1a44.zip
1 files changed, 101 insertions, 0 deletions
diff --git a/Mailman/Handlers/Tagger.py b/Mailman/Handlers/Tagger.py
new file mode 100644
index 000000000..27c178751
--- /dev/null
+++ b/Mailman/Handlers/Tagger.py
@@ -0,0 +1,101 @@
+# Copyright (C) 2001 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software 
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+"""Extract topics from the original mail message.
+"""
+
+import re
+from mimelib.Parser import Parser
+from mimelib.MsgReader import MsgReader
+
+from Mailman.Logging.Syslog import syslog
+
+CRNL = '\r\n'
+NL = '\n'
+NLTAB = '\n\t'
+
+
+
+def process(mlist, msg, msgdata):
+    if not mlist.topics:
+        return
+    # Extract the Subject:, Keywords:, and possibly body text
+    matchlines = []
+    matchlines.append(msg.get('subject', None))
+    matchlines.append(msg.get('keywords', None))
+    if mlist.topics_bodylines_limit == 0:
+        # Don't scan any body lines
+        pass
+    elif mlist.topics_bodylines_limit < 0:
+        # Scan all body lines
+        matchlines.extend(scanbody(msg))
+    else:
+        # Scan just some of the body lines
+        matchlines.extend(scanbody(msg, mlist.topics_bodylines_limit))
+    matchlines = filter(None, matchlines)
+    # For each regular expression in the topics list, see if any of the lines
+    # of interest from the message match the regexp.  If so, the message gets
+    # added to the specific topics bucket.
+    hits = {}
+    for name, pattern, desc, emptyflag in mlist.topics:
+        cre = re.compile(pattern, re.IGNORECASE | re.VERBOSE)
+        for line in matchlines:
+            if cre.search(line):
+                hits[name] = 1
+                break
+    if hits:
+        msgdata['topichits'] = hits.keys()
+        msg['X-Topics'] = NLTAB.join(hits.keys())
+    
+
+
+def scanbody(msg, numlines=None):
+    # We only scan the body of the message if it is of MIME type text/plain,
+    # or if the outer type is multipart/alternative and there is a text/plain
+    # part.  Anything else, and the body is ignored for header-scan purposes.
+    found = None
+    if msg.gettype('text/plain') == 'text/plain':
+        found = msg
+    elif msg.ismultipart() and msg.gettype() == 'multipart/alternative':
+        for found in msg.get_payload():
+            if found.gettype('text/plain') == 'text/plain':
+                break
+        else:
+            found = None
+    if not found:
+        return []
+    # Now that we have a Message object that meets our criteria, let's extract
+    # the first numlines of body text.
+    lines = []
+    reader = MsgReader(msg)
+    lineno = 0
+    while numlines is None or lineno < numlines:
+        line = reader.readline()
+        if not line:
+            break
+        # Blank lines don't count
+        if not line.strip():
+            continue
+        lineno += 1
+        # Stop scanning if we find a line that would not be recognized as
+        # either a header or a continuation line
+        if line[0] not in ' \t' and line.find(':') < 0:
+            break
+        lines.append(line)
+    # Concatenate those body text lines with newlines, and then feed it to the
+    # mimelib message Parser
+    msg = Parser().parsestr(NL.join(lines))
+    return msg.getall('subject', []) + msg.getall('keywords', [])
author	bwarsaw	2001-07-19 06:35:22 +0000
committer	bwarsaw	2001-07-19 06:35:22 +0000
commit	9d7862db08e3017e6cca5ef8a0f96c8759ca1a44 (patch)
tree	b9abe2e5e4c9deb25b46cd9ec8aa3602779573f3 /Mailman/Handlers/Tagger.py
parent	f55c95e2c9bfc437a72d5040fbdda521c9557ebe (diff)
download	mailman-9d7862db08e3017e6cca5ef8a0f96c8759ca1a44.tar.gz mailman-9d7862db08e3017e6cca5ef8a0f96c8759ca1a44.tar.zst mailman-9d7862db08e3017e6cca5ef8a0f96c8759ca1a44.zip