diff options
| author | tkikuchi | 2005-08-28 05:31:27 +0000 |
|---|---|---|
| committer | tkikuchi | 2005-08-28 05:31:27 +0000 |
| commit | 067dc15b2432bb285ab5e4a3eac6f4dddd67ed19 (patch) | |
| tree | ceac72251ee33742bfff7626c99dde163d3da946 /Mailman/Handlers/SpamDetect.py | |
| parent | bc1dad4f90a26ade7c4dd6d2863de88856e8b4b6 (diff) | |
| download | mailman-067dc15b2432bb285ab5e4a3eac6f4dddd67ed19.tar.gz mailman-067dc15b2432bb285ab5e4a3eac6f4dddd67ed19.tar.zst mailman-067dc15b2432bb285ab5e4a3eac6f4dddd67ed19.zip | |
Diffstat (limited to 'Mailman/Handlers/SpamDetect.py')
| -rw-r--r-- | Mailman/Handlers/SpamDetect.py | 112 |
1 files changed, 101 insertions, 11 deletions
diff --git a/Mailman/Handlers/SpamDetect.py b/Mailman/Handlers/SpamDetect.py index 6a67f3410..b5f9d0b6a 100644 --- a/Mailman/Handlers/SpamDetect.py +++ b/Mailman/Handlers/SpamDetect.py @@ -1,17 +1,17 @@ -# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc. +# Copyright (C) 1998-2004 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software +# along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. """Do more detailed spam detection. @@ -25,26 +25,116 @@ TBD: This needs to be made more configurable and robust. """ import re +from cStringIO import StringIO + +from email.Generator import Generator from Mailman import mm_cfg from Mailman import Errors +from Mailman import i18n +from Mailman.Handlers.Hold import hold_for_approval + +try: + True, False +except NameError: + True = 1 + False = 0 + +# First, play footsie with _ so that the following are marked as translated, +# but aren't actually translated until we need the text later on. +def _(s): + return s class SpamDetected(Errors.DiscardMessage): """The message contains known spam""" +class HeaderMatchHold(Errors.HoldMessage): + reason = _('The message headers matched a filter rule') + + +# And reset the translator +_ = i18n._ + + + +class Tee: + def __init__(self, outfp_a, outfp_b): + self._outfp_a = outfp_a + self._outfp_b = outfp_b + + def write(self, s): + self._outfp_a.write(s) + self._outfp_b.write(s) + + +# Class to capture the headers separate from the message body +class HeaderGenerator(Generator): + def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): + Generator.__init__(self, outfp, mangle_from_, maxheaderlen) + self._headertxt = '' + + def _write_headers(self, msg): + sfp = StringIO() + oldfp = self._fp + self._fp = Tee(oldfp, sfp) + try: + Generator._write_headers(self, msg) + finally: + self._fp = oldfp + self._headertxt = sfp.getvalue() + + def header_text(self): + return self._headertxt + def process(mlist, msg, msgdata): - if msgdata.get('approved'): + # Don't check if the message has been approved OR it is generated + # internally for administration because holding '-owner' notification + # may cause infinite loop of checking. (Actually, it is stopped + # elsewhere.) + if msgdata.get('approved') or msg.get('x-list-administrivia'): return + # First do site hard coded header spam checks for header, regex in mm_cfg.KNOWN_SPAMMERS: cre = re.compile(regex, re.IGNORECASE) - value = msg[header] - if not value: + for value in msg.get_all(header, []): + mo = cre.search(value) + if mo: + # we've detected spam, so throw the message away + raise SpamDetected + # Now do header_filter_rules + # TK: Collect headers in sub-parts because attachment filename + # extension may be a clue to possible virus/spam. + if msg.is_multipart(): + headers = '' + for p in msg.walk(): + g = HeaderGenerator(StringIO()) + g.flatten(p) + headers += g.header_text() + else: + # Only the top level header should be checked. + g = HeaderGenerator(StringIO()) + g.flatten(msg) + headers = g.header_text() + # Now reshape headers (remove extra CR and connect multiline). + headers = re.sub('\n+', '\n', headers) + headers = re.sub('\n\s', ' ', headers) + for patterns, action, empty in mlist.header_filter_rules: + if action == mm_cfg.DEFER: continue - mo = cre.search(value) - if mo: - # we've detected spam, so throw the message away - raise SpamDetected + for pattern in patterns.splitlines(): + if pattern.startswith('#'): + continue + if re.search(pattern, headers, re.IGNORECASE|re.MULTILINE): + if action == mm_cfg.DISCARD: + raise Errors.DiscardMessage + if action == mm_cfg.REJECT: + raise Errors.RejectMessage( + _('Message rejected by filter rule match')) + if action == mm_cfg.HOLD: + hold_for_approval(mlist, msg, msgdata, HeaderMatchHold) + if action == mm_cfg.ACCEPT: + return |
