diff options
Diffstat (limited to 'Mailman/EncWord.py')
| -rw-r--r-- | Mailman/EncWord.py | 92 |
1 files changed, 58 insertions, 34 deletions
diff --git a/Mailman/EncWord.py b/Mailman/EncWord.py index fdcd2ff4b..929768e84 100644 --- a/Mailman/EncWord.py +++ b/Mailman/EncWord.py @@ -1,65 +1,85 @@ +# Copyright (C) 1998,1999,2000 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + """Decode encoded-words as defined by RFC 2047""" +import sys +import string import base64 + + class DecodeError(ValueError): __super_init = ValueError.__init__ def __init__(self, msg): - self.__super_init("invalid encoded-word: %s" % msg) + self.__super_init('invalid encoded-word: %s' % msg) + + class Decoder: """Decode mail header encoded-word format defined by RFC 2047""" offset = 0 def decode(self, s): - """Decode an encoded-word + """Decode an encoded-word. - Returns the charset of the encoded-word, the decoded text, and - the position of the first character following the - encoded-word. + Returns the charset of the encoded-word, the decoded text, and the + position of the first character following the encoded-word. - The first position of the input string must by the first - character of the encoded-word. + The first position of the input string must by the first character of + the encoded-word. """ - if not s.startswith('=?'): - raise DecodeError("must start with '=?', not %s" % repr(s[:2])) + if s[:2] <> '=?': + raise DecodeError('must start with "=?", not %s' % repr(s[:2])) charset = self._get_charset(s) encoding = self._get_encoding(s) _text = self._get_text(s) - + # encoding must be either 'q' or 'b', ensured by _get_encoding() if encoding == 'q': text = self._decode_q(_text) else: text = self._decode_b(_text) - return charset, text, self.offset - # XXX technically the charset and encoding can't contain SPACE, - # CTLs, or especials; do not currently check this + # TBD: Technically the charset and encoding can't contain SPACE, CTLs, or + # especials; do not currently check this. def _get_charset(self, s): - i = s.find('?', 2) + i = string.find(s, '?', 2) if i == -1: - raise DecodeError("can't find of charset") + raise DecodeError("can't find end of charset") self.offset = i + 1 return s[2:i] _valid_encodings = ('q', 'b') def _get_encoding(self, s): - i = s.find('?', self.offset) - if i == -1: + i = string.find(s, '?', self.offset) + if i < 0: raise DecodeError("can't find encoding") - enc = s[self.offset:i].lower() + enc = string.lower(s[self.offset:i]) self.offset = i + 1 if enc not in Decoder._valid_encodings: - raise DecodeError("'%s' is not a valid encoding" % enc) + raise DecodeError('not a valid encoding: %s' % enc) return enc def _get_text(self, s): - i = s.find('?=', self.offset) - if i == -1: + i = string.find(s, '?=', self.offset) + if i < 0: raise DecodeError("can't find end of encoded text") text = s[self.offset:i] self.offset = i + 2 @@ -72,29 +92,29 @@ class Decoder: chunks = [] offset = 0 end = len(s) - import sys while offset < end: - i = s.find('=', offset) - j = s.find('_', offset) - if j == i == -1: + i = string.find(s, '=', offset) + j = string.find(s, '_', offset) + if i < 0 and j < 0: chunks.append(s[offset:]) break - if (j < i and j != -1) or i == -1: + if (j < i and j >= 0) or i < 0: chunks.append(s[offset:j]) chunks.append(Decoder.SPACE) offset = j + 1 else: chunks.append(s[offset:i]) hexdig = s[i+1:i+3] - chunks.append(chr(int(hexdig, 16))) + chunks.append(chr(string.atoi(hexdig, 16))) offset = i + 3 - return "".join(chunks) + return string.join(chunks, '') def _decode_b(self, s): """B encoding == base64 encoding defined by RFC 2045""" - import sys return base64.decodestring(s) + + def decode(s): """Decode a string containing encoded words""" _decode = Decoder().decode @@ -103,8 +123,8 @@ def decode(s): offset = 0 charset = None while 1: - i = s.find('=?', offset) - if i == -1: + i = string.find(s, '=?', offset) + if i < 0: chunks.append(s[offset:]) break chunks.append(s[offset:i]) @@ -112,11 +132,13 @@ def decode(s): offset = offset + i if charset is None: charset = _charset - elif charset != _charset: - raise ValueError, "can not decode string with multiple charsets" + elif charset <> _charset: + raise ValueError("can't decode string with multiple charsets") chunks.append(text) - return "".join(chunks), charset + return string.join(chunks, ''), charset + + def test(): examples = [ # valid @@ -142,6 +164,8 @@ def test(): else: print text, charset + + if __name__ == "__main__": test() |
