diff options
| author | bwarsaw | 2002-10-02 14:01:55 +0000 |
|---|---|---|
| committer | bwarsaw | 2002-10-02 14:01:55 +0000 |
| commit | 341386bbfb78fb23d09cfbc280b32d14a80e32e3 (patch) | |
| tree | e91faad3e15b95fd988d74fa51fac1706941bca6 /Mailman | |
| parent | 6b457c295f03acbd4df4a2f730ca94601936467e (diff) | |
| download | mailman-341386bbfb78fb23d09cfbc280b32d14a80e32e3.tar.gz mailman-341386bbfb78fb23d09cfbc280b32d14a80e32e3.tar.zst mailman-341386bbfb78fb23d09cfbc280b32d14a80e32e3.zip | |
Better support for "funny" characters in subject prefixes.
encode_p(): Removed.
_isunicode(): Helper for unicode type testing.
prefix_subject(): Better algorithm for handling unicode subject
prefixes. Now we always set the Subject header to a Header instance
if we're adding the prefix. Always convert the prefix and all
previous Subject header bits to unicode (possibly with character
replacements) so that the Header class does the right thing, i.e. does
the us-ascii, charset hint, utf-8 encoding. Because of this, if the
list's charset is us-ascii, we'll substitute iso-8859-1 for a slightly
wider character coverage.
Diffstat (limited to 'Mailman')
| -rw-r--r-- | Mailman/Handlers/CookHeaders.py | 74 |
1 files changed, 32 insertions, 42 deletions
diff --git a/Mailman/Handlers/CookHeaders.py b/Mailman/Handlers/CookHeaders.py index 55ed5ac4d..c7e0f74a6 100644 --- a/Mailman/Handlers/CookHeaders.py +++ b/Mailman/Handlers/CookHeaders.py @@ -19,6 +19,7 @@ from __future__ import nested_scopes import re +from types import UnicodeType from email.Charset import Charset from email.Header import Header, decode_header @@ -35,6 +36,11 @@ MAXLINELEN = 78 +def _isunicode(s): + return isinstance(s, UnicodeType) + + + def process(mlist, msg, msgdata): # Set the "X-Ack: no" header if noack flag is set. if msgdata.get('noack'): @@ -163,29 +169,6 @@ def process(mlist, msg, msgdata): -def encode_p(mlist, subject, prefix): - # Decide whether we're going to encode the prefix or not - if mlist.encode_ascii_prefixes == 1: - # Always encode - return 1 - try: - prefix.encode('us-ascii') - except UnicodeError: - # There are non-ASCII characters in the prefix, so we must encode it - return 1 - if mlist.encode_ascii_prefixes == 0: - # Never encode if the prefix is ASCII - return 0 - # What's left is `As needed' encoding. Meaning, we'll only encode the - # prefix if the Subject: header contains non-ASCII characters. Note that - # subject might be a Header instance, so str()-ify it first. - try: - str(subject).encode('us-ascii') - except UnicodeError: - return 1 - return 0 - - def prefix_subject(mlist, msg, msgdata): # Add the subject prefix unless the message is a digest or is being fast # tracked (e.g. internally crafted, delivered to a single user such as the @@ -198,27 +181,34 @@ def prefix_subject(mlist, msg, msgdata): # and each word of the prefix is encoded in a different chunk in the # header, we won't find it. I think in practice that's unlikely though. headerbits = decode_header(subject) - has_prefix = 0 if prefix and subject: pattern = re.escape(prefix.strip()) for decodedsubj, charset in headerbits: if re.search(pattern, decodedsubj, re.IGNORECASE): - has_prefix = 1 - charset = Charset(Utils.GetCharSet(mlist.preferred_language)) - # We purposefully leave no space b/w prefix and subject! + # The subject's already got the prefix, so don't change it + return + del msg['subject'] if not subject: - del msg['subject'] - h = Header(prefix, charset, header_name='Subject') - h.append(_('(no subject)'), charset) - msg['Subject'] = h - elif prefix and not has_prefix: - del msg['subject'] - if encode_p(mlist, subject, prefix): - # We'll encode the new prefix (just in case) but leave the old - # subject alone, in case it was already encoded. - h = Header(prefix, charset, 128, header_name='Subject') - else: - h = Header(prefix, header_name='Subject') - for s, c in headerbits: - h.append(s, c) - msg['Subject'] = h + subject = _('(no subject)') + # Get the charset to encode the prefix in. If this is us-ascii, we'll use + # iso-8859-1 instead, just to get a little extra coverage, and because the + # Header class tries us-ascii first anyway. + charset = Utils.GetCharSet(mlist.preferred_language) + if charset == 'us-ascii': + charset = 'iso-8859-1' + charset = Charset(charset) + # Convert the prefix to unicode so Header will do the 3-charset encoding. + # If prefix is a byte string and there are funky characters in it that + # don't match the charset, we might as well replace them now. + if not _isunicode(prefix): + prefix = unicode(prefix, charset.get_output_charset(), 'replace') + # We purposefully leave no space b/w prefix and subject! + h = Header(prefix, charset, header_name='Subject') + for s, c in headerbits: + # Once again, convert the string to unicode. + if c is None: + c = 'iso-8859-1' + if not _isunicode(s): + s = unicode(s, c, 'replace') + h.append(s, c) + msg['Subject'] = h |
