# Copyright (C) 2014-2017 by the Free Software Foundation, Inc.
#
# This file is part of GNU Mailman.
#
# GNU Mailman is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.

"""Subject header prefix munging."""

import re

from contextlib import suppress
from email.header import Header, decode_header, make_header
from mailman.core.i18n import _
from mailman.interfaces.handler import IHandler
from public import public
from zope.interface import implementer


RE_PATTERN = '\s*((RE|AW|SV|VS)(\[\d+\])?\s*:\s*)+'
ASCII_CHARSETS = (None, 'ascii', 'us-ascii')
EMPTYSTRING = ''


def ascii_header(mlist, msgdata, subject, prefix, prefix_pattern, ws):
    if mlist.preferred_language.charset not in ASCII_CHARSETS:
        return None
    for chunk, charset in decode_header(subject.encode()):
        if charset not in ASCII_CHARSETS:
            return None
    subject_text = EMPTYSTRING.join(str(subject).splitlines())
    # At this point, the subject may become null if someone posted mail
    # with "Subject: [subject prefix]".
    if subject_text.strip() == '':
        with _.using(mlist.preferred_language.code):
            subject_text = _('(no subject)')
    else:
        subject_text = re.sub(prefix_pattern, '', subject_text)
    msgdata['stripped_subject'] = subject_text
    rematch = re.match(RE_PATTERN, subject_text, re.I)
    if rematch:
        subject_text = subject_text[rematch.end():]
        recolon = 'Re: '
    else:
        recolon = ''
    lines = subject_text.splitlines()
    first_line = [lines[0]]
    if recolon:
        first_line.insert(0, recolon)
    if prefix:
        first_line.insert(0, prefix)
    subject_text = EMPTYSTRING.join(first_line)
    return Header(subject_text, continuation_ws=ws)


def all_same_charset(mlist, msgdata, subject, prefix, prefix_pattern, ws):
    list_charset = mlist.preferred_language.charset
    chunks = []
    for chunk, charset in decode_header(subject.encode()):
        if charset is None:
            charset = 'us-ascii'
        if isinstance(chunk, str):
            chunks.append(chunk)
        else:
            chunks.append(chunk.decode(charset))
        if charset != list_charset:
            return None
    subject_text = EMPTYSTRING.join(chunks)
    # At this point, the subject may become null if someone posted mail
    # with "Subject: [subject prefix]".
    if subject_text.strip() == '':
        with _.push(mlist.preferred_language.code):
            subject_text = _('(no subject)')
    else:
        subject_text = re.sub(prefix_pattern, '', subject_text)
    msgdata['stripped_subject'] = subject_text
    rematch = re.match(RE_PATTERN, subject_text, re.I)
    if rematch:
        subject_text = subject_text[rematch.end():]
        recolon = 'Re: '
    else:
        recolon = ''
    lines = subject_text.splitlines()
    first_line = [lines[0]]
    if recolon:
        first_line.insert(0, recolon)
    if prefix:
        first_line.insert(0, prefix)
    subject_text = EMPTYSTRING.join(first_line)
    return Header(subject_text, charset=list_charset, continuation_ws=ws)


def mixed_charsets(mlist, msgdata, subject, prefix, prefix_pattern, ws):
    list_charset = mlist.preferred_language.charset
    chunks = decode_header(subject.encode())
    if len(chunks) == 0:
        with _.push(mlist.preferred_language.code):
            subject_text = _('(no subject)')
        chunks = [(prefix, list_charset),
                  (subject_text, list_charset),
                  ]
        return make_header(chunks, continuation_ws=ws)
    # Only search the first chunk for Re and existing prefix.
    chunk_text, chunk_charset = chunks[0]
    if chunk_charset is None:
        chunk_charset = 'us-ascii'
    if isinstance(chunk_text, str):
        first_text = chunk_text
    else:
        first_text = chunk_text.decode(chunk_charset)
    first_text = re.sub(prefix_pattern, '', first_text).lstrip()
    rematch = re.match(RE_PATTERN, first_text, re.I)
    if rematch:
        first_text = 'Re: ' + first_text[rematch.end():]
    chunks[0] = (first_text, chunk_charset)
    # The subject text stripped of the prefix, for use in the NNTP gateway.
    msgdata['stripped_subject'] = str(make_header(chunks, continuation_ws=ws))
    chunks.insert(0, (prefix, list_charset))
    return make_header(chunks, continuation_ws=ws)


@public
@implementer(IHandler)
class SubjectPrefix:
    """Add a list-specific prefix to the Subject header value."""

    name = 'subject-prefix'
    description = _('Add a list-specific prefix to the Subject header value.')

    def process(self, mlist, msg, msgdata):
        """See `IHandler`."""
        if msgdata.get('isdigest') or msgdata.get('_fasttrack'):
            return
        prefix = mlist.subject_prefix
        if not prefix.strip():
            return
        subject = msg.get('subject', '')
        # Turn the value into a Header instance and try to figure out what
        # continuation whitespace is being used.
        # Save the original Subject.
        msgdata['original_subject'] = subject
        if isinstance(subject, Header):
            subject_text = str(subject)
        else:
            subject = make_header(decode_header(subject))
            subject_text = str(subject)
        lines = subject_text.splitlines()
        ws = '\t'
        if len(lines) > 1 and lines[1] and lines[1][0] in ' \t':
            ws = lines[1][0]
        # If the subject_prefix contains '%d', it is replaced with the mailing
        # list's sequence number.  The sequential number format allows '%d' or
        # '%05d' like pattern.
        prefix_pattern = re.escape(prefix)
        # Unescape '%'.
        prefix_pattern = '%'.join(prefix_pattern.split(r'\%'))
        p = re.compile('%\d*d')
        if p.search(prefix, 1):
            # The prefix has number, so we should search prefix w/number in
            # subject.  Also, force new style.
            prefix_pattern = p.sub(r'\\s*\\d+\\s*', prefix_pattern)
        # Substitute %d in prefix with post_id
        with suppress(TypeError):
            prefix = prefix % mlist.post_id
        for handler in (ascii_header,
                        all_same_charset,
                        mixed_charsets,
                        ):
            new_subject = handler(
                mlist, msgdata, subject, prefix, prefix_pattern, ws)
            if new_subject is not None:
                del msg['subject']
                msg['Subject'] = new_subject
                return