summaryrefslogtreecommitdiff
path: root/src/mailman/mta/bulk.py
blob: befeb71313f4e8f20877461bb404f394355d6c98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Copyright (C) 2009-2015 by the Free Software Foundation, Inc.
#
# This file is part of GNU Mailman.
#
# GNU Mailman is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.

"""Bulk message delivery."""

__all__ = [
    'BulkDelivery',
    ]


from mailman.mta.base import BaseDelivery


# A mapping of top-level domains to bucket numbers.  The zeroth bucket is
# reserved for everything else.  At one time, these were the most common
# domains.
CHUNKMAP = dict(
    com=1,
    net=2,
    org=2,
    edu=3,
    us=3,
    ca=3,
    )



class BulkDelivery(BaseDelivery):
    """Deliver messages to the MSA in as few sessions as possible."""

    def __init__(self, max_recipients=None):
        """See `BaseDelivery`.

        :param max_recipients: The maximum number of recipients per delivery
            chunk.  None, zero or less means to group all recipients into one
            big chunk.
        :type max_recipients: integer
        """
        super(BulkDelivery, self).__init__()
        self._max_recipients = (max_recipients
                                if max_recipients is not None
                                else 0)

    def chunkify(self, recipients):
        """Split a set of recipients into chunks.

        The `max_recipients` argument given to the constructor specifies the
        maximum number of recipients in each chunk.

        :param recipients: The set of recipient email addresses
        :type recipients: sequence of email address strings
        :return: A list of chunks, where each chunk is a set containing no
            more than `max_recipients` number of addresses.  The chunk can
            contain fewer, and no packing is guaranteed.
        :rtype: list of sets of strings
        """
        if self._max_recipients <= 0:
            yield set(recipients)
            return
        # This algorithm was originally suggested by Chuq Von Rospach.  Start
        # by splitting the recipient addresses into top-level domain buckets,
        # using the "most common" domains.  Everything else ends up in the
        # zeroth bucket.
        by_bucket = {}
        for address in recipients:
            localpart, at, domain = address.partition('@')
            domain_parts = domain.split('.')
            bucket_number = CHUNKMAP.get(domain_parts[-1], 0)
            by_bucket.setdefault(bucket_number, set()).add(address)
        # Fill chunks by sorting the tld values by length.
        chunk = set()
        for tld_chunk in sorted(by_bucket.values(), key=len, reverse=True):
            while tld_chunk:
                chunk.add(tld_chunk.pop())
                if len(chunk) == self._max_recipients:
                    yield chunk
                    chunk = set()
            # Every tld bucket starts a new chunk, but only if non-empty
            if len(chunk) > 0:
                yield chunk
                chunk = set()
        # Be sure to include the last chunk, but only if it's non-empty.
        if len(chunk) > 0:
            yield chunk

    def deliver(self, mlist, msg, msgdata):
        """See `IMailTransportAgentDelivery`."""
        refused = {}
        for recipients in self.chunkify(msgdata.get('recipients', set())):
            chunk_refused = self._deliver_to_recipients(
                mlist, msg, msgdata, recipients)
            refused.update(chunk_refused)
        return refused