summaryrefslogtreecommitdiff
path: root/src/mailman/mta/bulk.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/mailman/mta/bulk.py')
-rw-r--r--src/mailman/mta/bulk.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/src/mailman/mta/bulk.py b/src/mailman/mta/bulk.py
index 12f94cc23..5c6d8257f 100644
--- a/src/mailman/mta/bulk.py
+++ b/src/mailman/mta/bulk.py
@@ -25,16 +25,83 @@ __all__ = [
]
+from itertools import chain
+
from zope.interface import implements
from mailman.interfaces.mta import IMailTransportAgentDelivery
+# A mapping of top-level domains to bucket numbers. The zeroth bucket is
+# reserved for everything else. At one time, these were the most common
+# domains.
+CHUNKMAP = dict(
+ com=1,
+ net=2,
+ org=2,
+ edu=3,
+ us=3,
+ ca=3,
+ )
+
+
class BulkDelivery:
"""Deliver messages to the MTA in as few sessions as possible."""
implements(IMailTransportAgentDelivery)
+ def __init__(self, max_recipients):
+ """Create a bulk deliverer.
+
+ :param max_recipients: The maximum number of recipients per delivery
+ chunk. Zero or less means to group all recipients into one
+ chunk.
+ :type max_recipients: integer
+ """
+ self._max_recipients = max_recipients
+
def deliver(self, mlist, msg, msgdata):
"""See `IMailTransportAgentDelivery`."""
+
+ def chunkify(self, recipients):
+ """Split a set of recipients into chunks.
+
+ The `max_recipients` argument given to the constructor specifies the
+ maximum number of recipients in each chunk.
+
+ :param recipients: The set of recipient email addresses
+ :type recipients: sequence of email address strings
+ :return: A list of chunks, where each chunk is a set containing no
+ more than `max_recipients` number of addresses. The chunk can
+ contain fewer, and no packing is guaranteed.
+ :rtype: list of sets of strings
+ """
+ if self._max_recipients <= 0:
+ yield set(recipients)
+ return
+ # This algorithm was originally suggested by Chuq Von Rospach. Start
+ # by splitting the recipient addresses into top-level domain buckets,
+ # using the "most common" domains. Everything else ends up in the
+ # zeroth bucket.
+ by_bucket = {}
+ for address in recipients:
+ localpart, at, domain = address.partition('@')
+ domain_parts = domain.split('.')
+ bucket_number = CHUNKMAP.get(domain_parts[-1], 0)
+ by_bucket.setdefault(bucket_number, set()).add(address)
+ # Fill chunks by sorting the tld values by length.
+ chunk = set()
+ for tld_chunk in sorted(by_bucket.values(), key=len, reverse=True):
+ while tld_chunk:
+ chunk.add(tld_chunk.pop())
+ if len(chunk) == self._max_recipients:
+ yield chunk
+ chunk = set()
+ # Every tld bucket starts a new chunk, but only if non-empty
+ if len(chunk) > 0:
+ yield chunk
+ chunk = set()
+ # Be sure to include the last chunk, but only if it's non-empty.
+ if len(chunk) > 0:
+ yield chunk