diff options
| author | Barry Warsaw | 2007-07-22 19:52:34 -0400 |
|---|---|---|
| committer | Barry Warsaw | 2007-07-22 19:52:34 -0400 |
| commit | 2e4314fc178f34170b82aaa2f8ed4d0f5440f4f4 (patch) | |
| tree | fa45f70d2643a8f42d727feb9de694e7c78ac1a8 | |
| parent | 8158f01c930d856b0ff892aab53cfbbcc25c85ec (diff) | |
| download | mailman-2e4314fc178f34170b82aaa2f8ed4d0f5440f4f4.tar.gz mailman-2e4314fc178f34170b82aaa2f8ed4d0f5440f4f4.tar.zst mailman-2e4314fc178f34170b82aaa2f8ed4d0f5440f4f4.zip | |
| -rw-r--r-- | Mailman/configuration.py | 1 | ||||
| -rw-r--r-- | Mailman/database/__init__.py | 2 | ||||
| -rw-r--r-- | Mailman/database/messagestore.py | 140 | ||||
| -rw-r--r-- | Mailman/database/model/__init__.py | 2 | ||||
| -rw-r--r-- | Mailman/database/model/message.py | 30 | ||||
| -rw-r--r-- | Mailman/docs/archives.txt | 1 | ||||
| -rw-r--r-- | Mailman/docs/messagestore.txt | 169 | ||||
| -rw-r--r-- | Mailman/interfaces/messagestore.py | 101 |
8 files changed, 445 insertions, 1 deletions
diff --git a/Mailman/configuration.py b/Mailman/configuration.py index dbb057a5f..52537e9ac 100644 --- a/Mailman/configuration.py +++ b/Mailman/configuration.py @@ -133,6 +133,7 @@ class Configuration(object): self.BADQUEUE_DIR = join(qdir, 'bad') self.RETRYQUEUE_DIR = join(qdir, 'retry') self.MAILDIR_DIR = join(qdir, 'maildir') + self.MESSAGES_DIR = join(VAR_DIR, 'messages') # Other useful files self.PIDFILE = join(datadir, 'master-qrunner.pid') self.SITE_PW_FILE = join(datadir, 'adm.pw') diff --git a/Mailman/database/__init__.py b/Mailman/database/__init__.py index 6c6312d0a..11afe5f3e 100644 --- a/Mailman/database/__init__.py +++ b/Mailman/database/__init__.py @@ -23,6 +23,7 @@ from elixir import objectstore from Mailman.database.listmanager import ListManager from Mailman.database.usermanager import UserManager +from Mailman.database.messagestore import MessageStore __all__ = [ 'initialize', @@ -42,6 +43,7 @@ def initialize(): model.initialize() config.list_manager = ListManager() config.user_manager = UserManager() + config.message_store = MessageStore() flush() diff --git a/Mailman/database/messagestore.py b/Mailman/database/messagestore.py new file mode 100644 index 000000000..eb29fcfb4 --- /dev/null +++ b/Mailman/database/messagestore.py @@ -0,0 +1,140 @@ +# Copyright (C) 2007 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +from __future__ import with_statement + +__metaclass__ = type +__all__ = [ + 'MessageStore', + ] + +import os +import errno +import base64 +import hashlib +import cPickle as pickle + +from zope.interface import implements + +from Mailman import Utils +from Mailman.configuration import config +from Mailman.database.model import Message +from Mailman.interfaces import IMessageStore + +# It could be very bad if you have already stored files and you change this +# value. We'd need a script to reshuffle and resplit. +MAX_SPLITS = 2 +EMPTYSTRING = '' + + + +class MessageStore: + implements(IMessageStore) + + def add(self, message): + # Ensure that the message has the requisite headers. + message_ids = message.get_all('message-id', []) + dates = message.get_all('date', []) + if not (len(message_ids) == 1 and len(dates) == 1): + raise ValueError( + 'Exactly one Message-ID and one Date header required') + # Calculate and insert the X-List-ID-Hash. + message_id = message_ids[0] + date = dates[0] + shaobj = hashlib.sha1(message_id) + shaobj.update(date) + hash32 = base64.b32encode(shaobj.digest()) + del message['X-List-ID-Hash'] + message['X-List-ID-Hash'] = hash32 + # Calculate the path on disk where we're going to store this message + # object, in pickled format. + parts = [] + split = list(hash32) + while split and len(parts) < MAX_SPLITS: + parts.append(split.pop(0) + split.pop(0)) + parts.append(EMPTYSTRING.join(split)) + relpath = os.path.join(*parts) + # Store the message in the database. This relies on the database + # providing a unique serial number, but to get this information, we + # have to use a straight insert instead of relying on Elixir to create + # the object. + result = Message.table.insert().execute( + hash=hash32, path=relpath, message_id=message_id) + # Add the additional header. + seqno = result.last_inserted_ids()[0] + del message['X-List-Sequence-Number'] + message['X-List-Sequence-Number'] = str(seqno) + # Now calculate the full file system path. + path = os.path.join(config.MESSAGES_DIR, relpath, str(seqno)) + # Write the file to the path, but catch the appropriate exception in + # case the parent directories don't yet exist. In that case, create + # them and try again. + while True: + try: + with open(path, 'w') as fp: + # -1 says to use the highest protocol available. + pickle.dump(message, fp, -1) + break + except IOError, e: + if e.errno <> errno.ENOENT: + raise + os.makedirs(os.path.dirname(path)) + return seqno + + def _msgobj(self, msgrow): + path = os.path.join(config.MESSAGES_DIR, msgrow.path, str(msgrow.id)) + with open(path) as fp: + return pickle.load(fp) + + def get_messages_by_message_id(self, message_id): + for msgrow in Message.select_by(message_id=message_id): + yield self._msgobj(msgrow) + + def get_messages_by_hash(self, hash): + for msgrow in Message.select_by(hash=hash): + yield self._msgobj(msgrow) + + def _getmsg(self, global_id): + try: + hash, seqno = global_id.split('/', 1) + seqno = int(seqno) + except ValueError: + return None + msgrows = Message.select_by(id=seqno) + if not msgrows: + return None + assert len(msgrows) == 1, 'Multiple id matches' + if msgrows[0].hash <> hash: + # The client lied about which message they wanted. They gave a + # valid sequence number, but the hash did not match. + return None + return msgrows[0] + + def get_message(self, global_id): + msgrow = self._getmsg(global_id) + return (self._msgobj(msgrow) if msgrow is not None else None) + + @property + def messages(self): + for msgrow in Message.select(): + yield self._msgobj(msgrow) + + def delete_message(self, global_id): + msgrow = self._getmsg(global_id) + if msgrow is None: + raise KeyError(global_id) + msgrow.delete() diff --git a/Mailman/database/model/__init__.py b/Mailman/database/model/__init__.py index 82e66eb0b..5b9d32ce0 100644 --- a/Mailman/database/model/__init__.py +++ b/Mailman/database/model/__init__.py @@ -19,6 +19,7 @@ __all__ = [ 'Address', 'Language', 'MailingList', + 'Message', 'Preferences', 'User', 'Version', @@ -43,6 +44,7 @@ from Mailman.database.model.address import Address from Mailman.database.model.language import Language from Mailman.database.model.mailinglist import MailingList from Mailman.database.model.member import Member +from Mailman.database.model.message import Message from Mailman.database.model.preferences import Preferences from Mailman.database.model.user import User from Mailman.database.model.version import Version diff --git a/Mailman/database/model/message.py b/Mailman/database/model/message.py new file mode 100644 index 000000000..df8371c6a --- /dev/null +++ b/Mailman/database/model/message.py @@ -0,0 +1,30 @@ +# Copyright (C) 2007 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +from elixir import * +from zope.interface import implements + + + +class Message(Entity): + """A message in the message store.""" + + has_field('hash', Unicode) + has_field('path', Unicode) + has_field('message_id', Unicode) + + using_options(shortnames=True) diff --git a/Mailman/docs/archives.txt b/Mailman/docs/archives.txt index 682ee8777..1bed66e1a 100644 --- a/Mailman/docs/archives.txt +++ b/Mailman/docs/archives.txt @@ -25,7 +25,6 @@ A helper function. ... msg, msgdata = switchboard.dequeue(filebase) ... switchboard.finish(filebase) - The purpose of the ToArchive handler is to make a simple decision as to whether the message should get archived and if so, to drop the message in the archiving queue. Really the most important things are to determine when a diff --git a/Mailman/docs/messagestore.txt b/Mailman/docs/messagestore.txt new file mode 100644 index 000000000..ace95e914 --- /dev/null +++ b/Mailman/docs/messagestore.txt @@ -0,0 +1,169 @@ +The message store +================= + +The message store is a collection of messages keyed off of unique global +identifiers. A global id for a message is calculated relative to the message +store's base URL and its components are stored as headers on the message. One +piece of information is the X-List-ID-Hash, a base-32 encoding of the SHA1 +hash of the message's Message-ID and Date headers, which the message must +have. The second piece of information is supplied by the message store; it +is a sequence number that will uniquely identify the message even when the +X-List-ID-Hash collides. + + >>> from email import message_from_string + >>> from Mailman.configuration import config + >>> from Mailman.database import flush + >>> store = config.message_store + +If you try to add a message to the store which is missing either the +Message-ID header or the Date header, you will get a ValueError. + + >>> msg = message_from_string("""\ + ... Subject: An important message + ... + ... This message is very important. + ... """) + >>> store.add(msg) + Traceback (most recent call last): + ... + ValueError: Exactly one Message-ID and one Date header required + +Adding a Message-ID header alone doesn't help. + + >>> msg['Message-ID'] = '<87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>' + >>> store.add(msg) + Traceback (most recent call last): + ... + ValueError: Exactly one Message-ID and one Date header required + +Neither does adding just a Date header. + + >>> del msg['message-id'] + >>> msg['Date'] = 'Wed, 04 Jul 2007 16:49:58 +0900' + >>> store.add(msg) + Traceback (most recent call last): + ... + ValueError: Exactly one Message-ID and one Date header required + +However, having them both is all you need. + + >>> msg['Message-ID'] = '<87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>' + >>> store.add(msg) + 1 + >>> flush() + >>> print msg.as_string() + Subject: An important message + Date: Wed, 04 Jul 2007 16:49:58 +0900 + Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> + X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI + X-List-Sequence-Number: 1 + <BLANKLINE> + This message is very important. + <BLANKLINE> + + +Finding messages +---------------- + +There are several ways to find a message given some or all of the information +created above. Because Message-IDs are not guaranteed unique, looking up +messages with that key resturns a collection. The collection may be empty if +there are no matches. + + >>> list(store.get_messages_by_message_id('nothing')) + [] + +Given an existing Message-ID, all matching messages will be found. + + >>> msgs = list(store.get_messages_by_message_id(msg['message-id'])) + >>> len(msgs) + 1 + >>> print msgs[0].as_string() + Subject: An important message + Date: Wed, 04 Jul 2007 16:49:58 +0900 + Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> + X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI + X-List-Sequence-Number: 1 + <BLANKLINE> + This message is very important. + <BLANKLINE> + +Similarly, we can find messages by the ID hash. + + >>> list(store.get_messages_by_hash('nothing')) + [] + >>> msgs = list(store.get_messages_by_hash(msg['x-list-id-hash'])) + >>> len(msgs) + 1 + >>> print msgs[0].as_string() + Subject: An important message + Date: Wed, 04 Jul 2007 16:49:58 +0900 + Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> + X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI + X-List-Sequence-Number: 1 + <BLANKLINE> + This message is very important. + <BLANKLINE> + +We can also get a single message by using it's relative global ID. This +returns None if there is no match. + + >>> print store.get_message('nothing') + None + >>> print store.get_message('nothing/1') + None + >>> id_hash = msg['x-list-id-hash'] + >>> seqno = msg['x-list-sequence-number'] + >>> global_id = id_hash + '/' + seqno + >>> print store.get_message(global_id).as_string() + Subject: An important message + Date: Wed, 04 Jul 2007 16:49:58 +0900 + Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> + X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI + X-List-Sequence-Number: 1 + <BLANKLINE> + This message is very important. + <BLANKLINE> + + +Iterating over all messages +--------------------------- + +The message store provides a means to iterate over all the messages it +contains. + + >>> msgs = list(store.messages) + >>> len(msgs) + 1 + >>> print msgs[0].as_string() + Subject: An important message + Date: Wed, 04 Jul 2007 16:49:58 +0900 + Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> + X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI + X-List-Sequence-Number: 1 + <BLANKLINE> + This message is very important. + <BLANKLINE> + + +Deleting messages from the store +-------------------------------- + +The global relative ID is the key into the message store. If you try to +delete a global ID that isn't in the store, you get an exception. + + >>> store.delete_message('nothing') + Traceback (most recent call last): + ... + KeyError: 'nothing' + +But if you delete an existing message, it really gets deleted. + + >>> store.delete_message(global_id) + >>> flush() + >>> list(store.messages) + [] + >>> print store.get_message(global_id) + None + >>> list(store.get_messages_by_message_id(msg['message-id'])) + [] diff --git a/Mailman/interfaces/messagestore.py b/Mailman/interfaces/messagestore.py new file mode 100644 index 000000000..541238fd1 --- /dev/null +++ b/Mailman/interfaces/messagestore.py @@ -0,0 +1,101 @@ +# Copyright (C) 2007 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. + +"""The message storage service.""" + +from zope.interface import Interface, Attribute + + + +class IMessageStore(Interface): + """The interface of the global message storage service. + + All messages that are stored in the system live in the message storage + service. This store is responsible for providing unique identifiers for + every message stored in it. A message stored in this service must have at + least a Message-ID header and a Date header. These are not guaranteed to + be unique, so the service also provides a unique sequence number to every + message. + + Storing a message returns the unique sequence number for the message. + This sequence number will be stored on the message's + X-List-Sequence-Number header. Any previous such header value will be + overwritten. An X-List-ID-Hash header will also be added, containing the + Base-32 encoded SHA1 hash of the message's Message-ID and Date headers. + + The combination of the X-List-ID-Hash header and the + X-List-Sequence-Number header uniquely identify this message to the + storage service. A globally unique URL that addresses this message may be + crafted from these headers and the List-Archive header as follows. For a + message with the following headers: + + Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> + Date: Wed, 04 Jul 2007 16:49:58 +0900 + List-Archive: http://archive.example.com/ + X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI + X-List-Sequence-Number: 801 + + the globally unique URL would be: + + http://archive.example.com/RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI/801 + """ + + def add(message): + """Add the message to the store. + + :param message: An email.message.Message instance containing at least + a Message-ID header and a Date header. The message will be given + an X-List-ID-Hash header and an X-List-Sequence-Number header. + :returns: The message's sequence ID as an integer. + :raises ValueError: if the message is missing one of the required + headers. + """ + + def get_messages_by_message_id(message_id): + """Return the set of messages with the matching Message-ID. + + :param message_id: The Message-ID header contents to search for. + :returns: An iterator over all the matching messages. + """ + + def get_messages_by_hash(hash): + """Return the set of messages with the matching X-List-ID-Hash. + + :param hash: The X-List-ID-Hash header contents to search for. + :returns: An iterator over all the matching messages. + """ + + def get_message(global_id): + """Return the message with the matching hash and sequence number. + + :param global_id: The global relative ID which uniquely addresses this + message, relative to the base address of the message store. This + must be a string of the X-List-ID-Hash followed by a single slash + character, followed by the X-List-Sequence-Number. + :returns: The matching message, or None if there is no match. + """ + + def delete_message(global_id): + """Remove the addressed message from the store. + + :param global_id: The global relative ID which uniquely addresses the + message to delete. + :raises KeyError: if there is no such message. + """ + + messages = Attribute( + """An iterator over all messages in this message store.""") |
