summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Mailman/configuration.py1
-rw-r--r--Mailman/database/__init__.py2
-rw-r--r--Mailman/database/messagestore.py140
-rw-r--r--Mailman/database/model/__init__.py2
-rw-r--r--Mailman/database/model/message.py30
-rw-r--r--Mailman/docs/archives.txt1
-rw-r--r--Mailman/docs/messagestore.txt169
-rw-r--r--Mailman/interfaces/messagestore.py101
8 files changed, 445 insertions, 1 deletions
diff --git a/Mailman/configuration.py b/Mailman/configuration.py
index dbb057a5f..52537e9ac 100644
--- a/Mailman/configuration.py
+++ b/Mailman/configuration.py
@@ -133,6 +133,7 @@ class Configuration(object):
self.BADQUEUE_DIR = join(qdir, 'bad')
self.RETRYQUEUE_DIR = join(qdir, 'retry')
self.MAILDIR_DIR = join(qdir, 'maildir')
+ self.MESSAGES_DIR = join(VAR_DIR, 'messages')
# Other useful files
self.PIDFILE = join(datadir, 'master-qrunner.pid')
self.SITE_PW_FILE = join(datadir, 'adm.pw')
diff --git a/Mailman/database/__init__.py b/Mailman/database/__init__.py
index 6c6312d0a..11afe5f3e 100644
--- a/Mailman/database/__init__.py
+++ b/Mailman/database/__init__.py
@@ -23,6 +23,7 @@ from elixir import objectstore
from Mailman.database.listmanager import ListManager
from Mailman.database.usermanager import UserManager
+from Mailman.database.messagestore import MessageStore
__all__ = [
'initialize',
@@ -42,6 +43,7 @@ def initialize():
model.initialize()
config.list_manager = ListManager()
config.user_manager = UserManager()
+ config.message_store = MessageStore()
flush()
diff --git a/Mailman/database/messagestore.py b/Mailman/database/messagestore.py
new file mode 100644
index 000000000..eb29fcfb4
--- /dev/null
+++ b/Mailman/database/messagestore.py
@@ -0,0 +1,140 @@
+# Copyright (C) 2007 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+from __future__ import with_statement
+
+__metaclass__ = type
+__all__ = [
+ 'MessageStore',
+ ]
+
+import os
+import errno
+import base64
+import hashlib
+import cPickle as pickle
+
+from zope.interface import implements
+
+from Mailman import Utils
+from Mailman.configuration import config
+from Mailman.database.model import Message
+from Mailman.interfaces import IMessageStore
+
+# It could be very bad if you have already stored files and you change this
+# value. We'd need a script to reshuffle and resplit.
+MAX_SPLITS = 2
+EMPTYSTRING = ''
+
+
+
+class MessageStore:
+ implements(IMessageStore)
+
+ def add(self, message):
+ # Ensure that the message has the requisite headers.
+ message_ids = message.get_all('message-id', [])
+ dates = message.get_all('date', [])
+ if not (len(message_ids) == 1 and len(dates) == 1):
+ raise ValueError(
+ 'Exactly one Message-ID and one Date header required')
+ # Calculate and insert the X-List-ID-Hash.
+ message_id = message_ids[0]
+ date = dates[0]
+ shaobj = hashlib.sha1(message_id)
+ shaobj.update(date)
+ hash32 = base64.b32encode(shaobj.digest())
+ del message['X-List-ID-Hash']
+ message['X-List-ID-Hash'] = hash32
+ # Calculate the path on disk where we're going to store this message
+ # object, in pickled format.
+ parts = []
+ split = list(hash32)
+ while split and len(parts) < MAX_SPLITS:
+ parts.append(split.pop(0) + split.pop(0))
+ parts.append(EMPTYSTRING.join(split))
+ relpath = os.path.join(*parts)
+ # Store the message in the database. This relies on the database
+ # providing a unique serial number, but to get this information, we
+ # have to use a straight insert instead of relying on Elixir to create
+ # the object.
+ result = Message.table.insert().execute(
+ hash=hash32, path=relpath, message_id=message_id)
+ # Add the additional header.
+ seqno = result.last_inserted_ids()[0]
+ del message['X-List-Sequence-Number']
+ message['X-List-Sequence-Number'] = str(seqno)
+ # Now calculate the full file system path.
+ path = os.path.join(config.MESSAGES_DIR, relpath, str(seqno))
+ # Write the file to the path, but catch the appropriate exception in
+ # case the parent directories don't yet exist. In that case, create
+ # them and try again.
+ while True:
+ try:
+ with open(path, 'w') as fp:
+ # -1 says to use the highest protocol available.
+ pickle.dump(message, fp, -1)
+ break
+ except IOError, e:
+ if e.errno <> errno.ENOENT:
+ raise
+ os.makedirs(os.path.dirname(path))
+ return seqno
+
+ def _msgobj(self, msgrow):
+ path = os.path.join(config.MESSAGES_DIR, msgrow.path, str(msgrow.id))
+ with open(path) as fp:
+ return pickle.load(fp)
+
+ def get_messages_by_message_id(self, message_id):
+ for msgrow in Message.select_by(message_id=message_id):
+ yield self._msgobj(msgrow)
+
+ def get_messages_by_hash(self, hash):
+ for msgrow in Message.select_by(hash=hash):
+ yield self._msgobj(msgrow)
+
+ def _getmsg(self, global_id):
+ try:
+ hash, seqno = global_id.split('/', 1)
+ seqno = int(seqno)
+ except ValueError:
+ return None
+ msgrows = Message.select_by(id=seqno)
+ if not msgrows:
+ return None
+ assert len(msgrows) == 1, 'Multiple id matches'
+ if msgrows[0].hash <> hash:
+ # The client lied about which message they wanted. They gave a
+ # valid sequence number, but the hash did not match.
+ return None
+ return msgrows[0]
+
+ def get_message(self, global_id):
+ msgrow = self._getmsg(global_id)
+ return (self._msgobj(msgrow) if msgrow is not None else None)
+
+ @property
+ def messages(self):
+ for msgrow in Message.select():
+ yield self._msgobj(msgrow)
+
+ def delete_message(self, global_id):
+ msgrow = self._getmsg(global_id)
+ if msgrow is None:
+ raise KeyError(global_id)
+ msgrow.delete()
diff --git a/Mailman/database/model/__init__.py b/Mailman/database/model/__init__.py
index 82e66eb0b..5b9d32ce0 100644
--- a/Mailman/database/model/__init__.py
+++ b/Mailman/database/model/__init__.py
@@ -19,6 +19,7 @@ __all__ = [
'Address',
'Language',
'MailingList',
+ 'Message',
'Preferences',
'User',
'Version',
@@ -43,6 +44,7 @@ from Mailman.database.model.address import Address
from Mailman.database.model.language import Language
from Mailman.database.model.mailinglist import MailingList
from Mailman.database.model.member import Member
+from Mailman.database.model.message import Message
from Mailman.database.model.preferences import Preferences
from Mailman.database.model.user import User
from Mailman.database.model.version import Version
diff --git a/Mailman/database/model/message.py b/Mailman/database/model/message.py
new file mode 100644
index 000000000..df8371c6a
--- /dev/null
+++ b/Mailman/database/model/message.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2007 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+from elixir import *
+from zope.interface import implements
+
+
+
+class Message(Entity):
+ """A message in the message store."""
+
+ has_field('hash', Unicode)
+ has_field('path', Unicode)
+ has_field('message_id', Unicode)
+
+ using_options(shortnames=True)
diff --git a/Mailman/docs/archives.txt b/Mailman/docs/archives.txt
index 682ee8777..1bed66e1a 100644
--- a/Mailman/docs/archives.txt
+++ b/Mailman/docs/archives.txt
@@ -25,7 +25,6 @@ A helper function.
... msg, msgdata = switchboard.dequeue(filebase)
... switchboard.finish(filebase)
-
The purpose of the ToArchive handler is to make a simple decision as to
whether the message should get archived and if so, to drop the message in the
archiving queue. Really the most important things are to determine when a
diff --git a/Mailman/docs/messagestore.txt b/Mailman/docs/messagestore.txt
new file mode 100644
index 000000000..ace95e914
--- /dev/null
+++ b/Mailman/docs/messagestore.txt
@@ -0,0 +1,169 @@
+The message store
+=================
+
+The message store is a collection of messages keyed off of unique global
+identifiers. A global id for a message is calculated relative to the message
+store's base URL and its components are stored as headers on the message. One
+piece of information is the X-List-ID-Hash, a base-32 encoding of the SHA1
+hash of the message's Message-ID and Date headers, which the message must
+have. The second piece of information is supplied by the message store; it
+is a sequence number that will uniquely identify the message even when the
+X-List-ID-Hash collides.
+
+ >>> from email import message_from_string
+ >>> from Mailman.configuration import config
+ >>> from Mailman.database import flush
+ >>> store = config.message_store
+
+If you try to add a message to the store which is missing either the
+Message-ID header or the Date header, you will get a ValueError.
+
+ >>> msg = message_from_string("""\
+ ... Subject: An important message
+ ...
+ ... This message is very important.
+ ... """)
+ >>> store.add(msg)
+ Traceback (most recent call last):
+ ...
+ ValueError: Exactly one Message-ID and one Date header required
+
+Adding a Message-ID header alone doesn't help.
+
+ >>> msg['Message-ID'] = '<87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>'
+ >>> store.add(msg)
+ Traceback (most recent call last):
+ ...
+ ValueError: Exactly one Message-ID and one Date header required
+
+Neither does adding just a Date header.
+
+ >>> del msg['message-id']
+ >>> msg['Date'] = 'Wed, 04 Jul 2007 16:49:58 +0900'
+ >>> store.add(msg)
+ Traceback (most recent call last):
+ ...
+ ValueError: Exactly one Message-ID and one Date header required
+
+However, having them both is all you need.
+
+ >>> msg['Message-ID'] = '<87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>'
+ >>> store.add(msg)
+ 1
+ >>> flush()
+ >>> print msg.as_string()
+ Subject: An important message
+ Date: Wed, 04 Jul 2007 16:49:58 +0900
+ Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>
+ X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+ X-List-Sequence-Number: 1
+ <BLANKLINE>
+ This message is very important.
+ <BLANKLINE>
+
+
+Finding messages
+----------------
+
+There are several ways to find a message given some or all of the information
+created above. Because Message-IDs are not guaranteed unique, looking up
+messages with that key resturns a collection. The collection may be empty if
+there are no matches.
+
+ >>> list(store.get_messages_by_message_id('nothing'))
+ []
+
+Given an existing Message-ID, all matching messages will be found.
+
+ >>> msgs = list(store.get_messages_by_message_id(msg['message-id']))
+ >>> len(msgs)
+ 1
+ >>> print msgs[0].as_string()
+ Subject: An important message
+ Date: Wed, 04 Jul 2007 16:49:58 +0900
+ Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>
+ X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+ X-List-Sequence-Number: 1
+ <BLANKLINE>
+ This message is very important.
+ <BLANKLINE>
+
+Similarly, we can find messages by the ID hash.
+
+ >>> list(store.get_messages_by_hash('nothing'))
+ []
+ >>> msgs = list(store.get_messages_by_hash(msg['x-list-id-hash']))
+ >>> len(msgs)
+ 1
+ >>> print msgs[0].as_string()
+ Subject: An important message
+ Date: Wed, 04 Jul 2007 16:49:58 +0900
+ Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>
+ X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+ X-List-Sequence-Number: 1
+ <BLANKLINE>
+ This message is very important.
+ <BLANKLINE>
+
+We can also get a single message by using it's relative global ID. This
+returns None if there is no match.
+
+ >>> print store.get_message('nothing')
+ None
+ >>> print store.get_message('nothing/1')
+ None
+ >>> id_hash = msg['x-list-id-hash']
+ >>> seqno = msg['x-list-sequence-number']
+ >>> global_id = id_hash + '/' + seqno
+ >>> print store.get_message(global_id).as_string()
+ Subject: An important message
+ Date: Wed, 04 Jul 2007 16:49:58 +0900
+ Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>
+ X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+ X-List-Sequence-Number: 1
+ <BLANKLINE>
+ This message is very important.
+ <BLANKLINE>
+
+
+Iterating over all messages
+---------------------------
+
+The message store provides a means to iterate over all the messages it
+contains.
+
+ >>> msgs = list(store.messages)
+ >>> len(msgs)
+ 1
+ >>> print msgs[0].as_string()
+ Subject: An important message
+ Date: Wed, 04 Jul 2007 16:49:58 +0900
+ Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>
+ X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+ X-List-Sequence-Number: 1
+ <BLANKLINE>
+ This message is very important.
+ <BLANKLINE>
+
+
+Deleting messages from the store
+--------------------------------
+
+The global relative ID is the key into the message store. If you try to
+delete a global ID that isn't in the store, you get an exception.
+
+ >>> store.delete_message('nothing')
+ Traceback (most recent call last):
+ ...
+ KeyError: 'nothing'
+
+But if you delete an existing message, it really gets deleted.
+
+ >>> store.delete_message(global_id)
+ >>> flush()
+ >>> list(store.messages)
+ []
+ >>> print store.get_message(global_id)
+ None
+ >>> list(store.get_messages_by_message_id(msg['message-id']))
+ []
diff --git a/Mailman/interfaces/messagestore.py b/Mailman/interfaces/messagestore.py
new file mode 100644
index 000000000..541238fd1
--- /dev/null
+++ b/Mailman/interfaces/messagestore.py
@@ -0,0 +1,101 @@
+# Copyright (C) 2007 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+"""The message storage service."""
+
+from zope.interface import Interface, Attribute
+
+
+
+class IMessageStore(Interface):
+ """The interface of the global message storage service.
+
+ All messages that are stored in the system live in the message storage
+ service. This store is responsible for providing unique identifiers for
+ every message stored in it. A message stored in this service must have at
+ least a Message-ID header and a Date header. These are not guaranteed to
+ be unique, so the service also provides a unique sequence number to every
+ message.
+
+ Storing a message returns the unique sequence number for the message.
+ This sequence number will be stored on the message's
+ X-List-Sequence-Number header. Any previous such header value will be
+ overwritten. An X-List-ID-Hash header will also be added, containing the
+ Base-32 encoded SHA1 hash of the message's Message-ID and Date headers.
+
+ The combination of the X-List-ID-Hash header and the
+ X-List-Sequence-Number header uniquely identify this message to the
+ storage service. A globally unique URL that addresses this message may be
+ crafted from these headers and the List-Archive header as follows. For a
+ message with the following headers:
+
+ Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>
+ Date: Wed, 04 Jul 2007 16:49:58 +0900
+ List-Archive: http://archive.example.com/
+ X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI
+ X-List-Sequence-Number: 801
+
+ the globally unique URL would be:
+
+ http://archive.example.com/RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI/801
+ """
+
+ def add(message):
+ """Add the message to the store.
+
+ :param message: An email.message.Message instance containing at least
+ a Message-ID header and a Date header. The message will be given
+ an X-List-ID-Hash header and an X-List-Sequence-Number header.
+ :returns: The message's sequence ID as an integer.
+ :raises ValueError: if the message is missing one of the required
+ headers.
+ """
+
+ def get_messages_by_message_id(message_id):
+ """Return the set of messages with the matching Message-ID.
+
+ :param message_id: The Message-ID header contents to search for.
+ :returns: An iterator over all the matching messages.
+ """
+
+ def get_messages_by_hash(hash):
+ """Return the set of messages with the matching X-List-ID-Hash.
+
+ :param hash: The X-List-ID-Hash header contents to search for.
+ :returns: An iterator over all the matching messages.
+ """
+
+ def get_message(global_id):
+ """Return the message with the matching hash and sequence number.
+
+ :param global_id: The global relative ID which uniquely addresses this
+ message, relative to the base address of the message store. This
+ must be a string of the X-List-ID-Hash followed by a single slash
+ character, followed by the X-List-Sequence-Number.
+ :returns: The matching message, or None if there is no match.
+ """
+
+ def delete_message(global_id):
+ """Remove the addressed message from the store.
+
+ :param global_id: The global relative ID which uniquely addresses the
+ message to delete.
+ :raises KeyError: if there is no such message.
+ """
+
+ messages = Attribute(
+ """An iterator over all messages in this message store.""")