summaryrefslogtreecommitdiff
path: root/Mailman/database/messagestore.py
diff options
context:
space:
mode:
Diffstat (limited to 'Mailman/database/messagestore.py')
-rw-r--r--Mailman/database/messagestore.py140
1 files changed, 140 insertions, 0 deletions
diff --git a/Mailman/database/messagestore.py b/Mailman/database/messagestore.py
new file mode 100644
index 000000000..eb29fcfb4
--- /dev/null
+++ b/Mailman/database/messagestore.py
@@ -0,0 +1,140 @@
+# Copyright (C) 2007 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+from __future__ import with_statement
+
+__metaclass__ = type
+__all__ = [
+ 'MessageStore',
+ ]
+
+import os
+import errno
+import base64
+import hashlib
+import cPickle as pickle
+
+from zope.interface import implements
+
+from Mailman import Utils
+from Mailman.configuration import config
+from Mailman.database.model import Message
+from Mailman.interfaces import IMessageStore
+
+# It could be very bad if you have already stored files and you change this
+# value. We'd need a script to reshuffle and resplit.
+MAX_SPLITS = 2
+EMPTYSTRING = ''
+
+
+
+class MessageStore:
+ implements(IMessageStore)
+
+ def add(self, message):
+ # Ensure that the message has the requisite headers.
+ message_ids = message.get_all('message-id', [])
+ dates = message.get_all('date', [])
+ if not (len(message_ids) == 1 and len(dates) == 1):
+ raise ValueError(
+ 'Exactly one Message-ID and one Date header required')
+ # Calculate and insert the X-List-ID-Hash.
+ message_id = message_ids[0]
+ date = dates[0]
+ shaobj = hashlib.sha1(message_id)
+ shaobj.update(date)
+ hash32 = base64.b32encode(shaobj.digest())
+ del message['X-List-ID-Hash']
+ message['X-List-ID-Hash'] = hash32
+ # Calculate the path on disk where we're going to store this message
+ # object, in pickled format.
+ parts = []
+ split = list(hash32)
+ while split and len(parts) < MAX_SPLITS:
+ parts.append(split.pop(0) + split.pop(0))
+ parts.append(EMPTYSTRING.join(split))
+ relpath = os.path.join(*parts)
+ # Store the message in the database. This relies on the database
+ # providing a unique serial number, but to get this information, we
+ # have to use a straight insert instead of relying on Elixir to create
+ # the object.
+ result = Message.table.insert().execute(
+ hash=hash32, path=relpath, message_id=message_id)
+ # Add the additional header.
+ seqno = result.last_inserted_ids()[0]
+ del message['X-List-Sequence-Number']
+ message['X-List-Sequence-Number'] = str(seqno)
+ # Now calculate the full file system path.
+ path = os.path.join(config.MESSAGES_DIR, relpath, str(seqno))
+ # Write the file to the path, but catch the appropriate exception in
+ # case the parent directories don't yet exist. In that case, create
+ # them and try again.
+ while True:
+ try:
+ with open(path, 'w') as fp:
+ # -1 says to use the highest protocol available.
+ pickle.dump(message, fp, -1)
+ break
+ except IOError, e:
+ if e.errno <> errno.ENOENT:
+ raise
+ os.makedirs(os.path.dirname(path))
+ return seqno
+
+ def _msgobj(self, msgrow):
+ path = os.path.join(config.MESSAGES_DIR, msgrow.path, str(msgrow.id))
+ with open(path) as fp:
+ return pickle.load(fp)
+
+ def get_messages_by_message_id(self, message_id):
+ for msgrow in Message.select_by(message_id=message_id):
+ yield self._msgobj(msgrow)
+
+ def get_messages_by_hash(self, hash):
+ for msgrow in Message.select_by(hash=hash):
+ yield self._msgobj(msgrow)
+
+ def _getmsg(self, global_id):
+ try:
+ hash, seqno = global_id.split('/', 1)
+ seqno = int(seqno)
+ except ValueError:
+ return None
+ msgrows = Message.select_by(id=seqno)
+ if not msgrows:
+ return None
+ assert len(msgrows) == 1, 'Multiple id matches'
+ if msgrows[0].hash <> hash:
+ # The client lied about which message they wanted. They gave a
+ # valid sequence number, but the hash did not match.
+ return None
+ return msgrows[0]
+
+ def get_message(self, global_id):
+ msgrow = self._getmsg(global_id)
+ return (self._msgobj(msgrow) if msgrow is not None else None)
+
+ @property
+ def messages(self):
+ for msgrow in Message.select():
+ yield self._msgobj(msgrow)
+
+ def delete_message(self, global_id):
+ msgrow = self._getmsg(global_id)
+ if msgrow is None:
+ raise KeyError(global_id)
+ msgrow.delete()