diff options
| author | Barry Warsaw | 2007-12-10 23:00:14 -0500 |
|---|---|---|
| committer | Barry Warsaw | 2007-12-10 23:00:14 -0500 |
| commit | 7923b90f0349f9e2dc891082e2e1c3bf23b4d79c (patch) | |
| tree | 35ce2b0d149f8f806d84e0b8e991213d073df193 | |
| parent | 5495accf05d77e1c4ff2855f5e42c2e56f51e45d (diff) | |
| download | mailman-7923b90f0349f9e2dc891082e2e1c3bf23b4d79c.tar.gz mailman-7923b90f0349f9e2dc891082e2e1c3bf23b4d79c.tar.zst mailman-7923b90f0349f9e2dc891082e2e1c3bf23b4d79c.zip | |
Add .get() to our Message subclass, which ensures that returned
values are unicodes if they come from the base class as a string.
Get rid of the 'global id'. Now use just Message-ID. Rename
X-List-ID-Hash to X-Message-ID-Hash. Do not take Date header into
account when calculating this hash.
Because of the above change, the assumption is that there will be no
Message-ID collisions. Therefore, get rid of IMessageStore
.get_message(), .get_messages_by_message_id() and
.get_messages_by_hash(). Instead, it's now .get_message_by_id() and
.get_message_by_hash() both of which return the message object or
None.
Message.hash -> Message.message_id_hash
When storing a message in the message store, the final path component
has the entire hash, not just the leftover parts after directory
prefix splitting.
MessageStore.delete_message() deletes the file too.
Doctests clean up message store messages though the message store
instead of directly off the filesystem.
Diffstat (limited to '')
| -rw-r--r-- | Mailman/Message.py | 6 | ||||
| -rw-r--r-- | Mailman/app/moderator.py | 24 | ||||
| -rw-r--r-- | Mailman/database/mailman.sql | 354 | ||||
| -rw-r--r-- | Mailman/database/message.py | 6 | ||||
| -rw-r--r-- | Mailman/database/messagestore.py | 91 | ||||
| -rw-r--r-- | Mailman/docs/hold.txt | 12 | ||||
| -rw-r--r-- | Mailman/docs/messagestore.txt | 101 | ||||
| -rw-r--r-- | Mailman/docs/requests.txt | 30 | ||||
| -rw-r--r-- | Mailman/docs/subject-munging.txt | 4 | ||||
| -rw-r--r-- | Mailman/interfaces/messages.py | 88 | ||||
| -rw-r--r-- | Mailman/tests/test_documentation.py | 7 |
11 files changed, 340 insertions, 383 deletions
diff --git a/Mailman/Message.py b/Mailman/Message.py index f0f95e76e..89bc42798 100644 --- a/Mailman/Message.py +++ b/Mailman/Message.py @@ -51,6 +51,12 @@ class Message(email.message.Message): return unicode(value, 'ascii') return value + def get(self, name, failobj=None): + value = email.message.Message.get(self, name, failobj) + if isinstance(value, str): + return unicode(value, 'ascii') + return value + def get_all(self, name, failobj=None): all_values = email.message.Message.get_all(self, name, failobj) return [(unicode(value, 'ascii') if isinstance(value, str) else value) diff --git a/Mailman/app/moderator.py b/Mailman/app/moderator.py index 0557f4332..691268153 100644 --- a/Mailman/app/moderator.py +++ b/Mailman/app/moderator.py @@ -62,22 +62,24 @@ def hold_message(mlist, msg, msgdata=None, reason=None): reason = '' # Add the message to the message store. It is required to have a # Message-ID header. - if 'message-id' not in msg: - msg['Message-ID'] = make_msgid() - seqno = config.db.message_store.add(msg) - global_id = '%s/%s' % (msg['X-List-ID-Hash'], seqno) + message_id = msg.get('message-id') + if message_id is None: + msg['Message-ID'] = message_id = unicode(make_msgid()) + assert isinstance(message_id, unicode), ( + 'Message-ID is not a unicode: %s' % message_id) + config.db.message_store.add(msg) # Prepare the message metadata with some extra information needed only by # the moderation interface. - msgdata['_mod_global_id'] = global_id + msgdata['_mod_message_id'] = message_id msgdata['_mod_fqdn_listname'] = mlist.fqdn_listname msgdata['_mod_sender'] = msg.get_sender() msgdata['_mod_subject'] = msg.get('subject', _('(no subject)')) msgdata['_mod_reason'] = reason msgdata['_mod_hold_date'] = datetime.now().isoformat() - # Now hold this request. We'll use the message's global ID as the key. + # Now hold this request. We'll use the message_id as the key. requestsdb = config.db.requests.get_list_requests(mlist) request_id = requestsdb.hold_request( - RequestType.held_message, global_id, msgdata) + RequestType.held_message, message_id, msgdata) return request_id @@ -88,7 +90,7 @@ def handle_message(mlist, id, action, key, msgdata = requestdb.get_request(id) # Handle the action. rejection = None - global_id = msgdata['_mod_global_id'] + message_id = msgdata['_mod_message_id'] sender = msgdata['_mod_sender'] subject = msgdata['_mod_subject'] if action is Action.defer: @@ -107,7 +109,7 @@ def handle_message(mlist, id, action, sender, comment or _('[No reason given]'), language) elif action is Action.accept: # Start by getting the message from the message store. - msg = config.db.message_store.get_message(global_id) + msg = config.db.message_store.get_message_by_id(message_id) # Delete moderation-specific entries from the message metadata. for key in msgdata.keys(): if key.startswith('_mod_'): @@ -136,7 +138,7 @@ def handle_message(mlist, id, action, # Forward the message. if forward: # Get a copy of the original message from the message store. - msg = config.db.message_store.get_message(global_id) + msg = config.db.message_store.get_message_by_id(message_id) # It's possible the forwarding address list is a comma separated list # of realname/address pairs. addresses = [addr[1] for addr in getaddresses(forward)] @@ -160,7 +162,7 @@ def handle_message(mlist, id, action, fmsg.send(mlist) # Delete the message from the message store if it is not being preserved. if not preserve: - config.db.message_store.delete_message(global_id) + config.db.message_store.delete_message(message_id) requestdb.delete_request(id) # Log the rejection if rejection: diff --git a/Mailman/database/mailman.sql b/Mailman/database/mailman.sql index a20b1b118..cff4daba0 100644 --- a/Mailman/database/mailman.sql +++ b/Mailman/database/mailman.sql @@ -1,201 +1,201 @@ CREATE TABLE _request ( - id INTEGER NOT NULL, - "key" TEXT, - request_type TEXT, - data_hash TEXT, - mailing_list_id INTEGER, - PRIMARY KEY (id), - CONSTRAINT _request_mailing_list_id_fk FOREIGN KEY(mailing_list_id) REFERENCES mailinglist (id) + id INTEGER NOT NULL, + "key" TEXT, + request_type TEXT, + data_hash TEXT, + mailing_list_id INTEGER, + PRIMARY KEY (id), + CONSTRAINT _request_mailing_list_id_fk FOREIGN KEY(mailing_list_id) REFERENCES mailinglist (id) ); CREATE TABLE address ( - id INTEGER NOT NULL, - address TEXT, - _original TEXT, - real_name TEXT, - verified_on TIMESTAMP, - registered_on TIMESTAMP, - user_id INTEGER, - preferences_id INTEGER, - PRIMARY KEY (id), - CONSTRAINT address_user_id_fk FOREIGN KEY(user_id) REFERENCES user (id), - CONSTRAINT address_preferences_id_fk FOREIGN KEY(preferences_id) REFERENCES preferences (id) + id INTEGER NOT NULL, + address TEXT, + _original TEXT, + real_name TEXT, + verified_on TIMESTAMP, + registered_on TIMESTAMP, + user_id INTEGER, + preferences_id INTEGER, + PRIMARY KEY (id), + CONSTRAINT address_user_id_fk FOREIGN KEY(user_id) REFERENCES user (id), + CONSTRAINT address_preferences_id_fk FOREIGN KEY(preferences_id) REFERENCES preferences (id) ); CREATE TABLE language ( - id INTEGER NOT NULL, - code TEXT, - PRIMARY KEY (id) + id INTEGER NOT NULL, + code TEXT, + PRIMARY KEY (id) ); CREATE TABLE mailinglist ( - id INTEGER NOT NULL, - list_name TEXT, - host_name TEXT, - created_at TIMESTAMP, - web_page_url TEXT, - admin_member_chunksize INTEGER, - hold_and_cmd_autoresponses BLOB, - next_request_id INTEGER, - next_digest_number INTEGER, - admin_responses BLOB, - postings_responses BLOB, - request_responses BLOB, - digest_last_sent_at NUMERIC(10, 2), - one_last_digest BLOB, - volume INTEGER, - last_post_time TIMESTAMP, - accept_these_nonmembers BLOB, - acceptable_aliases BLOB, - admin_immed_notify BOOLEAN, - admin_notify_mchanges BOOLEAN, - administrivia BOOLEAN, - advertised BOOLEAN, - anonymous_list BOOLEAN, - archive BOOLEAN, - archive_private BOOLEAN, - archive_volume_frequency INTEGER, - autorespond_admin BOOLEAN, - autorespond_postings BOOLEAN, - autorespond_requests INTEGER, - autoresponse_admin_text TEXT, - autoresponse_graceperiod TEXT, - autoresponse_postings_text TEXT, - autoresponse_request_text TEXT, - ban_list BLOB, - bounce_info_stale_after TEXT, - bounce_matching_headers TEXT, - bounce_notify_owner_on_disable BOOLEAN, - bounce_notify_owner_on_removal BOOLEAN, - bounce_processing BOOLEAN, - bounce_score_threshold INTEGER, - bounce_unrecognized_goes_to_list_owner BOOLEAN, - bounce_you_are_disabled_warnings INTEGER, - bounce_you_are_disabled_warnings_interval TEXT, - collapse_alternatives BOOLEAN, - convert_html_to_plaintext BOOLEAN, - default_member_moderation BOOLEAN, - description TEXT, - digest_footer TEXT, - digest_header TEXT, - digest_is_default BOOLEAN, - digest_send_periodic BOOLEAN, - digest_size_threshold INTEGER, - digest_volume_frequency INTEGER, - digestable BOOLEAN, - discard_these_nonmembers BLOB, - emergency BOOLEAN, - encode_ascii_prefixes BOOLEAN, - filter_action INTEGER, - filter_content BOOLEAN, - filter_filename_extensions BLOB, - filter_mime_types BLOB, - first_strip_reply_to BOOLEAN, - forward_auto_discards BOOLEAN, - gateway_to_mail BOOLEAN, - gateway_to_news BOOLEAN, - generic_nonmember_action INTEGER, - goodbye_msg TEXT, - header_filter_rules BLOB, - hold_these_nonmembers BLOB, - include_list_post_header BOOLEAN, - include_rfc2369_headers BOOLEAN, - info TEXT, - linked_newsgroup TEXT, - max_days_to_hold INTEGER, - max_message_size INTEGER, - max_num_recipients INTEGER, - member_moderation_action BOOLEAN, - member_moderation_notice TEXT, - mime_is_default_digest BOOLEAN, - moderator_password TEXT, - msg_footer TEXT, - msg_header TEXT, - new_member_options INTEGER, - news_moderation TEXT, - news_prefix_subject_too BOOLEAN, - nntp_host TEXT, - nondigestable BOOLEAN, - nonmember_rejection_notice TEXT, - obscure_addresses BOOLEAN, - pass_filename_extensions BLOB, - pass_mime_types BLOB, - personalize TEXT, - post_id INTEGER, - preferred_language TEXT, - private_roster BOOLEAN, - real_name TEXT, - reject_these_nonmembers BLOB, - reply_goes_to_list TEXT, - reply_to_address TEXT, - require_explicit_destination BOOLEAN, - respond_to_post_requests BOOLEAN, - scrub_nondigest BOOLEAN, - send_goodbye_msg BOOLEAN, - send_reminders BOOLEAN, - send_welcome_msg BOOLEAN, - subject_prefix TEXT, - subscribe_auto_approval BLOB, - subscribe_policy INTEGER, - topics BLOB, - topics_bodylines_limit INTEGER, - topics_enabled BOOLEAN, - unsubscribe_policy INTEGER, - welcome_msg TEXT, - PRIMARY KEY (id) + id INTEGER NOT NULL, + list_name TEXT, + host_name TEXT, + created_at TIMESTAMP, + web_page_url TEXT, + admin_member_chunksize INTEGER, + hold_and_cmd_autoresponses BLOB, + next_request_id INTEGER, + next_digest_number INTEGER, + admin_responses BLOB, + postings_responses BLOB, + request_responses BLOB, + digest_last_sent_at NUMERIC(10, 2), + one_last_digest BLOB, + volume INTEGER, + last_post_time TIMESTAMP, + accept_these_nonmembers BLOB, + acceptable_aliases BLOB, + admin_immed_notify BOOLEAN, + admin_notify_mchanges BOOLEAN, + administrivia BOOLEAN, + advertised BOOLEAN, + anonymous_list BOOLEAN, + archive BOOLEAN, + archive_private BOOLEAN, + archive_volume_frequency INTEGER, + autorespond_admin BOOLEAN, + autorespond_postings BOOLEAN, + autorespond_requests INTEGER, + autoresponse_admin_text TEXT, + autoresponse_graceperiod TEXT, + autoresponse_postings_text TEXT, + autoresponse_request_text TEXT, + ban_list BLOB, + bounce_info_stale_after TEXT, + bounce_matching_headers TEXT, + bounce_notify_owner_on_disable BOOLEAN, + bounce_notify_owner_on_removal BOOLEAN, + bounce_processing BOOLEAN, + bounce_score_threshold INTEGER, + bounce_unrecognized_goes_to_list_owner BOOLEAN, + bounce_you_are_disabled_warnings INTEGER, + bounce_you_are_disabled_warnings_interval TEXT, + collapse_alternatives BOOLEAN, + convert_html_to_plaintext BOOLEAN, + default_member_moderation BOOLEAN, + description TEXT, + digest_footer TEXT, + digest_header TEXT, + digest_is_default BOOLEAN, + digest_send_periodic BOOLEAN, + digest_size_threshold INTEGER, + digest_volume_frequency INTEGER, + digestable BOOLEAN, + discard_these_nonmembers BLOB, + emergency BOOLEAN, + encode_ascii_prefixes BOOLEAN, + filter_action INTEGER, + filter_content BOOLEAN, + filter_filename_extensions BLOB, + filter_mime_types BLOB, + first_strip_reply_to BOOLEAN, + forward_auto_discards BOOLEAN, + gateway_to_mail BOOLEAN, + gateway_to_news BOOLEAN, + generic_nonmember_action INTEGER, + goodbye_msg TEXT, + header_filter_rules BLOB, + hold_these_nonmembers BLOB, + include_list_post_header BOOLEAN, + include_rfc2369_headers BOOLEAN, + info TEXT, + linked_newsgroup TEXT, + max_days_to_hold INTEGER, + max_message_size INTEGER, + max_num_recipients INTEGER, + member_moderation_action BOOLEAN, + member_moderation_notice TEXT, + mime_is_default_digest BOOLEAN, + moderator_password TEXT, + msg_footer TEXT, + msg_header TEXT, + new_member_options INTEGER, + news_moderation TEXT, + news_prefix_subject_too BOOLEAN, + nntp_host TEXT, + nondigestable BOOLEAN, + nonmember_rejection_notice TEXT, + obscure_addresses BOOLEAN, + pass_filename_extensions BLOB, + pass_mime_types BLOB, + personalize TEXT, + post_id INTEGER, + preferred_language TEXT, + private_roster BOOLEAN, + real_name TEXT, + reject_these_nonmembers BLOB, + reply_goes_to_list TEXT, + reply_to_address TEXT, + require_explicit_destination BOOLEAN, + respond_to_post_requests BOOLEAN, + scrub_nondigest BOOLEAN, + send_goodbye_msg BOOLEAN, + send_reminders BOOLEAN, + send_welcome_msg BOOLEAN, + subject_prefix TEXT, + subscribe_auto_approval BLOB, + subscribe_policy INTEGER, + topics BLOB, + topics_bodylines_limit INTEGER, + topics_enabled BOOLEAN, + unsubscribe_policy INTEGER, + welcome_msg TEXT, + PRIMARY KEY (id) ); CREATE TABLE member ( - id INTEGER NOT NULL, - role TEXT, - mailing_list TEXT, - address_id INTEGER, - preferences_id INTEGER, - PRIMARY KEY (id), - CONSTRAINT member_address_id_fk FOREIGN KEY(address_id) REFERENCES address (id), - CONSTRAINT member_preferences_id_fk FOREIGN KEY(preferences_id) REFERENCES preferences (id) + id INTEGER NOT NULL, + role TEXT, + mailing_list TEXT, + address_id INTEGER, + preferences_id INTEGER, + PRIMARY KEY (id), + CONSTRAINT member_address_id_fk FOREIGN KEY(address_id) REFERENCES address (id), + CONSTRAINT member_preferences_id_fk FOREIGN KEY(preferences_id) REFERENCES preferences (id) ); CREATE TABLE message ( - id INTEGER NOT NULL, - hash TEXT, - path TEXT, - message_id TEXT, - PRIMARY KEY (id) + id INTEGER NOT NULL, + message_id_hash TEXT, + path TEXT, + message_id TEXT, + PRIMARY KEY (id) ); CREATE TABLE pended ( - id INTEGER NOT NULL, - token TEXT, - expiration_date TIMESTAMP, - PRIMARY KEY (id) + id INTEGER NOT NULL, + token TEXT, + expiration_date TIMESTAMP, + PRIMARY KEY (id) ); CREATE TABLE pendedkeyvalue ( - id INTEGER NOT NULL, - "key" TEXT, - value TEXT, - pended_id INTEGER, - PRIMARY KEY (id), - CONSTRAINT pendedkeyvalue_pended_id_fk FOREIGN KEY(pended_id) REFERENCES pended (id) + id INTEGER NOT NULL, + "key" TEXT, + value TEXT, + pended_id INTEGER, + PRIMARY KEY (id), + CONSTRAINT pendedkeyvalue_pended_id_fk FOREIGN KEY(pended_id) REFERENCES pended (id) ); CREATE TABLE preferences ( - id INTEGER NOT NULL, - acknowledge_posts BOOLEAN, - hide_address BOOLEAN, - preferred_language TEXT, - receive_list_copy BOOLEAN, - receive_own_postings BOOLEAN, - delivery_mode TEXT, - delivery_status TEXT, - PRIMARY KEY (id) + id INTEGER NOT NULL, + acknowledge_posts BOOLEAN, + hide_address BOOLEAN, + preferred_language TEXT, + receive_list_copy BOOLEAN, + receive_own_postings BOOLEAN, + delivery_mode TEXT, + delivery_status TEXT, + PRIMARY KEY (id) ); CREATE TABLE user ( - id INTEGER NOT NULL, - real_name TEXT, - password TEXT, - preferences_id INTEGER, - PRIMARY KEY (id), - CONSTRAINT user_preferences_id_fk FOREIGN KEY(preferences_id) REFERENCES preferences (id) + id INTEGER NOT NULL, + real_name TEXT, + password TEXT, + preferences_id INTEGER, + PRIMARY KEY (id), + CONSTRAINT user_preferences_id_fk FOREIGN KEY(preferences_id) REFERENCES preferences (id) ); CREATE TABLE version ( - id INTEGER NOT NULL, - component TEXT, - version INTEGER, - PRIMARY KEY (id) + id INTEGER NOT NULL, + component TEXT, + version INTEGER, + PRIMARY KEY (id) ); CREATE INDEX ix__request_mailing_list_id ON _request (mailing_list_id); CREATE INDEX ix_address_preferences_id ON address (preferences_id); diff --git a/Mailman/database/message.py b/Mailman/database/message.py index b8a7e3dfb..01e71b0c0 100644 --- a/Mailman/database/message.py +++ b/Mailman/database/message.py @@ -31,12 +31,12 @@ class Message(Model): id = Int(primary=True, default=AutoReload) message_id = Unicode() - hash = RawStr() + message_id_hash = RawStr() path = RawStr() # This is a Messge-ID field representation, not a database row id. - def __init__(self, message_id, hash, path): + def __init__(self, message_id, message_id_hash, path): self.message_id = message_id - self.hash = hash + self.message_id_hash = message_id_hash self.path = path config.db.store.add(self) diff --git a/Mailman/database/messagestore.py b/Mailman/database/messagestore.py index 69c5d58d5..57c0042e8 100644 --- a/Mailman/database/messagestore.py +++ b/Mailman/database/messagestore.py @@ -50,31 +50,35 @@ class MessageStore: message_ids = message.get_all('message-id', []) if len(message_ids) <> 1: raise ValueError('Exactly one Message-ID header required') - # Calculate and insert the X-List-ID-Hash. + # Calculate and insert the X-Message-ID-Hash. message_id = message_ids[0] + # Complain if the Message-ID already exists in the storage. + existing = config.db.store.find(Message, + Message.message_id == message_id).one() + if existing is not None: + raise ValueError('Message ID already exists in message store: %s', + message_id) shaobj = hashlib.sha1(message_id) hash32 = base64.b32encode(shaobj.digest()) - del message['X-List-ID-Hash'] - message['X-List-ID-Hash'] = hash32 + del message['X-Message-ID-Hash'] + message['X-Message-ID-Hash'] = hash32 # Calculate the path on disk where we're going to store this message # object, in pickled format. parts = [] split = list(hash32) while split and len(parts) < MAX_SPLITS: parts.append(split.pop(0) + split.pop(0)) - parts.append(EMPTYSTRING.join(split)) + parts.append(hash32) relpath = os.path.join(*parts) # Store the message in the database. This relies on the database # providing a unique serial number, but to get this information, we # have to use a straight insert instead of relying on Elixir to create # the object. - row = Message(hash=hash32, path=relpath, message_id=message_id) - # Add the additional header. - seqno = row.id - del message['X-List-Sequence-Number'] - message['X-List-Sequence-Number'] = str(seqno) + row = Message(message_id=message_id, + message_id_hash=hash32, + path=relpath) # Now calculate the full file system path. - path = os.path.join(config.MESSAGES_DIR, relpath, str(seqno)) + path = os.path.join(config.MESSAGES_DIR, relpath) # Write the file to the path, but catch the appropriate exception in # case the parent directories don't yet exist. In that case, create # them and try again. @@ -88,54 +92,41 @@ class MessageStore: if e.errno <> errno.ENOENT: raise os.makedirs(os.path.dirname(path)) - return seqno + return hash32 - def _msgobj(self, msgrow): - path = os.path.join(config.MESSAGES_DIR, msgrow.path, str(msgrow.id)) + def _get_message(self, row): + path = os.path.join(config.MESSAGES_DIR, row.path) with open(path) as fp: return pickle.load(fp) - def get_messages_by_message_id(self, message_id): - for msgrow in config.db.store.find(Message, message_id=message_id): - yield self._msgobj(msgrow) + def get_message_by_id(self, message_id): + row = config.db.store.find(Message, message_id=message_id).one() + if row is None: + return None + return self._get_message(row) - def get_messages_by_hash(self, hash): + def get_message_by_hash(self, message_id_hash): # It's possible the hash came from a message header, in which case it - # will be a Unicode. However when coming from source code, it will - # always be an 8-string. Coerce to the latter if necessary; it must - # be US-ASCII. - if isinstance(hash, unicode): - hash = hash.encode('ascii') - for msgrow in config.db.store.find(Message, hash=hash): - yield self._msgobj(msgrow) - - def _getmsg(self, global_id): - try: - hash, seqno = global_id.split('/', 1) - seqno = int(seqno) - except ValueError: - return None - messages = config.db.store.find(Message, id=seqno) - if messages.count() == 0: + # will be a Unicode. However when coming from source code, it may be + # an 8-string. Coerce to the latter if necessary; it must be + # US-ASCII. + if isinstance(message_id_hash, unicode): + message_id_hash = message_id_hash.encode('ascii') + row = config.db.store.find(Message, + message_id_hash=message_id_hash).one() + if row is None: return None - assert messages.count() == 1, 'Multiple id matches' - if messages[0].hash <> hash: - # The client lied about which message they wanted. They gave a - # valid sequence number, but the hash did not match. - return None - return messages[0] - - def get_message(self, global_id): - msgrow = self._getmsg(global_id) - return (self._msgobj(msgrow) if msgrow is not None else None) + return self._get_message(row) @property def messages(self): - for msgrow in config.db.store.find(Message): - yield self._msgobj(msgrow) + for row in config.db.store.find(Message): + yield self._get_message(row) - def delete_message(self, global_id): - msgrow = self._getmsg(global_id) - if msgrow is None: - raise KeyError(global_id) - config.db.store.remove(msgrow) + def delete_message(self, message_id): + row = config.db.store.find(Message, message_id=message_id).one() + if row is None: + raise LookupError(message_id) + path = os.path.join(config.MESSAGES_DIR, row.path) + os.remove(path) + config.db.store.remove(row) diff --git a/Mailman/docs/hold.txt b/Mailman/docs/hold.txt index 56a10206f..a93953435 100644 --- a/Mailman/docs/hold.txt +++ b/Mailman/docs/hold.txt @@ -130,8 +130,7 @@ that Mailman pulled it from the appropriate news group. From: aperson@example.org Subject: An implicit message Message-ID: ... - X-List-ID-Hash: ... - X-List-Sequence-Number: ... + X-Message-ID-Hash: ... <BLANKLINE> <BLANKLINE> >>> print msgdata @@ -263,8 +262,7 @@ one to the original author. <BLANKLINE> From: aperson@example.com Message-ID: ... - X-List-ID-Hash: ... - X-List-Sequence-Number: ... + X-Message-ID-Hash: ... <BLANKLINE> <BLANKLINE> --... @@ -350,12 +348,12 @@ The message itself is held in the message store. >>> rkey, rdata = config.db.requests.get_list_requests(mlist).get_request( ... data['id']) - >>> msg = config.db.message_store.get_message(rdata['_mod_global_id']) + >>> msg = config.db.message_store.get_message_by_id( + ... rdata['_mod_message_id']) >>> print msg.as_string() From: aperson@example.com Message-ID: ... - X-List-ID-Hash: ... - X-List-Sequence-Number: ... + X-Message-ID-Hash: ... <BLANKLINE> <BLANKLINE> diff --git a/Mailman/docs/messagestore.txt b/Mailman/docs/messagestore.txt index 9b44a7e59..012376a14 100644 --- a/Mailman/docs/messagestore.txt +++ b/Mailman/docs/messagestore.txt @@ -1,14 +1,11 @@ The message store ================= -The message store is a collection of messages keyed off of unique global -identifiers. A global id for a message is calculated relative to the message -store's base URL and its components are stored as headers on the message. One -piece of information is the X-List-ID-Hash, a base-32 encoding of the SHA1 -hash of the message's Message-ID header, which the message must have. The -second piece of information is supplied by the message store; it is a sequence -number that will uniquely identify the message even when the X-List-ID-Hash -collides. +The message store is a collection of messages keyed off of Message-ID and +X-Message-ID-Hash headers. Either of these values can be combined with the +message's List-Archive header to create a globally unique URI to the message +object in the internet facing interface of the message store. The +X-Message-ID-Hash is the Base32 SHA1 hash of the Message-ID. >>> from Mailman.configuration import config >>> store = config.db.message_store @@ -30,12 +27,11 @@ However, if the message has a Message-ID header, it can be stored. >>> msg['Message-ID'] = '<87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp>' >>> store.add(msg) - 1 + 'AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35' >>> print msg.as_string() Subject: An important message Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> - X-List-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 - X-List-Sequence-Number: 1 + X-Message-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 <BLANKLINE> This message is very important. <BLANKLINE> @@ -44,59 +40,33 @@ However, if the message has a Message-ID header, it can be stored. Finding messages ---------------- -There are several ways to find a message given some or all of the information -created above. Because Message-IDs are not guaranteed unique, looking up -messages with that key resturns a collection. The collection may be empty if -there are no matches. - - >>> list(store.get_messages_by_message_id(u'nothing')) - [] +There are several ways to find a message given either the Message-ID or +X-Message-ID-Hash headers. In either case, if no matching message is found, +None is returned. -Given an existing Message-ID, all matching messages will be found. - - >>> msgs = list(store.get_messages_by_message_id(msg['message-id'])) - >>> len(msgs) - 1 - >>> print msgs[0].as_string() - Subject: An important message - Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> - X-List-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 - X-List-Sequence-Number: 1 - <BLANKLINE> - This message is very important. - <BLANKLINE> + >>> print store.get_message_by_id(u'nothing') + None + >>> print store.get_message_by_hash(u'nothing') + None -Similarly, we can find messages by the ID hash. +Given an existing Message-ID, the message can be found. - >>> list(store.get_messages_by_hash('nothing')) - [] - >>> msgs = list(store.get_messages_by_hash(msg['x-list-id-hash'])) - >>> len(msgs) - 1 - >>> print msgs[0].as_string() + >>> message = store.get_message_by_id(msg['message-id']) + >>> print message.as_string() Subject: An important message Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> - X-List-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 - X-List-Sequence-Number: 1 + X-Message-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 <BLANKLINE> This message is very important. <BLANKLINE> -We can also get a single message by using it's relative global ID. This -returns None if there is no match. +Similarly, we can find messages by the X-Message-ID-Hash: - >>> print store.get_message('nothing') - None - >>> print store.get_message('nothing/1') - None - >>> id_hash = msg['x-list-id-hash'] - >>> seqno = msg['x-list-sequence-number'] - >>> global_id = id_hash + '/' + seqno - >>> print store.get_message(global_id).as_string() + >>> message = store.get_message_by_hash(msg['x-message-id-hash']) + >>> print message.as_string() Subject: An important message Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> - X-List-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 - X-List-Sequence-Number: 1 + X-Message-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 <BLANKLINE> This message is very important. <BLANKLINE> @@ -108,14 +78,13 @@ Iterating over all messages The message store provides a means to iterate over all the messages it contains. - >>> msgs = list(store.messages) - >>> len(msgs) + >>> messages = list(store.messages) + >>> len(messages) 1 - >>> print msgs[0].as_string() + >>> print messages[0].as_string() Subject: An important message Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> - X-List-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 - X-List-Sequence-Number: 1 + X-Message-ID-Hash: AGDWSNXXKCWEILKKNYTBOHRDQGOX3Y35 <BLANKLINE> This message is very important. <BLANKLINE> @@ -124,20 +93,22 @@ contains. Deleting messages from the store -------------------------------- -The global relative ID is the key into the message store. If you try to -delete a global ID that isn't in the store, you get an exception. +You delete a message from the storage service by providing the Message-ID for +the message you want to delete. If you try to delete a Message-ID that isn't +in the store, you get an exception. - >>> store.delete_message('nothing') + >>> store.delete_message(u'nothing') Traceback (most recent call last): ... - KeyError: 'nothing' + LookupError: nothing But if you delete an existing message, it really gets deleted. - >>> store.delete_message(global_id) + >>> message_id = message['message-id'] + >>> store.delete_message(message_id) >>> list(store.messages) [] - >>> print store.get_message(global_id) + >>> print store.get_message_by_id(message_id) + None + >>> print store.get_message_by_hash(message['x-message-id-hash']) None - >>> list(store.get_messages_by_message_id(msg['message-id'])) - [] diff --git a/Mailman/docs/requests.txt b/Mailman/docs/requests.txt index 7a395ce94..ea4dcc75d 100644 --- a/Mailman/docs/requests.txt +++ b/Mailman/docs/requests.txt @@ -231,6 +231,9 @@ this case, we won't include any additional metadata. We can also hold a message with some additional metadata. + # Delete the Message-ID from the previous hold so we don't try to store + # collisions in the message storage. + >>> del msg['message-id'] >>> msgdata = dict(sender='aperson@example.com', ... approved=True, ... received_time=123.45) @@ -308,8 +311,7 @@ indicates that the message has been approved. To: alist@example.com Subject: Something important Message-ID: ... - X-List-ID-Hash: ... - X-List-Sequence-Number: ... + X-Message-ID-Hash: ... X-Mailman-Approved-At: ... <BLANKLINE> Here's something important about our mailing list. @@ -338,26 +340,21 @@ is deleted. ... """) >>> id_4 = moderator.hold_message(mlist, msg, {}, 'Needs approval') >>> moderator.handle_message(mlist, id_4, Action.discard) - >>> msgs = config.db.message_store.get_messages_by_message_id(u'<12345>') - >>> list(msgs) - [] + >>> print config.db.message_store.get_message_by_id(u'<12345>') + None But if we ask to preserve the message when we discard it, it will be held in the message store after disposition. >>> id_4 = moderator.hold_message(mlist, msg, {}, 'Needs approval') >>> moderator.handle_message(mlist, id_4, Action.discard, preserve=True) - >>> msgs = config.db.message_store.get_messages_by_message_id(u'<12345>') - >>> msgs = list(msgs) - >>> len(msgs) - 1 - >>> print msgs[0].as_string() + >>> stored_msg = config.db.message_store.get_message_by_id(u'<12345>') + >>> print stored_msg.as_string() From: aperson@example.org To: alist@example.com Subject: Something important Message-ID: <12345> - X-List-ID-Hash: 4CF7EAU3SIXBPXBB5S6PEUMO62MWGQN6 - X-List-Sequence-Number: 1 + X-Message-ID-Hash: 4CF7EAU3SIXBPXBB5S6PEUMO62MWGQN6 <BLANKLINE> Here's something important about our mailing list. <BLANKLINE> @@ -366,6 +363,10 @@ Orthogonal to preservation, the message can also be forwarded to another address. This is helpful for getting the message into the inbox of one of the moderators. + # Set a new Message-ID from the previous hold so we don't try to store + # collisions in the message storage. + >>> del msg['message-id'] + >>> msg['Message-ID'] = u'<abcde>' >>> id_4 = moderator.hold_message(mlist, msg, {}, 'Needs approval') >>> moderator.handle_message(mlist, id_4, Action.discard, ... forward=[u'zperson@example.com']) @@ -383,9 +384,8 @@ moderators. From: aperson@example.org To: alist@example.com Subject: Something important - Message-ID: <12345> - X-List-ID-Hash: 4CF7EAU3SIXBPXBB5S6PEUMO62MWGQN6 - X-List-Sequence-Number: ... + Message-ID: <abcde> + X-Message-ID-Hash: EN2R5UQFMOUTCL44FLNNPLSXBIZW62ER <BLANKLINE> Here's something important about our mailing list. <BLANKLINE> diff --git a/Mailman/docs/subject-munging.txt b/Mailman/docs/subject-munging.txt index 388a02564..12bc098f7 100644 --- a/Mailman/docs/subject-munging.txt +++ b/Mailman/docs/subject-munging.txt @@ -36,7 +36,7 @@ the new Subject header because it gets converted from a string to an email.header.Header instance which has an unhelpful repr. >>> msgdata['origsubj'] - '' + u'' >>> print msg['subject'] [XTest] (no subject) @@ -52,7 +52,7 @@ the beginning of the header's value. >>> msgdata = {} >>> process(mlist, msg, msgdata) >>> msgdata['origsubj'] - 'Something important' + u'Something important' >>> print msg['subject'] [XTest] Something important diff --git a/Mailman/interfaces/messages.py b/Mailman/interfaces/messages.py index 9fac98d76..3be811a3a 100644 --- a/Mailman/interfaces/messages.py +++ b/Mailman/interfaces/messages.py @@ -25,76 +25,66 @@ class IMessageStore(Interface): """The interface of the global message storage service. All messages that are stored in the system live in the message storage - service. This store is responsible for providing unique identifiers for - every message stored in it. A message stored in this service must have at - least a Message-ID header and a Date header. These are not guaranteed to - be unique, so the service also provides a unique sequence number to every - message. + service. A message stored in this service must have a Message-ID header. + The store writes an X-Message-ID-Hash header which contains the Base32 + encoded SHA1 hash of the message's Message-ID header. Any existing + X-Message-ID-Hash header is overwritten. - Storing a message returns the unique sequence number for the message. - This sequence number will be stored on the message's - X-List-Sequence-Number header. Any previous such header value will be - overwritten. An X-List-ID-Hash header will also be added, containing the - Base-32 encoded SHA1 hash of the message's Message-ID and Date headers. + Either the Message-ID or the X-Message-ID-Hash header can be used to + uniquely identify this message in the storage service. While it is + possible to see duplicate Message-IDs, this is never correct and the + service is allowed to drop any subsequent colliding messages, or overwrite + earlier messages with later ones. - The combination of the X-List-ID-Hash header and the - X-List-Sequence-Number header uniquely identify this message to the - storage service. A globally unique URL that addresses this message may be - crafted from these headers and the List-Archive header as follows. For a - message with the following headers: + The combination of the List-Archive header and either the Message-ID or + X-Message-ID-Hash header can be used to retrieve the message from the + internet facing interface for the message store. This can be considered a + globally unique URI to the message. + + For example, a message with the following headers: Message-ID: <87myycy5eh.fsf@uwakimon.sk.tsukuba.ac.jp> Date: Wed, 04 Jul 2007 16:49:58 +0900 List-Archive: http://archive.example.com/ - X-List-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI - X-List-Sequence-Number: 801 + X-Message-ID-Hash: RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI - the globally unique URL would be: + the globally unique URI would be: - http://archive.example.com/RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI/801 + http://archive.example.com/RXTJ357KFOTJP3NFJA6KMO65X7VQOHJI """ def add(message): """Add the message to the store. :param message: An email.message.Message instance containing at least - a Message-ID header and a Date header. The message will be given - an X-List-ID-Hash header and an X-List-Sequence-Number header. - :returns: The message's sequence ID as an integer. - :raises ValueError: if the message is missing one of the required - headers. + a unique Message-ID header. The message will be given an + X-Message-ID-Hash header, overriding any existing such header. + :returns: The calculated X-Message-ID-Hash header. + :raises ValueError: if the message is missing a Message-ID header. + The storage service is also allowed to raise this exception if it + find, but disallows collisions. """ - def get_messages_by_message_id(message_id): - """Return the set of messages with the matching Message-ID. + def get_message_by_id(message_id): + """Return the message with a matching Message-ID. :param message_id: The Message-ID header contents to search for. - :returns: An iterator over all the matching messages. + :returns: The message, or None if no matching message was found. """ - def get_messages_by_hash(hash): - """Return the set of messages with the matching X-List-ID-Hash. + def get_message_by_hash(message_id_hash): + """Return the message with the matching X-Message-ID-Hash. - :param hash: The X-List-ID-Hash header contents to search for. - :returns: An iterator over all the matching messages. - """ - - def get_message(global_id): - """Return the message with the matching hash and sequence number. - - :param global_id: The global relative ID which uniquely addresses this - message, relative to the base address of the message store. This - must be a string of the X-List-ID-Hash followed by a single slash - character, followed by the X-List-Sequence-Number. - :returns: The matching message, or None if there is no match. + :param message_id_hash: The X-Message-ID-Hash header contents to + search for. + :returns: The message, or None if no matching message was found. """ - def delete_message(global_id): - """Remove the addressed message from the store. + def delete_message(message_id): + """Remove the given message from the store. - :param global_id: The global relative ID which uniquely addresses the - message to delete. - :raises KeyError: if there is no such message. + :param message: The Message-ID of the mesage to delete from the store. + :raises LookupError: if there is no such message. """ messages = Attribute( @@ -105,8 +95,8 @@ class IMessageStore(Interface): class IMessage(Interface): """The representation of an email message.""" - hash = Attribute("""The unique SHA1 hash of the message.""") + message_id = Attribute("""The message's Message-ID header.""") - path = Attribute("""The filesystem path to the message object.""") + message_id_hash = Attribute("""The unique SHA1 hash of the message.""") - message_id = Attribute("""The message's Message-ID header.""") + path = Attribute("""The filesystem path to the message object.""") diff --git a/Mailman/tests/test_documentation.py b/Mailman/tests/test_documentation.py index 575d8e6bd..390ba6a66 100644 --- a/Mailman/tests/test_documentation.py +++ b/Mailman/tests/test_documentation.py @@ -58,10 +58,9 @@ def cleaning_teardown(testobj): for dirpath, dirnames, filenames in os.walk(config.QUEUE_DIR): for filename in filenames: os.remove(os.path.join(dirpath, filename)) - # Clear out messages in the message store directory. - for dirpath, dirnames, filenames in os.walk(config.MESSAGES_DIR): - for filename in filenames: - os.remove(os.path.join(dirpath, filename)) + # Clear out messages in the message store. + for message in config.db.message_store.messages: + config.db.message_store.delete_message(message['message-id']) |
