diff options
| author | bwarsaw | 2001-05-14 18:16:30 +0000 |
|---|---|---|
| committer | bwarsaw | 2001-05-14 18:16:30 +0000 |
| commit | f04bb42e60fb9800b99c0b7bb36f198636d7b3ea (patch) | |
| tree | 463f7d149701a51063cdabc88e7c4eecb87089d1 | |
| parent | b0203e6fa189fa98523802031ca2e35dc98d73ed (diff) | |
| download | mailman-f04bb42e60fb9800b99c0b7bb36f198636d7b3ea.tar.gz mailman-f04bb42e60fb9800b99c0b7bb36f198636d7b3ea.tar.zst mailman-f04bb42e60fb9800b99c0b7bb36f198636d7b3ea.zip | |
enqueue(): The `received_time' metadata (which is set here once, but
only if it has no prior value), is encoded into the file name so that
we can guarantee FIFO order on the processed files. We can't encode
the received time in the file attributes because there isn't enough
precision (and I suspect that stat'ing all those files will be too
much of a disk I/O drain).
Instead, the filebase is composed of the string representation of the
current time in float seconds, the symbol `+', and the SHA1 hexdigest
of a hash of the uniquifying data. This makes it easy and quick to
decode received time for FIFO sorting, but retains the "random" digest
for bitrange slicing. Note that the received_time metadata value is
never changed once its set so the first part of the filebase will
remain unchanged as it moves between queues (while the hexdigest will
almost definitely change on each queue move).
dequeue(): Be more robust about missing .msg or .db files when the
other exists (usually, it'll be the .msg file that's missing). Return
None for either the msg or data part of the 2-tuple return value,
where None means "missing".
files(): Utililize the new file naming convention to break apart the
file name and sort the files in FIFO order, while still retaining the
bitrange random hash feature.
MarshalSwitchboard: All Python versions up to and including Python 2.1
have a bug in the marshal representation of binary floating point
numbers. Specifically, it loses precision that Mailman requires. The
solution in this class is to have a hardcoded list of known float
attributes, convert them to strings via repr() before marshaling the
dictionary, and convert them back to floats -- via a safe eval() --
when reading the marshal back from file.
| -rw-r--r-- | Mailman/Queue/Switchboard.py | 93 |
1 files changed, 71 insertions, 22 deletions
diff --git a/Mailman/Queue/Switchboard.py b/Mailman/Queue/Switchboard.py index 6f8023fb4..ea97026a8 100644 --- a/Mailman/Queue/Switchboard.py +++ b/Mailman/Queue/Switchboard.py @@ -37,7 +37,7 @@ import os import time import sha import marshal -from errno import EEXIST +import errno from mimelib.Parser import Parser @@ -61,7 +61,7 @@ class _Switchboard: try: os.mkdir(self.__whichq, 0770) except OSError, e: - if e.errno <> EEXIST: raise + if e.errno <> errno.EEXIST: raise finally: os.umask(omask) # Fast track for no slices @@ -74,14 +74,22 @@ class _Switchboard: def enqueue(self, _msg, _metadata={}, **_kws): # Calculate the SHA hexdigest of the message to get a unique base - # filename. + # filename. We're also going to use the digest as a hash into the set + # of parallel qrunner processes. data = _metadata.copy() data.update(_kws) listname = data.get('listname', '--nolist--') - now = `time.time()` + # Get some data for the input to the sha hash + now = time.time() msgtext = str(_msg) - hashfood = msgtext + listname + now - filebase = sha.new(hashfood).hexdigest() + hashfood = msgtext + listname + `now` + # Encode the current time into the file name for FIFO sorting in + # files(). The file name consists of two parts separated by a `+': + # the received time for this message (i.e. when it first showed up on + # this system) and the sha hex digest. + #rcvtime = data.setdefault('received_time', now) + rcvtime = data.setdefault('received_time', now) + filebase = `rcvtime` + '+' + sha.new(hashfood).hexdigest() # Figure out which queue files the message is to be written to. msgfile = os.path.join(self.__whichq, filebase + '.msg') dbfile = os.path.join(self.__whichq, filebase + '.db') @@ -115,25 +123,45 @@ class _Switchboard: dbfile = os.path.join(self.__whichq, filebase + '.db') # Read the message text and parse it into a message object tree. When # done, unlink the msg file. - msgfp = open(msgfile) - p = Parser(_class=Message.Message) - msg = p.parse(msgfp) - msgfp.close() - os.unlink(msgfile) + msg = data = None + try: + msgfp = open(msgfile) + except IOError, e: + if e.errno <> errno.ENOENT: raise + else: + p = Parser(_class=Message.Message) + msg = p.parse(msgfp) + msgfp.close() + os.unlink(msgfile) # Now, read the metadata using the appropriate external metadata # format. When done, unlink the metadata file. - data = self._ext_read(dbfile) - os.unlink(dbfile) + try: + data = self._ext_read(dbfile) + except (IOError, OSError), e: + if e.errno <> errno.ENOENT: raise + else: + os.unlink(dbfile) return msg, data def files(self): - all = [os.path.splitext(f)[0] for f in os.listdir(self.__whichq) - if f.endswith('.db')] - # Fast track exit - if self.__lower is None: - return all - # BAW: test performance and end-cases of this algorithm - return [f for f in all if self.__lower <= long(f, 16) < self.__upper] + times = {} + lower = self.__lower + upper = self.__upper + for f in os.listdir(self.__whichq): + # We only care about the file's base name (i.e. no extension). + # Thus we'll ignore anything that doesn't end in .db. + if not f.endswith('.db'): + continue + filebase = os.path.splitext(f)[0] + when, digest = filebase.split('+') + # Throw out any files which don't match our bitrange. BAW: test + # performance and end-cases of this algorithm. + if not lower or (lower <= long(digest, 16) < upper): + times[float(when)] = filebase + # FIFO sort + keys = times.keys() + keys.sort() + return [times[k] for k in keys] def _ext_write(self, tmpfile, data): raise UnimplementedError @@ -145,20 +173,41 @@ class _Switchboard: class MarshalSwitchboard(_Switchboard): """Python marshal format.""" + FLOAT_ATTRIBUTES = ['received_time'] + def _ext_write(self, filename, dict): omask = os.umask(007) # -rw-rw---- try: fp = open(filename, 'w') finally: os.umask(omask) + # Python's marshal, up to and including in Python 2.1, has a bug where + # the full precision of floats was not stored. We work around this + # bug by hardcoding a list of float values we know about, repr()-izing + # them ourselves, and doing the reverse conversion on _ext_read(). + for attr in self.FLOAT_ATTRIBUTES: + # We use try/except because we expect a hitrate of nearly 100% + try: + fval = dict[attr] + except KeyError: + pass + else: + dict[attr] = repr(fval) marshal.dump(dict, fp) fp.close() def _ext_read(self, filename): fp = open(filename) - data = marshal.load(fp) + dict = marshal.load(fp) + # Do the reverse conversion (repr -> float) + for attr in self.FLOAT_ATTRIBUTES: + try: + sval = dict[attr] + except KeyError: + pass + dict[attr] = eval(sval, {'__builtins__': {}}) fp.close() - return data + return dict |
