1 files changed, 583 insertions, 0 deletions
diff --git a/Mailman/lockfile.py b/Mailman/lockfile.py
new file mode 100644
index 000000000..7db746952
--- /dev/null
+++ b/Mailman/lockfile.py
@@ -0,0 +1,583 @@
+# Copyright (C) 1998-2007 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+"""Portable, NFS-safe file locking with timeouts.
+
+This code implements an NFS-safe file-based locking algorithm influenced by
+the GNU/Linux open(2) manpage, under the description of the O_EXCL option.
+From RH6.1:
+
+        [...] O_EXCL is broken on NFS file systems, programs which rely on it
+        for performing locking tasks will contain a race condition.  The
+        solution for performing atomic file locking using a lockfile is to
+        create a unique file on the same fs (e.g., incorporating hostname and
+        pid), use link(2) to make a link to the lockfile.  If link() returns
+        0, the lock is successful.  Otherwise, use stat(2) on the unique file
+        to check if its link count has increased to 2, in which case the lock
+        is also successful.
+
+The assumption made here is that there will be no `outside interference',
+e.g. no agent external to this code will have access to link() to the affected
+lock files.
+
+LockFile objects support lock-breaking so that you can't wedge a process
+forever.  This is especially helpful in a web environment, but may not be
+appropriate for all applications.
+
+Locks have a `lifetime', which is the maximum length of time the process
+expects to retain the lock.  It is important to pick a good number here
+because other processes will not break an existing lock until the expected
+lifetime has expired.  Too long and other processes will hang; too short and
+you'll end up trampling on existing process locks -- and possibly corrupting
+data.  In a distributed (NFS) environment, you also need to make sure that
+your clocks are properly synchronized.
+"""
+
+__metaclass__ = type
+__all__ = [
+    'LockError',
+    'AlreadyLockedError',
+    'NotLockedError',
+    'LockFile',
+    ]
+
+# This code has undergone several revisions, with contributions from Barry
+# Warsaw, Thomas Wouters, Harald Meland, and John Viega.  It should also work
+# well outside of Mailman so it could be used for other Python projects
+# requiring file locking.  See the __main__ section at the bottom of the file
+# for unit testing.
+
+import os
+import time
+import errno
+import random
+import socket
+import logging
+import datetime
+import traceback
+
+# Units are floating-point seconds.
+DEFAULT_LOCK_LIFETIME  = datetime.timedelta(seconds=15)
+# Allowable a bit of clock skew, in seconds.
+CLOCK_SLOP = 10
+# This is appropriate for Mailman, but you may want to change this if you're
+# using this code outside Mailman.
+log = logging.getLogger('mailman.locks')
+
+
+
+# Exceptions that can be raised by this module
+class LockError(Exception):
+    """Base class for all exceptions in this module."""
+
+class AlreadyLockedError(LockError):
+    """An attempt is made to lock an already locked object."""
+
+class NotLockedError(LockError):
+    """An attempt is made to unlock an object that isn't locked."""
+
+class TimeOutError(LockError):
+    """The timeout interval elapsed before the lock succeeded."""
+
+
+
+class LockFile:
+    """A portable way to lock resources by way of the file system.
+
+    This class supports the following methods:
+
+    __init__(lockfile[, lifetime]):
+        Create the resource lock using lockfile as the global lock file.  Each
+        process laying claim to this resource lock will create their own
+        temporary lock files based on the path specified by lockfile.
+        Optional lifetime is a timedelta specifying the number of seconds the
+        process expects to hold the lock.
+
+    set_lifetime(lifetime):
+        Set a new lock lifetime.  This takes affect the next time the file is
+        locked, but does not refresh a locked file.
+
+    get_lifetime():
+        Return the lock's lifetime.
+
+    refresh([newlifetime[, unconditionally]]):
+        Refreshes the lifetime of a locked file.  Use this if you realize that
+        you need to keep a resource locked longer than you thought.  With
+        optional newlifetime, set the lock's lifetime.   Raises NotLockedError
+        if the lock is not set, unless optional unconditionally flag is set to
+        true.
+
+    lock([timeout]):
+        Acquire the lock.  This blocks until the lock is acquired unless
+        optional timeout is greater than 0, in which case, a TimeOutError is
+        raised when timeout number of seconds (or possibly more) expires
+        without lock acquisition.  Raises AlreadyLockedError if the lock is
+        already set.
+
+    unlock([unconditionally]):
+        Relinquishes the lock.  Raises a NotLockedError if the lock is not
+        set, unless optional unconditionally is true.
+
+    locked():
+        Return true if the lock is set, otherwise false.  To avoid race
+        conditions, this refreshes the lock (on set locks).
+
+    """
+    # XXX We need to watch out for two lock objects in the same process
+    # pointing to the same lock file.  Without this, if you lock lf1 and do
+    # not lock lf2, lf2.locked() will still return true.  NOTE: this gimmick
+    # probably does /not/ work in a multithreaded world, but we don't have to
+    # worry about that, do we? <1 wink>.
+    COUNTER = 0
+
+    def __init__(self, lockfile, lifetime=DEFAULT_LOCK_LIFETIME):
+        """Create the resource lock using lockfile as the global lock file.
+
+        Each process laying claim to this resource lock will create their own
+        temporary lock files based on the path specified by lockfile.
+        Optional lifetime is the number of seconds the process expects to hold
+        the lock.  Optional withlogging, when true, turns on lockfile logging
+        (see the module docstring for details).
+        """
+        self._lockfile = lockfile
+        self._lifetime = lifetime
+        # This works because we know we're single threaded
+        self._counter = LockFile.COUNTER
+        LockFile.COUNTER += 1
+        self._tmpfname = '%s.%s.%d.%d' % (
+            lockfile, socket.gethostname(), os.getpid(), self._counter)
+        # For transferring ownership across a fork.
+        self._owned = True
+
+    def __repr__(self):
+        return '<LockFile %s: %s [%s: %s] pid=%s>' % (
+            id(self), self._lockfile,
+            self.locked() and 'locked' or 'unlocked',
+            self._lifetime, os.getpid())
+
+    def set_lifetime(self, lifetime):
+        """Set a new lock lifetime.
+
+        This takes affect the next time the file is locked, but does not
+        refresh a locked file.
+        """
+        self._lifetime = lifetime
+
+    def get_lifetime(self):
+        """Return the lock's lifetime."""
+        return self._lifetime
+
+    def refresh(self, newlifetime=None, unconditionally=False):
+        """Refreshes the lifetime of a locked file.
+
+        Use this if you realize that you need to keep a resource locked longer
+        than you thought.  With optional newlifetime, set the lock's lifetime.
+        Raises NotLockedError if the lock is not set, unless optional
+        unconditionally flag is set to true.
+        """
+        if newlifetime is not None:
+            self.set_lifetime(newlifetime)
+        # Do we have the lock?  As a side effect, this refreshes the lock!
+        if not self.locked() and not unconditionally:
+            raise NotLockedError('%s: %s' % (repr(self), self._read()))
+
+    def lock(self, timeout=0):
+        """Acquire the lock.
+
+        This blocks until the lock is acquired unless optional timeout is
+        greater than 0, in which case, a TimeOutError is raised when timeout
+        number of seconds (or possibly more) expires without lock acquisition.
+        Raises AlreadyLockedError if the lock is already set.
+        """
+        if timeout:
+            timeout_time = time.time() + timeout
+        # Make sure my temp lockfile exists, and that its contents are
+        # up-to-date (e.g. the temp file name, and the lock lifetime).
+        self._write()
+        # XXX This next call can fail with an EPERM.  I have no idea why, but
+        # I'm nervous about wrapping this in a try/except.  It seems to be a
+        # very rare occurence, only happens from cron, and (only?) on Solaris
+        # 2.6.
+        self._touch()
+        log.debug('laying claim: %s', self._lockfile)
+        # for quieting the logging output
+        loopcount = -1
+        while True:
+            loopcount += 1
+            # Create the hard link and test for exactly 2 links to the file
+            try:
+                os.link(self._tmpfname, self._lockfile)
+                # If we got here, we know we know we got the lock, and never
+                # had it before, so we're done.  Just touch it again for the
+                # fun of it.
+                log.debug('got the lock: %s', self._lockfile)
+                self._touch()
+                break
+            except OSError, e:
+                # The link failed for some reason, possibly because someone
+                # else already has the lock (i.e. we got an EEXIST), or for
+                # some other bizarre reason.
+                if e.errno == errno.ENOENT:
+                    # XXX in some Linux environments, it is possible to get
+                    # an ENOENT, which is truly strange, because this means
+                    # that self._tmpfname doesn't exist at the time of the
+                    # os.link(), but self._write() is supposed to guarantee
+                    # that this happens!  I don't honestly know why this
+                    # happens, but for now we just say we didn't acquire the
+                    # lock, and try again next time.
+                    pass
+                elif e.errno <> errno.EEXIST:
+                    # Something very bizarre happened.  Clean up our state and
+                    # pass the error on up.
+                    log.exception('unexpected link')
+                    os.unlink(self._tmpfname)
+                    raise
+                elif self._linkcount() <> 2:
+                    # Somebody's messin' with us!  Log this, and try again
+                    # later.  XXX should we raise an exception?
+                    log.error('unexpected linkcount: %d', self._linkcount())
+                elif self._read() == self._tmpfname:
+                    # It was us that already had the link.
+                    log.debug('already locked: %s', self._lockfile)
+                    raise AlreadyLockedError
+                # otherwise, someone else has the lock
+                pass
+            # We did not acquire the lock, because someone else already has
+            # it.  Have we timed out in our quest for the lock?
+            if timeout and timeout_time < time.time():
+                os.unlink(self._tmpfname)
+                log.error('timed out')
+                raise TimeOutError
+            # Okay, we haven't timed out, but we didn't get the lock.  Let's
+            # find if the lock lifetime has expired.
+            if time.time() > self._releasetime() + CLOCK_SLOP:
+                # Yes, so break the lock.
+                self._break()
+                log.error('lifetime has expired, breaking')
+            # Okay, someone else has the lock, our claim hasn't timed out yet,
+            # and the expected lock lifetime hasn't expired yet.  So let's
+            # wait a while for the owner of the lock to give it up.
+            elif not loopcount % 100:
+                log.debug('waiting for claim: %s', self._lockfile)
+            self._sleep()
+
+    def unlock(self, unconditionally=False):
+        """Unlock the lock.
+
+        If we don't already own the lock (either because of unbalanced unlock
+        calls, or because the lock was stolen out from under us), raise a
+        NotLockedError, unless optional `unconditionally' is true.
+        """
+        islocked = self.locked()
+        if not islocked and not unconditionally:
+            raise NotLockedError
+        # If we owned the lock, remove the global file, relinquishing it.
+        if islocked:
+            try:
+                os.unlink(self._lockfile)
+            except OSError, e:
+                if e.errno <> errno.ENOENT:
+                    raise
+        # Remove our tempfile
+        try:
+            os.unlink(self._tmpfname)
+        except OSError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+        log.debug('unlocked: %s', self._lockfile)
+
+    def locked(self):
+        """Return true if we own the lock, false if we do not.
+
+        Checking the status of the lock resets the lock's lifetime, which
+        helps avoid race conditions during the lock status test.
+        """
+        # Discourage breaking the lock for a while.
+        try:
+            self._touch()
+        except OSError, e:
+            if e.errno == errno.EPERM:
+                # We can't touch the file because we're not the owner.  I
+                # don't see how we can own the lock if we're not the owner.
+                return False
+            else:
+                raise
+        # XXX Can the link count ever be > 2?
+        if self._linkcount() <> 2:
+            return False
+        return self._read() == self._tmpfname
+
+    def finalize(self):
+        log.debug('finalize: %s', self._lockfile)
+        self.unlock(unconditionally=True)
+
+    def __del__(self):
+        log.debug('__del__: %s', self._lockfile)
+        if self._owned:
+            self.finalize()
+
+    # Python 2.5 context manager protocol support.
+    def __enter__(self):
+        self.lock()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.unlock()
+        # Don't suppress any exception that might have occurred.
+        return False
+
+    # Use these only if you're transfering ownership to a child process across
+    # a fork.  Use at your own risk, but it should be race-condition safe.
+    # _transfer_to() is called in the parent, passing in the pid of the child.
+    # _take_possession() is called in the child, and blocks until the parent
+    # has transferred possession to the child.  _disown() is used to set the
+    # _owned flag to false, and it is a disgusting wart necessary to make
+    # forced lock acquisition work in mailmanctl. :(
+    def _transfer_to(self, pid):
+        # First touch it so it won't get broken while we're fiddling about.
+        self._touch()
+        # Find out current claim's temp filename
+        winner = self._read()
+        # Now twiddle ours to the given pid
+        self._tmpfname = '%s.%s.%d' % (
+            self._lockfile, socket.gethostname(), pid)
+        # Create a hard link from the global lock file to the temp file.  This
+        # actually does things in reverse order of normal operation because we
+        # know that lockfile exists, and tmpfname better not!
+        os.link(self._lockfile, self._tmpfname)
+        # Now update the lock file to contain a reference to the new owner
+        self._write()
+        # Toggle off our ownership of the file so we don't try to finalize it
+        # in our __del__()
+        self._owned = False
+        # Unlink the old winner, completing the transfer
+        os.unlink(winner)
+        # And do some sanity checks
+        assert self._linkcount() == 2
+        assert self.locked()
+        log.debug('transferred the lock: %s', self._lockfile)
+
+    def _take_possession(self):
+        self._tmpfname = tmpfname = '%s.%s.%d' % (
+            self._lockfile, socket.gethostname(), os.getpid())
+        # Wait until the linkcount is 2, indicating the parent has completed
+        # the transfer.
+        while self._linkcount() <> 2 or self._read() <> tmpfname:
+            time.sleep(0.25)
+        log.debug('took possession of the lock: %s', self._lockfile)
+
+    def _disown(self):
+        self._owned = False
+
+    #
+    # Private interface
+    #
+
+    def _write(self):
+        # Make sure it's group writable
+        fp = open(self._tmpfname, 'w')
+        try:
+            fp.write(self._tmpfname)
+        finally:
+            fp.close()
+
+    def _read(self):
+        try:
+            fp = open(self._lockfile)
+            try:
+                filename = fp.read()
+            finally:
+                fp.close()
+            return filename
+        except EnvironmentError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+            return None
+
+    def _touch(self, filename=None):
+        expiration_date = datetime.datetime.now() + self._lifetime
+        t = time.mktime(expiration_date.timetuple())
+        try:
+            # XXX We probably don't need to modify atime, but this is easier.
+            os.utime(filename or self._tmpfname, (t, t))
+        except OSError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+
+    def _releasetime(self):
+        try:
+            return os.stat(self._lockfile).st_mtime
+        except OSError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+            return -1
+
+    def _linkcount(self):
+        try:
+            return os.stat(self._lockfile).st_nlink
+        except OSError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+            return -1
+
+    def _break(self):
+        # First, touch the global lock file.  This reduces but does not
+        # eliminate the chance for a race condition during breaking.  Two
+        # processes could both pass the test for lock expiry in lock() before
+        # one of them gets to touch the global lockfile.  This shouldn't be
+        # too bad because all they'll do in this function is wax the lock
+        # files, not claim the lock, and we can be defensive for ENOENTs
+        # here.
+        #
+        # Touching the lock could fail if the process breaking the lock and
+        # the process that claimed the lock have different owners.  We could
+        # solve this by set-uid'ing the CGI and mail wrappers, but I don't
+        # think it's that big a problem.
+        try:
+            self._touch(self._lockfile)
+        except OSError, e:
+            if e.errno <> errno.EPERM:
+                raise
+        # Get the name of the old winner's temp file.
+        winner = self._read()
+        # Remove the global lockfile, which actually breaks the lock.
+        try:
+            os.unlink(self._lockfile)
+        except OSError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+        # Try to remove the old winner's temp file, since we're assuming the
+        # winner process has hung or died.  Don't worry too much if we can't
+        # unlink their temp file -- this doesn't wreck the locking algorithm,
+        # but will leave temp file turds laying around, a minor inconvenience.
+        try:
+            if winner:
+                os.unlink(winner)
+        except OSError, e:
+            if e.errno <> errno.ENOENT:
+                raise
+
+    def _sleep(self):
+        interval = random.random() * 2.0 + 0.01
+        time.sleep(interval)
+
+
+
+# Unit test framework
+def _dochild():
+    prefix = '[%d]' % os.getpid()
+    # Create somewhere between 1 and 1000 locks
+    lockfile = LockFile('/tmp/LockTest', lifetime=120)
+    # Use a lock lifetime of between 1 and 15 seconds.  Under normal
+    # situations, Mailman's usage patterns (untested) shouldn't be much longer
+    # than this.
+    workinterval = 5 * random.random()
+    hitwait = 20 * random.random()
+    print prefix, 'workinterval:', workinterval
+    islocked = False
+    t0 = 0
+    t1 = 0
+    t2 = 0
+    try:
+        try:
+            t0 = time.time()
+            print prefix, 'acquiring...'
+            lockfile.lock()
+            print prefix, 'acquired...'
+            islocked = True
+        except TimeOutError:
+            print prefix, 'timed out'
+        else:
+            t1 = time.time()
+            print prefix, 'acquisition time:', t1-t0, 'seconds'
+            time.sleep(workinterval)
+    finally:
+        if islocked:
+            try:
+                lockfile.unlock()
+                t2 = time.time()
+                print prefix, 'lock hold time:', t2-t1, 'seconds'
+            except NotLockedError:
+                print prefix, 'lock was broken'
+    # wait for next web hit
+    print prefix, 'webhit sleep:', hitwait
+    time.sleep(hitwait)
+
+
+def _seed():
+    try:
+        fp = open('/dev/random')
+        d = fp.read(40)
+        fp.close()
+    except EnvironmentError, e:
+        if e.errno <> errno.ENOENT:
+            raise
+        import sha
+        d = sha.new(`os.getpid()`+`time.time()`).hexdigest()
+    random.seed(d)
+
+
+def _onetest():
+    loopcount = random.randint(1, 100)
+    for i in range(loopcount):
+        print 'Loop %d of %d' % (i+1, loopcount)
+        pid = os.fork()
+        if pid:
+            # parent, wait for child to exit
+            pid, status = os.waitpid(pid, 0)
+        else:
+            # child
+            _seed()
+            try:
+                _dochild()
+            except KeyboardInterrupt:
+                pass
+            os._exit(0)
+
+
+def _reap(kids):
+    if not kids:
+        return
+    pid, status = os.waitpid(-1, os.WNOHANG)
+    if pid <> 0:
+        del kids[pid]
+
+
+def _test(numtests):
+    kids = {}
+    for i in range(numtests):
+        pid = os.fork()
+        if pid:
+            # parent
+            kids[pid] = pid
+        else:
+            # child
+            _seed()
+            try:
+                _onetest()
+            except KeyboardInterrupt:
+                pass
+            os._exit(0)
+        # slightly randomize each kid's seed
+    while kids:
+        _reap(kids)
+
+
+if __name__ == '__main__':
+    import sys
+    import random
+    _test(int(sys.argv[1]))