summaryrefslogtreecommitdiff
path: root/Mailman/LockFile.py
diff options
context:
space:
mode:
authorBarry Warsaw2007-10-10 23:22:03 -0400
committerBarry Warsaw2007-10-10 23:22:03 -0400
commit1ad73a52bb9d82ef3af1e34ad9ef66ac2eda2909 (patch)
tree7985eda24e60a39d58c54436e99f975610ee947d /Mailman/LockFile.py
parent15f9e73fdb96a145632e5916cc0073472c014c99 (diff)
downloadmailman-1ad73a52bb9d82ef3af1e34ad9ef66ac2eda2909.tar.gz
mailman-1ad73a52bb9d82ef3af1e34ad9ef66ac2eda2909.tar.zst
mailman-1ad73a52bb9d82ef3af1e34ad9ef66ac2eda2909.zip
Diffstat (limited to 'Mailman/LockFile.py')
-rw-r--r--Mailman/LockFile.py575
1 files changed, 0 insertions, 575 deletions
diff --git a/Mailman/LockFile.py b/Mailman/LockFile.py
deleted file mode 100644
index 9323b4895..000000000
--- a/Mailman/LockFile.py
+++ /dev/null
@@ -1,575 +0,0 @@
-# Copyright (C) 1998-2007 by the Free Software Foundation, Inc.
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
-# USA.
-
-"""Portable, NFS-safe file locking with timeouts.
-
-This code implements an NFS-safe file-based locking algorithm influenced by
-the GNU/Linux open(2) manpage, under the description of the O_EXCL option.
-From RH6.1:
-
- [...] O_EXCL is broken on NFS file systems, programs which rely on it
- for performing locking tasks will contain a race condition. The
- solution for performing atomic file locking using a lockfile is to
- create a unique file on the same fs (e.g., incorporating hostname and
- pid), use link(2) to make a link to the lockfile. If link() returns
- 0, the lock is successful. Otherwise, use stat(2) on the unique file
- to check if its link count has increased to 2, in which case the lock
- is also successful.
-
-The assumption made here is that there will be no `outside interference',
-e.g. no agent external to this code will have access to link() to the affected
-lock files.
-
-LockFile objects support lock-breaking so that you can't wedge a process
-forever. This is especially helpful in a web environment, but may not be
-appropriate for all applications.
-
-Locks have a `lifetime', which is the maximum length of time the process
-expects to retain the lock. It is important to pick a good number here
-because other processes will not break an existing lock until the expected
-lifetime has expired. Too long and other processes will hang; too short and
-you'll end up trampling on existing process locks -- and possibly corrupting
-data. In a distributed (NFS) environment, you also need to make sure that
-your clocks are properly synchronized.
-"""
-
-# This code has undergone several revisions, with contributions from Barry
-# Warsaw, Thomas Wouters, Harald Meland, and John Viega. It should also work
-# well outside of Mailman so it could be used for other Python projects
-# requiring file locking. See the __main__ section at the bottom of the file
-# for unit testing.
-
-import os
-import time
-import errno
-import random
-import socket
-import logging
-import datetime
-import traceback
-
-# Units are floating-point seconds.
-DEFAULT_LOCK_LIFETIME = datetime.timedelta(seconds=15)
-# Allowable a bit of clock skew, in seconds.
-CLOCK_SLOP = 10
-# This is appropriate for Mailman, but you may want to change this if you're
-# using this code outside Mailman.
-log = logging.getLogger('mailman.locks')
-
-
-
-# Exceptions that can be raised by this module
-class LockError(Exception):
- """Base class for all exceptions in this module."""
-
-class AlreadyLockedError(LockError):
- """An attempt is made to lock an already locked object."""
-
-class NotLockedError(LockError):
- """An attempt is made to unlock an object that isn't locked."""
-
-class TimeOutError(LockError):
- """The timeout interval elapsed before the lock succeeded."""
-
-
-
-class LockFile:
- """A portable way to lock resources by way of the file system.
-
- This class supports the following methods:
-
- __init__(lockfile[, lifetime]):
- Create the resource lock using lockfile as the global lock file. Each
- process laying claim to this resource lock will create their own
- temporary lock files based on the path specified by lockfile.
- Optional lifetime is a timedelta specifying the number of seconds the
- process expects to hold the lock.
-
- set_lifetime(lifetime):
- Set a new lock lifetime. This takes affect the next time the file is
- locked, but does not refresh a locked file.
-
- get_lifetime():
- Return the lock's lifetime.
-
- refresh([newlifetime[, unconditionally]]):
- Refreshes the lifetime of a locked file. Use this if you realize that
- you need to keep a resource locked longer than you thought. With
- optional newlifetime, set the lock's lifetime. Raises NotLockedError
- if the lock is not set, unless optional unconditionally flag is set to
- true.
-
- lock([timeout]):
- Acquire the lock. This blocks until the lock is acquired unless
- optional timeout is greater than 0, in which case, a TimeOutError is
- raised when timeout number of seconds (or possibly more) expires
- without lock acquisition. Raises AlreadyLockedError if the lock is
- already set.
-
- unlock([unconditionally]):
- Relinquishes the lock. Raises a NotLockedError if the lock is not
- set, unless optional unconditionally is true.
-
- locked():
- Return true if the lock is set, otherwise false. To avoid race
- conditions, this refreshes the lock (on set locks).
-
- """
- # XXX We need to watch out for two lock objects in the same process
- # pointing to the same lock file. Without this, if you lock lf1 and do
- # not lock lf2, lf2.locked() will still return true. NOTE: this gimmick
- # probably does /not/ work in a multithreaded world, but we don't have to
- # worry about that, do we? <1 wink>.
- COUNTER = 0
-
- def __init__(self, lockfile, lifetime=DEFAULT_LOCK_LIFETIME):
- """Create the resource lock using lockfile as the global lock file.
-
- Each process laying claim to this resource lock will create their own
- temporary lock files based on the path specified by lockfile.
- Optional lifetime is the number of seconds the process expects to hold
- the lock. Optional withlogging, when true, turns on lockfile logging
- (see the module docstring for details).
- """
- self._lockfile = lockfile
- self._lifetime = lifetime
- # This works because we know we're single threaded
- self._counter = LockFile.COUNTER
- LockFile.COUNTER += 1
- self._tmpfname = '%s.%s.%d.%d' % (
- lockfile, socket.gethostname(), os.getpid(), self._counter)
- # For transferring ownership across a fork.
- self._owned = True
-
- def __repr__(self):
- return '<LockFile %s: %s [%s: %s] pid=%s>' % (
- id(self), self._lockfile,
- self.locked() and 'locked' or 'unlocked',
- self._lifetime, os.getpid())
-
- def set_lifetime(self, lifetime):
- """Set a new lock lifetime.
-
- This takes affect the next time the file is locked, but does not
- refresh a locked file.
- """
- self._lifetime = lifetime
-
- def get_lifetime(self):
- """Return the lock's lifetime."""
- return self._lifetime
-
- def refresh(self, newlifetime=None, unconditionally=False):
- """Refreshes the lifetime of a locked file.
-
- Use this if you realize that you need to keep a resource locked longer
- than you thought. With optional newlifetime, set the lock's lifetime.
- Raises NotLockedError if the lock is not set, unless optional
- unconditionally flag is set to true.
- """
- if newlifetime is not None:
- self.set_lifetime(newlifetime)
- # Do we have the lock? As a side effect, this refreshes the lock!
- if not self.locked() and not unconditionally:
- raise NotLockedError('%s: %s' % (repr(self), self._read()))
-
- def lock(self, timeout=0):
- """Acquire the lock.
-
- This blocks until the lock is acquired unless optional timeout is
- greater than 0, in which case, a TimeOutError is raised when timeout
- number of seconds (or possibly more) expires without lock acquisition.
- Raises AlreadyLockedError if the lock is already set.
- """
- if timeout:
- timeout_time = time.time() + timeout
- # Make sure my temp lockfile exists, and that its contents are
- # up-to-date (e.g. the temp file name, and the lock lifetime).
- self._write()
- # XXX This next call can fail with an EPERM. I have no idea why, but
- # I'm nervous about wrapping this in a try/except. It seems to be a
- # very rare occurence, only happens from cron, and (only?) on Solaris
- # 2.6.
- self._touch()
- log.debug('laying claim: %s', self._lockfile)
- # for quieting the logging output
- loopcount = -1
- while True:
- loopcount += 1
- # Create the hard link and test for exactly 2 links to the file
- try:
- os.link(self._tmpfname, self._lockfile)
- # If we got here, we know we know we got the lock, and never
- # had it before, so we're done. Just touch it again for the
- # fun of it.
- log.debug('got the lock: %s', self._lockfile)
- self._touch()
- break
- except OSError, e:
- # The link failed for some reason, possibly because someone
- # else already has the lock (i.e. we got an EEXIST), or for
- # some other bizarre reason.
- if e.errno == errno.ENOENT:
- # XXX in some Linux environments, it is possible to get
- # an ENOENT, which is truly strange, because this means
- # that self._tmpfname doesn't exist at the time of the
- # os.link(), but self._write() is supposed to guarantee
- # that this happens! I don't honestly know why this
- # happens, but for now we just say we didn't acquire the
- # lock, and try again next time.
- pass
- elif e.errno <> errno.EEXIST:
- # Something very bizarre happened. Clean up our state and
- # pass the error on up.
- log.exception('unexpected link')
- os.unlink(self._tmpfname)
- raise
- elif self._linkcount() <> 2:
- # Somebody's messin' with us! Log this, and try again
- # later. XXX should we raise an exception?
- log.error('unexpected linkcount: %d', self._linkcount())
- elif self._read() == self._tmpfname:
- # It was us that already had the link.
- log.debug('already locked: %s', self._lockfile)
- raise AlreadyLockedError
- # otherwise, someone else has the lock
- pass
- # We did not acquire the lock, because someone else already has
- # it. Have we timed out in our quest for the lock?
- if timeout and timeout_time < time.time():
- os.unlink(self._tmpfname)
- log.error('timed out')
- raise TimeOutError
- # Okay, we haven't timed out, but we didn't get the lock. Let's
- # find if the lock lifetime has expired.
- if time.time() > self._releasetime() + CLOCK_SLOP:
- # Yes, so break the lock.
- self._break()
- log.error('lifetime has expired, breaking')
- # Okay, someone else has the lock, our claim hasn't timed out yet,
- # and the expected lock lifetime hasn't expired yet. So let's
- # wait a while for the owner of the lock to give it up.
- elif not loopcount % 100:
- log.debug('waiting for claim: %s', self._lockfile)
- self._sleep()
-
- def unlock(self, unconditionally=False):
- """Unlock the lock.
-
- If we don't already own the lock (either because of unbalanced unlock
- calls, or because the lock was stolen out from under us), raise a
- NotLockedError, unless optional `unconditionally' is true.
- """
- islocked = self.locked()
- if not islocked and not unconditionally:
- raise NotLockedError
- # If we owned the lock, remove the global file, relinquishing it.
- if islocked:
- try:
- os.unlink(self._lockfile)
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
- # Remove our tempfile
- try:
- os.unlink(self._tmpfname)
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
- log.debug('unlocked: %s', self._lockfile)
-
- def locked(self):
- """Return true if we own the lock, false if we do not.
-
- Checking the status of the lock resets the lock's lifetime, which
- helps avoid race conditions during the lock status test.
- """
- # Discourage breaking the lock for a while.
- try:
- self._touch()
- except OSError, e:
- if e.errno == errno.EPERM:
- # We can't touch the file because we're not the owner. I
- # don't see how we can own the lock if we're not the owner.
- return False
- else:
- raise
- # XXX Can the link count ever be > 2?
- if self._linkcount() <> 2:
- return False
- return self._read() == self._tmpfname
-
- def finalize(self):
- log.debug('finalize: %s', self._lockfile)
- self.unlock(unconditionally=True)
-
- def __del__(self):
- log.debug('__del__: %s', self._lockfile)
- if self._owned:
- self.finalize()
-
- # Python 2.5 context manager protocol support.
- def __enter__(self):
- self.lock()
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- self.unlock()
- # Don't suppress any exception that might have occurred.
- return False
-
- # Use these only if you're transfering ownership to a child process across
- # a fork. Use at your own risk, but it should be race-condition safe.
- # _transfer_to() is called in the parent, passing in the pid of the child.
- # _take_possession() is called in the child, and blocks until the parent
- # has transferred possession to the child. _disown() is used to set the
- # _owned flag to false, and it is a disgusting wart necessary to make
- # forced lock acquisition work in mailmanctl. :(
- def _transfer_to(self, pid):
- # First touch it so it won't get broken while we're fiddling about.
- self._touch()
- # Find out current claim's temp filename
- winner = self._read()
- # Now twiddle ours to the given pid
- self._tmpfname = '%s.%s.%d' % (
- self._lockfile, socket.gethostname(), pid)
- # Create a hard link from the global lock file to the temp file. This
- # actually does things in reverse order of normal operation because we
- # know that lockfile exists, and tmpfname better not!
- os.link(self._lockfile, self._tmpfname)
- # Now update the lock file to contain a reference to the new owner
- self._write()
- # Toggle off our ownership of the file so we don't try to finalize it
- # in our __del__()
- self._owned = False
- # Unlink the old winner, completing the transfer
- os.unlink(winner)
- # And do some sanity checks
- assert self._linkcount() == 2
- assert self.locked()
- log.debug('transferred the lock: %s', self._lockfile)
-
- def _take_possession(self):
- self._tmpfname = tmpfname = '%s.%s.%d' % (
- self._lockfile, socket.gethostname(), os.getpid())
- # Wait until the linkcount is 2, indicating the parent has completed
- # the transfer.
- while self._linkcount() <> 2 or self._read() <> tmpfname:
- time.sleep(0.25)
- log.debug('took possession of the lock: %s', self._lockfile)
-
- def _disown(self):
- self._owned = False
-
- #
- # Private interface
- #
-
- def _write(self):
- # Make sure it's group writable
- fp = open(self._tmpfname, 'w')
- try:
- fp.write(self._tmpfname)
- finally:
- fp.close()
-
- def _read(self):
- try:
- fp = open(self._lockfile)
- try:
- filename = fp.read()
- finally:
- fp.close()
- return filename
- except EnvironmentError, e:
- if e.errno <> errno.ENOENT:
- raise
- return None
-
- def _touch(self, filename=None):
- expiration_date = datetime.datetime.now() + self._lifetime
- t = time.mktime(expiration_date.timetuple())
- try:
- # XXX We probably don't need to modify atime, but this is easier.
- os.utime(filename or self._tmpfname, (t, t))
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
-
- def _releasetime(self):
- try:
- return os.stat(self._lockfile).st_mtime
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
- return -1
-
- def _linkcount(self):
- try:
- return os.stat(self._lockfile).st_nlink
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
- return -1
-
- def _break(self):
- # First, touch the global lock file. This reduces but does not
- # eliminate the chance for a race condition during breaking. Two
- # processes could both pass the test for lock expiry in lock() before
- # one of them gets to touch the global lockfile. This shouldn't be
- # too bad because all they'll do in this function is wax the lock
- # files, not claim the lock, and we can be defensive for ENOENTs
- # here.
- #
- # Touching the lock could fail if the process breaking the lock and
- # the process that claimed the lock have different owners. We could
- # solve this by set-uid'ing the CGI and mail wrappers, but I don't
- # think it's that big a problem.
- try:
- self._touch(self._lockfile)
- except OSError, e:
- if e.errno <> errno.EPERM:
- raise
- # Get the name of the old winner's temp file.
- winner = self._read()
- # Remove the global lockfile, which actually breaks the lock.
- try:
- os.unlink(self._lockfile)
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
- # Try to remove the old winner's temp file, since we're assuming the
- # winner process has hung or died. Don't worry too much if we can't
- # unlink their temp file -- this doesn't wreck the locking algorithm,
- # but will leave temp file turds laying around, a minor inconvenience.
- try:
- if winner:
- os.unlink(winner)
- except OSError, e:
- if e.errno <> errno.ENOENT:
- raise
-
- def _sleep(self):
- interval = random.random() * 2.0 + 0.01
- time.sleep(interval)
-
-
-
-# Unit test framework
-def _dochild():
- prefix = '[%d]' % os.getpid()
- # Create somewhere between 1 and 1000 locks
- lockfile = LockFile('/tmp/LockTest', lifetime=120)
- # Use a lock lifetime of between 1 and 15 seconds. Under normal
- # situations, Mailman's usage patterns (untested) shouldn't be much longer
- # than this.
- workinterval = 5 * random.random()
- hitwait = 20 * random.random()
- print prefix, 'workinterval:', workinterval
- islocked = False
- t0 = 0
- t1 = 0
- t2 = 0
- try:
- try:
- t0 = time.time()
- print prefix, 'acquiring...'
- lockfile.lock()
- print prefix, 'acquired...'
- islocked = True
- except TimeOutError:
- print prefix, 'timed out'
- else:
- t1 = time.time()
- print prefix, 'acquisition time:', t1-t0, 'seconds'
- time.sleep(workinterval)
- finally:
- if islocked:
- try:
- lockfile.unlock()
- t2 = time.time()
- print prefix, 'lock hold time:', t2-t1, 'seconds'
- except NotLockedError:
- print prefix, 'lock was broken'
- # wait for next web hit
- print prefix, 'webhit sleep:', hitwait
- time.sleep(hitwait)
-
-
-def _seed():
- try:
- fp = open('/dev/random')
- d = fp.read(40)
- fp.close()
- except EnvironmentError, e:
- if e.errno <> errno.ENOENT:
- raise
- import sha
- d = sha.new(`os.getpid()`+`time.time()`).hexdigest()
- random.seed(d)
-
-
-def _onetest():
- loopcount = random.randint(1, 100)
- for i in range(loopcount):
- print 'Loop %d of %d' % (i+1, loopcount)
- pid = os.fork()
- if pid:
- # parent, wait for child to exit
- pid, status = os.waitpid(pid, 0)
- else:
- # child
- _seed()
- try:
- _dochild()
- except KeyboardInterrupt:
- pass
- os._exit(0)
-
-
-def _reap(kids):
- if not kids:
- return
- pid, status = os.waitpid(-1, os.WNOHANG)
- if pid <> 0:
- del kids[pid]
-
-
-def _test(numtests):
- kids = {}
- for i in range(numtests):
- pid = os.fork()
- if pid:
- # parent
- kids[pid] = pid
- else:
- # child
- _seed()
- try:
- _onetest()
- except KeyboardInterrupt:
- pass
- os._exit(0)
- # slightly randomize each kid's seed
- while kids:
- _reap(kids)
-
-
-if __name__ == '__main__':
- import sys
- import random
- _test(int(sys.argv[1]))