diff options
Diffstat (limited to 'Mailman/lockfile.py')
| -rw-r--r-- | Mailman/lockfile.py | 583 |
1 files changed, 0 insertions, 583 deletions
diff --git a/Mailman/lockfile.py b/Mailman/lockfile.py deleted file mode 100644 index 7db746952..000000000 --- a/Mailman/lockfile.py +++ /dev/null @@ -1,583 +0,0 @@ -# Copyright (C) 1998-2007 by the Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, -# USA. - -"""Portable, NFS-safe file locking with timeouts. - -This code implements an NFS-safe file-based locking algorithm influenced by -the GNU/Linux open(2) manpage, under the description of the O_EXCL option. -From RH6.1: - - [...] O_EXCL is broken on NFS file systems, programs which rely on it - for performing locking tasks will contain a race condition. The - solution for performing atomic file locking using a lockfile is to - create a unique file on the same fs (e.g., incorporating hostname and - pid), use link(2) to make a link to the lockfile. If link() returns - 0, the lock is successful. Otherwise, use stat(2) on the unique file - to check if its link count has increased to 2, in which case the lock - is also successful. - -The assumption made here is that there will be no `outside interference', -e.g. no agent external to this code will have access to link() to the affected -lock files. - -LockFile objects support lock-breaking so that you can't wedge a process -forever. This is especially helpful in a web environment, but may not be -appropriate for all applications. - -Locks have a `lifetime', which is the maximum length of time the process -expects to retain the lock. It is important to pick a good number here -because other processes will not break an existing lock until the expected -lifetime has expired. Too long and other processes will hang; too short and -you'll end up trampling on existing process locks -- and possibly corrupting -data. In a distributed (NFS) environment, you also need to make sure that -your clocks are properly synchronized. -""" - -__metaclass__ = type -__all__ = [ - 'LockError', - 'AlreadyLockedError', - 'NotLockedError', - 'LockFile', - ] - -# This code has undergone several revisions, with contributions from Barry -# Warsaw, Thomas Wouters, Harald Meland, and John Viega. It should also work -# well outside of Mailman so it could be used for other Python projects -# requiring file locking. See the __main__ section at the bottom of the file -# for unit testing. - -import os -import time -import errno -import random -import socket -import logging -import datetime -import traceback - -# Units are floating-point seconds. -DEFAULT_LOCK_LIFETIME = datetime.timedelta(seconds=15) -# Allowable a bit of clock skew, in seconds. -CLOCK_SLOP = 10 -# This is appropriate for Mailman, but you may want to change this if you're -# using this code outside Mailman. -log = logging.getLogger('mailman.locks') - - - -# Exceptions that can be raised by this module -class LockError(Exception): - """Base class for all exceptions in this module.""" - -class AlreadyLockedError(LockError): - """An attempt is made to lock an already locked object.""" - -class NotLockedError(LockError): - """An attempt is made to unlock an object that isn't locked.""" - -class TimeOutError(LockError): - """The timeout interval elapsed before the lock succeeded.""" - - - -class LockFile: - """A portable way to lock resources by way of the file system. - - This class supports the following methods: - - __init__(lockfile[, lifetime]): - Create the resource lock using lockfile as the global lock file. Each - process laying claim to this resource lock will create their own - temporary lock files based on the path specified by lockfile. - Optional lifetime is a timedelta specifying the number of seconds the - process expects to hold the lock. - - set_lifetime(lifetime): - Set a new lock lifetime. This takes affect the next time the file is - locked, but does not refresh a locked file. - - get_lifetime(): - Return the lock's lifetime. - - refresh([newlifetime[, unconditionally]]): - Refreshes the lifetime of a locked file. Use this if you realize that - you need to keep a resource locked longer than you thought. With - optional newlifetime, set the lock's lifetime. Raises NotLockedError - if the lock is not set, unless optional unconditionally flag is set to - true. - - lock([timeout]): - Acquire the lock. This blocks until the lock is acquired unless - optional timeout is greater than 0, in which case, a TimeOutError is - raised when timeout number of seconds (or possibly more) expires - without lock acquisition. Raises AlreadyLockedError if the lock is - already set. - - unlock([unconditionally]): - Relinquishes the lock. Raises a NotLockedError if the lock is not - set, unless optional unconditionally is true. - - locked(): - Return true if the lock is set, otherwise false. To avoid race - conditions, this refreshes the lock (on set locks). - - """ - # XXX We need to watch out for two lock objects in the same process - # pointing to the same lock file. Without this, if you lock lf1 and do - # not lock lf2, lf2.locked() will still return true. NOTE: this gimmick - # probably does /not/ work in a multithreaded world, but we don't have to - # worry about that, do we? <1 wink>. - COUNTER = 0 - - def __init__(self, lockfile, lifetime=DEFAULT_LOCK_LIFETIME): - """Create the resource lock using lockfile as the global lock file. - - Each process laying claim to this resource lock will create their own - temporary lock files based on the path specified by lockfile. - Optional lifetime is the number of seconds the process expects to hold - the lock. Optional withlogging, when true, turns on lockfile logging - (see the module docstring for details). - """ - self._lockfile = lockfile - self._lifetime = lifetime - # This works because we know we're single threaded - self._counter = LockFile.COUNTER - LockFile.COUNTER += 1 - self._tmpfname = '%s.%s.%d.%d' % ( - lockfile, socket.gethostname(), os.getpid(), self._counter) - # For transferring ownership across a fork. - self._owned = True - - def __repr__(self): - return '<LockFile %s: %s [%s: %s] pid=%s>' % ( - id(self), self._lockfile, - self.locked() and 'locked' or 'unlocked', - self._lifetime, os.getpid()) - - def set_lifetime(self, lifetime): - """Set a new lock lifetime. - - This takes affect the next time the file is locked, but does not - refresh a locked file. - """ - self._lifetime = lifetime - - def get_lifetime(self): - """Return the lock's lifetime.""" - return self._lifetime - - def refresh(self, newlifetime=None, unconditionally=False): - """Refreshes the lifetime of a locked file. - - Use this if you realize that you need to keep a resource locked longer - than you thought. With optional newlifetime, set the lock's lifetime. - Raises NotLockedError if the lock is not set, unless optional - unconditionally flag is set to true. - """ - if newlifetime is not None: - self.set_lifetime(newlifetime) - # Do we have the lock? As a side effect, this refreshes the lock! - if not self.locked() and not unconditionally: - raise NotLockedError('%s: %s' % (repr(self), self._read())) - - def lock(self, timeout=0): - """Acquire the lock. - - This blocks until the lock is acquired unless optional timeout is - greater than 0, in which case, a TimeOutError is raised when timeout - number of seconds (or possibly more) expires without lock acquisition. - Raises AlreadyLockedError if the lock is already set. - """ - if timeout: - timeout_time = time.time() + timeout - # Make sure my temp lockfile exists, and that its contents are - # up-to-date (e.g. the temp file name, and the lock lifetime). - self._write() - # XXX This next call can fail with an EPERM. I have no idea why, but - # I'm nervous about wrapping this in a try/except. It seems to be a - # very rare occurence, only happens from cron, and (only?) on Solaris - # 2.6. - self._touch() - log.debug('laying claim: %s', self._lockfile) - # for quieting the logging output - loopcount = -1 - while True: - loopcount += 1 - # Create the hard link and test for exactly 2 links to the file - try: - os.link(self._tmpfname, self._lockfile) - # If we got here, we know we know we got the lock, and never - # had it before, so we're done. Just touch it again for the - # fun of it. - log.debug('got the lock: %s', self._lockfile) - self._touch() - break - except OSError, e: - # The link failed for some reason, possibly because someone - # else already has the lock (i.e. we got an EEXIST), or for - # some other bizarre reason. - if e.errno == errno.ENOENT: - # XXX in some Linux environments, it is possible to get - # an ENOENT, which is truly strange, because this means - # that self._tmpfname doesn't exist at the time of the - # os.link(), but self._write() is supposed to guarantee - # that this happens! I don't honestly know why this - # happens, but for now we just say we didn't acquire the - # lock, and try again next time. - pass - elif e.errno <> errno.EEXIST: - # Something very bizarre happened. Clean up our state and - # pass the error on up. - log.exception('unexpected link') - os.unlink(self._tmpfname) - raise - elif self._linkcount() <> 2: - # Somebody's messin' with us! Log this, and try again - # later. XXX should we raise an exception? - log.error('unexpected linkcount: %d', self._linkcount()) - elif self._read() == self._tmpfname: - # It was us that already had the link. - log.debug('already locked: %s', self._lockfile) - raise AlreadyLockedError - # otherwise, someone else has the lock - pass - # We did not acquire the lock, because someone else already has - # it. Have we timed out in our quest for the lock? - if timeout and timeout_time < time.time(): - os.unlink(self._tmpfname) - log.error('timed out') - raise TimeOutError - # Okay, we haven't timed out, but we didn't get the lock. Let's - # find if the lock lifetime has expired. - if time.time() > self._releasetime() + CLOCK_SLOP: - # Yes, so break the lock. - self._break() - log.error('lifetime has expired, breaking') - # Okay, someone else has the lock, our claim hasn't timed out yet, - # and the expected lock lifetime hasn't expired yet. So let's - # wait a while for the owner of the lock to give it up. - elif not loopcount % 100: - log.debug('waiting for claim: %s', self._lockfile) - self._sleep() - - def unlock(self, unconditionally=False): - """Unlock the lock. - - If we don't already own the lock (either because of unbalanced unlock - calls, or because the lock was stolen out from under us), raise a - NotLockedError, unless optional `unconditionally' is true. - """ - islocked = self.locked() - if not islocked and not unconditionally: - raise NotLockedError - # If we owned the lock, remove the global file, relinquishing it. - if islocked: - try: - os.unlink(self._lockfile) - except OSError, e: - if e.errno <> errno.ENOENT: - raise - # Remove our tempfile - try: - os.unlink(self._tmpfname) - except OSError, e: - if e.errno <> errno.ENOENT: - raise - log.debug('unlocked: %s', self._lockfile) - - def locked(self): - """Return true if we own the lock, false if we do not. - - Checking the status of the lock resets the lock's lifetime, which - helps avoid race conditions during the lock status test. - """ - # Discourage breaking the lock for a while. - try: - self._touch() - except OSError, e: - if e.errno == errno.EPERM: - # We can't touch the file because we're not the owner. I - # don't see how we can own the lock if we're not the owner. - return False - else: - raise - # XXX Can the link count ever be > 2? - if self._linkcount() <> 2: - return False - return self._read() == self._tmpfname - - def finalize(self): - log.debug('finalize: %s', self._lockfile) - self.unlock(unconditionally=True) - - def __del__(self): - log.debug('__del__: %s', self._lockfile) - if self._owned: - self.finalize() - - # Python 2.5 context manager protocol support. - def __enter__(self): - self.lock() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.unlock() - # Don't suppress any exception that might have occurred. - return False - - # Use these only if you're transfering ownership to a child process across - # a fork. Use at your own risk, but it should be race-condition safe. - # _transfer_to() is called in the parent, passing in the pid of the child. - # _take_possession() is called in the child, and blocks until the parent - # has transferred possession to the child. _disown() is used to set the - # _owned flag to false, and it is a disgusting wart necessary to make - # forced lock acquisition work in mailmanctl. :( - def _transfer_to(self, pid): - # First touch it so it won't get broken while we're fiddling about. - self._touch() - # Find out current claim's temp filename - winner = self._read() - # Now twiddle ours to the given pid - self._tmpfname = '%s.%s.%d' % ( - self._lockfile, socket.gethostname(), pid) - # Create a hard link from the global lock file to the temp file. This - # actually does things in reverse order of normal operation because we - # know that lockfile exists, and tmpfname better not! - os.link(self._lockfile, self._tmpfname) - # Now update the lock file to contain a reference to the new owner - self._write() - # Toggle off our ownership of the file so we don't try to finalize it - # in our __del__() - self._owned = False - # Unlink the old winner, completing the transfer - os.unlink(winner) - # And do some sanity checks - assert self._linkcount() == 2 - assert self.locked() - log.debug('transferred the lock: %s', self._lockfile) - - def _take_possession(self): - self._tmpfname = tmpfname = '%s.%s.%d' % ( - self._lockfile, socket.gethostname(), os.getpid()) - # Wait until the linkcount is 2, indicating the parent has completed - # the transfer. - while self._linkcount() <> 2 or self._read() <> tmpfname: - time.sleep(0.25) - log.debug('took possession of the lock: %s', self._lockfile) - - def _disown(self): - self._owned = False - - # - # Private interface - # - - def _write(self): - # Make sure it's group writable - fp = open(self._tmpfname, 'w') - try: - fp.write(self._tmpfname) - finally: - fp.close() - - def _read(self): - try: - fp = open(self._lockfile) - try: - filename = fp.read() - finally: - fp.close() - return filename - except EnvironmentError, e: - if e.errno <> errno.ENOENT: - raise - return None - - def _touch(self, filename=None): - expiration_date = datetime.datetime.now() + self._lifetime - t = time.mktime(expiration_date.timetuple()) - try: - # XXX We probably don't need to modify atime, but this is easier. - os.utime(filename or self._tmpfname, (t, t)) - except OSError, e: - if e.errno <> errno.ENOENT: - raise - - def _releasetime(self): - try: - return os.stat(self._lockfile).st_mtime - except OSError, e: - if e.errno <> errno.ENOENT: - raise - return -1 - - def _linkcount(self): - try: - return os.stat(self._lockfile).st_nlink - except OSError, e: - if e.errno <> errno.ENOENT: - raise - return -1 - - def _break(self): - # First, touch the global lock file. This reduces but does not - # eliminate the chance for a race condition during breaking. Two - # processes could both pass the test for lock expiry in lock() before - # one of them gets to touch the global lockfile. This shouldn't be - # too bad because all they'll do in this function is wax the lock - # files, not claim the lock, and we can be defensive for ENOENTs - # here. - # - # Touching the lock could fail if the process breaking the lock and - # the process that claimed the lock have different owners. We could - # solve this by set-uid'ing the CGI and mail wrappers, but I don't - # think it's that big a problem. - try: - self._touch(self._lockfile) - except OSError, e: - if e.errno <> errno.EPERM: - raise - # Get the name of the old winner's temp file. - winner = self._read() - # Remove the global lockfile, which actually breaks the lock. - try: - os.unlink(self._lockfile) - except OSError, e: - if e.errno <> errno.ENOENT: - raise - # Try to remove the old winner's temp file, since we're assuming the - # winner process has hung or died. Don't worry too much if we can't - # unlink their temp file -- this doesn't wreck the locking algorithm, - # but will leave temp file turds laying around, a minor inconvenience. - try: - if winner: - os.unlink(winner) - except OSError, e: - if e.errno <> errno.ENOENT: - raise - - def _sleep(self): - interval = random.random() * 2.0 + 0.01 - time.sleep(interval) - - - -# Unit test framework -def _dochild(): - prefix = '[%d]' % os.getpid() - # Create somewhere between 1 and 1000 locks - lockfile = LockFile('/tmp/LockTest', lifetime=120) - # Use a lock lifetime of between 1 and 15 seconds. Under normal - # situations, Mailman's usage patterns (untested) shouldn't be much longer - # than this. - workinterval = 5 * random.random() - hitwait = 20 * random.random() - print prefix, 'workinterval:', workinterval - islocked = False - t0 = 0 - t1 = 0 - t2 = 0 - try: - try: - t0 = time.time() - print prefix, 'acquiring...' - lockfile.lock() - print prefix, 'acquired...' - islocked = True - except TimeOutError: - print prefix, 'timed out' - else: - t1 = time.time() - print prefix, 'acquisition time:', t1-t0, 'seconds' - time.sleep(workinterval) - finally: - if islocked: - try: - lockfile.unlock() - t2 = time.time() - print prefix, 'lock hold time:', t2-t1, 'seconds' - except NotLockedError: - print prefix, 'lock was broken' - # wait for next web hit - print prefix, 'webhit sleep:', hitwait - time.sleep(hitwait) - - -def _seed(): - try: - fp = open('/dev/random') - d = fp.read(40) - fp.close() - except EnvironmentError, e: - if e.errno <> errno.ENOENT: - raise - import sha - d = sha.new(`os.getpid()`+`time.time()`).hexdigest() - random.seed(d) - - -def _onetest(): - loopcount = random.randint(1, 100) - for i in range(loopcount): - print 'Loop %d of %d' % (i+1, loopcount) - pid = os.fork() - if pid: - # parent, wait for child to exit - pid, status = os.waitpid(pid, 0) - else: - # child - _seed() - try: - _dochild() - except KeyboardInterrupt: - pass - os._exit(0) - - -def _reap(kids): - if not kids: - return - pid, status = os.waitpid(-1, os.WNOHANG) - if pid <> 0: - del kids[pid] - - -def _test(numtests): - kids = {} - for i in range(numtests): - pid = os.fork() - if pid: - # parent - kids[pid] = pid - else: - # child - _seed() - try: - _onetest() - except KeyboardInterrupt: - pass - os._exit(0) - # slightly randomize each kid's seed - while kids: - _reap(kids) - - -if __name__ == '__main__': - import sys - import random - _test(int(sys.argv[1])) |
