From e2c69057f1452f9ac85de27e54e931272eeeeb10 Mon Sep 17 00:00:00 2001 From: bwarsaw Date: Wed, 22 Jul 1998 21:50:51 +0000 Subject: Several changes, primarily to make this more robust. So far, no more locking problems. JV please eyeball... 1. Got rid of is_locked attribute. Test for locking makes an explicit test that a) the tmpfname exists and b) the pid read from the file is our pid. This means that two instances of FileLock in the same process that share the same lockfile will always be locked and unlocked together (probably doesn't occur except in testing). 2. Added an __del__() which unlocks 3. Moved the creation of the basic lockfile to __kickstart() and added a force argument. When force is true (default is false), the lockfile is first removed then recreated. This is only used if the lockfile contains bogus data, such that the winner could not be determined. In that case, the only way to "break" the lock is to recreate a new basic lockfile. This could potentially leave tmpfname turds, but in reality this shouldn't happen very often. 4. Encapsulate reading and writing of the lockfile into __write() and __read(), so that (most importantly) the umask will be set correctly. This should fix most of the permission problems associated with the lockfile by assuring that they are group writable. A side effect: don't eval() the contents of the lockfile. 5. lock(): Use the above changes. Added an assert for what I think is an invariant: that the winner filename better not be equal to self.tmpfname. Moved the timeout check into an else clause of the pid comparison and eliminated the check of hung_timeout > 0 (this is the one I'm least sure about). --- Mailman/LockFile.py | 103 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 26 deletions(-) (limited to 'Mailman/LockFile.py') diff --git a/Mailman/LockFile.py b/Mailman/LockFile.py index e4d4ef3ee..d1fb9504a 100644 --- a/Mailman/LockFile.py +++ b/Mailman/LockFile.py @@ -28,6 +28,9 @@ sure no malicious people have access to link() to the lock file. # processes think he needs, he can say so. import socket, os, time +import string +#from stat import ST_NLINK +ST_NLINK = 3 # faster DEFAULT_HUNG_TIMEOUT = 15 DEFAULT_SLEEP_INTERVAL = .25 @@ -36,6 +39,7 @@ AlreadyCalledLockError = "AlreadyCalledLockError" NotLockedError = "NotLockedError" TimeOutError = "TimeOutError" + class FileLock: def __init__(self, lockfile, hung_timeout = DEFAULT_HUNG_TIMEOUT, sleep_interval = DEFAULT_SLEEP_INTERVAL): @@ -44,16 +48,58 @@ class FileLock: self.sleep_interval = sleep_interval self.tmpfname = "%s.%s.%d" % (lockfile, socket.gethostname(), os.getpid()) - self.is_locked = 0 + self.__kickstart() + + def __del__(self): + self.unlock() + + def __kickstart(self, force=0): + # forcing means to remove the original lockfile, and create a new one. + # this might be necessary if the file contains bogus locker + # information such that the owner of the lock can't be determined + if force: + try: + os.unlink(self.lockfile) + except IOError: + pass if not os.path.exists(self.lockfile): try: - file = open(self.lockfile, "w+") + # make sure it's group writable + oldmask = os.umask(002) + try: + file = open(self.lockfile, 'w+') + file.close() + finally: + os.umask(oldmask) except IOError: pass + def __write(self): + # make sure it's group writable + oldmask = os.umask(002) + try: + fp = open(self.tmpfname, 'w') + fp.write('%d %s\n' % (os.getpid(), self.tmpfname)) + fp.close() + finally: + os.umask(oldmask) + + def __read(self): + # can raise ValueError in two situations: + # + # either first element wasn't an integer (a valid pid), or we didn't + # get a 2-list from the string.split. Either way, the data in the + # file is bogus, but this is caught higher up + fp = open(self.tmpfname, 'r') + try: + pid, winner = string.split(string.strip(fp.read())) + finally: + fp.close() + return int(pid), winner + # Note that no one new can grab the lock once we've opened our tmpfile # until we close it, even if we don't have the lock. So checking the PID - # and stealing the lock are garunteed to be atomic. + # and stealing the lock are guaranteed to be atomic. def lock(self, timeout = 0): """Blocks until the lock can be obtained. @@ -67,42 +113,45 @@ class FileLock: if self.locked(): raise AlreadyCalledLockError while 1: + # create the hard link and test for exactly 2 links to the file os.link(self.lockfile, self.tmpfname) - if os.stat(self.tmpfname)[3] == 2: - file = open(self.tmpfname, 'w+') - file.write(`os.getpid(),self.tmpfname`) - file.close() - self.is_locked = 1 + if os.stat(self.tmpfname)[ST_NLINK] == 2: + # we have the lock (since there are no other links to the lock + # file), so we can piss on the hydrant + self.__write() break if timeout and timeout_time < time.time(): raise TimeOutError - file = open(self.tmpfname, 'r') + # someone else must have gotten the lock. let's find out who it + # is. if there is some bogosity in the lock file's data then we + # will steal the lock. try: - pid,winner = eval(file.read()) - except SyntaxError: # no info in file... *can* happen - file.close() + pid, winner = self.__read() + except ValueError: os.unlink(self.tmpfname) + self.__kickstart(force=1) continue - file.close() + assert winner <> self.tmpfname + # record the previous winner and the current time if pid <> last_pid: last_pid = pid stime = time.time() - if (stime + self.hung_timeout < time.time()) and \ - self.hung_timeout > 0: - # then - file = open(self.tmpfname, 'w+') - file.write(`os.getpid(),self.tmpfname`) + # here's where we potentially steal the lock. if the pid in the + # lockfile hasn't changed in hung_timeout seconds, then we assume + # that the locker crashed + elif stime + self.hung_timeout < time.time(): + self.__write() # steal try: os.unlink(winner) except os.error: + # winner lockfile could be missing pass - try: - os.unlink(self.tmpfname) - except os.error, (errno, msg): - # XXX: better debugging - msg = msg + ': ' + self.tmpfname - raise os.error, (errno, msg) + os.unlink(self.tmpfname) continue + # okay, someone else has the lock, we didn't steal it, and it + # hasn't timed out yet. So let's wait for the owner of the lock + # to give it up. Unlink our claim to the lock and sleep for a + # while, then try again os.unlink(self.tmpfname) time.sleep(self.sleep_interval) @@ -110,8 +159,10 @@ class FileLock: def unlock(self): if not self.locked(): raise NotLockedError - self.is_locked = 0 os.unlink(self.tmpfname) def locked(self): - return os.path.exists(self.tmpfname) and self.is_locked + if not os.path.exists(self.tmpfname): + return 0 + pid, winner = self.__read() + return pid == os.getpid() -- cgit v1.2.3-70-g09d2