diff options
| -rw-r--r-- | Mailman/Defaults.py.in | 19 | ||||
| -rw-r--r-- | Mailman/Pending.py | 133 |
2 files changed, 112 insertions, 40 deletions
diff --git a/Mailman/Defaults.py.in b/Mailman/Defaults.py.in index 5a9114cfb..cfb60802e 100644 --- a/Mailman/Defaults.py.in +++ b/Mailman/Defaults.py.in @@ -1065,6 +1065,25 @@ LIST_LOCK_LIFETIME = hours(5) # the message will be re-queued for later delivery. LIST_LOCK_TIMEOUT = seconds(10) +# Set this to true to turn on lock debugging messages for the pending +# requests database, which will be written to logs/locks. If you think +# you're having lock problems, or just want to tune the locks for your +# system, turn on lock debugging. +PENDINGDB_LOCK_DEBUGGING = 0 + +# This variable specifies how long an attempt will be made to acquire a +# pendingdb lock by the incoming qrunner process. If the lock acquisition +# times out, the message will be re-queued for later delivery. +PENDINGDB_LOCK_TIMEOUT = seconds(30) + +# The pendingdb is shared among all lists, and handles all list +# (un)subscriptions, admin approvals and otherwise held messages, so it is +# potentially locked a lot more often than single lists. Mailman deals with +# this by re-trying any attempts to alter the pendingdb that failed because +# of locking errors. This variable indicates howmany attempt should be made +# before abandoning all hope. +PENDINGDB_LOCK_ATTEMPTS = 10 + ##### diff --git a/Mailman/Pending.py b/Mailman/Pending.py index 624cea3e9..8b3bfa6d0 100644 --- a/Mailman/Pending.py +++ b/Mailman/Pending.py @@ -56,29 +56,49 @@ def new(*content): # It's a programming error if this assertion fails! We do it this way so # the assert test won't fail if the sequence is empty. assert content[:1] in _ALLKEYS - # Acquire the pending database lock, letting TimeOutError percolate up. - lock = LockFile.LockFile(LOCKFILE) - lock.lock(timeout=30) + + # Get a lock handle now, but only lock inside the loop. + lock = LockFile.LockFile(LOCKFILE, + withlogging=mm_cfg.PENDINGDB_LOCK_DEBUGGING) + # We try the main loop several times. If we get a lock error + # somewhere (for instance because someone broke the lock) we simply + # try again. + retries = mm_cfg.PENDINGDB_LOCK_ATTEMPTS try: - # Load the current database - db = _load() - # Calculate a unique cookie - while 1: - n = random.random() - now = time.time() - hashfood = str(now) + str(n) + str(content) - cookie = sha.new(hashfood).hexdigest() - if not db.has_key(cookie): - break - # Store the content, plus the time in the future when this entry will - # be evicted from the database, due to staleness. - db[cookie] = content - evictions = db.setdefault('evictions', {}) - evictions[cookie] = now + mm_cfg.PENDING_REQUEST_LIFE - _save(db) - return cookie + while retries: + retries -= 1 + if not lock.locked(): + try: + lock.lock(timeout=mm_cfg.PENDINGDB_LOCK_TIMEOUT) + except LockFile.TimeOutError: + continue + # Load the current database + db = _load() + # Calculate a unique cookie + while 1: + n = random.random() + now = time.time() + hashfood = str(now) + str(n) + str(content) + cookie = sha.new(hashfood).hexdigest() + if not db.has_key(cookie): + break + # Store the content, plus the time in the future when this + # entry will be evicted from the database, due to staleness. + db[cookie] = content + evictions = db.setdefault('evictions', {}) + evictions[cookie] = now + mm_cfg.PENDING_REQUEST_LIFE + try: + _save(db, lock) + except LockFile.NotLockedError: + continue + return cookie + else: + # We failed to get the lock or keep it long enough to save + # the data! + raise LockFile.TimeOutError finally: - lock.unlock(unconditionally=1) + if lock.locked(): + lock.unlock() @@ -88,30 +108,55 @@ def confirm(cookie, expunge=1): If optional expunge is true (the default), the record is also removed from the database. """ - # Acquire the pending database lock, letting TimeOutError percolate up. - # BAW: we perhaps shouldn't acquire the lock if expunge==0. - lock = LockFile.LockFile(LOCKFILE) - lock.lock(timeout=30) - try: - # Load the database + + if not expunge: db = _load() missing = [] content = db.get(cookie, missing) if content is missing: return None - # Remove the entry from the database - if expunge: + return content + + # Get a lock handle now, but only lock inside the loop. + lock = LockFile.LockFile(LOCKFILE, + withlogging=mm_cfg.PENDINGDB_LOCK_DEBUGGING) + # We try the main loop several times. If we get a lock error + # somewhere (for instance because someone broke the lock) we simply + # try again. + retries = mm_cfg.PENDINGDB_LOCK_ATTEMPTS + try: + while retries: + retries -= 1 + if not lock.locked(): + try: + lock.lock(timeout=mm_cfg.PENDINGDB_LOCK_TIMEOUT) + except LockFile.TimeOutError: + continue + # Load the database + db = _load() + missing = [] + content = db.get(cookie, missing) + if content is missing: + return None del db[cookie] del db['evictions'][cookie] - _save(db) - return content + try: + _save(db, lock) + except LockFile.NotLockedError: + continue + return content + else: + # We failed to get the lock and keep it long enough to save + # the data! + raise LockFile.TimeOutError finally: - lock.unlock(unconditionally=1) + if lock.locked(): + lock.unlock() def _load(): - # The list's lock must be acquired. + # The list's lock must be acquired if you wish to alter data and save. # # First try to load the pickle file fp = None @@ -134,8 +179,10 @@ def _load(): fp.close() -def _save(db): - # Lock must be acquired. +def _save(db, lock): + # Lock must be acquired before loading the data that is now being saved. + if not lock.locked(): + raise LockFile.NotLockedError evictions = db['evictions'] now = time.time() for cookie, data in db.items(): @@ -154,13 +201,17 @@ def _save(db): omask = os.umask(007) # Always save this as a pickle (safely), and after that succeeds, blow # away any old marshal file. - tmpfile = PCKFILE + '.tmp' + tmpfile = "%s.tmp.%d.%d"%(PCKFILE, os.getpid(), now) fp = None try: fp = open(tmpfile, 'w') cPickle.dump(db, fp) fp.close() fp = None + if not lock.locked(): + # Our lock was broken? + os.remove(tmpfile) + raise LockFile.NotLockedError os.rename(tmpfile, PCKFILE) if os.path.exists(DBFILE): os.remove(DBFILE) @@ -173,8 +224,9 @@ def _save(db): def _update(olddb): # Update an old pending_subscriptions.db database to the new format - lock = LockFile.LockFile(LOCKFILE) - lock.lock(timeout=30) + lock = LockFile.LockFile(LOCKFILE, + withlogging=mm_cfg.PENDINGDB_LOCK_DEBUGGING) + lock.lock(timeout=mm_cfg.PENDINGDB_LOCK_TIMEOUT) try: # We don't need this entry anymore if olddb.has_key('lastculltime'): @@ -199,6 +251,7 @@ def _update(olddb): # request was made. The new format keeps it as the time the # request should be evicted. evictions[cookie] = data[-1] + mm_cfg.PENDING_REQUEST_LIFE - _save(db) + _save(db, lock) finally: - lock.unlock(unconditionally=1) + if lock.locked(): + lock.unlock() |
