summaryrefslogtreecommitdiff
path: root/Mailman/bin
diff options
context:
space:
mode:
Diffstat (limited to 'Mailman/bin')
-rw-r--r--Mailman/bin/mailmanctl.py399
-rw-r--r--Mailman/bin/master.py56
2 files changed, 91 insertions, 364 deletions
diff --git a/Mailman/bin/mailmanctl.py b/Mailman/bin/mailmanctl.py
index 2dc1905da..4dd7bd587 100644
--- a/Mailman/bin/mailmanctl.py
+++ b/Mailman/bin/mailmanctl.py
@@ -15,48 +15,36 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.
+"""Mailman start/stop script."""
+
+from __future__ import with_statement
+
import os
import grp
import pwd
import sys
import errno
import signal
-import socket
import logging
-import optparse
-from datetime import timedelta
-from munepy import Enum
-from locknix import lockfile
+from optparse import OptionParser
-from Mailman import Defaults
from Mailman import Version
-from Mailman import loginit
from Mailman.configuration import config
from Mailman.i18n import _
from Mailman.initialize import initialize
COMMASPACE = ', '
-DOT = '.'
-# Calculate this here and now, because we're going to do a chdir later on, and
-# if the path is relative, the qrunner script won't be found.
-BIN_DIR = os.path.abspath(os.path.dirname(sys.argv[0]))
-# Since we wake up once per day and refresh the lock, the LOCK_LIFETIME
-# needn't be (much) longer than SNOOZE. We pad it 6 hours just to be safe.
-LOCK_LIFETIME = Defaults.days(1) + Defaults.hours(6)
-SNOOZE = Defaults.days(1)
-
-elog = None
-qlog = None
-opts = None
+log = None
+parser = None
def parseargs():
- parser = optparse.OptionParser(version=Version.MAILMAN_VERSION,
- usage=_("""\
+ parser = OptionParser(version=Version.MAILMAN_VERSION,
+ usage=_("""\
Primary start-up and shutdown script for Mailman's qrunner daemon.
This script starts, stops, and restarts the main Mailman queue runners, making
@@ -94,7 +82,7 @@ Commands:
Usage: %prog [options] [ start | stop | restart | reopen ]"""))
parser.add_option('-u', '--run-as-user',
- dest='checkprivs', default=True, action='store_false',
+ default=True, action='store_false',
help=_("""\
Normally, this script will refuse to run if the user id and group id are not
set to the `mailman' user and group (as defined when you configured Mailman).
@@ -109,15 +97,15 @@ for normal production environments.
Note though, that if you run with -u and are not in the mailman group, you may
have permission problems, such as begin unable to delete a list's archives
through the web. Tough luck!"""))
- parser.add_option('-s', '--stale-lock-cleanup',
- dest='force', default=False, action='store_true',
+ parser.add_option('-f', '--force',
+ default=False, action='store_true',
help=_("""\
-If mailmanctl finds an existing master lock, it will normally exit with an
-error message. With this option, mailmanctl will perform an extra level of
-checking. If a process matching the host/pid described in the lock file is
-running, mailmanctl will still exit, but if no matching process is found,
-mailmanctl will remove the apparently stale lock and make another attempt to
-claim the master lock."""))
+If the master watcher finds an existing master lock, it will normally exit
+with an error message. With this option,the master will perform an extra
+level of checking. If a process matching the host/pid described in the lock
+file is running, the master will still exit, requiring you to manually clean
+up the lock. But if no matching process is found, the master will remove the
+apparently stale lock and make another attempt to claim the master lock."""))
parser.add_option('-q', '--quiet',
default=False, action='store_true',
help=_("""\
@@ -125,36 +113,31 @@ Don't print status messages. Error messages are still printed to standard
error."""))
parser.add_option('-C', '--config',
help=_('Alternative configuration file to use'))
- opts, args = parser.parse_args()
- if not args:
- parser.print_help()
- print >> sys.stderr, _('No command given.')
- sys.exit(1)
- if len(args) > 1:
- parser.print_help()
- commands = COMMASPACE.join(args)
- print >> sys.stderr, _('Bad command: $commands')
- sys.exit(1)
- return parser, opts, args
+ options, arguments = parser.parse_args()
+ if not arguments:
+ parser.error(_('No command given.'))
+ if len(arguments) > 1:
+ commands = COMMASPACE.join(arguments)
+ parser.error(_('Bad command: $commands'))
+ parser.options = options
+ parser.arguments = arguments
+ return parser
def kill_watcher(sig):
try:
- fp = open(config.PIDFILE)
- pidstr = fp.read()
- fp.close()
- pid = int(pidstr.strip())
+ with open(config.PIDFILE) as f:
+ pid = int(f.read().strip())
except (IOError, ValueError), e:
# For i18n convenience
- pidfile = config.PIDFILE
- print >> sys.stderr, _('PID unreadable in: $pidfile')
+ print >> sys.stderr, _('PID unreadable in: $config.PIDFILE')
print >> sys.stderr, e
print >> sys.stderr, _('Is qrunner even running?')
return
try:
os.kill(pid, sig)
- except OSError, e:
+ except OSError, error:
if e.errno <> errno.ESRCH:
raise
print >> sys.stderr, _('No child with pid: $pid')
@@ -164,130 +147,17 @@ def kill_watcher(sig):
-def get_lock_data():
- # Return the hostname, pid, and tempfile
- fp = open(config.LOCK_FILE)
- try:
- filename = os.path.split(fp.read().strip())[1]
- finally:
- fp.close()
- parts = filename.split('.')
- hostname = DOT.join(parts[1:-1])
- pid = int(parts[-1])
- return hostname, int(pid), filename
-
-
-def qrunner_state():
- # 1 if proc exists on host (but is it qrunner? ;)
- # 0 if host matches but no proc
- # hostname if hostname doesn't match
- hostname, pid, tempfile = get_lock_data()
- if hostname <> socket.gethostname():
- return hostname
- # Find out if the process exists by calling kill with a signal 0.
- try:
- os.kill(pid, 0)
- except OSError, e:
- if e.errno <> errno.ESRCH:
- raise
- return 0
- return 1
-
-
-def acquire_lock_1(force):
- # Be sure we can acquire the master qrunner lock. If not, it means some
- # other master qrunner daemon is already going.
- lock = lockfile.Lock(config.LOCK_FILE, LOCK_LIFETIME)
- try:
- lock.lock(timedelta(seconds=0.1))
- return lock
- except lockfile.TimeOutError:
- if not force:
- raise
- # Force removal of lock first
- lock.disown()
- hostname, pid, tempfile = get_lock_data()
- os.unlink(config.LOCK_FILE)
- os.unlink(os.path.join(config.LOCK_DIR, tempfile))
- return acquire_lock_1(force=False)
-
-
-def acquire_lock(force):
- try:
- lock = acquire_lock_1(force)
- return lock
- except lockfile.TimeOutError:
- status = qrunner_state()
- if status == 1:
- # host matches and proc exists
- print >> sys.stderr, _("""\
-The master qrunner lock could not be acquired because it appears as if another
-master qrunner is already running.
-""")
- elif status == 0:
- # host matches but no proc
- print >> sys.stderr, _("""\
-The master qrunner lock could not be acquired. It appears as though there is
-a stale master qrunner lock. Try re-running mailmanctl with the -s flag.
-""")
- else:
- # host doesn't even match
- print >> sys.stderr, _("""\
-The master qrunner lock could not be acquired, because it appears as if some
-process on some other host may have acquired it. We can't test for stale
-locks across host boundaries, so you'll have to do this manually. Or, if you
-know the lock is stale, re-run mailmanctl with the -s flag.
-
-Lock file: $config.LOCK_FILE
-Lock host: $status
-
-Exiting.""")
-
-
-
-def start_runner(qrname, slice, count):
- pid = os.fork()
- if pid:
- # parent
- return pid
- # child
- #
- # Craft the command line arguments for the exec() call.
- rswitch = '--runner=%s:%d:%d' % (qrname, slice, count)
- # Wherever mailmanctl lives, so too must live the qrunner script.
- exe = os.path.join(BIN_DIR, 'qrunner')
- # config.PYTHON, which is the absolute path to the Python interpreter,
- # must be given as argv[0] due to Python's library search algorithm.
- args = [sys.executable, sys.executable, exe, rswitch, '-s']
- if opts.config:
- args.extend(['-C', opts.config])
- os.execl(*args)
- # Should never get here
- raise RuntimeError('os.execl() failed')
-
-
-def start_all_runners():
- kids = {}
- for qrname, count in config.qrunners.items():
- for slice in range(count):
- # queue runner name, slice, numslices, restart count
- info = (qrname, slice, count, 0)
- pid = start_runner(qrname, slice, count)
- kids[pid] = info
- return kids
-
-
-
-def check_privs(parser):
+def check_privileges():
# If we're running as root (uid == 0), coerce the uid and gid to that
# which Mailman was configured for, and refuse to run if we didn't coerce
# the uid/gid.
- gid = grp.getgrnam(config.MAILMAN_GROUP)[2]
- uid = pwd.getpwnam(config.MAILMAN_USER)[2]
+ gid = grp.getgrnam(config.MAILMAN_GROUP).gr_gid
+ uid = pwd.getpwnam(config.MAILMAN_USER).pw_uid
myuid = os.getuid()
if myuid == 0:
# Set the process's supplimental groups.
- groups = [x[2] for x in grp.getgrall() if config.MAILMAN_USER in x[3]]
+ groups = [group.gr_gid for group in grp.getgrall()
+ if config.MAILMAN_USER in group.gr_mem]
groups.append(gid)
os.setgroups(groups)
os.setgid(gid)
@@ -300,208 +170,63 @@ def check_privs(parser):
def main():
- global elog, qlog, opts
+ global log, parser
- parser, opts, args = parseargs()
- initialize(opts.config)
+ parser = parseargs()
+ initialize(parser.options.config)
- elog = logging.getLogger('mailman.error')
- qlog = logging.getLogger('mailman.qrunner')
+ log = logging.getLogger('mailman.qrunner')
- if opts.checkprivs:
- check_privs(parser)
+ if not parser.options.run_as_user:
+ check_privileges()
else:
- print _('Warning! You may encounter permission problems.')
+ if not parser.options.quiet:
+ print _('Warning! You may encounter permission problems.')
# Handle the commands
- command = args[0].lower()
+ command = parser.arguments[0].lower()
if command == 'stop':
- if not opts.quiet:
+ if not parser.options.quiet:
print _("Shutting down Mailman's master qrunner")
kill_watcher(signal.SIGTERM)
elif command == 'restart':
- if not opts.quiet:
+ if not parser.options.quiet:
print _("Restarting Mailman's master qrunner")
kill_watcher(signal.SIGUSR1)
elif command == 'reopen':
- if not opts.quiet:
+ if not parser.options.quiet:
print _('Re-opening all log files')
kill_watcher(signal.SIGHUP)
elif command == 'start':
- # Here's the scoop on the processes we're about to create. We'll need
- # one for each qrunner, and one for a master child process watcher /
- # lock refresher process.
- #
- # The child watcher process simply waits on the pids of the children
- # qrunners. Unless explicitly disabled by a mailmanctl switch (or the
- # children are killed with SIGTERM instead of SIGINT), the watcher
- # will automatically restart any child process that exits. This
- # allows us to be more robust, and also to implement restart by simply
- # SIGINT'ing the qrunner children, and letting the watcher restart
- # them.
+ # Start the master qrunner watcher process.
#
- # Under normal operation, we have a child per queue. This lets us get
- # the most out of the available resources, since a qrunner with no
- # files in its queue directory is pretty cheap, but having a separate
- # runner process per queue allows for a very responsive system. Some
- # people want a more traditional (i.e. MM2.0.x) cron-invoked qrunner.
- # No problem, but using mailmanctl isn't the answer. So while
- # mailmanctl hard codes some things, others, such as the number of
- # qrunners per queue, are configurable.
- #
- # First, acquire the master mailmanctl lock
- lock = acquire_lock(opts.force)
- if not lock:
- return
# Daemon process startup according to Stevens, Advanced Programming in
# the UNIX Environment, Chapter 13.
pid = os.fork()
if pid:
# parent
- if not opts.quiet:
+ if not parser.options.quiet:
print _("Starting Mailman's master qrunner.")
- # Give up the lock "ownership". This just means the foreground
- # process won't close/unlock the lock when it finalizes this lock
- # instance. We'll let the mater watcher subproc own the lock.
- lock.transfer_to(pid)
return
# child
- lock.take_possession()
- # Save our pid in a file for "mailmanctl stop" rendezvous.
- fp = open(config.PIDFILE, 'w')
- try:
- print >> fp, os.getpid()
- finally:
- fp.close()
+ #
# Create a new session and become the session leader, but since we
# won't be opening any terminal devices, don't do the ultra-paranoid
# suggestion of doing a second fork after the setsid() call.
os.setsid()
# Instead of cd'ing to root, cd to the Mailman runtime directory.
os.chdir(config.VAR_DIR)
- # I don't think we have any unneeded file descriptors.
- #
- # Now start all the qrunners. This returns a dictionary where the
- # keys are qrunner pids and the values are tuples of the following
- # form: (qrname, slice, count). This does its own fork and exec, and
- # sets up its own signal handlers.
- kids = start_all_runners()
- # Set up a SIGALRM handler to refresh the lock once per day. The lock
- # lifetime is 1day+6hours so this should be plenty.
- def sigalrm_handler(signum, frame):
- lock.refresh()
- signal.alarm(Defaults.days(1))
- signal.signal(signal.SIGALRM, sigalrm_handler)
- signal.alarm(int(Defaults.days(1)))
- # Set up a SIGHUP handler so that if we get one, we'll pass it along
- # to all the qrunner children. This will tell them to close and
- # reopen their log files
- def sighup_handler(signum, frame):
- loginit.reopen()
- for pid in kids.keys():
- os.kill(pid, signal.SIGHUP)
- # And just to tweak things...
- qlog.info('Master watcher caught SIGHUP. Re-opening log files.')
- signal.signal(signal.SIGHUP, sighup_handler)
- # We also need to install a SIGTERM handler because that's what init
- # will kill this process with when changing run levels. It's also the
- # signal 'mailmanctl stop' uses.
- def sigterm_handler(signum, frame):
- # Make sure we never try to restart our children, no matter why
- # the child exited.
- opts.restart = False
- qlog.info('I AM NEVER RESTARTING AGAIN: %d', pid)
- for pid in kids.keys():
- try:
- os.kill(pid, signal.SIGTERM)
- except OSError, e:
- if e.errno <> errno.ESRCH:
- raise
- qlog.info('Master watcher caught SIGTERM. Exiting.')
- signal.signal(signal.SIGTERM, sigterm_handler)
- # Finally, we need a SIGINT handler which will cause the sub-qrunners
- # to exit, but the master will restart SIGINT'd sub-processes unless
- # the -n flag was given.
- def sigint_handler(signum, frame):
- for pid in kids.keys():
- os.kill(pid, signal.SIGINT)
- qlog.info('Master watcher caught SIGINT. Restarting.')
- signal.signal(signal.SIGINT, sigint_handler)
- # Now we're ready to simply do our wait/restart loop. This is the
- # master qrunner watcher.
- try:
- while True:
- try:
- pid, status = os.wait()
- except OSError, e:
- # No children? We're done
- if e.errno == errno.ECHILD:
- break
- # If the system call got interrupted, just restart it.
- elif e.errno <> errno.EINTR:
- raise
- continue
- killsig = exitstatus = None
- if os.WIFSIGNALED(status):
- killsig = os.WTERMSIG(status)
- if os.WIFEXITED(status):
- exitstatus = os.WEXITSTATUS(status)
- # We'll restart the process unless we were given the
- # "no-restart" switch, or if the process was SIGTERM'd or
- # exitted with a SIGTERM exit status. This lets us better
- # handle runaway restarts (say, if the subproc had a syntax
- # error!)
- restarting = ''
- if opts.restart:
- if ((exitstatus is None and killsig <> signal.SIGTERM) or
- (killsig is None and exitstatus <> signal.SIGTERM)):
- # Then
- restarting = '[restarting]'
- qrname, slice, count, restarts = kids[pid]
- del kids[pid]
- qlog.info("""\
-Master qrunner detected subprocess exit
-(pid: %d, sig: %s, sts: %s, class: %s, slice: %d/%d) %s""",
- pid, killsig, exitstatus, qrname,
- slice+1, count, restarting)
- # See if we've reached the maximum number of allowable restarts
- if exitstatus <> signal.SIGINT:
- restarts += 1
- if restarts > config.MAX_RESTARTS:
- qlog.info("""\
-Qrunner %s reached maximum restart limit of %d, not restarting.""",
- qrname, config.MAX_RESTARTS)
- restarting = ''
- # Now perhaps restart the process unless it exited with a
- # SIGTERM or we aren't restarting.
- if restarting:
- newpid = start_runner(qrname, slice, count)
- kids[newpid] = (qrname, slice, count, restarts)
- finally:
- # Should we leave the main loop for any reason, we want to be sure
- # all of our children are exited cleanly. Send SIGTERMs to all
- # the child processes and wait for them all to exit.
- for pid in kids.keys():
- try:
- os.kill(pid, signal.SIGTERM)
- except OSError, e:
- if e.errno == errno.ESRCH:
- # The child has already exited
- qlog.info('ESRCH on pid: %d', pid)
- del kids[pid]
- # Wait for all the children to go away
- while True:
- try:
- pid, status = os.wait()
- except OSError, e:
- if e.errno == errno.ECHILD:
- break
- elif e.errno <> errno.EINTR:
- raise
- continue
- # Finally, give up the lock
- lock.unlock(unconditionally=True)
- os._exit(0)
+ # Exec the master watcher.
+ args = [sys.executable, sys.executable,
+ os.path.join(config.BIN_DIR, 'master')]
+ if parser.options.force:
+ args.append('--force')
+ if parser.options.config:
+ args.extend(['-C', parser.options.config])
+ log.debug('starting: %s', args)
+ os.execl(*args)
+ # We should never get here.
+ raise RuntimeError('os.execl() failed')
diff --git a/Mailman/bin/master.py b/Mailman/bin/master.py
index 6e7c5408d..e2a80934f 100644
--- a/Mailman/bin/master.py
+++ b/Mailman/bin/master.py
@@ -18,8 +18,6 @@
from __future__ import with_statement
import os
-import grp
-import pwd
import sys
import errno
import signal
@@ -39,16 +37,8 @@ from Mailman.i18n import _
from Mailman.initialize import initialize
-COMMASPACE = ', '
DOT = '.'
-# Calculate this here and now, because we're going to do a chdir later on, and
-# if the path is relative, the qrunner script won't be found.
-BIN_DIR = os.path.abspath(os.path.dirname(sys.argv[0]))
-
-# Since we wake up once per day and refresh the lock, the LOCK_LIFETIME
-# needn't be (much) longer than SNOOZE. We pad it 6 hours just to be safe.
LOCK_LIFETIME = Defaults.days(1) + Defaults.hours(6)
-SNOOZE = Defaults.days(1)
log = None
parser = None
@@ -84,6 +74,15 @@ Usage: %prog [options]"""))
help=_("""\
Don't restart the qrunners when they exit because of an error or a SIGUSR1.
Use this only for debugging."""))
+ parser.add_option('-f', '--force',
+ default=False, action='store_true',
+ help=_("""\
+If the master watcher finds an existing master lock, it will normally exit
+with an error message. With this option,the master will perform an extra
+level of checking. If a process matching the host/pid described in the lock
+file is running, the master will still exit, requiring you to manually clean
+up the lock. But if no matching process is found, the master will remove the
+apparently stale lock and make another attempt to claim the master lock."""))
parser.add_option('-C', '--config',
help=_('Alternative configuration file to use'))
options, arguments = parser.parse_args()
@@ -104,8 +103,8 @@ def get_lock_data():
with open(config.LOCK_FILE) as fp:
filename = os.path.split(fp.read().strip())[1]
parts = filename.split('.')
- hostname = DOT.join(parts[1:-1])
- pid = int(parts[-1])
+ hostname = DOT.join(parts[1:-2])
+ pid = int(parts[-2])
return hostname, int(pid), filename
@@ -164,28 +163,27 @@ def acquire_lock_1(force):
return acquire_lock_1(force=False)
-def acquire_lock(force):
+def acquire_lock():
"""Acquire the master queue runner lock.
- :param force: Flag that controls whether to force acquisition of the lock.
:return: The master queue runner lock or None if the lock couldn't be
acquired. In that case, an error messages is also printed to standard
error.
"""
try:
- lock = acquire_lock_1(force)
+ lock = acquire_lock_1(parser.options.force)
return lock
except lockfile.TimeOutError:
status = master_state()
if status == WatcherState.conflict:
# Hostname matches and process exists.
- print >> sys.stderr, _("""\
-The master qrunner lock could not be acquired because it appears as if another
-master qrunner is already running.
+ message = _("""\
+The master qrunner lock could not be acquired because it appears
+as though another master qrunner is already running.
""")
elif status == WatcherState.stale_lock:
# Hostname matches but the process does not exist.
- print >> sys.stderr, _("""\
+ message = _("""\
The master qrunner lock could not be acquired. It appears as though there is
a stale master qrunner lock. Try re-running mailmanctl with the -s flag.
""")
@@ -193,17 +191,17 @@ a stale master qrunner lock. Try re-running mailmanctl with the -s flag.
assert status == WatcherState.host_mismatch, (
'Invalid enum value: %s' % status)
# Hostname doesn't even match.
- print >> sys.stderr, _("""\
+ hostname, pid, tempfile = get_lock_data()
+ message = _("""\
The master qrunner lock could not be acquired, because it appears as if some
process on some other host may have acquired it. We can't test for stale
-locks across host boundaries, so you'll have to do this manually. Or, if you
-know the lock is stale, re-run mailmanctl with the -s flag.
+locks across host boundaries, so you'll have to clean this up manually.
Lock file: $config.LOCK_FILE
-Lock host: $status
+Lock host: $hostname
Exiting.""")
- return None
+ parser.error(message)
@@ -226,7 +224,7 @@ def start_runner(qrname, slice, count):
# Craft the command line arguments for the exec() call.
rswitch = '--runner=%s:%d:%d' % (qrname, slice, count)
# Wherever mailmanctl lives, so too must live the qrunner script.
- exe = os.path.join(BIN_DIR, 'qrunner')
+ exe = os.path.join(config.BIN_DIR, 'qrunner')
# config.PYTHON, which is the absolute path to the Python interpreter,
# must be given as argv[0] due to Python's library search algorithm.
args = [sys.executable, sys.executable, exe, rswitch, '-s']
@@ -375,14 +373,18 @@ def main():
log = logging.getLogger('mailman.qrunner')
# Acquire the master lock, exiting if we can't acquire it. We'll let the
- # caller handle any clean up or lock breaking.
- with lockfile.Lock(config.LOCK_FILE, LOCK_LIFETIME) as lock:
+ # caller handle any clean up or lock breaking. No with statement here
+ # because Lock's constructor doesn't support a timeout.
+ lock = acquire_lock()
+ try:
with open(config.PIDFILE, 'w') as fp:
print >> fp, os.getpid()
try:
control_loop(lock)
finally:
os.remove(config.PIDFILE)
+ finally:
+ lock.unlock()