summaryrefslogtreecommitdiff
path: root/Mailman/bin/mailmanctl.py
diff options
context:
space:
mode:
Diffstat (limited to 'Mailman/bin/mailmanctl.py')
-rw-r--r--Mailman/bin/mailmanctl.py521
1 files changed, 521 insertions, 0 deletions
diff --git a/Mailman/bin/mailmanctl.py b/Mailman/bin/mailmanctl.py
new file mode 100644
index 000000000..a91b5651b
--- /dev/null
+++ b/Mailman/bin/mailmanctl.py
@@ -0,0 +1,521 @@
+# Copyright (C) 2001-2006 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+import os
+import grp
+import pwd
+import sys
+import errno
+import signal
+import socket
+import logging
+import optparse
+
+from Mailman import Defaults
+from Mailman import Errors
+from Mailman import LockFile
+from Mailman import Utils
+from Mailman import Version
+from Mailman import loginit
+from Mailman.MailList import MailList
+from Mailman.configuration import config
+from Mailman.i18n import _
+
+__i18n_templates__ = True
+
+COMMASPACE = ', '
+DOT = '.'
+
+# Since we wake up once per day and refresh the lock, the LOCK_LIFETIME
+# needn't be (much) longer than SNOOZE. We pad it 6 hours just to be safe.
+LOCK_LIFETIME = Defaults.days(1) + Defaults.hours(6)
+SNOOZE = Defaults.days(1)
+MAX_RESTARTS = 10
+
+
+
+def parseargs():
+ parser = optparse.OptionParser(version=Version.MAILMAN_VERSION,
+ usage=_("""\
+Primary start-up and shutdown script for Mailman's qrunner daemon.
+
+This script starts, stops, and restarts the main Mailman queue runners, making
+sure that the various long-running qrunners are still alive and kicking. It
+does this by forking and exec'ing the qrunners and waiting on their pids.
+When it detects a subprocess has exited, it may restart it.
+
+The qrunners respond to SIGINT, SIGTERM, and SIGHUP. SIGINT and SIGTERM both
+cause the qrunners to exit cleanly, but the master will only restart qrunners
+that have exited due to a SIGINT. SIGHUP causes the master and the qrunners
+to close their log files, and reopen then upon the next printed message.
+
+The master also responds to SIGINT, SIGTERM, and SIGHUP, which it simply
+passes on to the qrunners (note that the master will close and reopen its own
+log files on receipt of a SIGHUP). The master also leaves its own process id
+in the file data/master-qrunner.pid but you normally don't need to use this
+pid directly. The `start', `stop', `restart', and `reopen' commands handle
+everything for you.
+
+Commands:
+
+ start - Start the master daemon and all qrunners. Prints a message and
+ exits if the master daemon is already running.
+
+ stop - Stops the master daemon and all qrunners. After stopping, no
+ more messages will be processed.
+
+ restart - Restarts the qrunners, but not the master process. Use this
+ whenever you upgrade or update Mailman so that the qrunners will
+ use the newly installed code.
+
+ reopen - This will close all log files, causing them to be re-opened the
+ next time a message is written to them
+
+Usage: %prog [options] [ start | stop | restart | reopen ]"""))
+ parser.add_option('-n', '--no-restart',
+ dest='restart', default=True, action='store_false',
+ help=_("""\
+Don't restart the qrunners when they exit because of an error or a SIGINT.
+They are never restarted if they exit in response to a SIGTERM. Use this only
+for debugging. Only useful if the `start' command is given."""))
+ parser.add_option('-u', '--run-as-user',
+ dest='checkprivs', default=True, action='store_false',
+ help=_("""\
+Normally, this script will refuse to run if the user id and group id are not
+set to the `mailman' user and group (as defined when you configured Mailman).
+If run as root, this script will change to this user and group before the
+check is made.
+
+This can be inconvenient for testing and debugging purposes, so the -u flag
+means that the step that sets and checks the uid/gid is skipped, and the
+program is run as the current user and group. This flag is not recommended
+for normal production environments.
+
+Note though, that if you run with -u and are not in the mailman group, you may
+have permission problems, such as begin unable to delete a list's archives
+through the web. Tough luck!"""))
+ parser.add_option('-s', '--stale-lock-cleanup',
+ dest='force', default=False, action='store_true',
+ help=_("""\
+If mailmanctl finds an existing master lock, it will normally exit with an
+error message. With this option, mailmanctl will perform an extra level of
+checking. If a process matching the host/pid described in the lock file is
+running, mailmanctl will still exit, but if no matching process is found,
+mailmanctl will remove the apparently stale lock and make another attempt to
+claim the master lock."""))
+ parser.add_option('-q', '--quiet',
+ default=False, action='store_true',
+ help=_("""\
+Don't print status messages. Error messages are still printed to standard
+error."""))
+ parser.add_option('-C', '--config',
+ help=_('Alternative configuration file to use'))
+ opts, args = parser.parse_args()
+ if not args:
+ parser.print_help()
+ print >> sys.stderr, _('No command given.')
+ sys.exit(1)
+ if len(args) > 1:
+ parse.print_help()
+ commands = COMMASPACE.join(args)
+ print >> sys.stderr, _('Bad command: $commands')
+ sys.exit(1)
+ return parser, opts, args
+
+
+
+def kill_watcher(sig):
+ try:
+ fp = open(config.PIDFILE)
+ pidstr = fp.read()
+ fp.close()
+ pid = int(pidstr.strip())
+ except (IOError, ValueError), e:
+ # For i18n convenience
+ pidfile = config.PIDFILE
+ print >> sys.stderr, _('PID unreadable in: $pidfile')
+ print >> sys.stderr, e
+ print >> sys.stderr, _('Is qrunner even running?')
+ return
+ try:
+ os.kill(pid, sig)
+ except OSError, e:
+ if e.errno <> errno.ESRCH: raise
+ print >> sys.stderr, _('No child with pid: $pid')
+ print >> sys.stderr, e
+ print >> sys.stderr, _('Stale pid file removed.')
+ os.unlink(config.PIDFILE)
+
+
+
+def get_lock_data():
+ # Return the hostname, pid, and tempfile
+ fp = open(config.LOCKFILE)
+ try:
+ filename = os.path.split(fp.read().strip())[1]
+ finally:
+ fp.close()
+ parts = filename.split('.')
+ hostname = DOT.join(parts[1:-1])
+ pid = int(parts[-1])
+ return hostname, int(pid), filename
+
+
+def qrunner_state():
+ # 1 if proc exists on host (but is it qrunner? ;)
+ # 0 if host matches but no proc
+ # hostname if hostname doesn't match
+ hostname, pid, tempfile = get_lock_data()
+ if hostname <> socket.gethostname():
+ return hostname
+ # Find out if the process exists by calling kill with a signal 0.
+ try:
+ os.kill(pid, 0)
+ except OSError, e:
+ if e.errno <> errno.ESRCH:
+ raise
+ return 0
+ return 1
+
+
+def acquire_lock_1(force):
+ # Be sure we can acquire the master qrunner lock. If not, it means some
+ # other master qrunner daemon is already going.
+ lock = LockFile.LockFile(config.LOCK_FILE, LOCK_LIFETIME)
+ try:
+ lock.lock(0.1)
+ return lock
+ except LockFile.TimeOutError:
+ if not force:
+ raise
+ # Force removal of lock first
+ lock._disown()
+ hostname, pid, tempfile = get_lock_data()
+ os.unlink(config.LOCKFILE)
+ os.unlink(os.path.join(config.LOCK_DIR, tempfile))
+ return acquire_lock_1(force=False)
+
+
+def acquire_lock(force):
+ try:
+ lock = acquire_lock_1(force)
+ return lock
+ except LockFile.TimeOutError:
+ status = qrunner_state()
+ if status == 1:
+ # host matches and proc exists
+ print >> sys.stderr, _("""\
+The master qrunner lock could not be acquired because it appears as if another
+master qrunner is already running.
+""")
+ elif status == 0:
+ # host matches but no proc
+ print >> sys.stderr, _("""\
+The master qrunner lock could not be acquired. It appears as though there is
+a stale master qrunner lock. Try re-running mailmanctl with the -s flag.
+""")
+ else:
+ # host doesn't even match
+ print >> sys.stderr, _("""\
+The master qrunner lock could not be acquired, because it appears as if some
+process on some other host may have acquired it. We can't test for stale
+locks across host boundaries, so you'll have to do this manually. Or, if you
+know the lock is stale, re-run mailmanctl with the -s flag.
+
+Lock file: $config.LOCKFILE
+Lock host: $status
+
+Exiting.""")
+
+
+
+def start_runner(qrname, slice, count):
+ pid = os.fork()
+ if pid:
+ # parent
+ return pid
+ # child
+ #
+ # Craft the command line arguments for the exec() call.
+ rswitch = '--runner=%s:%d:%d' % (qrname, slice, count)
+ exe = os.path.join(config.BIN_DIR, 'qrunner')
+ # config.PYTHON, which is the absolute path to the Python interpreter,
+ # must be given as argv[0] due to Python's library search algorithm.
+ os.execl(config.PYTHON, config.PYTHON, exe, rswitch, '-s')
+ # Should never get here
+ raise RuntimeError, 'os.execl() failed'
+
+
+def start_all_runners():
+ kids = {}
+ for qrname, count in config.QRUNNERS:
+ for slice in range(count):
+ # queue runner name, slice, numslices, restart count
+ info = (qrname, slice, count, 0)
+ pid = start_runner(qrname, slice, count)
+ kids[pid] = info
+ return kids
+
+
+
+def check_for_site_list():
+ sitelistname = config.MAILMAN_SITE_LIST
+ try:
+ sitelist = MailList(sitelistname, lock=False)
+ except Errors.MMUnknownListError:
+ print >> sys.stderr, _('Site list is missing: $sitelistname')
+ elog.error('Site list is missing: %s', config.MAILMAN_SITE_LIST)
+ sys.exit(1)
+
+
+def check_privs():
+ # If we're running as root (uid == 0), coerce the uid and gid to that
+ # which Mailman was configured for, and refuse to run if we didn't coerce
+ # the uid/gid.
+ gid = grp.getgrnam(config.MAILMAN_GROUP)[2]
+ uid = pwd.getpwnam(config.MAILMAN_USER)[2]
+ myuid = os.getuid()
+ if myuid == 0:
+ # Set the process's supplimental groups.
+ groups = [x[2] for x in grp.getgrall() if config.MAILMAN_USER in x[3]]
+ groups.append(gid)
+ os.setgroups(groups)
+ os.setgid(gid)
+ os.setuid(uid)
+ elif myuid <> uid:
+ name = config.MAILMAN_USER
+ usage(1, _(
+ 'Run this program as root or as the $name user, or use -u.'))
+
+
+
+def main():
+ global elog, qlog
+
+ parser, opts, args = parseargs()
+ config.load(opts.config)
+
+ loginit.initialize()
+ elog = logging.getLogger('mailman.error')
+ qlog = logging.getLogger('mailman.qrunner')
+
+ if opts.checkprivs:
+ check_privs()
+ else:
+ print _('Warning! You may encounter permission problems.')
+
+ # Handle the commands
+ command = args[0].lower()
+ if command == 'stop':
+ # Sent the master qrunner process a SIGINT, which is equivalent to
+ # giving cron/qrunner a ctrl-c or KeyboardInterrupt. This will
+ # effectively shut everything down.
+ if not opts.quiet:
+ print _("Shutting down Mailman's master qrunner")
+ kill_watcher(signal.SIGTERM)
+ elif command == 'restart':
+ # Sent the master qrunner process a SIGHUP. This will cause the
+ # master qrunner to kill and restart all the worker qrunners, and to
+ # close and re-open its log files.
+ if not opts.quiet:
+ print _("Restarting Mailman's master qrunner")
+ kill_watcher(signal.SIGINT)
+ elif command == 'reopen':
+ if not opts.quiet:
+ print _('Re-opening all log files')
+ kill_watcher(signal.SIGHUP)
+ elif command == 'start':
+ # First, complain loudly if there's no site list.
+ check_for_site_list()
+ # Here's the scoop on the processes we're about to create. We'll need
+ # one for each qrunner, and one for a master child process watcher /
+ # lock refresher process.
+ #
+ # The child watcher process simply waits on the pids of the children
+ # qrunners. Unless explicitly disabled by a mailmanctl switch (or the
+ # children are killed with SIGTERM instead of SIGINT), the watcher
+ # will automatically restart any child process that exits. This
+ # allows us to be more robust, and also to implement restart by simply
+ # SIGINT'ing the qrunner children, and letting the watcher restart
+ # them.
+ #
+ # Under normal operation, we have a child per queue. This lets us get
+ # the most out of the available resources, since a qrunner with no
+ # files in its queue directory is pretty cheap, but having a separate
+ # runner process per queue allows for a very responsive system. Some
+ # people want a more traditional (i.e. MM2.0.x) cron-invoked qrunner.
+ # No problem, but using mailmanctl isn't the answer. So while
+ # mailmanctl hard codes some things, others, such as the number of
+ # qrunners per queue, are configurable.
+ #
+ # First, acquire the master mailmanctl lock
+ lock = acquire_lock(opts.force)
+ if not lock:
+ return
+ # Daemon process startup according to Stevens, Advanced Programming in
+ # the UNIX Environment, Chapter 13.
+ pid = os.fork()
+ if pid:
+ # parent
+ if not opts.quiet:
+ print _("Starting Mailman's master qrunner.")
+ # Give up the lock "ownership". This just means the foreground
+ # process won't close/unlock the lock when it finalizes this lock
+ # instance. We'll let the mater watcher subproc own the lock.
+ lock._transfer_to(pid)
+ return
+ # child
+ lock._take_possession()
+ # First, save our pid in a file for "mailmanctl stop" rendezvous. We
+ # want the perms on the .pid file to be rw-rw----
+ omask = os.umask(6)
+ try:
+ fp = open(config.PIDFILE, 'w')
+ print >> fp, os.getpid()
+ fp.close()
+ finally:
+ os.umask(omask)
+ # Create a new session and become the session leader, but since we
+ # won't be opening any terminal devices, don't do the ultra-paranoid
+ # suggestion of doing a second fork after the setsid() call.
+ os.setsid()
+ # Instead of cd'ing to root, cd to the Mailman installation home
+ os.chdir(config.PREFIX)
+ # Set our file mode creation umask
+ os.umask(007)
+ # I don't think we have any unneeded file descriptors.
+ #
+ # Now start all the qrunners. This returns a dictionary where the
+ # keys are qrunner pids and the values are tuples of the following
+ # form: (qrname, slice, count). This does its own fork and exec, and
+ # sets up its own signal handlers.
+ kids = start_all_runners()
+ # Set up a SIGALRM handler to refresh the lock once per day. The lock
+ # lifetime is 1day+6hours so this should be plenty.
+ def sigalrm_handler(signum, frame):
+ lock.refresh()
+ signal.alarm(Defaults.days(1))
+ signal.signal(signal.SIGALRM, sigalrm_handler)
+ signal.alarm(Defaults.days(1))
+ # Set up a SIGHUP handler so that if we get one, we'll pass it along
+ # to all the qrunner children. This will tell them to close and
+ # reopen their log files
+ def sighup_handler(signum, frame):
+ loginit.reopen()
+ for pid in kids.keys():
+ os.kill(pid, signal.SIGHUP)
+ # And just to tweak things...
+ qlog.info('Master watcher caught SIGHUP. Re-opening log files.')
+ signal.signal(signal.SIGHUP, sighup_handler)
+ # We also need to install a SIGTERM handler because that's what init
+ # will kill this process with when changing run levels.
+ def sigterm_handler(signum, frame):
+ for pid in kids.keys():
+ try:
+ os.kill(pid, signal.SIGTERM)
+ except OSError, e:
+ if e.errno <> errno.ESRCH: raise
+ qlog.info('Master watcher caught SIGTERM. Exiting.')
+ signal.signal(signal.SIGTERM, sigterm_handler)
+ # Finally, we need a SIGINT handler which will cause the sub-qrunners
+ # to exit, but the master will restart SIGINT'd sub-processes unless
+ # the -n flag was given.
+ def sigint_handler(signum, frame):
+ for pid in kids.keys():
+ os.kill(pid, signal.SIGINT)
+ qlog.info('Master watcher caught SIGINT. Restarting.')
+ signal.signal(signal.SIGINT, sigint_handler)
+ # Now we're ready to simply do our wait/restart loop. This is the
+ # master qrunner watcher.
+ try:
+ while True:
+ try:
+ pid, status = os.wait()
+ except OSError, e:
+ # No children? We're done
+ if e.errno == errno.ECHILD:
+ break
+ # If the system call got interrupted, just restart it.
+ elif e.errno <> errno.EINTR:
+ raise
+ continue
+ killsig = exitstatus = None
+ if os.WIFSIGNALED(status):
+ killsig = os.WTERMSIG(status)
+ if os.WIFEXITED(status):
+ exitstatus = os.WEXITSTATUS(status)
+ # We'll restart the process unless we were given the
+ # "no-restart" switch, or if the process was SIGTERM'd or
+ # exitted with a SIGTERM exit status. This lets us better
+ # handle runaway restarts (say, if the subproc had a syntax
+ # error!)
+ restarting = ''
+ if opts.restart:
+ if ((exitstatus == None and killsig <> signal.SIGTERM) or
+ (killsig == None and exitstatus <> signal.SIGTERM)):
+ # Then
+ restarting = '[restarting]'
+ qrname, slice, count, restarts = kids[pid]
+ del kids[pid]
+ qlog.info("""\
+Master qrunner detected subprocess exit
+(pid: %d, sig: %s, sts: %s, class: %s, slice: %d/%d) %s""",
+ pid, killsig, exitstatus, qrname,
+ slice+1, count, restarting)
+ # See if we've reached the maximum number of allowable restarts
+ if exitstatus <> signal.SIGINT:
+ restarts += 1
+ if restarts > MAX_RESTARTS:
+ qlog.info("""\
+Qrunner %s reached maximum restart limit of %d, not restarting.""",
+ qrname, MAX_RESTARTS)
+ restarting = ''
+ # Now perhaps restart the process unless it exited with a
+ # SIGTERM or we aren't restarting.
+ if restarting:
+ newpid = start_runner(qrname, slice, count)
+ kids[newpid] = (qrname, slice, count, restarts)
+ finally:
+ # Should we leave the main loop for any reason, we want to be sure
+ # all of our children are exited cleanly. Send SIGTERMs to all
+ # the child processes and wait for them all to exit.
+ for pid in kids.keys():
+ try:
+ os.kill(pid, signal.SIGTERM)
+ except OSError, e:
+ if e.errno == errno.ESRCH:
+ # The child has already exited
+ qlog.info('ESRCH on pid: %d', pid)
+ del kids[pid]
+ # Wait for all the children to go away
+ while True:
+ try:
+ pid, status = os.wait()
+ except OSError, e:
+ if e.errno == errno.ECHILD:
+ break
+ elif e.errno <> errno.EINTR:
+ raise
+ continue
+ # Finally, give up the lock
+ lock.unlock(unconditionally=True)
+ os._exit(0)
+
+
+
+if __name__ == '__main__':
+ main()