diff options
Diffstat (limited to 'src/mailman/bin/master.py')
| -rw-r--r-- | src/mailman/bin/master.py | 222 |
1 files changed, 129 insertions, 93 deletions
diff --git a/src/mailman/bin/master.py b/src/mailman/bin/master.py index c6a8bcd2d..0b273d332 100644 --- a/src/mailman/bin/master.py +++ b/src/mailman/bin/master.py @@ -19,7 +19,7 @@ import os import sys -import errno +import click import signal import socket import logging @@ -30,8 +30,10 @@ from flufl.lock import Lock, NotLockedError, TimeOutError from lazr.config import as_boolean from mailman.config import config from mailman.core.i18n import _ +from mailman.core.initialize import initialize from mailman.core.logging import reopen -from mailman.utilities.options import Options +from mailman.utilities.options import I18nCommand, validate_runner_spec +from mailman.version import MAILMAN_VERSION_FULL from public import public @@ -44,67 +46,25 @@ SUBPROC_START_WAIT = timedelta(seconds=20) PRESERVE_ENVS = ( 'COVERAGE_PROCESS_START', 'MAILMAN_EXTRA_TESTING_CFG', + 'LANG', + 'LANGUAGE', + 'LC_CTYPE', + 'LC_NUMERIC', + 'LC_TIME', + 'LC_COLLATE', + 'LC_MONETARY', + 'LC_MESSAGES', + 'LC_PAPER', + 'LC_NAME', + 'LC_ADDRESS', + 'LC_TELEPHONE', + 'LC_MEASUREMENT', + 'LC_IDENTIFICATION', + 'LC_ALL', ) -class MasterOptions(Options): - """Options for the master watcher.""" - - usage = _("""\ -%prog [options] - -Master subprocess watcher. - -Start and watch the configured runners and ensure that they stay alive and -kicking. Each runner is forked and exec'd in turn, with the master waiting on -their process ids. When it detects a child runner has exited, it may restart -it. - -The runners respond to SIGINT, SIGTERM, SIGUSR1 and SIGHUP. SIGINT, SIGTERM -and SIGUSR1 all cause a runner to exit cleanly. The master will restart -runners that have exited due to a SIGUSR1 or some kind of other exit condition -(say because of an uncaught exception). SIGHUP causes the master and the -runners to close their log files, and reopen then upon the next printed -message. - -The master also responds to SIGINT, SIGTERM, SIGUSR1 and SIGHUP, which it -simply passes on to the runners. Note that the master will close and reopen -its own log files on receipt of a SIGHUP. The master also leaves its own -process id in the file `data/master.pid` but you normally don't need to use -this pid directly.""") - - def add_options(self): - """See `Options`.""" - self.parser.add_option( - '-n', '--no-restart', - dest='restartable', default=True, action='store_false', - help=_("""\ -Don't restart the runners when they exit because of an error or a SIGUSR1. -Use this only for debugging.""")) - self.parser.add_option( - '-f', '--force', - default=False, action='store_true', - help=_("""\ -If the master watcher finds an existing master lock, it will normally exit -with an error message. With this option,the master will perform an extra -level of checking. If a process matching the host/pid described in the lock -file is running, the master will still exit, requiring you to manually clean -up the lock. But if no matching process is found, the master will remove the -apparently stale lock and make another attempt to claim the master lock.""")) - self.parser.add_option( - '-r', '--runner', - dest='runners', action='append', default=[], - help=_("""\ -Override the default set of runners that the master will invoke, which is -typically defined in the configuration file. Multiple -r options may be -given. The values for -r are passed straight through to bin/runner.""")) - - def sanity_check(self): - """See `Options`.""" - if len(self.arguments) > 0: - self.parser.error(_('Too many arguments')) - - +@public class WatcherState(Enum): """Enum for the state of the master process watcher.""" # No lock has been acquired by any process. @@ -117,6 +77,7 @@ class WatcherState(Enum): host_mismatch = 3 +@public def master_state(lock_file=None): """Get the state of the master watcher. @@ -139,12 +100,9 @@ def master_state(lock_file=None): try: os.kill(pid, 0) return WatcherState.conflict, lock - except OSError as error: - if error.errno == errno.ESRCH: - # No matching process id. - return WatcherState.stale_lock, lock - # Some other error occurred. - raise + except ProcessLookupError: + # No matching process id. + return WatcherState.stale_lock, lock def acquire_lock_1(force, lock_file=None): @@ -211,17 +169,17 @@ Lock host: $hostname Exiting.""") else: assert status is WatcherState.none, ( - 'Invalid enum value: ${0}'.format(status)) + 'Invalid enum value: {}'.format(status)) hostname, pid, tempfile = lock.details message = _("""\ For unknown reasons, the master lock could not be acquired. - Lock file: $config.LOCK_FILE Lock host: $hostname Exiting.""") - config.options.parser.error(message) + print(message, file=sys.stderr) + sys.exit(1) class PIDWatcher: @@ -438,15 +396,12 @@ class Loop: while True: try: pid, status = os.wait() - except OSError as error: + except ChildProcessError: # No children? We're done. - if error.errno == errno.ECHILD: - break + break + except InterruptedError: # pragma: nocover # If the system call got interrupted, just restart it. - elif error.errno == errno.EINTR: - continue - else: - raise + continue # Find out why the subprocess exited by getting the signal # received or exit status. if os.WIFSIGNALED(status): @@ -497,40 +452,121 @@ Runner {0} reached maximum restart limit of {1:d}, not restarting.""", for pid in self._kids: try: os.kill(pid, signal.SIGTERM) - except OSError as error: - if error.errno == errno.ESRCH: - # The child has already exited. - log.info('ESRCH on pid: %d', pid) + except ProcessLookupError: # pragma: nocover + # The child has already exited. + log.info('ESRCH on pid: %d', pid) + except OSError: # pragma: nocover + # XXX I'm not so sure about this. It preserves the semantics + # before conversion to PEP 3151 exceptions. But is it right? + pass # Wait for all the children to go away. while self._kids: try: pid, status = os.wait() self._kids.drop(pid) - except OSError as error: - if error.errno == errno.ECHILD: - break - elif error.errno == errno.EINTR: - continue - raise + except ChildProcessError: + break + except InterruptedError: # pragma: nocover + continue + +@click.command( + cls=I18nCommand, + context_settings=dict(help_option_names=['-h', '--help']), + help=_("""\ + Master subprocess watcher. + Start and watch the configured runners, ensuring that they stay alive and + kicking. Each runner is forked and exec'd in turn, with the master waiting + on their process ids. When it detects a child runner has exited, it may + restart it. + + The runners respond to SIGINT, SIGTERM, SIGUSR1 and SIGHUP. SIGINT, + SIGTERM and SIGUSR1 all cause a runner to exit cleanly. The master will + restart runners that have exited due to a SIGUSR1 or some kind of other + exit condition (say because of an uncaught exception). SIGHUP causes the + master and the runners to close their log files, and reopen then upon the + next printed message. + + The master also responds to SIGINT, SIGTERM, SIGUSR1 and SIGHUP, which it + simply passes on to the runners. Note that the master will close and + reopen its own log files on receipt of a SIGHUP. The master also leaves + its own process id in the file specified in the configuration file but you + normally don't need to use this PID directly.""")) +@click.option( + '-C', '--config', 'config_file', + envvar='MAILMAN_CONFIG_FILE', + type=click.Path(exists=True, dir_okay=False, resolve_path=True), + help=_("""\ + Configuration file to use. If not given, the environment variable + MAILMAN_CONFIG_FILE is consulted and used if set. If neither are given, a + default configuration file is loaded.""")) +@click.option( + '--no-restart', '-n', 'restartable', + is_flag=True, default=True, + help=_("""\ + Don't restart the runners when they exit because of an error or a SIGUSR1. + Use this only for debugging.""")) +@click.option( + '--force', '-f', + is_flag=True, default=False, + help=_("""\ + If the master watcher finds an existing master lock, it will normally exit + with an error message. With this option,the master will perform an extra + level of checking. If a process matching the host/pid described in the + lock file is running, the master will still exit, requiring you to manually + clean up the lock. But if no matching process is found, the master will + remove the apparently stale lock and make another attempt to claim the + master lock.""")) +@click.option( + '--runners', '-r', + metavar='runner[:slice:range]', + callback=validate_runner_spec, default=None, + multiple=True, + help=_("""\ + Override the default set of runners that the master will invoke, which is + typically defined in the configuration file. Multiple -r options may be + given. The values for -r are passed straight through to bin/runner.""")) +@click.option( + '-v', '--verbose', + is_flag=True, default=False, + help=_('Display more debugging information to the log file.')) +@click.version_option(MAILMAN_VERSION_FULL) @public -def main(): - """Main process.""" +def main(config_file, restartable, force, runners, verbose): + # XXX https://github.com/pallets/click/issues/303 + """Master subprocess watcher. + + Start and watch the configured runners and ensure that they stay + alive and kicking. Each runner is forked and exec'd in turn, with + the master waiting on their process ids. When it detects a child + runner has exited, it may restart it. - options = MasterOptions() - options.initialize() + The runners respond to SIGINT, SIGTERM, SIGUSR1 and SIGHUP. SIGINT, + SIGTERM and SIGUSR1 all cause a runner to exit cleanly. The master + will restart runners that have exited due to a SIGUSR1 or some kind + of other exit condition (say because of an uncaught exception). + SIGHUP causes the master and the runners to close their log files, + and reopen then upon the next printed message. + + The master also responds to SIGINT, SIGTERM, SIGUSR1 and SIGHUP, + which it simply passes on to the runners. Note that the master will + close and reopen its own log files on receipt of a SIGHUP. The + master also leaves its own process id in the file `data/master.pid` + but you normally don't need to use this pid directly. + """ + initialize(config_file, verbose) # Acquire the master lock, exiting if we can't. We'll let the caller # handle any clean up or lock breaking. No `with` statement here because # Lock's constructor doesn't support a timeout. - lock = acquire_lock(options.options.force) + lock = acquire_lock(force) try: with open(config.PID_FILE, 'w') as fp: print(os.getpid(), file=fp) - loop = Loop(lock, options.options.restartable, options.options.config) + loop = Loop(lock, restartable, config.filename) loop.install_signal_handlers() try: - loop.start_runners(options.options.runners) + loop.start_runners(runners) loop.loop() finally: loop.cleanup() |
