# Copyright (C) 2011-2017 by the Free Software Foundation, Inc. # # This file is part of GNU Mailman. # # GNU Mailman is free software: you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # GNU Mailman is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # GNU Mailman. If not, see . """Test some additional corner cases for starting/stopping.""" import os import sys import time import shutil import signal import socket import unittest from click.testing import CliRunner from contextlib import ExitStack, suppress from datetime import datetime, timedelta from flufl.lock import SEP from mailman.bin.master import WatcherState from mailman.commands.cli_control import reopen, restart, start from mailman.config import config from mailman.testing.helpers import configuration from mailman.testing.layers import ConfigLayer from pkg_resources import resource_filename from public import public from tempfile import TemporaryDirectory from unittest.mock import patch # For ../docs/control.rst @public def make_config(resources): cfg_path = resource_filename( 'mailman.commands.tests.data', 'no-runners.cfg') # We have to patch the global config's filename attribute. The problem # here is that click does not support setting the -C option on the # parent command (i.e. `master`). # https://github.com/pallets/click/issues/831 resources.enter_context(patch.object(config, 'filename', cfg_path)) # For ../docs/control.rst @public def find_master(): # See if the master process is still running. until = timedelta(seconds=10) + datetime.now() while datetime.now() < until: time.sleep(0.1) with suppress(FileNotFoundError, ValueError, ProcessLookupError): with open(config.PID_FILE) as fp: pid = int(fp.read().strip()) os.kill(pid, 0) return pid return None @public def claim_lock(): # Fake an acquisition of the master lock by another process, which # subsequently goes stale. Start by finding a free process id. Yes, # this could race, but given that we're starting with our own PID and # searching downward, it's less likely. fake_pid = os.getpid() - 1 while fake_pid > 1: try: os.kill(fake_pid, 0) except ProcessLookupError: break fake_pid -= 1 else: raise RuntimeError('Cannot find free PID') # Lock acquisition logic taken from flufl.lock. claim_file = SEP.join(( config.LOCK_FILE, socket.getfqdn(), str(fake_pid), '0')) with open(config.LOCK_FILE, 'w') as fp: fp.write(claim_file) os.link(config.LOCK_FILE, claim_file) expiration_date = datetime.now() - timedelta(minutes=5) t = time.mktime(expiration_date.timetuple()) os.utime(claim_file, (t, t)) return claim_file @public def kill_with_extreme_prejudice(pid_or_pidfile=None): # 2016-12-03 barry: We have intermittent hangs during both local and CI # test suite runs where killing a runner or master process doesn't # terminate the process. In those cases, wait()ing on the child can # suspend the test process indefinitely. Locally, you have to C-c the # test process, but that still doesn't kill it; the process continues to # run in the background. If you then search for the process's pid and # SIGTERM it, it will usually exit, which is why I don't understand why # the above SIGTERM doesn't kill it sometimes. However, when run under # CI, the test suite will just hang until the CI runner times it out. It # would be better to figure out the underlying cause, because we have # definitely seen other situations where a runner process won't exit, but # for testing purposes we're just trying to clean up some resources so # after a brief attempt at SIGTERMing it, let's SIGKILL it and warn. if isinstance(pid_or_pidfile, str): try: with open(pid_or_pidfile, 'r') as fp: pid = int(fp.read()) except FileNotFoundError: # There's nothing to kill. return else: pid = pid_or_pidfile if pid is not None: os.kill(pid, signal.SIGTERM) until = timedelta(seconds=10) + datetime.now() while datetime.now() < until: try: if pid is None: os.wait3(os.WNOHANG) else: os.waitpid(pid, os.WNOHANG) except ChildProcessError: # This basically means we went one too many times around the # loop. The previous iteration successfully reaped the child. # Because the return status of wait3() and waitpid() are different # in those cases, it's easier just to catch the exception for # either call and exit. return time.sleep(0.1) else: if pid is None: # There's really not much more we can do because we have no pid to # SIGKILL. Just report the problem and continue. print('WARNING: NO CHANGE IN CHILD PROCESS STATES', file=sys.stderr) return print('WARNING: SIGTERM DID NOT EXIT PROCESS; SIGKILLing', file=sys.stderr) if pid is not None: os.kill(pid, signal.SIGKILL) until = timedelta(seconds=10) + datetime.now() while datetime.now() < until: status = os.waitpid(pid, os.WNOHANG) if status == (0, 0): # The child was reaped. return time.sleep(0.1) else: print('WARNING: SIGKILL DID NOT EXIT PROCESS!', file=sys.stderr) class TestControl(unittest.TestCase): layer = ConfigLayer maxDiff = None def setUp(self): self._command = CliRunner() self._tmpdir = TemporaryDirectory() self.addCleanup(self._tmpdir.cleanup) # Specify where to put the pid file; and make sure that the master # gets killed regardless of whether it gets started or not. self._pid_file = os.path.join(self._tmpdir.name, 'master-test.pid') self.addCleanup(kill_with_extreme_prejudice, self._pid_file) # Patch cli_control so that 1) it doesn't actually do a fork, since # that makes it impossible to avoid race conditions in the test; 2) # doesn't actually os.execl(). with ExitStack() as resources: resources.enter_context(patch( 'mailman.commands.cli_control.os.fork', # Pretend to be the child. return_value=0 )) self._execl = resources.enter_context(patch( 'mailman.commands.cli_control.os.execl')) resources.enter_context(patch( 'mailman.commands.cli_control.os.setsid')) resources.enter_context(patch( 'mailman.commands.cli_control.os.chdir')) resources.enter_context(patch( 'mailman.commands.cli_control.os.environ', os.environ.copy())) # Arrange for the mocks to be reverted when the test is over. self.addCleanup(resources.pop_all().close) def test_master_is_elsewhere_and_missing(self): with ExitStack() as resources: bin_dir = resources.enter_context(TemporaryDirectory()) old_master = os.path.join(config.BIN_DIR, 'master') new_master = os.path.join(bin_dir, 'master') shutil.move(old_master, new_master) resources.callback(shutil.move, new_master, old_master) results = self._command.invoke(start) # Argument #2 to the execl() call should be the path to the master # program, and the path should not exist. self.assertEqual( len(self._execl.call_args_list), 1, results.output) posargs, kws = self._execl.call_args_list[0] master_path = posargs[2] self.assertEqual(os.path.basename(master_path), 'master') self.assertFalse(os.path.exists(master_path), master_path) def test_master_is_elsewhere_and_findable(self): with ExitStack() as resources: bin_dir = resources.enter_context(TemporaryDirectory()) old_master = os.path.join(config.BIN_DIR, 'master') new_master = os.path.join(bin_dir, 'master') shutil.move(old_master, new_master) resources.callback(shutil.move, new_master, old_master) with configuration('paths.testing', bin_dir=bin_dir): results = self._command.invoke(start) # Argument #2 to the execl() call should be the path to the master # program, and the path should exist. self.assertEqual( len(self._execl.call_args_list), 1, results.output) posargs, kws = self._execl.call_args_list[0] master_path = posargs[2] self.assertEqual(os.path.basename(master_path), 'master') self.assertTrue(os.path.exists(master_path), master_path) def test_stale_lock_no_force(self): claim_file = claim_lock() self.addCleanup(os.remove, claim_file) self.addCleanup(os.remove, config.LOCK_FILE) result = self._command.invoke(start) self.assertEqual(result.exit_code, 2) self.assertEqual( result.output, 'Usage: start [OPTIONS]\n\n' 'Error: A previous run of GNU Mailman did not exit cleanly ' '(stale_lock). Try using --force\n') def test_stale_lock_force(self): claim_file = claim_lock() self.addCleanup(os.remove, claim_file) self.addCleanup(os.remove, config.LOCK_FILE) # Don't test the results of this command. Because we're mocking # os.execl(), we'll end up raising the RuntimeError at the end of the # start() method, child branch. self._command.invoke(start, ('--force',)) self.assertEqual(len(self._execl.call_args_list), 1) posargs, kws = self._execl.call_args_list[0] self.assertIn('--force', posargs) class TestControlSimple(unittest.TestCase): layer = ConfigLayer maxDiff = None def setUp(self): self._command = CliRunner() def test_watcher_state_conflict(self): with patch('mailman.commands.cli_control.master_state', return_value=(WatcherState.conflict, object())): results = self._command.invoke(start) self.assertEqual(results.exit_code, 2) self.assertEqual( results.output, 'Usage: start [OPTIONS]\n\n' 'Error: GNU Mailman is already running\n') def test_reopen(self): with patch('mailman.commands.cli_control.kill_watcher') as mock: result = self._command.invoke(reopen) mock.assert_called_once_with(signal.SIGHUP) self.assertEqual(result.output, 'Reopening the Mailman runners\n') def test_reopen_quiet(self): with patch('mailman.commands.cli_control.kill_watcher') as mock: result = self._command.invoke(reopen, ('--quiet',)) mock.assert_called_once_with(signal.SIGHUP) self.assertEqual(result.output, '') def test_restart(self): with patch('mailman.commands.cli_control.kill_watcher') as mock: result = self._command.invoke(restart) mock.assert_called_once_with(signal.SIGUSR1) self.assertEqual(result.output, 'Restarting the Mailman runners\n') def test_restart_quiet(self): with patch('mailman.commands.cli_control.kill_watcher') as mock: result = self._command.invoke(restart, ('--quiet',)) mock.assert_called_once_with(signal.SIGUSR1) self.assertEqual(result.output, '')