summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Warsaw2016-12-03 11:08:18 -0500
committerBarry Warsaw2016-12-03 11:08:18 -0500
commita30bbdf0d483150dc4259b8d149679d38b6030f1 (patch)
treec41cf9df8b0789b78e56900df9d6e28b4e52f697
parent36a17d6ef058d61c9f866c66f3d9c74703fb72cc (diff)
downloadmailman-a30bbdf0d483150dc4259b8d149679d38b6030f1.tar.gz
mailman-a30bbdf0d483150dc4259b8d149679d38b6030f1.tar.zst
mailman-a30bbdf0d483150dc4259b8d149679d38b6030f1.zip
-rw-r--r--src/mailman/commands/tests/test_control.py58
1 files changed, 55 insertions, 3 deletions
diff --git a/src/mailman/commands/tests/test_control.py b/src/mailman/commands/tests/test_control.py
index 7377c4666..4425b3411 100644
--- a/src/mailman/commands/tests/test_control.py
+++ b/src/mailman/commands/tests/test_control.py
@@ -64,6 +64,59 @@ def find_master():
return None
+def kill_with_extreme_prejudice(pid=None):
+ # 2016-12-03 barry: We have intermittent hangs during both local and CI
+ # test suite runs where killing a runner or master process doesn't
+ # terminate the process. In those cases, wait()ing on the child can
+ # suspend the test process indefinitely. Locally, you have to C-c the
+ # test process, but that still doesn't kill it; the process continues to
+ # run in the background. If you then search for the process's pid and
+ # SIGTERM it, it will usually exit, which is why I don't understand why
+ # the above SIGTERM doesn't kill it sometimes. However, when run under
+ # CI, the test suite will just hang until the CI runner times it out. It
+ # would be better to figure out the underlying cause, because we have
+ # definitely seen other situations where a runner process won't exit, but
+ # for testing purposes we're just trying to clean up some resources so
+ # after a brief attempt at SIGTERMing it, let's SIGKILL it and warn.
+ if pid is not None:
+ os.kill(pid, signal.SIGTERM)
+ until = timedelta(seconds=10) + datetime.now()
+ while datetime.now() < until:
+ try:
+ if pid is None:
+ os.wait3(os.WNOHANG)
+ else:
+ os.waitpid(pid, os.WNOHANG)
+ except ChildProcessError:
+ # This basically means we went one too many times around the
+ # loop. The previous iteration successfully reaped the child.
+ # Because the return status of wait3() and waitpid() are different
+ # in those cases, it's easier just to catch the exception for
+ # either call and exit.
+ return
+ time.sleep(0.1)
+ else:
+ if pid is None:
+ # There's really not much more we can do because we have no pid to
+ # SIGKILL. Just report the problem and continue.
+ print('WARNING: NO CHANGE IN CHILD PROCESS STATES',
+ file=sys.stderr)
+ return
+ print('WARNING: SIGTERM DID NOT EXIT PROCESS; SIGKILLing',
+ file=sys.stderr)
+ if pid is not None:
+ os.kill(pid, signal.SIGKILL)
+ until = timedelta(seconds=10) + datetime.now()
+ while datetime.now() < until:
+ status = os.waitpid(pid, os.WNOHANG)
+ if status == (0, 0):
+ # The child was reaped.
+ return
+ time.sleep(0.1)
+ else:
+ print('WARNING: SIGKILL DID NOT EXIT PROCESS!', file=sys.stderr)
+
+
class FakeArgs:
force = None
run_as_user = None
@@ -171,7 +224,7 @@ class TestBinDir(unittest.TestCase):
args_config = Configuration()
args_config.load(self.args.config)
self.assertFalse(os.path.exists(args_config.PID_FILE))
- os.wait()
+ kill_with_extreme_prejudice()
def test_master_is_elsewhere_and_findable(self):
with ExitStack() as resources:
@@ -188,5 +241,4 @@ class TestBinDir(unittest.TestCase):
# killable. We might have to wait until the process has started.
master_pid = find_master()
self.assertIsNotNone(master_pid, 'master did not start')
- os.kill(master_pid, signal.SIGTERM)
- os.waitpid(master_pid, 0)
+ kill_with_extreme_prejudice(master_pid)