src/mailman/commands/tests/test_cli_control.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295

# Copyright (C) 2011-2017 by the Free Software Foundation, Inc.
#
# This file is part of GNU Mailman.
#
# GNU Mailman is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.

"""Test some additional corner cases for starting/stopping."""

import os
import sys
import time
import shutil
import signal
import socket
import unittest

from click.testing import CliRunner
from contextlib import ExitStack, suppress
from datetime import datetime, timedelta
from flufl.lock import SEP
from mailman.bin.master import WatcherState
from mailman.commands.cli_control import reopen, restart, start
from mailman.config import config
from mailman.testing.helpers import configuration
from mailman.testing.layers import ConfigLayer
from pkg_resources import resource_filename
from public import public
from tempfile import TemporaryDirectory
from unittest.mock import patch


# For ../docs/control.rst
@public
def make_config(resources):
    cfg_path = resource_filename(
        'mailman.commands.tests.data', 'no-runners.cfg')
    # We have to patch the global config's filename attribute.  The problem
    # here is that click does not support setting the -C option on the
    # parent command (i.e. `master`).
    # https://github.com/pallets/click/issues/831
    resources.enter_context(patch.object(config, 'filename', cfg_path))


# For ../docs/control.rst
@public
def find_master():
    # See if the master process is still running.
    until = timedelta(seconds=10) + datetime.now()
    while datetime.now() < until:
        time.sleep(0.1)
        with suppress(FileNotFoundError, ValueError, ProcessLookupError):
            with open(config.PID_FILE) as fp:
                pid = int(fp.read().strip())
                os.kill(pid, 0)
                return pid
    return None


@public
def claim_lock():
    # Fake an acquisition of the master lock by another process, which
    # subsequently goes stale.  Start by finding a free process id.  Yes,
    # this could race, but given that we're starting with our own PID and
    # searching downward, it's less likely.
    fake_pid = os.getpid() - 1
    while fake_pid > 1:
        try:
            os.kill(fake_pid, 0)
        except ProcessLookupError:
            break
        fake_pid -= 1
    else:
        raise RuntimeError('Cannot find free PID')
    # Lock acquisition logic taken from flufl.lock.
    claim_file = SEP.join((
        config.LOCK_FILE,
        socket.getfqdn(),
        str(fake_pid),
        '0'))
    with open(config.LOCK_FILE, 'w') as fp:
        fp.write(claim_file)
    os.link(config.LOCK_FILE, claim_file)
    expiration_date = datetime.now() - timedelta(minutes=5)
    t = time.mktime(expiration_date.timetuple())
    os.utime(claim_file, (t, t))
    return claim_file


@public
def kill_with_extreme_prejudice(pid_or_pidfile=None):
    # 2016-12-03 barry: We have intermittent hangs during both local and CI
    # test suite runs where killing a runner or master process doesn't
    # terminate the process.  In those cases, wait()ing on the child can
    # suspend the test process indefinitely.  Locally, you have to C-c the
    # test process, but that still doesn't kill it; the process continues to
    # run in the background.  If you then search for the process's pid and
    # SIGTERM it, it will usually exit, which is why I don't understand why
    # the above SIGTERM doesn't kill it sometimes.  However, when run under
    # CI, the test suite will just hang until the CI runner times it out.  It
    # would be better to figure out the underlying cause, because we have
    # definitely seen other situations where a runner process won't exit, but
    # for testing purposes we're just trying to clean up some resources so
    # after a brief attempt at SIGTERMing it, let's SIGKILL it and warn.
    if isinstance(pid_or_pidfile, str):
        try:
            with open(pid_or_pidfile, 'r') as fp:
                pid = int(fp.read())
        except FileNotFoundError:
            # There's nothing to kill.
            return
    else:
        pid = pid_or_pidfile
    if pid is not None:
        os.kill(pid, signal.SIGTERM)
    until = timedelta(seconds=10) + datetime.now()
    while datetime.now() < until:
        try:
            if pid is None:
                os.wait3(os.WNOHANG)
            else:
                os.waitpid(pid, os.WNOHANG)
        except ChildProcessError:
            # This basically means we went one too many times around the
            # loop.  The previous iteration successfully reaped the child.
            # Because the return status of wait3() and waitpid() are different
            # in those cases, it's easier just to catch the exception for
            # either call and exit.
            return
        time.sleep(0.1)
    else:
        if pid is None:
            # There's really not much more we can do because we have no pid to
            # SIGKILL.  Just report the problem and continue.
            print('WARNING: NO CHANGE IN CHILD PROCESS STATES',
                  file=sys.stderr)
            return
        print('WARNING: SIGTERM DID NOT EXIT PROCESS; SIGKILLing',
              file=sys.stderr)
        if pid is not None:
            os.kill(pid, signal.SIGKILL)
        until = timedelta(seconds=10) + datetime.now()
        while datetime.now() < until:
            status = os.waitpid(pid, os.WNOHANG)
            if status == (0, 0):
                # The child was reaped.
                return
            time.sleep(0.1)
        else:
            print('WARNING: SIGKILL DID NOT EXIT PROCESS!', file=sys.stderr)


class TestControl(unittest.TestCase):
    layer = ConfigLayer
    maxDiff = None

    def setUp(self):
        self._command = CliRunner()
        self._tmpdir = TemporaryDirectory()
        self.addCleanup(self._tmpdir.cleanup)
        # Specify where to put the pid file; and make sure that the master
        # gets killed regardless of whether it gets started or not.
        self._pid_file = os.path.join(self._tmpdir.name, 'master-test.pid')
        self.addCleanup(kill_with_extreme_prejudice, self._pid_file)
        # Patch cli_control so that 1) it doesn't actually do a fork, since
        # that makes it impossible to avoid race conditions in the test; 2)
        # doesn't actually os.execl().
        with ExitStack() as resources:
            resources.enter_context(patch(
                'mailman.commands.cli_control.os.fork',
                # Pretend to be the child.
                return_value=0
                ))
            self._execl = resources.enter_context(patch(
                'mailman.commands.cli_control.os.execl'))
            resources.enter_context(patch(
                'mailman.commands.cli_control.os.setsid'))
            resources.enter_context(patch(
                'mailman.commands.cli_control.os.chdir'))
            resources.enter_context(patch(
                'mailman.commands.cli_control.os.environ',
                os.environ.copy()))
            # Arrange for the mocks to be reverted when the test is over.
            self.addCleanup(resources.pop_all().close)

    def test_master_is_elsewhere_and_missing(self):
        with ExitStack() as resources:
            bin_dir = resources.enter_context(TemporaryDirectory())
            old_master = os.path.join(config.BIN_DIR, 'master')
            new_master = os.path.join(bin_dir, 'master')
            shutil.move(old_master, new_master)
            resources.callback(shutil.move, new_master, old_master)
            results = self._command.invoke(start)
            # Argument #2 to the execl() call should be the path to the master
            # program, and the path should not exist.
            self.assertEqual(
                len(self._execl.call_args_list), 1, results.output)
            posargs, kws = self._execl.call_args_list[0]
            master_path = posargs[2]
            self.assertEqual(os.path.basename(master_path), 'master')
            self.assertFalse(os.path.exists(master_path), master_path)

    def test_master_is_elsewhere_and_findable(self):
        with ExitStack() as resources:
            bin_dir = resources.enter_context(TemporaryDirectory())
            old_master = os.path.join(config.BIN_DIR, 'master')
            new_master = os.path.join(bin_dir, 'master')
            shutil.move(old_master, new_master)
            resources.callback(shutil.move, new_master, old_master)
            with configuration('paths.testing', bin_dir=bin_dir):
                results = self._command.invoke(start)
            # Argument #2 to the execl() call should be the path to the master
            # program, and the path should exist.
            self.assertEqual(
                len(self._execl.call_args_list), 1, results.output)
            posargs, kws = self._execl.call_args_list[0]
            master_path = posargs[2]
            self.assertEqual(os.path.basename(master_path), 'master')
            self.assertTrue(os.path.exists(master_path), master_path)

    def test_stale_lock_no_force(self):
        claim_file = claim_lock()
        self.addCleanup(os.remove, claim_file)
        self.addCleanup(os.remove, config.LOCK_FILE)
        result = self._command.invoke(start)
        self.assertEqual(result.exit_code, 2)
        self.assertEqual(
            result.output,
            'Usage: start [OPTIONS]\n\n'
            'Error: A previous run of GNU Mailman did not exit cleanly '
            '(stale_lock).  Try using --force\n')

    def test_stale_lock_force(self):
        claim_file = claim_lock()
        self.addCleanup(os.remove, claim_file)
        self.addCleanup(os.remove, config.LOCK_FILE)
        # Don't test the results of this command.  Because we're mocking
        # os.execl(), we'll end up raising the RuntimeError at the end of the
        # start() method, child branch.
        self._command.invoke(start, ('--force',))
        self.assertEqual(len(self._execl.call_args_list), 1)
        posargs, kws = self._execl.call_args_list[0]
        self.assertIn('--force', posargs)


class TestControlSimple(unittest.TestCase):
    layer = ConfigLayer
    maxDiff = None

    def setUp(self):
        self._command = CliRunner()

    def test_watcher_state_conflict(self):
        with patch('mailman.commands.cli_control.master_state',
                   return_value=(WatcherState.conflict, object())):
            results = self._command.invoke(start)
            self.assertEqual(results.exit_code, 2)
            self.assertEqual(
                results.output,
                'Usage: start [OPTIONS]\n\n'
                'Error: GNU Mailman is already running\n')

    def test_reopen(self):
        with patch('mailman.commands.cli_control.kill_watcher') as mock:
            result = self._command.invoke(reopen)
        mock.assert_called_once_with(signal.SIGHUP)
        self.assertEqual(result.output, 'Reopening the Mailman runners\n')

    def test_reopen_quiet(self):
        with patch('mailman.commands.cli_control.kill_watcher') as mock:
            result = self._command.invoke(reopen, ('--quiet',))
        mock.assert_called_once_with(signal.SIGHUP)
        self.assertEqual(result.output, '')

    def test_restart(self):
        with patch('mailman.commands.cli_control.kill_watcher') as mock:
            result = self._command.invoke(restart)
        mock.assert_called_once_with(signal.SIGUSR1)
        self.assertEqual(result.output, 'Restarting the Mailman runners\n')

    def test_restart_quiet(self):
        with patch('mailman.commands.cli_control.kill_watcher') as mock:
            result = self._command.invoke(restart, ('--quiet',))
        mock.assert_called_once_with(signal.SIGUSR1)
        self.assertEqual(result.output, '')