From 19eb12bd296737d9a67675b29d3981393b690c43 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Date: Thu, 25 Mar 2021 22:05:58 +0000 Subject: [PATCH] Revert "Implement "kill" method using os.kill()" This reverts commit 4b21111eb11c98b544252d501f33077c7417daa0. Reason for revert: This method is unstable and prone to timeouts Change-Id: I6064d60e4d63b085046aace7683d766a79dd22da --- neutron/agent/l3/ha_router.py | 4 +- neutron/agent/linux/external_process.py | 20 +++---- neutron/agent/linux/keepalived.py | 4 +- neutron/agent/linux/utils.py | 33 +++-------- neutron/privileged/__init__.py | 3 +- neutron/privileged/agent/linux/utils.py | 6 -- neutron/tests/common/net_helpers.py | 9 +-- .../agent/linux/test_async_process.py | 4 +- .../functional/agent/linux/test_utils.py | 59 ++----------------- .../tests/functional/agent/test_dhcp_agent.py | 3 +- neutron/tests/unit/agent/l3/test_ha_router.py | 3 +- .../unit/agent/linux/test_external_process.py | 49 ++++++--------- .../tests/unit/agent/linux/test_keepalived.py | 7 ++- neutron/tests/unit/agent/linux/test_utils.py | 36 +++++++++++ 14 files changed, 88 insertions(+), 152 deletions(-) diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index 306e69e49c9..2775706f5ef 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -440,12 +440,12 @@ class HaRouter(router.RouterInfo): pm = self._get_state_change_monitor_process_manager() process_monitor.unregister( self.router_id, IP_MONITOR_PROCESS_SERVICE) - pm.disable(sig=signal.SIGTERM) + pm.disable(sig=str(int(signal.SIGTERM))) try: common_utils.wait_until_true(lambda: not pm.active, timeout=SIGTERM_TIMEOUT) except common_utils.WaitTimeout: - pm.disable(sig=signal.SIGKILL) + pm.disable(sig=str(int(signal.SIGKILL))) @staticmethod def _gateway_ports_equal(port1, port2): diff --git a/neutron/agent/linux/external_process.py b/neutron/agent/linux/external_process.py index d5a3ce45a12..e8437336ae8 100644 --- a/neutron/agent/linux/external_process.py +++ b/neutron/agent/linux/external_process.py @@ -15,7 +15,6 @@ import abc import collections import os.path -import signal import eventlet from oslo_concurrency import lockutils @@ -97,10 +96,9 @@ class ProcessManager(MonitoredProcess): if self.custom_reload_callback: self.disable(get_stop_command=self.custom_reload_callback) else: - self.disable(signal.SIGHUP) + self.disable('HUP') - def disable(self, sig=signal.SIGKILL, get_stop_command=None): - sig = int(sig) + def disable(self, sig='9', get_stop_command=None): pid = self.pid if self.active: @@ -111,9 +109,11 @@ class ProcessManager(MonitoredProcess): run_as_root=self.run_as_root, privsep_exec=True) else: - self._kill_process(sig, pid) + cmd = self.get_kill_cmd(sig, pid) + utils.execute(cmd, run_as_root=self.run_as_root, + privsep_exec=True) # In the case of shutting down, remove the pid file - if sig == signal.SIGKILL: + if sig == '9': utils.delete_if_exists(self.get_pid_file_name(), run_as_root=self.run_as_root) elif pid: @@ -125,14 +125,12 @@ class ProcessManager(MonitoredProcess): LOG.debug('No %(service)s process started for %(uuid)s', {'service': self.service, 'uuid': self.uuid}) - def _kill_process(self, sig, pid): + def get_kill_cmd(self, sig, pid): if self.kill_scripts_path: kill_file = "%s-kill" % self.service if os.path.isfile(os.path.join(self.kill_scripts_path, kill_file)): - utils.execute([kill_file, sig, pid], - run_as_root=self.run_as_root) - return - utils.kill_process(pid, sig, run_as_root=self.run_as_root) + return [kill_file, sig, pid] + return ['kill', '-%s' % (sig), pid] def get_pid_file_name(self): """Returns the file name for a given kind of config file.""" diff --git a/neutron/agent/linux/keepalived.py b/neutron/agent/linux/keepalived.py index cf6c6067611..910d0be96e2 100644 --- a/neutron/agent/linux/keepalived.py +++ b/neutron/agent/linux/keepalived.py @@ -479,7 +479,7 @@ class KeepalivedManager(object): service_name=KEEPALIVED_SERVICE_NAME) pm = self.get_process() - pm.disable(sig=signal.SIGTERM) + pm.disable(sig=str(int(signal.SIGTERM))) try: utils.wait_until_true(lambda: not pm.active, timeout=SIGTERM_TIMEOUT) @@ -487,7 +487,7 @@ class KeepalivedManager(object): LOG.warning('Keepalived process %s did not finish after SIGTERM ' 'signal in %s seconds, sending SIGKILL signal', pm.pid, SIGTERM_TIMEOUT) - pm.disable(sig=signal.SIGKILL) + pm.disable(sig=str(int(signal.SIGKILL))) def check_processes(self): keepalived_pm = self.get_process() diff --git a/neutron/agent/linux/utils.py b/neutron/agent/linux/utils.py index 1e24c1873a5..c7e39d73037 100644 --- a/neutron/agent/linux/utils.py +++ b/neutron/agent/linux/utils.py @@ -214,33 +214,14 @@ def find_fork_top_parent(pid): return pid -def kill_process(pid, signal, run_as_root=False, extra_ok_codes=None, - raise_exception=True): - """Kill the process with the given pid using the given signal. - - :param pid: (str, int) process PID - :param signal: (str, int) signal to send - :param run_as_root: (bool) execute the command as root user - :param extra_ok_codes: (list of int) list of "errno" codes - :param raise_exception: (bool) if True, if an exception occurs, it is - raised - :return: 0 if OK, "errno" code in case of muted exception (see - "extra_ok_codes") - """ +def kill_process(pid, signal, run_as_root=False): + """Kill the process with the given pid using the given signal.""" try: - LOG.debug("Start killing process %s, signal %s", pid, signal) - if run_as_root: - priv_utils.kill_process(pid, signal) - else: - os.kill(int(pid), int(signal)) - LOG.debug("Finish killing process %s, signal %s", pid, signal) - except OSError as exc: - with excutils.save_and_reraise_exception() as ctxt: - extra_ok_codes = extra_ok_codes or [] - if exc.errno in extra_ok_codes or not raise_exception: - ctxt.reraise = False - return exc.errno - return 0 + execute(['kill', '-%d' % signal, pid], run_as_root=run_as_root, + privsep_exec=True) + except exceptions.ProcessExecutionError: + if process_is_running(pid): + raise def _get_conf_base(cfg_root, uuid, ensure_conf_dir): diff --git a/neutron/privileged/__init__.py b/neutron/privileged/__init__.py index 9c34936f4a5..f4fc471ede6 100644 --- a/neutron/privileged/__init__.py +++ b/neutron/privileged/__init__.py @@ -26,8 +26,7 @@ default = priv_context.PrivContext( caps.CAP_NET_ADMIN, caps.CAP_DAC_OVERRIDE, caps.CAP_DAC_READ_SEARCH, - caps.CAP_SYS_PTRACE, - caps.CAP_KILL], + caps.CAP_SYS_PTRACE], ) diff --git a/neutron/privileged/agent/linux/utils.py b/neutron/privileged/agent/linux/utils.py index 58398120eb2..a87edf907bf 100644 --- a/neutron/privileged/agent/linux/utils.py +++ b/neutron/privileged/agent/linux/utils.py @@ -82,9 +82,3 @@ def _create_process(cmd, addl_env=None): obj = subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return obj, cmd - - -@privileged.default.entrypoint -def kill_process(pid, signal): - """Kill the process with the given pid using the given signal.""" - os.kill(int(pid), int(signal)) diff --git a/neutron/tests/common/net_helpers.py b/neutron/tests/common/net_helpers.py index 380efd836a6..bff08b5ae8f 100644 --- a/neutron/tests/common/net_helpers.py +++ b/neutron/tests/common/net_helpers.py @@ -16,7 +16,6 @@ import abc from concurrent import futures import contextlib -import errno import os import random import re @@ -308,10 +307,7 @@ class RootHelperProcess(subprocess.Popen): def kill(self, sig=signal.SIGKILL): pid = self.child_pid or str(self.pid) - if utils.kill_process(pid, sig, run_as_root=True, - extra_ok_codes=[errno.ESRCH]): - LOG.debug('Process %s did not exists already so it could not be ' - 'killed', pid) + utils.execute(['kill', '-%d' % sig, pid], run_as_root=True) def read_stdout(self, timeout=None): return self._read_stream(self.stdout, timeout) @@ -629,8 +625,7 @@ class NamespaceFixture(fixtures.Fixture): if self.ip_wrapper.netns.exists(self.name): for pid in ip_lib.list_namespace_pids(self.name): utils.kill_process(pid, signal.SIGKILL, - run_as_root=True, - extra_ok_codes=[errno.ESRCH]) + run_as_root=True) self.ip_wrapper.netns.delete(self.name) except helpers.TestTimerTimeout: LOG.warning('Namespace %s was not deleted due to a timeout.', diff --git a/neutron/tests/functional/agent/linux/test_async_process.py b/neutron/tests/functional/agent/linux/test_async_process.py index ab4ac9cbb3f..95317021677 100644 --- a/neutron/tests/functional/agent/linux/test_async_process.py +++ b/neutron/tests/functional/agent/linux/test_async_process.py @@ -12,8 +12,6 @@ # License for the specific language governing permissions and limitations # under the License. -import signal - import eventlet from neutron._i18n import _ @@ -73,7 +71,7 @@ class TestAsyncProcess(AsyncProcessTestFramework): # Ensure that the same output is read twice self._check_stdout(proc) pid = proc.pid - utils.kill_process(pid, signal.SIGKILL) + utils.execute(['kill', '-9', pid]) common_utils.wait_until_true( lambda: proc.is_active() and pid != proc.pid, timeout=5, diff --git a/neutron/tests/functional/agent/linux/test_utils.py b/neutron/tests/functional/agent/linux/test_utils.py index 5eedec6a44b..b65233efd58 100644 --- a/neutron/tests/functional/agent/linux/test_utils.py +++ b/neutron/tests/functional/agent/linux/test_utils.py @@ -12,7 +12,6 @@ # License for the specific language governing permissions and limitations # under the License. -import errno import functools import os import signal @@ -42,8 +41,10 @@ class TestGetRootHelperChildPid(functional_base.BaseSudoTestCase): sleep_pid = utils.execute( ['ps', '--ppid', parent_pid, '-o', 'pid=']).strip() self.addCleanup( - utils.kill_process, sleep_pid, signal.SIGKILL, run_as_root=True, - raise_exception=False) + utils.execute, + ['kill', '-9', sleep_pid], + check_exit_code=False, + run_as_root=True) def test_get_root_helper_child_pid_returns_first_child(self): """Test that the first child, not lowest child pid is returned. @@ -171,55 +172,3 @@ class TestFindChildPids(functional_base.BaseSudoTestCase): with open('/proc/sys/kernel/pid_max', 'r') as fd: pid_max = int(fd.readline().strip()) self.assertEqual([], utils.find_child_pids(pid_max)) - - -class TestKillProcess(functional_base.BaseSudoTestCase): - - # NOTE(ralonsoh): in 64bit systems, PID_MAX_LIMIT is 2^22 - NON_EXISTING_PID = 2**22 + 1 - - def _test_exception(self, pid, error_no, run_as_root=False): - try: - utils.kill_process(pid, signal.SIGUSR1, run_as_root=run_as_root) - except OSError as exc: - self.assertEqual(error_no, exc.errno) - - def test_root_no_such_process(self): - self._test_exception(self.NON_EXISTING_PID, errno.ESRCH, - run_as_root=True) - - def test_root_no_such_proces_hidden(self): - self.assertEqual( - errno.ESRCH, - utils.kill_process(self.NON_EXISTING_PID, signal.SIGUSR1, - run_as_root=True, extra_ok_codes=[errno.ESRCH])) - - def test_root_exception_not_risen(self): - self.assertEqual( - errno.ESRCH, - utils.kill_process(self.NON_EXISTING_PID, signal.SIGUSR1, - run_as_root=True, raise_exception=False)) - - def test_non_root_no_such_process(self): - self._test_exception(self.NON_EXISTING_PID, errno.ESRCH) - - def test_non_root_no_such_process_hidden(self): - self.assertEqual( - errno.ESRCH, - utils.kill_process(self.NON_EXISTING_PID, signal.SIGUSR1, - extra_ok_codes=[errno.ESRCH])) - - def test_non_root_operation_not_permitted(self): - self._test_exception(1, errno.EPERM) - - def test_non_root_operation_not_permitted_hidden(self): - self.assertEqual( - errno.EPERM, - utils.kill_process(1, signal.SIGUSR1, - extra_ok_codes=[errno.EPERM])) - - def test_non_root_exception_not_risen(self): - self.assertEqual( - errno.ESRCH, - utils.kill_process(self.NON_EXISTING_PID, signal.SIGUSR1, - raise_exception=False)) diff --git a/neutron/tests/functional/agent/test_dhcp_agent.py b/neutron/tests/functional/agent/test_dhcp_agent.py index e8e93b16270..0eef55d62a3 100644 --- a/neutron/tests/functional/agent/test_dhcp_agent.py +++ b/neutron/tests/functional/agent/test_dhcp_agent.py @@ -15,7 +15,6 @@ import copy import os.path -import signal from unittest import mock import eventlet @@ -326,7 +325,7 @@ class DHCPAgentOVSTestCase(DHCPAgentOVSTestFramework): pm, network = self._spawn_network_metadata_proxy() old_pid = pm.pid - utils.kill_process(old_pid, signal.SIGKILL, run_as_root=True) + utils.execute(['kill', '-9', old_pid], run_as_root=True) common_utils.wait_until_true( lambda: pm.active and pm.pid != old_pid, timeout=5, diff --git a/neutron/tests/unit/agent/l3/test_ha_router.py b/neutron/tests/unit/agent/l3/test_ha_router.py index ac2ffc83dff..2abd0df59e2 100644 --- a/neutron/tests/unit/agent/l3/test_ha_router.py +++ b/neutron/tests/unit/agent/l3/test_ha_router.py @@ -109,7 +109,8 @@ class TestBasicRouterOperations(base.BaseTestCase): mock_pm.active = False ri.destroy_state_change_monitor(mock_pm) - mock_pm.disable.assert_called_once_with(sig=signal.SIGTERM) + mock_pm.disable.assert_called_once_with( + sig=str(int(signal.SIGTERM))) def test_destroy_state_change_monitor_force(self): ri = self._create_router(mock.MagicMock()) diff --git a/neutron/tests/unit/agent/linux/test_external_process.py b/neutron/tests/unit/agent/linux/test_external_process.py index fc17595e983..d2aa240098a 100644 --- a/neutron/tests/unit/agent/linux/test_external_process.py +++ b/neutron/tests/unit/agent/linux/test_external_process.py @@ -13,7 +13,6 @@ # under the License. import os.path -import signal from unittest import mock from neutron_lib import fixture as lib_fixtures @@ -194,7 +193,7 @@ class TestProcessManager(base.BaseTestCase): with mock.patch.object(ep.ProcessManager, 'disable') as disable: manager = ep.ProcessManager(self.conf, 'uuid', namespace='ns') manager.reload_cfg() - disable.assert_called_once_with(signal.SIGHUP) + disable.assert_called_once_with('HUP') def test_reload_cfg_with_custom_reload_callback(self): reload_callback = mock.sentinel.callback @@ -228,8 +227,10 @@ class TestProcessManager(base.BaseTestCase): with mock.patch.object(ep, 'utils') as utils: manager.disable() - utils.kill_process.assert_has_calls([ - mock.call(4, int(signal.SIGKILL), run_as_root=False)]) + utils.assert_has_calls([ + mock.call.execute(['kill', '-9', 4], + run_as_root=False, + privsep_exec=True)]) def test_disable_namespace(self): with mock.patch.object(ep.ProcessManager, 'pid') as pid: @@ -241,23 +242,10 @@ class TestProcessManager(base.BaseTestCase): with mock.patch.object(ep, 'utils') as utils: manager.disable() - utils.kill_process.assert_has_calls([ - mock.call(4, int(signal.SIGKILL), run_as_root=True)]) - - def test_disable_with_and_without_namespace(self): - namespaces = ['ns', None] - for namespace in namespaces: - with mock.patch.object(ep.ProcessManager, 'pid') as pid: - pid.__get__ = mock.Mock(return_value=4) - with mock.patch.object(ep.ProcessManager, 'active') as active: - active.__get__ = mock.Mock(return_value=True) - manager = ep.ProcessManager(self.conf, 'uuid', - namespace=namespace) - with mock.patch.object(ep, 'utils') as utils: - manager.disable() - call = mock.call(4, int(signal.SIGKILL), - run_as_root=bool(namespace)) - utils.kill_process.assert_has_calls([call]) + utils.assert_has_calls([ + mock.call.execute(['kill', '-9', 4], + run_as_root=True, + privsep_exec=True)]) def test_disable_not_active(self): with mock.patch.object(ep.ProcessManager, 'pid') as pid: @@ -282,10 +270,13 @@ class TestProcessManager(base.BaseTestCase): def _test_disable_custom_kill_script(self, kill_script_exists, namespace, kill_scripts_path='test-path/'): cfg.CONF.set_override("kill_scripts_path", kill_scripts_path, "AGENT") - process_pid = 4 + if kill_script_exists: + expected_cmd = ['test-service-kill', '9', 4] + else: + expected_cmd = ['kill', '-9', 4] with mock.patch.object(ep.ProcessManager, 'pid') as pid: - pid.__get__ = mock.Mock(return_value=process_pid) + pid.__get__ = mock.Mock(return_value=4) with mock.patch.object(ep.ProcessManager, 'active') as active: active.__get__ = mock.Mock(return_value=True) manager = ep.ProcessManager( @@ -295,15 +286,9 @@ class TestProcessManager(base.BaseTestCase): mock.patch.object(os.path, 'isfile', return_value=kill_script_exists): manager.disable() - if kill_script_exists: - expected_cmd = ['test-service-kill', - signal.SIGKILL, process_pid] - utils.execute.assert_called_with( - expected_cmd, run_as_root=bool(namespace)) - else: - utils.kill_process.assert_called_once_with( - process_pid, signal.SIGKILL, - run_as_root=bool(namespace)) + utils.execute.assert_called_with( + expected_cmd, run_as_root=bool(namespace), + privsep_exec=True) def test_disable_custom_kill_script_no_namespace(self): self._test_disable_custom_kill_script( diff --git a/neutron/tests/unit/agent/linux/test_keepalived.py b/neutron/tests/unit/agent/linux/test_keepalived.py index 1200492db8f..28474f9dc89 100644 --- a/neutron/tests/unit/agent/linux/test_keepalived.py +++ b/neutron/tests/unit/agent/linux/test_keepalived.py @@ -634,7 +634,8 @@ class KeepalivedManagerTestCase(base.BaseTestCase): mock_get_process.return_value = process process.active = False self.keepalived_manager.disable() - process.disable.assert_called_once_with(sig=int(signal.SIGTERM)) + process.disable.assert_called_once_with( + sig=str(int(signal.SIGTERM))) def test_destroy_force(self): mock_get_process = self.mock_get_process.start() @@ -644,5 +645,5 @@ class KeepalivedManagerTestCase(base.BaseTestCase): process.active = True self.keepalived_manager.disable() process.disable.assert_has_calls([ - mock.call(sig=signal.SIGTERM), - mock.call(sig=signal.SIGKILL)]) + mock.call(sig=str(int(signal.SIGTERM))), + mock.call(sig=str(int(signal.SIGKILL)))]) diff --git a/neutron/tests/unit/agent/linux/test_utils.py b/neutron/tests/unit/agent/linux/test_utils.py index 2aba6004c08..4e01bb907cd 100644 --- a/neutron/tests/unit/agent/linux/test_utils.py +++ b/neutron/tests/unit/agent/linux/test_utils.py @@ -13,9 +13,12 @@ # under the License. import copy +import signal import socket from unittest import mock +import testtools + from neutron_lib import exceptions from neutron_lib import fixture as lib_fixtures from oslo_config import cfg @@ -224,6 +227,39 @@ class TestFindForkTopParent(base.BaseTestCase): pid_invoked_with_cmdline_retvals=[True, True, True]) +class TestKillProcess(base.BaseTestCase): + def _test_kill_process(self, pid, raise_exception=False, + kill_signal=signal.SIGKILL, pid_killed=True): + if raise_exception: + exc = exceptions.ProcessExecutionError('', returncode=0) + else: + exc = None + with mock.patch.object(utils, 'execute', + side_effect=exc) as mock_execute: + with mock.patch.object(utils, 'process_is_running', + return_value=not pid_killed): + utils.kill_process(pid, kill_signal, run_as_root=True) + + mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid], + run_as_root=True, privsep_exec=True) + + def test_kill_process_returns_none_for_valid_pid(self): + self._test_kill_process('1') + + def test_kill_process_returns_none_for_stale_pid(self): + self._test_kill_process('1', raise_exception=True) + + def test_kill_process_raises_exception_for_execute_exception(self): + with testtools.ExpectedException(exceptions.ProcessExecutionError): + # Simulate that the process is running after trying to kill due to + # any reason such as, for example, Permission denied + self._test_kill_process('1', raise_exception=True, + pid_killed=False) + + def test_kill_process_with_different_signal(self): + self._test_kill_process('1', kill_signal=signal.SIGTERM) + + class TestGetCmdlineFromPid(base.BaseTestCase): def setUp(self):