Kill processes when cleaning up namespaces

This patch will kill processes that are listening on any port/UNIX
socket within the namespace to be cleaned up. To kill them it will
issue a SIGTERM to them (or to their parents if they were forked) and,
if they don't die after a few seconds, a SIGKILL to them and all their
children.

This is intended for those cases when there's no specific cleanup and
serves as a fallback method.

Change-Id: I4195f633ef4a1788496d1293846f19eef89416aa
Partial-Bug: #1403455
(cherry picked from commit 1d38f30555)
This commit is contained in:
Daniel Alvarez 2016-11-24 16:32:50 +00:00
parent 2c81375185
commit 340e776492
14 changed files with 720 additions and 40 deletions

View File

@ -13,8 +13,8 @@ dnsmasq: CommandFilter, dnsmasq, root
# dhcp-agent uses kill as well, that's handled by the generic KillFilter
# it looks like these are the only signals needed, per
# neutron/agent/linux/dhcp.py
kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP
kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP
kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP, -15
kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP, -15
ovs-vsctl: CommandFilter, ovs-vsctl, root
ivs-ctl: CommandFilter, ivs-ctl, root

View File

@ -19,10 +19,10 @@ radvd: CommandFilter, radvd, root
# metadata proxy
metadata_proxy: CommandFilter, neutron-ns-metadata-proxy, root
# RHEL invocation of the metadata proxy will report /usr/bin/python
kill_metadata: KillFilter, root, python, -9
kill_metadata7: KillFilter, root, python2.7, -9
kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -9, -HUP
kill_radvd: KillFilter, root, /sbin/radvd, -9, -HUP
kill_metadata: KillFilter, root, python, -15, -9
kill_metadata7: KillFilter, root, python2.7, -15, -9
kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -15, -9, -HUP
kill_radvd: KillFilter, root, /sbin/radvd, -15, -9, -HUP
# ip_lib
ip: IpFilter, ip, root

View File

@ -0,0 +1,12 @@
# neutron-rootwrap command filters for nodes on which neutron is
# expected to control network
#
# This file should be owned by (and only-writeable by) the root user
# format seems to be
# cmd-name: filter-name, raw-command, user, args
[Filters]
# netns-cleanup
netstat: CommandFilter, netstat, root

View File

@ -174,15 +174,11 @@ class AsyncProcess(object):
try:
# A process started by a root helper will be running as
# root and need to be killed via the same helper.
utils.execute(['kill', '-%d' % kill_signal, pid],
run_as_root=self.run_as_root)
except Exception as ex:
stale_pid = (isinstance(ex, RuntimeError) and
'No such process' in str(ex))
if not stale_pid:
LOG.exception(_LE('An error occurred while killing [%s].'),
self.cmd)
return False
utils.kill_process(pid, kill_signal, self.run_as_root)
except Exception:
LOG.exception(_LE('An error occurred while killing [%s].'),
self.cmd)
return False
if self._process:
self._process.wait()

View File

@ -165,9 +165,11 @@ def get_interface_mac(interface):
for char in info[MAC_START:MAC_END]])[:-1]
def find_child_pids(pid):
"""Retrieve a list of the pids of child processes of the given pid."""
def find_child_pids(pid, recursive=False):
"""Retrieve a list of the pids of child processes of the given pid.
It can also find all children through the hierarchy if recursive=True
"""
try:
raw_pids = execute(['ps', '--ppid', pid, '-o', 'pid='],
log_fail_as_error=False)
@ -179,7 +181,58 @@ def find_child_pids(pid):
if no_children_found:
ctxt.reraise = False
return []
return [x.strip() for x in raw_pids.split('\n') if x.strip()]
child_pids = [x.strip() for x in raw_pids.split('\n') if x.strip()]
if recursive:
for child in child_pids:
child_pids = child_pids + find_child_pids(child, True)
return child_pids
def find_parent_pid(pid):
"""Retrieve the pid of the parent process of the given pid.
If the pid doesn't exist in the system, this function will return
None
"""
try:
ppid = execute(['ps', '-o', 'ppid=', pid],
log_fail_as_error=False)
except ProcessExecutionError as e:
# Unexpected errors are the responsibility of the caller
with excutils.save_and_reraise_exception() as ctxt:
# Exception has already been logged by execute
no_such_pid = e.returncode == 1
if no_such_pid:
ctxt.reraise = False
return
return ppid.strip()
def find_fork_top_parent(pid):
"""Retrieve the pid of the top parent of the given pid through a fork.
This function will search the top parent with its same cmdline. If the
given pid has no parent, its own pid will be returned
"""
while True:
ppid = find_parent_pid(pid)
if (ppid and ppid != pid and
pid_invoked_with_cmdline(ppid, get_cmdline_from_pid(pid))):
pid = ppid
else:
return pid
def kill_process(pid, signal, run_as_root=False):
"""Kill the process with the given pid using the given signal."""
try:
execute(['kill', '-%d' % signal, pid], run_as_root=run_as_root)
except ProcessExecutionError as ex:
# TODO(dalvarez): this check has i18n issues. Maybe we can use
# use gettext module setting a global locale or just pay attention
# to returncode instead of checking the ex message.
if 'No such process' not in str(ex):
raise
def _get_conf_base(cfg_root, uuid, ensure_conf_dir):

View File

@ -15,6 +15,7 @@
import itertools
import re
import signal
import time
from neutron_lib import constants
@ -22,7 +23,7 @@ from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import importutils
from neutron._i18n import _LE
from neutron._i18n import _LE, _LW
from neutron.agent.common import config as agent_config
from neutron.agent.common import ovs_lib
from neutron.agent.l3 import agent as l3_agent
@ -32,7 +33,9 @@ from neutron.agent.linux import dhcp
from neutron.agent.linux import external_process
from neutron.agent.linux import interface
from neutron.agent.linux import ip_lib
from neutron.agent.linux import utils
from neutron.common import config
from neutron.common import utils as common_utils
from neutron.conf.agent import cmd
from neutron.conf.agent import dhcp as dhcp_config
@ -44,6 +47,12 @@ NS_PREFIXES = {
'l3': [l3_agent.NS_PREFIX, dvr.SNAT_NS_PREFIX, dvr_fip_ns.FIP_NS_PREFIX],
'lbaas': [LB_NS_PREFIX],
}
SIGTERM_WAITTIME = 10
NETSTAT_PIDS_REGEX = re.compile(r'.* (?P<pid>\d{2,6})/.*')
class PidsInNamespaceException(Exception):
pass
class FakeDhcpPlugin(object):
@ -130,6 +139,87 @@ def unplug_device(conf, device):
device.set_log_fail_as_error(orig_log_fail_as_error)
def find_listen_pids_namespace(namespace):
"""Retrieve a list of pids of listening processes within the given netns.
It executes netstat -nlp and returns a set of unique pairs
"""
ip = ip_lib.IPWrapper(namespace=namespace)
pids = set()
cmd = ['netstat', '-nlp']
output = ip.netns.execute(cmd, run_as_root=True)
for line in output.splitlines():
m = NETSTAT_PIDS_REGEX.match(line)
if m:
pids.add(m.group('pid'))
return pids
def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME):
"""Poll listening processes within the given namespace.
If after timeout seconds, there are remaining processes in the namespace,
then a PidsInNamespaceException will be thrown.
"""
# Would be better to handle an eventlet.timeout.Timeout exception
# but currently there's a problem importing eventlet since it's
# doing a local import from cmd/eventlet which doesn't have a
# timeout module
common_utils.wait_until_true(
lambda: not find_listen_pids_namespace(namespace),
timeout=SIGTERM_WAITTIME,
exception=PidsInNamespaceException)
def _kill_listen_processes(namespace, force=False):
"""Identify all listening processes within the given namespace.
Then, for each one, find its top parent with same cmdline (in case this
process forked) and issue a SIGTERM to all of them. If force is True,
then a SIGKILL will be issued to all parents and all their children. Also,
this function returns the number of listening processes.
"""
pids = find_listen_pids_namespace(namespace)
pids_to_kill = {utils.find_fork_top_parent(pid) for pid in pids}
kill_signal = signal.SIGTERM
if force:
kill_signal = signal.SIGKILL
children = [utils.find_child_pids(pid, True) for pid in pids_to_kill]
pids_to_kill.update(itertools.chain.from_iterable(children))
for pid in pids_to_kill:
# Throw a warning since this particular cleanup may need a specific
# implementation in the right module. Ideally, netns_cleanup wouldn't
# kill any processes as the responsible module should've killed them
# before cleaning up the namespace
LOG.warning(_LW("Killing (%(signal)d) [%(pid)s] %(cmdline)s"),
{'signal': kill_signal,
'pid': pid,
'cmdline': ' '.join(utils.get_cmdline_from_pid(pid))[:80]
})
try:
utils.kill_process(pid, kill_signal, run_as_root=True)
except Exception as ex:
LOG.error(_LE('An error occurred while killing '
'[%(pid)s]: %(msg)s'), {'pid': pid, 'msg': ex})
return len(pids)
def kill_listen_processes(namespace):
"""Kill all processes listening within the given namespace.
First it tries to kill them using SIGTERM, waits until they die gracefully
and then kills remaining processes (if any) with SIGKILL
"""
if _kill_listen_processes(namespace, force=False):
try:
wait_until_no_listen_pids_namespace(namespace)
except PidsInNamespaceException:
_kill_listen_processes(namespace, force=True)
# Allow some time for remaining processes to die
wait_until_no_listen_pids_namespace(namespace)
def destroy_namespace(conf, namespace, force=False):
"""Destroy a given namespace.
@ -145,6 +235,14 @@ def destroy_namespace(conf, namespace, force=False):
# NOTE: The dhcp driver will remove the namespace if is it empty,
# so a second check is required here.
if ip.netns.exists(namespace):
try:
kill_listen_processes(namespace)
except PidsInNamespaceException:
# This is unlikely since, at this point, we have SIGKILLed
# all remaining processes but if there are still some, log
# the error and continue with the cleanup
LOG.error(_LE('Not all processes were killed in %s'),
namespace)
for device in ip.get_devices(exclude_loopback=True):
unplug_device(conf, device)

View File

@ -41,3 +41,9 @@ ovstrace_filter: RegExpFilter, ovs-appctl, root, ovs-appctl, ofproto/trace, .*,
# needed for TestGetRootHelperChildPid
bash_filter: RegExpFilter, /bin/bash, root, bash, -c, \(sleep 100\)
sleep_kill: KillFilter, root, sleep, -9
#needed by test_netns_cleanup
process_spawn: EnvFilter, env, root, PATH=, python
ip_exec: IpNetnsExecFilter, ip, root
ps: CommandFilter, ps, root
pid_kill: RegExpFilter, kill, root, kill, -\d+, .*

View File

@ -0,0 +1,180 @@
# Copyright 2016 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import signal
import socket
import sys
import time
from neutron_lib import constants as n_const
from oslo_config import cfg
from neutron._i18n import _
from neutron.agent.linux import daemon
from neutron.tests.common import net_helpers
UNIX_FAMILY = 'UNIX'
OPTS = [
cfg.IntOpt('num_children',
short='n',
default=0,
help=_('Number of children to spawn'),
required=False),
cfg.StrOpt('family',
short='f',
default=n_const.IPv4,
choices=[n_const.IPv4, n_const.IPv6, UNIX_FAMILY],
help=_('Listen socket family (%(v4)s, %(v6)s or %(unix)s)') %
{
'v4': n_const.IPv4,
'v6': n_const.IPv6,
'unix': UNIX_FAMILY
},
required=False),
cfg.StrOpt('proto',
short='p',
default=n_const.PROTO_NAME_TCP,
choices=[n_const.PROTO_NAME_TCP, n_const.PROTO_NAME_UDP],
help=_('Protocol (%(tcp)s or %(udp)s)') %
{
'tcp': n_const.PROTO_NAME_TCP,
'udp': n_const.PROTO_NAME_UDP
},
required=False),
cfg.BoolOpt('parent_listen',
short='pl',
default=True,
help=_('Parent process must listen too'),
required=False),
cfg.BoolOpt('ignore_sigterm',
short='i',
default=False,
help=_('Ignore SIGTERM'),
required=False)
]
class ProcessSpawn(daemon.Daemon):
"""
This class is part of the functional test of the netns_cleanup module.
It allows spawning processes that listen on random ports either on
tcp(6), udp(6) or unix sockets. Also it allows handling or ignoring
SIGTERM to check whether the cleanup works as expected by getting rid
of the spawned processes.
"""
MAX_BIND_RETRIES = 5
DCT_FAMILY = {
n_const.IPv4: socket.AF_INET,
n_const.IPv6: socket.AF_INET6,
UNIX_FAMILY: socket.AF_UNIX
}
DCT_PROTO = {
n_const.PROTO_NAME_TCP: socket.SOCK_STREAM,
n_const.PROTO_NAME_UDP: socket.SOCK_DGRAM,
}
def __init__(self, pidfile=None,
family=n_const.IPv4,
proto=n_const.PROTO_NAME_TCP,
ignore_sigterm=False, num_children=0,
parent_must_listen=True):
self.family = family
self.proto = proto
self.ignore_sigterm = ignore_sigterm
self.num_children = num_children
self.listen_socket = None
self.parent_must_listen = parent_must_listen
self.child_pids = []
super(ProcessSpawn, self).__init__(pidfile)
def start_listening(self):
socket_family = self.DCT_FAMILY[self.family]
socket_type = self.DCT_PROTO[self.proto]
self.listen_socket = socket.socket(socket_family, socket_type)
# Try to listen in a random port which is not currently in use
retries = 0
while retries < ProcessSpawn.MAX_BIND_RETRIES:
# NOTE(dalvarez): not finding a free port on a freshly created
# namespace is very unlikely but if problems show up, retries can
# be increased to avoid tests failing
try:
if self.family == UNIX_FAMILY:
self.listen_socket.bind('')
else:
port = net_helpers.get_free_namespace_port(self.proto)
self.listen_socket.bind(('', port))
except socket.error:
retries += 1
else:
if n_const.PROTO_NAME_TCP in self.proto:
self.listen_socket.listen(0)
break
def do_sleep(self):
while True:
time.sleep(10)
def run(self):
# Spawn as many children as requested
children = []
while len(children) != self.num_children:
child_pid = os.fork()
if child_pid == 0:
# Listen and do nothing else
self.start_listening()
self.do_sleep()
return
children.append(child_pid)
# Install a SIGTERM handler if requested
if not self.ignore_sigterm:
signal.signal(signal.SIGTERM, self.sigterm_handler)
self.child_pids = children
if self.parent_must_listen:
self.start_listening()
self.do_sleep()
def sigterm_handler(self, signum, frame):
if self.listen_socket:
self.listen_socket.close()
for child in self.child_pids:
try:
os.kill(child, signal.SIGTERM)
except OSError:
pass
sys.exit(0)
def main():
cfg.CONF.register_cli_opts(OPTS)
cfg.CONF(project='neutron', default_config_files=[])
proc_spawn = ProcessSpawn(num_children=cfg.CONF.num_children,
family=cfg.CONF.family,
proto=cfg.CONF.proto,
parent_must_listen=cfg.CONF.parent_listen,
ignore_sigterm=cfg.CONF.ignore_sigterm)
proc_spawn.start()
if __name__ == "__main__":
main()

View File

@ -13,19 +13,26 @@
# License for the specific language governing permissions and limitations
# under the License.
import os
import mock
from neutron_lib import constants as n_const
from neutron.agent.l3 import agent as l3_agent
from neutron.agent.linux import dhcp
from neutron.agent.linux import ip_lib
from neutron.agent.linux import utils
from neutron.cmd import netns_cleanup
from neutron.common import utils as common_utils
from neutron.conf.agent import cmd
from neutron.tests import base as basetest
from neutron.tests.common import net_helpers
from neutron.tests.functional import base
from neutron.tests.functional.cmd import process_spawn
GET_NAMESPACES = 'neutron.agent.linux.ip_lib.IPWrapper.get_namespaces'
TEST_INTERFACE_DRIVER = 'neutron.agent.linux.interface.OVSInterfaceDriver'
NUM_SUBPROCESSES = 6
class NetnsCleanupTest(base.BaseSudoTestCase):
@ -60,13 +67,82 @@ class NetnsCleanupTest(base.BaseSudoTestCase):
# tests, as otherwise cleanup will kill them all
self.get_namespaces.return_value = [l3_namespace, dhcp_namespace]
# launch processes in each namespace to make sure they're
# killed during cleanup
procs_launched = self._launch_processes([l3_namespace, dhcp_namespace])
self.assertIsNot(procs_launched, 0)
common_utils.wait_until_true(
lambda: self._get_num_spawned_procs() == procs_launched,
timeout=15,
exception=Exception("Didn't spawn expected number of processes"))
netns_cleanup.cleanup_network_namespaces(self.conf)
self.get_namespaces_p.stop()
namespaces_now = ip_lib.IPWrapper.get_namespaces()
procs_after = self._get_num_spawned_procs()
self.assertEqual(procs_after, 0)
self.assertNotIn(l3_namespace, namespaces_now)
self.assertNotIn(dhcp_namespace, namespaces_now)
@staticmethod
def _launch_processes(namespaces):
"""
Launch processes in the specified namespaces.
This function will spawn processes inside the given namespaces:
- 6 processes listening on tcp ports (parent + 5 children)
- 1 process + 5 subprocesses listening on unix sockets
- 1 process + 5 subprocesses listening on udp6 sockets
First two sets of processes will process SIGTERM so when the parent
gets killed, it will kill all spawned children
The last set of processes will ignore SIGTERM. This will allow us
to test the cleanup functionality which will issue a SIGKILL
to all remaining processes after the SIGTERM attempt
"""
commands = [['python', process_spawn.__file__,
'-n', NUM_SUBPROCESSES,
'-f', n_const.IPv4,
'-p', n_const.PROTO_NAME_TCP,
'--noignore_sigterm',
'--parent_listen'],
['python', process_spawn.__file__,
'-n', NUM_SUBPROCESSES,
'-f', process_spawn.UNIX_FAMILY,
'-p', n_const.PROTO_NAME_TCP,
'--noignore_sigterm',
'--noparent_listen'],
['python', process_spawn.__file__,
'-n', NUM_SUBPROCESSES,
'-f', n_const.IPv4,
'-p', n_const.PROTO_NAME_UDP,
'--ignore_sigterm',
'--noparent_listen']]
proc_count = 0
for netns in namespaces:
ip = ip_lib.IPWrapper(namespace=netns)
for command in commands:
# The total amount of processes per command is
# the process itself plus the number of subprocesses spawned by
# it
proc_count += (1 + NUM_SUBPROCESSES)
# We need to pass the PATH env variable so that python
# interpreter runs under the same virtual environment.
# Otherwise, it won't find the necessary packages such as
# oslo_config
ip.netns.execute(command,
addl_env={'PATH':
os.environ.get('PATH')})
return proc_count
@staticmethod
def _get_num_spawned_procs():
cmd = ['ps', '-f', '-u', 'root']
out = utils.execute(cmd, run_as_root=True)
return sum([1 for line in out.splitlines() if 'process_spawn' in line])
class TestNETNSCLIConfig(basetest.BaseTestCase):

View File

@ -198,20 +198,18 @@ class TestAsyncProcess(base.BaseTestCase):
exc = RuntimeError(exception_message)
else:
exc = None
with mock.patch.object(utils, 'execute',
side_effect=exc) as mock_execute:
with mock.patch.object(utils, 'kill_process',
side_effect=exc) as mock_kill_process:
actual = self.proc._kill_process(pid, kill_signal)
self.assertEqual(expected, actual)
mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid],
run_as_root=self.proc.run_as_root)
mock_kill_process.assert_called_with(pid,
kill_signal,
self.proc.run_as_root)
def test__kill_process_returns_true_for_valid_pid(self):
self._test__kill_process('1', True)
def test__kill_process_returns_true_for_stale_pid(self):
self._test__kill_process('1', True, 'No such process')
def test__kill_process_returns_false_for_execute_exception(self):
self._test__kill_process('1', False, 'Invalid')

View File

@ -12,6 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import signal
import socket
import mock
@ -181,6 +182,100 @@ class AgentUtilsGetInterfaceMAC(base.BaseTestCase):
self.assertEqual(actual_val, expect_val)
class TestFindParentPid(base.BaseTestCase):
def setUp(self):
super(TestFindParentPid, self).setUp()
self.m_execute = mock.patch.object(utils, 'execute').start()
def test_returns_none_for_no_valid_pid(self):
self.m_execute.side_effect = utils.ProcessExecutionError('',
returncode=1)
self.assertIsNone(utils.find_parent_pid(-1))
def test_returns_parent_id_for_good_ouput(self):
self.m_execute.return_value = '123 \n'
self.assertEqual(utils.find_parent_pid(-1), '123')
def test_raises_exception_returncode_0(self):
with testtools.ExpectedException(utils.ProcessExecutionError):
self.m_execute.side_effect = \
utils.ProcessExecutionError('', returncode=0)
utils.find_parent_pid(-1)
def test_raises_unknown_exception(self):
with testtools.ExpectedException(RuntimeError):
self.m_execute.side_effect = RuntimeError()
utils.find_parent_pid(-1)
class TestFindForkTopParent(base.BaseTestCase):
def _test_find_fork_top_parent(self, expected=_marker,
find_parent_pid_retvals=None,
pid_invoked_with_cmdline_retvals=None):
def _find_parent_pid(x):
if find_parent_pid_retvals:
return find_parent_pid_retvals.pop(0)
pid_invoked_with_cmdline = {}
if pid_invoked_with_cmdline_retvals:
pid_invoked_with_cmdline['side_effect'] = (
pid_invoked_with_cmdline_retvals)
else:
pid_invoked_with_cmdline['return_value'] = False
with mock.patch.object(utils, 'find_parent_pid',
side_effect=_find_parent_pid), \
mock.patch.object(utils, 'pid_invoked_with_cmdline',
**pid_invoked_with_cmdline):
actual = utils.find_fork_top_parent(_marker)
self.assertEqual(expected, actual)
def test_returns_own_pid_no_parent(self):
self._test_find_fork_top_parent()
def test_returns_own_pid_nofork(self):
self._test_find_fork_top_parent(find_parent_pid_retvals=['2', '3'])
def test_returns_first_parent_pid_fork(self):
self._test_find_fork_top_parent(
expected='2',
find_parent_pid_retvals=['2', '3', '4'],
pid_invoked_with_cmdline_retvals=[True, False, False])
def test_returns_top_parent_pid_fork(self):
self._test_find_fork_top_parent(
expected='4',
find_parent_pid_retvals=['2', '3', '4'],
pid_invoked_with_cmdline_retvals=[True, True, True])
class TestKillProcess(base.BaseTestCase):
def _test_kill_process(self, pid, exception_message=None,
kill_signal=signal.SIGKILL):
if exception_message:
exc = utils.ProcessExecutionError(exception_message, returncode=0)
else:
exc = None
with mock.patch.object(utils, 'execute',
side_effect=exc) as mock_execute:
utils.kill_process(pid, kill_signal, run_as_root=True)
mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid],
run_as_root=True)
def test_kill_process_returns_none_for_valid_pid(self):
self._test_kill_process('1')
def test_kill_process_returns_none_for_stale_pid(self):
self._test_kill_process('1', 'No such process')
def test_kill_process_raises_exception_for_execute_exception(self):
with testtools.ExpectedException(utils.ProcessExecutionError):
self._test_kill_process('1', 'Invalid')
def test_kill_process_with_different_signal(self):
self._test_kill_process('1', kill_signal=signal.SIGTERM)
class TestFindChildPids(base.BaseTestCase):
def test_returns_empty_list_for_exit_code_1(self):
@ -197,6 +292,14 @@ class TestFindChildPids(base.BaseTestCase):
with mock.patch.object(utils, 'execute', return_value=' 123 \n 185\n'):
self.assertEqual(utils.find_child_pids(-1), ['123', '185'])
def test_returns_list_of_child_process_ids_recursively(self):
with mock.patch.object(utils, 'execute',
side_effect=[' 123 \n 185\n',
' 40 \n', '\n',
'41\n', '\n']):
actual = utils.find_child_pids(-1, True)
self.assertEqual(actual, ['123', '185', '40', '41'])
def test_raises_unknown_exception(self):
with testtools.ExpectedException(RuntimeError):
with mock.patch.object(utils, 'execute',

View File

@ -13,11 +13,47 @@
# License for the specific language governing permissions and limitations
# under the License.
import signal
import mock
import testtools
from neutron.cmd import netns_cleanup as util
from neutron.tests import base
NETSTAT_NETNS_OUTPUT = ("""
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State\
PID/Program name
tcp 0 0 0.0.0.0:9697 0.0.0.0:* LISTEN\
1347/python
raw 0 0 0.0.0.0:112 0.0.0.0:* 7\
1279/keepalived
raw 0 0 0.0.0.0:112 0.0.0.0:* 7\
1279/keepalived
raw6 0 0 :::58 :::* 7\
1349/radvd
Active UNIX domain sockets (only servers)
Proto RefCnt Flags Type State I-Node PID/Program name\
Path
unix 2 [ ACC ] STREAM LISTENING 82039530 1353/python\
/tmp/rootwrap-VKSm8a/rootwrap.sock
""")
NETSTAT_NO_NAMESPACE = ("""
Cannot open network namespace "qrouter-e6f206b2-4e8d-4597-a7e1-c3a20337e9c6":\
No such file or directory
""")
NETSTAT_NO_LISTEN_PROCS = ("""
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State\
PID/Program name
Active UNIX domain sockets (only servers)
Proto RefCnt Flags Type State I-Node PID/Program name\
Path
""")
class TestNetnsCleanup(base.BaseTestCase):
def setUp(self):
@ -163,6 +199,115 @@ class TestNetnsCleanup(base.BaseTestCase):
self.assertEqual([], ovs_br_cls.mock_calls)
self.assertTrue(debug.called)
def _test_find_listen_pids_namespace_helper(self, expected,
netstat_output=None):
with mock.patch('neutron.agent.linux.ip_lib.IPWrapper') as ip_wrap:
ip_wrap.return_value.netns.execute.return_value = netstat_output
observed = util.find_listen_pids_namespace(mock.ANY)
self.assertEqual(expected, observed)
def test_find_listen_pids_namespace_correct_output(self):
expected = set(['1347', '1279', '1349', '1353'])
self._test_find_listen_pids_namespace_helper(expected,
NETSTAT_NETNS_OUTPUT)
def test_find_listen_pids_namespace_no_procs(self):
expected = set()
self._test_find_listen_pids_namespace_helper(expected,
NETSTAT_NO_LISTEN_PROCS)
def test_find_listen_pids_namespace_no_namespace(self):
expected = set()
self._test_find_listen_pids_namespace_helper(expected,
NETSTAT_NO_NAMESPACE)
def _test__kill_listen_processes_helper(self, pids, parents, children,
kills_expected, force):
def _get_element(dct, x):
return dct.get(x, [])
def _find_childs(x, recursive):
return _get_element(children, x)
def _find_parent(x):
return _get_element(parents, x)
utils_mock = dict(
find_fork_top_parent=mock.DEFAULT,
find_child_pids=mock.DEFAULT,
get_cmdline_from_pid=mock.DEFAULT,
kill_process=mock.DEFAULT)
self.log_mock = mock.patch.object(util, 'LOG').start()
with mock.patch.multiple('neutron.agent.linux.utils', **utils_mock)\
as mocks:
mocks['find_fork_top_parent'].side_effect = _find_parent
mocks['find_child_pids'].side_effect = _find_childs
with mock.patch.object(util, 'find_listen_pids_namespace',
return_value=pids):
calls = []
for pid, sig in kills_expected:
calls.append(mock.call(pid, sig, run_as_root=True))
util._kill_listen_processes(mock.ANY, force=force)
mocks['kill_process'].assert_has_calls(calls, any_order=True)
def test__kill_listen_processes_only_parents_force_false(self):
pids = ['4', '5', '6']
parents = {'4': '1', '5': '5', '6': '2'}
children = {}
kills_expected = [('1', signal.SIGTERM),
('5', signal.SIGTERM),
('2', signal.SIGTERM)]
self._test__kill_listen_processes_helper(pids, parents, children,
kills_expected, False)
def test__kill_listen_processes_parents_and_childs(self):
pids = ['4', '5', '6']
parents = {'4': '1', '5': '2', '6': '3'}
children = {'1': ['4'], '2': ['5'], '3': ['6', '8', '7']}
kills_expected = [(str(x), signal.SIGKILL) for x in range(1, 9)]
self._test__kill_listen_processes_helper(pids, parents, children,
kills_expected, True)
def test_kill_listen_processes(self):
with mock.patch.object(util, '_kill_listen_processes',
return_value=1) as mock_kill_listen:
with mock.patch('neutron.common.utils.wait_until_true')\
as wait_until_true_mock:
wait_until_true_mock.side_effect = [
util.PidsInNamespaceException,
None]
namespace = mock.ANY
util.kill_listen_processes(namespace)
mock_kill_listen.assert_has_calls(
[mock.call(namespace, force=False),
mock.call(namespace, force=True)])
def test_kill_listen_processes_still_procs(self):
with mock.patch.object(util, '_kill_listen_processes',
return_value=1):
with mock.patch('neutron.common.utils.wait_until_true')\
as wait_until_true_mock:
wait_until_true_mock.side_effect = (
util.PidsInNamespaceException)
namespace = mock.ANY
with testtools.ExpectedException(
util.PidsInNamespaceException):
util.kill_listen_processes(namespace)
def test_kill_listen_processes_no_procs(self):
with mock.patch.object(util, '_kill_listen_processes',
return_value=0) as mock_kill_listen:
with mock.patch('neutron.common.utils.wait_until_true')\
as wait_until_true_mock:
namespace = mock.ANY
util.kill_listen_processes(namespace)
mock_kill_listen.assert_called_once_with(namespace,
force=False)
self.assertFalse(wait_until_true_mock.called)
def _test_destroy_namespace_helper(self, force, num_devices):
ns = 'qrouter-6e322ac7-ab50-4f53-9cdc-d1d3c1164b6d'
conf = mock.Mock()
@ -182,23 +327,27 @@ class TestNetnsCleanup(base.BaseTestCase):
ip_wrap.return_value.get_devices.return_value = devices
ip_wrap.return_value.netns.exists.return_value = True
with mock.patch.object(util, 'unplug_device') as unplug:
with mock.patch.object(util, 'kill_listen_processes'):
with mock.patch.object(util, 'kill_dhcp') as kill_dhcp:
util.destroy_namespace(conf, ns, force)
expected = [mock.call(namespace=ns)]
with mock.patch.object(util, 'unplug_device') as unplug:
if force:
expected.extend([
mock.call().netns.exists(ns),
mock.call().get_devices(exclude_loopback=True)])
self.assertTrue(kill_dhcp.called)
unplug.assert_has_calls(
[mock.call(conf, d) for d in
devices[1:]])
with mock.patch.object(util, 'kill_dhcp') as kill_dhcp:
util.destroy_namespace(conf, ns, force)
expected = [mock.call(namespace=ns)]
expected.append(mock.call().garbage_collect_namespace())
ip_wrap.assert_has_calls(expected)
if force:
expected.extend([
mock.call().netns.exists(ns),
mock.call().get_devices(
exclude_loopback=True)])
self.assertTrue(kill_dhcp.called)
unplug.assert_has_calls(
[mock.call(conf, d) for d in
devices[1:]])
expected.append(
mock.call().garbage_collect_namespace())
ip_wrap.assert_has_calls(expected)
def test_destroy_namespace_empty(self):
self._test_destroy_namespace_helper(False, 0)

View File

@ -0,0 +1,8 @@
---
features:
- A new mechanism has been added to netns_cleanup to
kill processes that are listening on any port/unix
socket within the namespace. This will try to kill
them gracefully via SIGTERM and, if they don't die,
then a SIGKILL will be sent to the remaining
processes to ensure a proper cleanup.

View File

@ -36,6 +36,7 @@ data_files =
etc/neutron/rootwrap.d/ipset-firewall.filters
etc/neutron/rootwrap.d/l3.filters
etc/neutron/rootwrap.d/linuxbridge-plugin.filters
etc/neutron/rootwrap.d/netns-cleanup.filters
etc/neutron/rootwrap.d/openvswitch-plugin.filters
scripts =
bin/neutron-rootwrap-xen-dom0