Kill processes when cleaning up namespaces
This patch will kill processes that are listening on any port/UNIX
socket within the namespace to be cleaned up. To kill them it will
issue a SIGTERM to them (or to their parents if they were forked) and,
if they don't die after a few seconds, a SIGKILL to them and all their
children.
This is intended for those cases when there's no specific cleanup and
serves as a fallback method.
Change-Id: I4195f633ef4a1788496d1293846f19eef89416aa
Partial-Bug: #1403455
(cherry picked from commit 1d38f30555
)
This commit is contained in:
parent
2c81375185
commit
340e776492
@ -13,8 +13,8 @@ dnsmasq: CommandFilter, dnsmasq, root
|
||||
# dhcp-agent uses kill as well, that's handled by the generic KillFilter
|
||||
# it looks like these are the only signals needed, per
|
||||
# neutron/agent/linux/dhcp.py
|
||||
kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP
|
||||
kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP
|
||||
kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP, -15
|
||||
kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP, -15
|
||||
|
||||
ovs-vsctl: CommandFilter, ovs-vsctl, root
|
||||
ivs-ctl: CommandFilter, ivs-ctl, root
|
||||
|
@ -19,10 +19,10 @@ radvd: CommandFilter, radvd, root
|
||||
# metadata proxy
|
||||
metadata_proxy: CommandFilter, neutron-ns-metadata-proxy, root
|
||||
# RHEL invocation of the metadata proxy will report /usr/bin/python
|
||||
kill_metadata: KillFilter, root, python, -9
|
||||
kill_metadata7: KillFilter, root, python2.7, -9
|
||||
kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -9, -HUP
|
||||
kill_radvd: KillFilter, root, /sbin/radvd, -9, -HUP
|
||||
kill_metadata: KillFilter, root, python, -15, -9
|
||||
kill_metadata7: KillFilter, root, python2.7, -15, -9
|
||||
kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -15, -9, -HUP
|
||||
kill_radvd: KillFilter, root, /sbin/radvd, -15, -9, -HUP
|
||||
|
||||
# ip_lib
|
||||
ip: IpFilter, ip, root
|
||||
|
12
etc/neutron/rootwrap.d/netns-cleanup.filters
Normal file
12
etc/neutron/rootwrap.d/netns-cleanup.filters
Normal file
@ -0,0 +1,12 @@
|
||||
# neutron-rootwrap command filters for nodes on which neutron is
|
||||
# expected to control network
|
||||
#
|
||||
# This file should be owned by (and only-writeable by) the root user
|
||||
|
||||
# format seems to be
|
||||
# cmd-name: filter-name, raw-command, user, args
|
||||
|
||||
[Filters]
|
||||
|
||||
# netns-cleanup
|
||||
netstat: CommandFilter, netstat, root
|
@ -174,12 +174,8 @@ class AsyncProcess(object):
|
||||
try:
|
||||
# A process started by a root helper will be running as
|
||||
# root and need to be killed via the same helper.
|
||||
utils.execute(['kill', '-%d' % kill_signal, pid],
|
||||
run_as_root=self.run_as_root)
|
||||
except Exception as ex:
|
||||
stale_pid = (isinstance(ex, RuntimeError) and
|
||||
'No such process' in str(ex))
|
||||
if not stale_pid:
|
||||
utils.kill_process(pid, kill_signal, self.run_as_root)
|
||||
except Exception:
|
||||
LOG.exception(_LE('An error occurred while killing [%s].'),
|
||||
self.cmd)
|
||||
return False
|
||||
|
@ -165,9 +165,11 @@ def get_interface_mac(interface):
|
||||
for char in info[MAC_START:MAC_END]])[:-1]
|
||||
|
||||
|
||||
def find_child_pids(pid):
|
||||
"""Retrieve a list of the pids of child processes of the given pid."""
|
||||
def find_child_pids(pid, recursive=False):
|
||||
"""Retrieve a list of the pids of child processes of the given pid.
|
||||
|
||||
It can also find all children through the hierarchy if recursive=True
|
||||
"""
|
||||
try:
|
||||
raw_pids = execute(['ps', '--ppid', pid, '-o', 'pid='],
|
||||
log_fail_as_error=False)
|
||||
@ -179,7 +181,58 @@ def find_child_pids(pid):
|
||||
if no_children_found:
|
||||
ctxt.reraise = False
|
||||
return []
|
||||
return [x.strip() for x in raw_pids.split('\n') if x.strip()]
|
||||
child_pids = [x.strip() for x in raw_pids.split('\n') if x.strip()]
|
||||
if recursive:
|
||||
for child in child_pids:
|
||||
child_pids = child_pids + find_child_pids(child, True)
|
||||
return child_pids
|
||||
|
||||
|
||||
def find_parent_pid(pid):
|
||||
"""Retrieve the pid of the parent process of the given pid.
|
||||
|
||||
If the pid doesn't exist in the system, this function will return
|
||||
None
|
||||
"""
|
||||
try:
|
||||
ppid = execute(['ps', '-o', 'ppid=', pid],
|
||||
log_fail_as_error=False)
|
||||
except ProcessExecutionError as e:
|
||||
# Unexpected errors are the responsibility of the caller
|
||||
with excutils.save_and_reraise_exception() as ctxt:
|
||||
# Exception has already been logged by execute
|
||||
no_such_pid = e.returncode == 1
|
||||
if no_such_pid:
|
||||
ctxt.reraise = False
|
||||
return
|
||||
return ppid.strip()
|
||||
|
||||
|
||||
def find_fork_top_parent(pid):
|
||||
"""Retrieve the pid of the top parent of the given pid through a fork.
|
||||
|
||||
This function will search the top parent with its same cmdline. If the
|
||||
given pid has no parent, its own pid will be returned
|
||||
"""
|
||||
while True:
|
||||
ppid = find_parent_pid(pid)
|
||||
if (ppid and ppid != pid and
|
||||
pid_invoked_with_cmdline(ppid, get_cmdline_from_pid(pid))):
|
||||
pid = ppid
|
||||
else:
|
||||
return pid
|
||||
|
||||
|
||||
def kill_process(pid, signal, run_as_root=False):
|
||||
"""Kill the process with the given pid using the given signal."""
|
||||
try:
|
||||
execute(['kill', '-%d' % signal, pid], run_as_root=run_as_root)
|
||||
except ProcessExecutionError as ex:
|
||||
# TODO(dalvarez): this check has i18n issues. Maybe we can use
|
||||
# use gettext module setting a global locale or just pay attention
|
||||
# to returncode instead of checking the ex message.
|
||||
if 'No such process' not in str(ex):
|
||||
raise
|
||||
|
||||
|
||||
def _get_conf_base(cfg_root, uuid, ensure_conf_dir):
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
import itertools
|
||||
import re
|
||||
import signal
|
||||
import time
|
||||
|
||||
from neutron_lib import constants
|
||||
@ -22,7 +23,7 @@ from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import importutils
|
||||
|
||||
from neutron._i18n import _LE
|
||||
from neutron._i18n import _LE, _LW
|
||||
from neutron.agent.common import config as agent_config
|
||||
from neutron.agent.common import ovs_lib
|
||||
from neutron.agent.l3 import agent as l3_agent
|
||||
@ -32,7 +33,9 @@ from neutron.agent.linux import dhcp
|
||||
from neutron.agent.linux import external_process
|
||||
from neutron.agent.linux import interface
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.agent.linux import utils
|
||||
from neutron.common import config
|
||||
from neutron.common import utils as common_utils
|
||||
from neutron.conf.agent import cmd
|
||||
from neutron.conf.agent import dhcp as dhcp_config
|
||||
|
||||
@ -44,6 +47,12 @@ NS_PREFIXES = {
|
||||
'l3': [l3_agent.NS_PREFIX, dvr.SNAT_NS_PREFIX, dvr_fip_ns.FIP_NS_PREFIX],
|
||||
'lbaas': [LB_NS_PREFIX],
|
||||
}
|
||||
SIGTERM_WAITTIME = 10
|
||||
NETSTAT_PIDS_REGEX = re.compile(r'.* (?P<pid>\d{2,6})/.*')
|
||||
|
||||
|
||||
class PidsInNamespaceException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FakeDhcpPlugin(object):
|
||||
@ -130,6 +139,87 @@ def unplug_device(conf, device):
|
||||
device.set_log_fail_as_error(orig_log_fail_as_error)
|
||||
|
||||
|
||||
def find_listen_pids_namespace(namespace):
|
||||
"""Retrieve a list of pids of listening processes within the given netns.
|
||||
|
||||
It executes netstat -nlp and returns a set of unique pairs
|
||||
"""
|
||||
ip = ip_lib.IPWrapper(namespace=namespace)
|
||||
pids = set()
|
||||
cmd = ['netstat', '-nlp']
|
||||
output = ip.netns.execute(cmd, run_as_root=True)
|
||||
for line in output.splitlines():
|
||||
m = NETSTAT_PIDS_REGEX.match(line)
|
||||
if m:
|
||||
pids.add(m.group('pid'))
|
||||
return pids
|
||||
|
||||
|
||||
def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME):
|
||||
"""Poll listening processes within the given namespace.
|
||||
|
||||
If after timeout seconds, there are remaining processes in the namespace,
|
||||
then a PidsInNamespaceException will be thrown.
|
||||
"""
|
||||
# Would be better to handle an eventlet.timeout.Timeout exception
|
||||
# but currently there's a problem importing eventlet since it's
|
||||
# doing a local import from cmd/eventlet which doesn't have a
|
||||
# timeout module
|
||||
common_utils.wait_until_true(
|
||||
lambda: not find_listen_pids_namespace(namespace),
|
||||
timeout=SIGTERM_WAITTIME,
|
||||
exception=PidsInNamespaceException)
|
||||
|
||||
|
||||
def _kill_listen_processes(namespace, force=False):
|
||||
"""Identify all listening processes within the given namespace.
|
||||
|
||||
Then, for each one, find its top parent with same cmdline (in case this
|
||||
process forked) and issue a SIGTERM to all of them. If force is True,
|
||||
then a SIGKILL will be issued to all parents and all their children. Also,
|
||||
this function returns the number of listening processes.
|
||||
"""
|
||||
pids = find_listen_pids_namespace(namespace)
|
||||
pids_to_kill = {utils.find_fork_top_parent(pid) for pid in pids}
|
||||
kill_signal = signal.SIGTERM
|
||||
if force:
|
||||
kill_signal = signal.SIGKILL
|
||||
children = [utils.find_child_pids(pid, True) for pid in pids_to_kill]
|
||||
pids_to_kill.update(itertools.chain.from_iterable(children))
|
||||
|
||||
for pid in pids_to_kill:
|
||||
# Throw a warning since this particular cleanup may need a specific
|
||||
# implementation in the right module. Ideally, netns_cleanup wouldn't
|
||||
# kill any processes as the responsible module should've killed them
|
||||
# before cleaning up the namespace
|
||||
LOG.warning(_LW("Killing (%(signal)d) [%(pid)s] %(cmdline)s"),
|
||||
{'signal': kill_signal,
|
||||
'pid': pid,
|
||||
'cmdline': ' '.join(utils.get_cmdline_from_pid(pid))[:80]
|
||||
})
|
||||
try:
|
||||
utils.kill_process(pid, kill_signal, run_as_root=True)
|
||||
except Exception as ex:
|
||||
LOG.error(_LE('An error occurred while killing '
|
||||
'[%(pid)s]: %(msg)s'), {'pid': pid, 'msg': ex})
|
||||
return len(pids)
|
||||
|
||||
|
||||
def kill_listen_processes(namespace):
|
||||
"""Kill all processes listening within the given namespace.
|
||||
|
||||
First it tries to kill them using SIGTERM, waits until they die gracefully
|
||||
and then kills remaining processes (if any) with SIGKILL
|
||||
"""
|
||||
if _kill_listen_processes(namespace, force=False):
|
||||
try:
|
||||
wait_until_no_listen_pids_namespace(namespace)
|
||||
except PidsInNamespaceException:
|
||||
_kill_listen_processes(namespace, force=True)
|
||||
# Allow some time for remaining processes to die
|
||||
wait_until_no_listen_pids_namespace(namespace)
|
||||
|
||||
|
||||
def destroy_namespace(conf, namespace, force=False):
|
||||
"""Destroy a given namespace.
|
||||
|
||||
@ -145,6 +235,14 @@ def destroy_namespace(conf, namespace, force=False):
|
||||
# NOTE: The dhcp driver will remove the namespace if is it empty,
|
||||
# so a second check is required here.
|
||||
if ip.netns.exists(namespace):
|
||||
try:
|
||||
kill_listen_processes(namespace)
|
||||
except PidsInNamespaceException:
|
||||
# This is unlikely since, at this point, we have SIGKILLed
|
||||
# all remaining processes but if there are still some, log
|
||||
# the error and continue with the cleanup
|
||||
LOG.error(_LE('Not all processes were killed in %s'),
|
||||
namespace)
|
||||
for device in ip.get_devices(exclude_loopback=True):
|
||||
unplug_device(conf, device)
|
||||
|
||||
|
@ -41,3 +41,9 @@ ovstrace_filter: RegExpFilter, ovs-appctl, root, ovs-appctl, ofproto/trace, .*,
|
||||
# needed for TestGetRootHelperChildPid
|
||||
bash_filter: RegExpFilter, /bin/bash, root, bash, -c, \(sleep 100\)
|
||||
sleep_kill: KillFilter, root, sleep, -9
|
||||
|
||||
#needed by test_netns_cleanup
|
||||
process_spawn: EnvFilter, env, root, PATH=, python
|
||||
ip_exec: IpNetnsExecFilter, ip, root
|
||||
ps: CommandFilter, ps, root
|
||||
pid_kill: RegExpFilter, kill, root, kill, -\d+, .*
|
||||
|
180
neutron/tests/functional/cmd/process_spawn.py
Normal file
180
neutron/tests/functional/cmd/process_spawn.py
Normal file
@ -0,0 +1,180 @@
|
||||
# Copyright 2016 Red Hat, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import os
|
||||
import signal
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
|
||||
from neutron_lib import constants as n_const
|
||||
from oslo_config import cfg
|
||||
|
||||
from neutron._i18n import _
|
||||
from neutron.agent.linux import daemon
|
||||
from neutron.tests.common import net_helpers
|
||||
|
||||
UNIX_FAMILY = 'UNIX'
|
||||
|
||||
OPTS = [
|
||||
cfg.IntOpt('num_children',
|
||||
short='n',
|
||||
default=0,
|
||||
help=_('Number of children to spawn'),
|
||||
required=False),
|
||||
cfg.StrOpt('family',
|
||||
short='f',
|
||||
default=n_const.IPv4,
|
||||
choices=[n_const.IPv4, n_const.IPv6, UNIX_FAMILY],
|
||||
help=_('Listen socket family (%(v4)s, %(v6)s or %(unix)s)') %
|
||||
{
|
||||
'v4': n_const.IPv4,
|
||||
'v6': n_const.IPv6,
|
||||
'unix': UNIX_FAMILY
|
||||
},
|
||||
required=False),
|
||||
cfg.StrOpt('proto',
|
||||
short='p',
|
||||
default=n_const.PROTO_NAME_TCP,
|
||||
choices=[n_const.PROTO_NAME_TCP, n_const.PROTO_NAME_UDP],
|
||||
help=_('Protocol (%(tcp)s or %(udp)s)') %
|
||||
{
|
||||
'tcp': n_const.PROTO_NAME_TCP,
|
||||
'udp': n_const.PROTO_NAME_UDP
|
||||
},
|
||||
required=False),
|
||||
cfg.BoolOpt('parent_listen',
|
||||
short='pl',
|
||||
default=True,
|
||||
help=_('Parent process must listen too'),
|
||||
required=False),
|
||||
cfg.BoolOpt('ignore_sigterm',
|
||||
short='i',
|
||||
default=False,
|
||||
help=_('Ignore SIGTERM'),
|
||||
required=False)
|
||||
]
|
||||
|
||||
|
||||
class ProcessSpawn(daemon.Daemon):
|
||||
"""
|
||||
This class is part of the functional test of the netns_cleanup module.
|
||||
|
||||
It allows spawning processes that listen on random ports either on
|
||||
tcp(6), udp(6) or unix sockets. Also it allows handling or ignoring
|
||||
SIGTERM to check whether the cleanup works as expected by getting rid
|
||||
of the spawned processes.
|
||||
"""
|
||||
|
||||
MAX_BIND_RETRIES = 5
|
||||
|
||||
DCT_FAMILY = {
|
||||
n_const.IPv4: socket.AF_INET,
|
||||
n_const.IPv6: socket.AF_INET6,
|
||||
UNIX_FAMILY: socket.AF_UNIX
|
||||
}
|
||||
DCT_PROTO = {
|
||||
n_const.PROTO_NAME_TCP: socket.SOCK_STREAM,
|
||||
n_const.PROTO_NAME_UDP: socket.SOCK_DGRAM,
|
||||
}
|
||||
|
||||
def __init__(self, pidfile=None,
|
||||
family=n_const.IPv4,
|
||||
proto=n_const.PROTO_NAME_TCP,
|
||||
ignore_sigterm=False, num_children=0,
|
||||
parent_must_listen=True):
|
||||
self.family = family
|
||||
self.proto = proto
|
||||
self.ignore_sigterm = ignore_sigterm
|
||||
self.num_children = num_children
|
||||
self.listen_socket = None
|
||||
self.parent_must_listen = parent_must_listen
|
||||
self.child_pids = []
|
||||
|
||||
super(ProcessSpawn, self).__init__(pidfile)
|
||||
|
||||
def start_listening(self):
|
||||
socket_family = self.DCT_FAMILY[self.family]
|
||||
socket_type = self.DCT_PROTO[self.proto]
|
||||
|
||||
self.listen_socket = socket.socket(socket_family, socket_type)
|
||||
|
||||
# Try to listen in a random port which is not currently in use
|
||||
retries = 0
|
||||
while retries < ProcessSpawn.MAX_BIND_RETRIES:
|
||||
# NOTE(dalvarez): not finding a free port on a freshly created
|
||||
# namespace is very unlikely but if problems show up, retries can
|
||||
# be increased to avoid tests failing
|
||||
try:
|
||||
if self.family == UNIX_FAMILY:
|
||||
self.listen_socket.bind('')
|
||||
else:
|
||||
port = net_helpers.get_free_namespace_port(self.proto)
|
||||
self.listen_socket.bind(('', port))
|
||||
except socket.error:
|
||||
retries += 1
|
||||
else:
|
||||
if n_const.PROTO_NAME_TCP in self.proto:
|
||||
self.listen_socket.listen(0)
|
||||
break
|
||||
|
||||
def do_sleep(self):
|
||||
while True:
|
||||
time.sleep(10)
|
||||
|
||||
def run(self):
|
||||
# Spawn as many children as requested
|
||||
children = []
|
||||
while len(children) != self.num_children:
|
||||
child_pid = os.fork()
|
||||
if child_pid == 0:
|
||||
# Listen and do nothing else
|
||||
self.start_listening()
|
||||
self.do_sleep()
|
||||
return
|
||||
children.append(child_pid)
|
||||
|
||||
# Install a SIGTERM handler if requested
|
||||
if not self.ignore_sigterm:
|
||||
signal.signal(signal.SIGTERM, self.sigterm_handler)
|
||||
|
||||
self.child_pids = children
|
||||
if self.parent_must_listen:
|
||||
self.start_listening()
|
||||
self.do_sleep()
|
||||
|
||||
def sigterm_handler(self, signum, frame):
|
||||
if self.listen_socket:
|
||||
self.listen_socket.close()
|
||||
for child in self.child_pids:
|
||||
try:
|
||||
os.kill(child, signal.SIGTERM)
|
||||
except OSError:
|
||||
pass
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def main():
|
||||
cfg.CONF.register_cli_opts(OPTS)
|
||||
cfg.CONF(project='neutron', default_config_files=[])
|
||||
proc_spawn = ProcessSpawn(num_children=cfg.CONF.num_children,
|
||||
family=cfg.CONF.family,
|
||||
proto=cfg.CONF.proto,
|
||||
parent_must_listen=cfg.CONF.parent_listen,
|
||||
ignore_sigterm=cfg.CONF.ignore_sigterm)
|
||||
proc_spawn.start()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -13,19 +13,26 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import os
|
||||
|
||||
import mock
|
||||
from neutron_lib import constants as n_const
|
||||
|
||||
from neutron.agent.l3 import agent as l3_agent
|
||||
from neutron.agent.linux import dhcp
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.agent.linux import utils
|
||||
from neutron.cmd import netns_cleanup
|
||||
from neutron.common import utils as common_utils
|
||||
from neutron.conf.agent import cmd
|
||||
from neutron.tests import base as basetest
|
||||
from neutron.tests.common import net_helpers
|
||||
from neutron.tests.functional import base
|
||||
from neutron.tests.functional.cmd import process_spawn
|
||||
|
||||
GET_NAMESPACES = 'neutron.agent.linux.ip_lib.IPWrapper.get_namespaces'
|
||||
TEST_INTERFACE_DRIVER = 'neutron.agent.linux.interface.OVSInterfaceDriver'
|
||||
NUM_SUBPROCESSES = 6
|
||||
|
||||
|
||||
class NetnsCleanupTest(base.BaseSudoTestCase):
|
||||
@ -60,13 +67,82 @@ class NetnsCleanupTest(base.BaseSudoTestCase):
|
||||
# tests, as otherwise cleanup will kill them all
|
||||
self.get_namespaces.return_value = [l3_namespace, dhcp_namespace]
|
||||
|
||||
# launch processes in each namespace to make sure they're
|
||||
# killed during cleanup
|
||||
procs_launched = self._launch_processes([l3_namespace, dhcp_namespace])
|
||||
self.assertIsNot(procs_launched, 0)
|
||||
common_utils.wait_until_true(
|
||||
lambda: self._get_num_spawned_procs() == procs_launched,
|
||||
timeout=15,
|
||||
exception=Exception("Didn't spawn expected number of processes"))
|
||||
|
||||
netns_cleanup.cleanup_network_namespaces(self.conf)
|
||||
|
||||
self.get_namespaces_p.stop()
|
||||
namespaces_now = ip_lib.IPWrapper.get_namespaces()
|
||||
procs_after = self._get_num_spawned_procs()
|
||||
self.assertEqual(procs_after, 0)
|
||||
self.assertNotIn(l3_namespace, namespaces_now)
|
||||
self.assertNotIn(dhcp_namespace, namespaces_now)
|
||||
|
||||
@staticmethod
|
||||
def _launch_processes(namespaces):
|
||||
"""
|
||||
Launch processes in the specified namespaces.
|
||||
|
||||
This function will spawn processes inside the given namespaces:
|
||||
- 6 processes listening on tcp ports (parent + 5 children)
|
||||
- 1 process + 5 subprocesses listening on unix sockets
|
||||
- 1 process + 5 subprocesses listening on udp6 sockets
|
||||
|
||||
First two sets of processes will process SIGTERM so when the parent
|
||||
gets killed, it will kill all spawned children
|
||||
The last set of processes will ignore SIGTERM. This will allow us
|
||||
to test the cleanup functionality which will issue a SIGKILL
|
||||
to all remaining processes after the SIGTERM attempt
|
||||
"""
|
||||
commands = [['python', process_spawn.__file__,
|
||||
'-n', NUM_SUBPROCESSES,
|
||||
'-f', n_const.IPv4,
|
||||
'-p', n_const.PROTO_NAME_TCP,
|
||||
'--noignore_sigterm',
|
||||
'--parent_listen'],
|
||||
['python', process_spawn.__file__,
|
||||
'-n', NUM_SUBPROCESSES,
|
||||
'-f', process_spawn.UNIX_FAMILY,
|
||||
'-p', n_const.PROTO_NAME_TCP,
|
||||
'--noignore_sigterm',
|
||||
'--noparent_listen'],
|
||||
['python', process_spawn.__file__,
|
||||
'-n', NUM_SUBPROCESSES,
|
||||
'-f', n_const.IPv4,
|
||||
'-p', n_const.PROTO_NAME_UDP,
|
||||
'--ignore_sigterm',
|
||||
'--noparent_listen']]
|
||||
|
||||
proc_count = 0
|
||||
for netns in namespaces:
|
||||
ip = ip_lib.IPWrapper(namespace=netns)
|
||||
for command in commands:
|
||||
# The total amount of processes per command is
|
||||
# the process itself plus the number of subprocesses spawned by
|
||||
# it
|
||||
proc_count += (1 + NUM_SUBPROCESSES)
|
||||
# We need to pass the PATH env variable so that python
|
||||
# interpreter runs under the same virtual environment.
|
||||
# Otherwise, it won't find the necessary packages such as
|
||||
# oslo_config
|
||||
ip.netns.execute(command,
|
||||
addl_env={'PATH':
|
||||
os.environ.get('PATH')})
|
||||
return proc_count
|
||||
|
||||
@staticmethod
|
||||
def _get_num_spawned_procs():
|
||||
cmd = ['ps', '-f', '-u', 'root']
|
||||
out = utils.execute(cmd, run_as_root=True)
|
||||
return sum([1 for line in out.splitlines() if 'process_spawn' in line])
|
||||
|
||||
|
||||
class TestNETNSCLIConfig(basetest.BaseTestCase):
|
||||
|
||||
|
@ -198,20 +198,18 @@ class TestAsyncProcess(base.BaseTestCase):
|
||||
exc = RuntimeError(exception_message)
|
||||
else:
|
||||
exc = None
|
||||
with mock.patch.object(utils, 'execute',
|
||||
side_effect=exc) as mock_execute:
|
||||
with mock.patch.object(utils, 'kill_process',
|
||||
side_effect=exc) as mock_kill_process:
|
||||
actual = self.proc._kill_process(pid, kill_signal)
|
||||
|
||||
self.assertEqual(expected, actual)
|
||||
mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid],
|
||||
run_as_root=self.proc.run_as_root)
|
||||
mock_kill_process.assert_called_with(pid,
|
||||
kill_signal,
|
||||
self.proc.run_as_root)
|
||||
|
||||
def test__kill_process_returns_true_for_valid_pid(self):
|
||||
self._test__kill_process('1', True)
|
||||
|
||||
def test__kill_process_returns_true_for_stale_pid(self):
|
||||
self._test__kill_process('1', True, 'No such process')
|
||||
|
||||
def test__kill_process_returns_false_for_execute_exception(self):
|
||||
self._test__kill_process('1', False, 'Invalid')
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import signal
|
||||
import socket
|
||||
|
||||
import mock
|
||||
@ -181,6 +182,100 @@ class AgentUtilsGetInterfaceMAC(base.BaseTestCase):
|
||||
self.assertEqual(actual_val, expect_val)
|
||||
|
||||
|
||||
class TestFindParentPid(base.BaseTestCase):
|
||||
def setUp(self):
|
||||
super(TestFindParentPid, self).setUp()
|
||||
self.m_execute = mock.patch.object(utils, 'execute').start()
|
||||
|
||||
def test_returns_none_for_no_valid_pid(self):
|
||||
self.m_execute.side_effect = utils.ProcessExecutionError('',
|
||||
returncode=1)
|
||||
self.assertIsNone(utils.find_parent_pid(-1))
|
||||
|
||||
def test_returns_parent_id_for_good_ouput(self):
|
||||
self.m_execute.return_value = '123 \n'
|
||||
self.assertEqual(utils.find_parent_pid(-1), '123')
|
||||
|
||||
def test_raises_exception_returncode_0(self):
|
||||
with testtools.ExpectedException(utils.ProcessExecutionError):
|
||||
self.m_execute.side_effect = \
|
||||
utils.ProcessExecutionError('', returncode=0)
|
||||
utils.find_parent_pid(-1)
|
||||
|
||||
def test_raises_unknown_exception(self):
|
||||
with testtools.ExpectedException(RuntimeError):
|
||||
self.m_execute.side_effect = RuntimeError()
|
||||
utils.find_parent_pid(-1)
|
||||
|
||||
|
||||
class TestFindForkTopParent(base.BaseTestCase):
|
||||
def _test_find_fork_top_parent(self, expected=_marker,
|
||||
find_parent_pid_retvals=None,
|
||||
pid_invoked_with_cmdline_retvals=None):
|
||||
def _find_parent_pid(x):
|
||||
if find_parent_pid_retvals:
|
||||
return find_parent_pid_retvals.pop(0)
|
||||
|
||||
pid_invoked_with_cmdline = {}
|
||||
if pid_invoked_with_cmdline_retvals:
|
||||
pid_invoked_with_cmdline['side_effect'] = (
|
||||
pid_invoked_with_cmdline_retvals)
|
||||
else:
|
||||
pid_invoked_with_cmdline['return_value'] = False
|
||||
with mock.patch.object(utils, 'find_parent_pid',
|
||||
side_effect=_find_parent_pid), \
|
||||
mock.patch.object(utils, 'pid_invoked_with_cmdline',
|
||||
**pid_invoked_with_cmdline):
|
||||
actual = utils.find_fork_top_parent(_marker)
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_returns_own_pid_no_parent(self):
|
||||
self._test_find_fork_top_parent()
|
||||
|
||||
def test_returns_own_pid_nofork(self):
|
||||
self._test_find_fork_top_parent(find_parent_pid_retvals=['2', '3'])
|
||||
|
||||
def test_returns_first_parent_pid_fork(self):
|
||||
self._test_find_fork_top_parent(
|
||||
expected='2',
|
||||
find_parent_pid_retvals=['2', '3', '4'],
|
||||
pid_invoked_with_cmdline_retvals=[True, False, False])
|
||||
|
||||
def test_returns_top_parent_pid_fork(self):
|
||||
self._test_find_fork_top_parent(
|
||||
expected='4',
|
||||
find_parent_pid_retvals=['2', '3', '4'],
|
||||
pid_invoked_with_cmdline_retvals=[True, True, True])
|
||||
|
||||
|
||||
class TestKillProcess(base.BaseTestCase):
|
||||
def _test_kill_process(self, pid, exception_message=None,
|
||||
kill_signal=signal.SIGKILL):
|
||||
if exception_message:
|
||||
exc = utils.ProcessExecutionError(exception_message, returncode=0)
|
||||
else:
|
||||
exc = None
|
||||
with mock.patch.object(utils, 'execute',
|
||||
side_effect=exc) as mock_execute:
|
||||
utils.kill_process(pid, kill_signal, run_as_root=True)
|
||||
|
||||
mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid],
|
||||
run_as_root=True)
|
||||
|
||||
def test_kill_process_returns_none_for_valid_pid(self):
|
||||
self._test_kill_process('1')
|
||||
|
||||
def test_kill_process_returns_none_for_stale_pid(self):
|
||||
self._test_kill_process('1', 'No such process')
|
||||
|
||||
def test_kill_process_raises_exception_for_execute_exception(self):
|
||||
with testtools.ExpectedException(utils.ProcessExecutionError):
|
||||
self._test_kill_process('1', 'Invalid')
|
||||
|
||||
def test_kill_process_with_different_signal(self):
|
||||
self._test_kill_process('1', kill_signal=signal.SIGTERM)
|
||||
|
||||
|
||||
class TestFindChildPids(base.BaseTestCase):
|
||||
|
||||
def test_returns_empty_list_for_exit_code_1(self):
|
||||
@ -197,6 +292,14 @@ class TestFindChildPids(base.BaseTestCase):
|
||||
with mock.patch.object(utils, 'execute', return_value=' 123 \n 185\n'):
|
||||
self.assertEqual(utils.find_child_pids(-1), ['123', '185'])
|
||||
|
||||
def test_returns_list_of_child_process_ids_recursively(self):
|
||||
with mock.patch.object(utils, 'execute',
|
||||
side_effect=[' 123 \n 185\n',
|
||||
' 40 \n', '\n',
|
||||
'41\n', '\n']):
|
||||
actual = utils.find_child_pids(-1, True)
|
||||
self.assertEqual(actual, ['123', '185', '40', '41'])
|
||||
|
||||
def test_raises_unknown_exception(self):
|
||||
with testtools.ExpectedException(RuntimeError):
|
||||
with mock.patch.object(utils, 'execute',
|
||||
|
@ -13,11 +13,47 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import signal
|
||||
|
||||
import mock
|
||||
import testtools
|
||||
|
||||
from neutron.cmd import netns_cleanup as util
|
||||
from neutron.tests import base
|
||||
|
||||
NETSTAT_NETNS_OUTPUT = ("""
|
||||
Active Internet connections (only servers)
|
||||
Proto Recv-Q Send-Q Local Address Foreign Address State\
|
||||
PID/Program name
|
||||
tcp 0 0 0.0.0.0:9697 0.0.0.0:* LISTEN\
|
||||
1347/python
|
||||
raw 0 0 0.0.0.0:112 0.0.0.0:* 7\
|
||||
1279/keepalived
|
||||
raw 0 0 0.0.0.0:112 0.0.0.0:* 7\
|
||||
1279/keepalived
|
||||
raw6 0 0 :::58 :::* 7\
|
||||
1349/radvd
|
||||
Active UNIX domain sockets (only servers)
|
||||
Proto RefCnt Flags Type State I-Node PID/Program name\
|
||||
Path
|
||||
unix 2 [ ACC ] STREAM LISTENING 82039530 1353/python\
|
||||
/tmp/rootwrap-VKSm8a/rootwrap.sock
|
||||
""")
|
||||
|
||||
NETSTAT_NO_NAMESPACE = ("""
|
||||
Cannot open network namespace "qrouter-e6f206b2-4e8d-4597-a7e1-c3a20337e9c6":\
|
||||
No such file or directory
|
||||
""")
|
||||
|
||||
NETSTAT_NO_LISTEN_PROCS = ("""
|
||||
Active Internet connections (only servers)
|
||||
Proto Recv-Q Send-Q Local Address Foreign Address State\
|
||||
PID/Program name
|
||||
Active UNIX domain sockets (only servers)
|
||||
Proto RefCnt Flags Type State I-Node PID/Program name\
|
||||
Path
|
||||
""")
|
||||
|
||||
|
||||
class TestNetnsCleanup(base.BaseTestCase):
|
||||
def setUp(self):
|
||||
@ -163,6 +199,115 @@ class TestNetnsCleanup(base.BaseTestCase):
|
||||
self.assertEqual([], ovs_br_cls.mock_calls)
|
||||
self.assertTrue(debug.called)
|
||||
|
||||
def _test_find_listen_pids_namespace_helper(self, expected,
|
||||
netstat_output=None):
|
||||
with mock.patch('neutron.agent.linux.ip_lib.IPWrapper') as ip_wrap:
|
||||
ip_wrap.return_value.netns.execute.return_value = netstat_output
|
||||
observed = util.find_listen_pids_namespace(mock.ANY)
|
||||
self.assertEqual(expected, observed)
|
||||
|
||||
def test_find_listen_pids_namespace_correct_output(self):
|
||||
expected = set(['1347', '1279', '1349', '1353'])
|
||||
self._test_find_listen_pids_namespace_helper(expected,
|
||||
NETSTAT_NETNS_OUTPUT)
|
||||
|
||||
def test_find_listen_pids_namespace_no_procs(self):
|
||||
expected = set()
|
||||
self._test_find_listen_pids_namespace_helper(expected,
|
||||
NETSTAT_NO_LISTEN_PROCS)
|
||||
|
||||
def test_find_listen_pids_namespace_no_namespace(self):
|
||||
expected = set()
|
||||
self._test_find_listen_pids_namespace_helper(expected,
|
||||
NETSTAT_NO_NAMESPACE)
|
||||
|
||||
def _test__kill_listen_processes_helper(self, pids, parents, children,
|
||||
kills_expected, force):
|
||||
def _get_element(dct, x):
|
||||
return dct.get(x, [])
|
||||
|
||||
def _find_childs(x, recursive):
|
||||
return _get_element(children, x)
|
||||
|
||||
def _find_parent(x):
|
||||
return _get_element(parents, x)
|
||||
|
||||
utils_mock = dict(
|
||||
find_fork_top_parent=mock.DEFAULT,
|
||||
find_child_pids=mock.DEFAULT,
|
||||
get_cmdline_from_pid=mock.DEFAULT,
|
||||
kill_process=mock.DEFAULT)
|
||||
|
||||
self.log_mock = mock.patch.object(util, 'LOG').start()
|
||||
with mock.patch.multiple('neutron.agent.linux.utils', **utils_mock)\
|
||||
as mocks:
|
||||
mocks['find_fork_top_parent'].side_effect = _find_parent
|
||||
mocks['find_child_pids'].side_effect = _find_childs
|
||||
|
||||
with mock.patch.object(util, 'find_listen_pids_namespace',
|
||||
return_value=pids):
|
||||
calls = []
|
||||
for pid, sig in kills_expected:
|
||||
calls.append(mock.call(pid, sig, run_as_root=True))
|
||||
util._kill_listen_processes(mock.ANY, force=force)
|
||||
mocks['kill_process'].assert_has_calls(calls, any_order=True)
|
||||
|
||||
def test__kill_listen_processes_only_parents_force_false(self):
|
||||
pids = ['4', '5', '6']
|
||||
parents = {'4': '1', '5': '5', '6': '2'}
|
||||
children = {}
|
||||
kills_expected = [('1', signal.SIGTERM),
|
||||
('5', signal.SIGTERM),
|
||||
('2', signal.SIGTERM)]
|
||||
|
||||
self._test__kill_listen_processes_helper(pids, parents, children,
|
||||
kills_expected, False)
|
||||
|
||||
def test__kill_listen_processes_parents_and_childs(self):
|
||||
pids = ['4', '5', '6']
|
||||
parents = {'4': '1', '5': '2', '6': '3'}
|
||||
children = {'1': ['4'], '2': ['5'], '3': ['6', '8', '7']}
|
||||
kills_expected = [(str(x), signal.SIGKILL) for x in range(1, 9)]
|
||||
self._test__kill_listen_processes_helper(pids, parents, children,
|
||||
kills_expected, True)
|
||||
|
||||
def test_kill_listen_processes(self):
|
||||
with mock.patch.object(util, '_kill_listen_processes',
|
||||
return_value=1) as mock_kill_listen:
|
||||
with mock.patch('neutron.common.utils.wait_until_true')\
|
||||
as wait_until_true_mock:
|
||||
wait_until_true_mock.side_effect = [
|
||||
util.PidsInNamespaceException,
|
||||
None]
|
||||
namespace = mock.ANY
|
||||
util.kill_listen_processes(namespace)
|
||||
mock_kill_listen.assert_has_calls(
|
||||
[mock.call(namespace, force=False),
|
||||
mock.call(namespace, force=True)])
|
||||
|
||||
def test_kill_listen_processes_still_procs(self):
|
||||
with mock.patch.object(util, '_kill_listen_processes',
|
||||
return_value=1):
|
||||
with mock.patch('neutron.common.utils.wait_until_true')\
|
||||
as wait_until_true_mock:
|
||||
wait_until_true_mock.side_effect = (
|
||||
util.PidsInNamespaceException)
|
||||
namespace = mock.ANY
|
||||
with testtools.ExpectedException(
|
||||
util.PidsInNamespaceException):
|
||||
util.kill_listen_processes(namespace)
|
||||
|
||||
def test_kill_listen_processes_no_procs(self):
|
||||
with mock.patch.object(util, '_kill_listen_processes',
|
||||
return_value=0) as mock_kill_listen:
|
||||
with mock.patch('neutron.common.utils.wait_until_true')\
|
||||
as wait_until_true_mock:
|
||||
namespace = mock.ANY
|
||||
util.kill_listen_processes(namespace)
|
||||
mock_kill_listen.assert_called_once_with(namespace,
|
||||
force=False)
|
||||
self.assertFalse(wait_until_true_mock.called)
|
||||
|
||||
def _test_destroy_namespace_helper(self, force, num_devices):
|
||||
ns = 'qrouter-6e322ac7-ab50-4f53-9cdc-d1d3c1164b6d'
|
||||
conf = mock.Mock()
|
||||
@ -182,6 +327,8 @@ class TestNetnsCleanup(base.BaseTestCase):
|
||||
ip_wrap.return_value.get_devices.return_value = devices
|
||||
ip_wrap.return_value.netns.exists.return_value = True
|
||||
|
||||
with mock.patch.object(util, 'kill_listen_processes'):
|
||||
|
||||
with mock.patch.object(util, 'unplug_device') as unplug:
|
||||
|
||||
with mock.patch.object(util, 'kill_dhcp') as kill_dhcp:
|
||||
@ -191,13 +338,15 @@ class TestNetnsCleanup(base.BaseTestCase):
|
||||
if force:
|
||||
expected.extend([
|
||||
mock.call().netns.exists(ns),
|
||||
mock.call().get_devices(exclude_loopback=True)])
|
||||
mock.call().get_devices(
|
||||
exclude_loopback=True)])
|
||||
self.assertTrue(kill_dhcp.called)
|
||||
unplug.assert_has_calls(
|
||||
[mock.call(conf, d) for d in
|
||||
devices[1:]])
|
||||
|
||||
expected.append(mock.call().garbage_collect_namespace())
|
||||
expected.append(
|
||||
mock.call().garbage_collect_namespace())
|
||||
ip_wrap.assert_has_calls(expected)
|
||||
|
||||
def test_destroy_namespace_empty(self):
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
features:
|
||||
- A new mechanism has been added to netns_cleanup to
|
||||
kill processes that are listening on any port/unix
|
||||
socket within the namespace. This will try to kill
|
||||
them gracefully via SIGTERM and, if they don't die,
|
||||
then a SIGKILL will be sent to the remaining
|
||||
processes to ensure a proper cleanup.
|
@ -36,6 +36,7 @@ data_files =
|
||||
etc/neutron/rootwrap.d/ipset-firewall.filters
|
||||
etc/neutron/rootwrap.d/l3.filters
|
||||
etc/neutron/rootwrap.d/linuxbridge-plugin.filters
|
||||
etc/neutron/rootwrap.d/netns-cleanup.filters
|
||||
etc/neutron/rootwrap.d/openvswitch-plugin.filters
|
||||
scripts =
|
||||
bin/neutron-rootwrap-xen-dom0
|
||||
|
Loading…
Reference in New Issue
Block a user