Browse Source

Kill processes when cleaning up namespaces

This patch will kill processes that are listening on any port/UNIX
socket within the namespace to be cleaned up. To kill them it will
issue a SIGTERM to them (or to their parents if they were forked) and,
if they don't die after a few seconds, a SIGKILL to them and all their
children.

This is intended for those cases when there's no specific cleanup and
serves as a fallback method.

Change-Id: I4195f633ef4a1788496d1293846f19eef89416aa
Partial-Bug: #1403455
tags/10.0.0.0b3
Daniel Alvarez 2 years ago
parent
commit
1d38f30555

+ 2
- 2
etc/neutron/rootwrap.d/dhcp.filters View File

@@ -13,8 +13,8 @@ dnsmasq: CommandFilter, dnsmasq, root
13 13
 # dhcp-agent uses kill as well, that's handled by the generic KillFilter
14 14
 # it looks like these are the only signals needed, per
15 15
 # neutron/agent/linux/dhcp.py
16
-kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP
17
-kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP
16
+kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP, -15
17
+kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP, -15
18 18
 
19 19
 ovs-vsctl: CommandFilter, ovs-vsctl, root
20 20
 ivs-ctl: CommandFilter, ivs-ctl, root

+ 4
- 4
etc/neutron/rootwrap.d/l3.filters View File

@@ -19,10 +19,10 @@ radvd: CommandFilter, radvd, root
19 19
 # metadata proxy
20 20
 metadata_proxy: CommandFilter, neutron-ns-metadata-proxy, root
21 21
 # RHEL invocation of the metadata proxy will report /usr/bin/python
22
-kill_metadata: KillFilter, root, python, -9
23
-kill_metadata7: KillFilter, root, python2.7, -9
24
-kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -9, -HUP
25
-kill_radvd: KillFilter, root, /sbin/radvd, -9, -HUP
22
+kill_metadata: KillFilter, root, python, -15, -9
23
+kill_metadata7: KillFilter, root, python2.7, -15, -9
24
+kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -15, -9, -HUP
25
+kill_radvd: KillFilter, root, /sbin/radvd, -15, -9, -HUP
26 26
 
27 27
 # ip_lib
28 28
 ip: IpFilter, ip, root

+ 12
- 0
etc/neutron/rootwrap.d/netns-cleanup.filters View File

@@ -0,0 +1,12 @@
1
+# neutron-rootwrap command filters for nodes on which neutron is
2
+# expected to control network
3
+#
4
+# This file should be owned by (and only-writeable by) the root user
5
+
6
+# format seems to be
7
+# cmd-name: filter-name, raw-command, user, args
8
+
9
+[Filters]
10
+
11
+# netns-cleanup
12
+netstat: CommandFilter, netstat, root

+ 5
- 9
neutron/agent/linux/async_process.py View File

@@ -175,15 +175,11 @@ class AsyncProcess(object):
175 175
         try:
176 176
             # A process started by a root helper will be running as
177 177
             # root and need to be killed via the same helper.
178
-            utils.execute(['kill', '-%d' % kill_signal, pid],
179
-                          run_as_root=self.run_as_root)
180
-        except Exception as ex:
181
-            stale_pid = (isinstance(ex, RuntimeError) and
182
-                         'No such process' in str(ex))
183
-            if not stale_pid:
184
-                LOG.exception(_LE('An error occurred while killing [%s].'),
185
-                              self.cmd)
186
-                return False
178
+            utils.kill_process(pid, kill_signal, self.run_as_root)
179
+        except Exception:
180
+            LOG.exception(_LE('An error occurred while killing [%s].'),
181
+                          self.cmd)
182
+            return False
187 183
 
188 184
         if self._process:
189 185
             self._process.wait()

+ 56
- 3
neutron/agent/linux/utils.py View File

@@ -171,9 +171,11 @@ def get_interface_mac(interface):
171 171
     return ':'.join(["%02x" % b for b in iterbytes(info[MAC_START:MAC_END])])
172 172
 
173 173
 
174
-def find_child_pids(pid):
175
-    """Retrieve a list of the pids of child processes of the given pid."""
174
+def find_child_pids(pid, recursive=False):
175
+    """Retrieve a list of the pids of child processes of the given pid.
176 176
 
177
+    It can also find all children through the hierarchy if recursive=True
178
+    """
177 179
     try:
178 180
         raw_pids = execute(['ps', '--ppid', pid, '-o', 'pid='],
179 181
                            log_fail_as_error=False)
@@ -185,7 +187,58 @@ def find_child_pids(pid):
185 187
             if no_children_found:
186 188
                 ctxt.reraise = False
187 189
                 return []
188
-    return [x.strip() for x in raw_pids.split('\n') if x.strip()]
190
+    child_pids = [x.strip() for x in raw_pids.split('\n') if x.strip()]
191
+    if recursive:
192
+        for child in child_pids:
193
+            child_pids = child_pids + find_child_pids(child, True)
194
+    return child_pids
195
+
196
+
197
+def find_parent_pid(pid):
198
+    """Retrieve the pid of the parent process of the given pid.
199
+
200
+    If the pid doesn't exist in the system, this function will return
201
+    None
202
+    """
203
+    try:
204
+        ppid = execute(['ps', '-o', 'ppid=', pid],
205
+                       log_fail_as_error=False)
206
+    except ProcessExecutionError as e:
207
+        # Unexpected errors are the responsibility of the caller
208
+        with excutils.save_and_reraise_exception() as ctxt:
209
+            # Exception has already been logged by execute
210
+            no_such_pid = e.returncode == 1
211
+            if no_such_pid:
212
+                ctxt.reraise = False
213
+                return
214
+    return ppid.strip()
215
+
216
+
217
+def find_fork_top_parent(pid):
218
+    """Retrieve the pid of the top parent of the given pid through a fork.
219
+
220
+    This function will search the top parent with its same cmdline. If the
221
+    given pid has no parent, its own pid will be returned
222
+    """
223
+    while True:
224
+        ppid = find_parent_pid(pid)
225
+        if (ppid and ppid != pid and
226
+                pid_invoked_with_cmdline(ppid, get_cmdline_from_pid(pid))):
227
+            pid = ppid
228
+        else:
229
+            return pid
230
+
231
+
232
+def kill_process(pid, signal, run_as_root=False):
233
+    """Kill the process with the given pid using the given signal."""
234
+    try:
235
+        execute(['kill', '-%d' % signal, pid], run_as_root=run_as_root)
236
+    except ProcessExecutionError as ex:
237
+        # TODO(dalvarez): this check has i18n issues. Maybe we can use
238
+        # use gettext module setting a global locale or just pay attention
239
+        # to returncode instead of checking the ex message.
240
+        if 'No such process' not in str(ex):
241
+            raise
189 242
 
190 243
 
191 244
 def _get_conf_base(cfg_root, uuid, ensure_conf_dir):

+ 99
- 1
neutron/cmd/netns_cleanup.py View File

@@ -15,6 +15,7 @@
15 15
 
16 16
 import itertools
17 17
 import re
18
+import signal
18 19
 import time
19 20
 
20 21
 from neutron_lib import constants
@@ -22,7 +23,7 @@ from oslo_config import cfg
22 23
 from oslo_log import log as logging
23 24
 from oslo_utils import importutils
24 25
 
25
-from neutron._i18n import _LE
26
+from neutron._i18n import _LE, _LW
26 27
 from neutron.agent.common import config as agent_config
27 28
 from neutron.agent.common import ovs_lib
28 29
 from neutron.agent.l3 import dvr_fip_ns
@@ -32,7 +33,9 @@ from neutron.agent.linux import dhcp
32 33
 from neutron.agent.linux import external_process
33 34
 from neutron.agent.linux import interface
34 35
 from neutron.agent.linux import ip_lib
36
+from neutron.agent.linux import utils
35 37
 from neutron.common import config
38
+from neutron.common import utils as common_utils
36 39
 from neutron.conf.agent import cmd
37 40
 from neutron.conf.agent import dhcp as dhcp_config
38 41
 
@@ -45,6 +48,12 @@ NS_PREFIXES = {
45 48
            dvr_fip_ns.FIP_NS_PREFIX],
46 49
     'lbaas': [LB_NS_PREFIX],
47 50
 }
51
+SIGTERM_WAITTIME = 10
52
+NETSTAT_PIDS_REGEX = re.compile(r'.* (?P<pid>\d{2,6})/.*')
53
+
54
+
55
+class PidsInNamespaceException(Exception):
56
+    pass
48 57
 
49 58
 
50 59
 class FakeDhcpPlugin(object):
@@ -131,6 +140,87 @@ def unplug_device(conf, device):
131 140
         device.set_log_fail_as_error(orig_log_fail_as_error)
132 141
 
133 142
 
143
+def find_listen_pids_namespace(namespace):
144
+    """Retrieve a list of pids of listening processes within the given netns.
145
+
146
+    It executes netstat -nlp and returns a set of unique pairs
147
+    """
148
+    ip = ip_lib.IPWrapper(namespace=namespace)
149
+    pids = set()
150
+    cmd = ['netstat', '-nlp']
151
+    output = ip.netns.execute(cmd, run_as_root=True)
152
+    for line in output.splitlines():
153
+        m = NETSTAT_PIDS_REGEX.match(line)
154
+        if m:
155
+            pids.add(m.group('pid'))
156
+    return pids
157
+
158
+
159
+def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME):
160
+    """Poll listening processes within the given namespace.
161
+
162
+    If after timeout seconds, there are remaining processes in the namespace,
163
+    then a PidsInNamespaceException will be thrown.
164
+    """
165
+    # Would be better to handle an eventlet.timeout.Timeout exception
166
+    # but currently there's a problem importing eventlet since it's
167
+    # doing a local import from cmd/eventlet which doesn't have a
168
+    # timeout module
169
+    common_utils.wait_until_true(
170
+        lambda: not find_listen_pids_namespace(namespace),
171
+        timeout=SIGTERM_WAITTIME,
172
+        exception=PidsInNamespaceException)
173
+
174
+
175
+def _kill_listen_processes(namespace, force=False):
176
+    """Identify all listening processes within the given namespace.
177
+
178
+    Then, for each one, find its top parent with same cmdline (in case this
179
+    process forked) and issue a SIGTERM to all of them. If force is True,
180
+    then a SIGKILL will be issued to all parents and all their children. Also,
181
+    this function returns the number of listening processes.
182
+    """
183
+    pids = find_listen_pids_namespace(namespace)
184
+    pids_to_kill = {utils.find_fork_top_parent(pid) for pid in pids}
185
+    kill_signal = signal.SIGTERM
186
+    if force:
187
+        kill_signal = signal.SIGKILL
188
+        children = [utils.find_child_pids(pid, True) for pid in pids_to_kill]
189
+        pids_to_kill.update(itertools.chain.from_iterable(children))
190
+
191
+    for pid in pids_to_kill:
192
+        # Throw a warning since this particular cleanup may need a specific
193
+        # implementation in the right module. Ideally, netns_cleanup wouldn't
194
+        # kill any processes as the responsible module should've killed them
195
+        # before cleaning up the namespace
196
+        LOG.warning(_LW("Killing (%(signal)d) [%(pid)s] %(cmdline)s"),
197
+                    {'signal': kill_signal,
198
+                     'pid': pid,
199
+                     'cmdline': ' '.join(utils.get_cmdline_from_pid(pid))[:80]
200
+                     })
201
+        try:
202
+            utils.kill_process(pid, kill_signal, run_as_root=True)
203
+        except Exception as ex:
204
+            LOG.error(_LE('An error occurred while killing '
205
+                          '[%(pid)s]: %(msg)s'), {'pid': pid, 'msg': ex})
206
+    return len(pids)
207
+
208
+
209
+def kill_listen_processes(namespace):
210
+    """Kill all processes listening within the given namespace.
211
+
212
+    First it tries to kill them using SIGTERM, waits until they die gracefully
213
+    and then kills remaining processes (if any) with SIGKILL
214
+    """
215
+    if _kill_listen_processes(namespace, force=False):
216
+        try:
217
+            wait_until_no_listen_pids_namespace(namespace)
218
+        except PidsInNamespaceException:
219
+            _kill_listen_processes(namespace, force=True)
220
+            # Allow some time for remaining processes to die
221
+            wait_until_no_listen_pids_namespace(namespace)
222
+
223
+
134 224
 def destroy_namespace(conf, namespace, force=False):
135 225
     """Destroy a given namespace.
136 226
 
@@ -146,6 +236,14 @@ def destroy_namespace(conf, namespace, force=False):
146 236
             # NOTE: The dhcp driver will remove the namespace if is it empty,
147 237
             # so a second check is required here.
148 238
             if ip.netns.exists(namespace):
239
+                try:
240
+                    kill_listen_processes(namespace)
241
+                except PidsInNamespaceException:
242
+                    # This is unlikely since, at this point, we have SIGKILLed
243
+                    # all remaining processes but if there are still some, log
244
+                    # the error and continue with the cleanup
245
+                    LOG.error(_LE('Not all processes were killed in %s'),
246
+                              namespace)
149 247
                 for device in ip.get_devices(exclude_loopback=True):
150 248
                     unplug_device(conf, device)
151 249
 

+ 6
- 0
neutron/tests/contrib/functional-testing.filters View File

@@ -41,3 +41,9 @@ ovstrace_filter: RegExpFilter, ovs-appctl, root, ovs-appctl, ofproto/trace, .*,
41 41
 # needed for TestGetRootHelperChildPid
42 42
 bash_filter: RegExpFilter, /bin/bash, root, bash, -c, \(sleep 100\)
43 43
 sleep_kill: KillFilter, root, sleep, -9
44
+
45
+#needed by test_netns_cleanup
46
+process_spawn: EnvFilter, env, root, PATH=, python
47
+ip_exec: IpNetnsExecFilter, ip, root
48
+ps: CommandFilter, ps, root
49
+pid_kill: RegExpFilter, kill, root, kill, -\d+, .*

+ 180
- 0
neutron/tests/functional/cmd/process_spawn.py View File

@@ -0,0 +1,180 @@
1
+# Copyright 2016 Red Hat, Inc.
2
+#
3
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
4
+#    not use this file except in compliance with the License. You may obtain
5
+#    a copy of the License at
6
+#
7
+#         http://www.apache.org/licenses/LICENSE-2.0
8
+#
9
+#    Unless required by applicable law or agreed to in writing, software
10
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12
+#    License for the specific language governing permissions and limitations
13
+#    under the License.
14
+
15
+import os
16
+import signal
17
+import socket
18
+import sys
19
+import time
20
+
21
+from neutron_lib import constants as n_const
22
+from oslo_config import cfg
23
+
24
+from neutron._i18n import _
25
+from neutron.agent.linux import daemon
26
+from neutron.tests.common import net_helpers
27
+
28
+UNIX_FAMILY = 'UNIX'
29
+
30
+OPTS = [
31
+    cfg.IntOpt('num_children',
32
+               short='n',
33
+               default=0,
34
+               help=_('Number of children to spawn'),
35
+               required=False),
36
+    cfg.StrOpt('family',
37
+               short='f',
38
+               default=n_const.IPv4,
39
+               choices=[n_const.IPv4, n_const.IPv6, UNIX_FAMILY],
40
+               help=_('Listen socket family (%(v4)s, %(v6)s or %(unix)s)') %
41
+                     {
42
+                         'v4': n_const.IPv4,
43
+                         'v6': n_const.IPv6,
44
+                         'unix': UNIX_FAMILY
45
+                     },
46
+               required=False),
47
+    cfg.StrOpt('proto',
48
+               short='p',
49
+               default=n_const.PROTO_NAME_TCP,
50
+               choices=[n_const.PROTO_NAME_TCP, n_const.PROTO_NAME_UDP],
51
+               help=_('Protocol (%(tcp)s or %(udp)s)') %
52
+                     {
53
+                         'tcp': n_const.PROTO_NAME_TCP,
54
+                         'udp': n_const.PROTO_NAME_UDP
55
+                     },
56
+               required=False),
57
+    cfg.BoolOpt('parent_listen',
58
+               short='pl',
59
+               default=True,
60
+               help=_('Parent process must listen too'),
61
+               required=False),
62
+    cfg.BoolOpt('ignore_sigterm',
63
+                short='i',
64
+                default=False,
65
+                help=_('Ignore SIGTERM'),
66
+                required=False)
67
+]
68
+
69
+
70
+class ProcessSpawn(daemon.Daemon):
71
+    """
72
+    This class is part of the functional test of the netns_cleanup module.
73
+
74
+    It allows spawning processes that listen on random ports either on
75
+    tcp(6), udp(6) or unix sockets. Also it allows handling or ignoring
76
+    SIGTERM to check whether the cleanup works as expected by getting rid
77
+    of the spawned processes.
78
+    """
79
+
80
+    MAX_BIND_RETRIES = 5
81
+
82
+    DCT_FAMILY = {
83
+        n_const.IPv4: socket.AF_INET,
84
+        n_const.IPv6: socket.AF_INET6,
85
+        UNIX_FAMILY: socket.AF_UNIX
86
+    }
87
+    DCT_PROTO = {
88
+        n_const.PROTO_NAME_TCP: socket.SOCK_STREAM,
89
+        n_const.PROTO_NAME_UDP: socket.SOCK_DGRAM,
90
+    }
91
+
92
+    def __init__(self, pidfile=None,
93
+                 family=n_const.IPv4,
94
+                 proto=n_const.PROTO_NAME_TCP,
95
+                 ignore_sigterm=False, num_children=0,
96
+                 parent_must_listen=True):
97
+        self.family = family
98
+        self.proto = proto
99
+        self.ignore_sigterm = ignore_sigterm
100
+        self.num_children = num_children
101
+        self.listen_socket = None
102
+        self.parent_must_listen = parent_must_listen
103
+        self.child_pids = []
104
+
105
+        super(ProcessSpawn, self).__init__(pidfile)
106
+
107
+    def start_listening(self):
108
+        socket_family = self.DCT_FAMILY[self.family]
109
+        socket_type = self.DCT_PROTO[self.proto]
110
+
111
+        self.listen_socket = socket.socket(socket_family, socket_type)
112
+
113
+        # Try to listen in a random port which is not currently in use
114
+        retries = 0
115
+        while retries < ProcessSpawn.MAX_BIND_RETRIES:
116
+            # NOTE(dalvarez): not finding a free port on a freshly created
117
+            # namespace is very unlikely but if problems show up, retries can
118
+            # be increased to avoid tests failing
119
+            try:
120
+                if self.family == UNIX_FAMILY:
121
+                    self.listen_socket.bind('')
122
+                else:
123
+                    port = net_helpers.get_free_namespace_port(self.proto)
124
+                    self.listen_socket.bind(('', port))
125
+            except socket.error:
126
+                retries += 1
127
+            else:
128
+                if n_const.PROTO_NAME_TCP in self.proto:
129
+                    self.listen_socket.listen(0)
130
+                break
131
+
132
+    def do_sleep(self):
133
+        while True:
134
+            time.sleep(10)
135
+
136
+    def run(self):
137
+        # Spawn as many children as requested
138
+        children = []
139
+        while len(children) != self.num_children:
140
+            child_pid = os.fork()
141
+            if child_pid == 0:
142
+                # Listen and do nothing else
143
+                self.start_listening()
144
+                self.do_sleep()
145
+                return
146
+            children.append(child_pid)
147
+
148
+        # Install a SIGTERM handler if requested
149
+        if not self.ignore_sigterm:
150
+            signal.signal(signal.SIGTERM, self.sigterm_handler)
151
+
152
+        self.child_pids = children
153
+        if self.parent_must_listen:
154
+            self.start_listening()
155
+        self.do_sleep()
156
+
157
+    def sigterm_handler(self, signum, frame):
158
+        if self.listen_socket:
159
+            self.listen_socket.close()
160
+        for child in self.child_pids:
161
+            try:
162
+                os.kill(child, signal.SIGTERM)
163
+            except OSError:
164
+                pass
165
+        sys.exit(0)
166
+
167
+
168
+def main():
169
+    cfg.CONF.register_cli_opts(OPTS)
170
+    cfg.CONF(project='neutron', default_config_files=[])
171
+    proc_spawn = ProcessSpawn(num_children=cfg.CONF.num_children,
172
+                      family=cfg.CONF.family,
173
+                      proto=cfg.CONF.proto,
174
+                      parent_must_listen=cfg.CONF.parent_listen,
175
+                      ignore_sigterm=cfg.CONF.ignore_sigterm)
176
+    proc_spawn.start()
177
+
178
+
179
+if __name__ == "__main__":
180
+    main()

+ 76
- 0
neutron/tests/functional/cmd/test_netns_cleanup.py View File

@@ -13,19 +13,26 @@
13 13
 #    License for the specific language governing permissions and limitations
14 14
 #    under the License.
15 15
 
16
+import os
17
+
16 18
 import mock
19
+from neutron_lib import constants as n_const
17 20
 
18 21
 from neutron.agent.l3 import namespaces
19 22
 from neutron.agent.linux import dhcp
20 23
 from neutron.agent.linux import ip_lib
24
+from neutron.agent.linux import utils
21 25
 from neutron.cmd import netns_cleanup
26
+from neutron.common import utils as common_utils
22 27
 from neutron.conf.agent import cmd
23 28
 from neutron.tests import base as basetest
24 29
 from neutron.tests.common import net_helpers
25 30
 from neutron.tests.functional import base
31
+from neutron.tests.functional.cmd import process_spawn
26 32
 
27 33
 GET_NAMESPACES = 'neutron.agent.linux.ip_lib.IPWrapper.get_namespaces'
28 34
 TEST_INTERFACE_DRIVER = 'neutron.agent.linux.interface.OVSInterfaceDriver'
35
+NUM_SUBPROCESSES = 6
29 36
 
30 37
 
31 38
 class NetnsCleanupTest(base.BaseSudoTestCase):
@@ -60,13 +67,82 @@ class NetnsCleanupTest(base.BaseSudoTestCase):
60 67
         # tests, as otherwise cleanup will kill them all
61 68
         self.get_namespaces.return_value = [l3_namespace, dhcp_namespace]
62 69
 
70
+        # launch processes in each namespace to make sure they're
71
+        # killed during cleanup
72
+        procs_launched = self._launch_processes([l3_namespace, dhcp_namespace])
73
+        self.assertIsNot(procs_launched, 0)
74
+        common_utils.wait_until_true(
75
+            lambda: self._get_num_spawned_procs() == procs_launched,
76
+            timeout=15,
77
+            exception=Exception("Didn't spawn expected number of processes"))
78
+
63 79
         netns_cleanup.cleanup_network_namespaces(self.conf)
64 80
 
65 81
         self.get_namespaces_p.stop()
66 82
         namespaces_now = ip_lib.IPWrapper.get_namespaces()
83
+        procs_after = self._get_num_spawned_procs()
84
+        self.assertEqual(procs_after, 0)
67 85
         self.assertNotIn(l3_namespace, namespaces_now)
68 86
         self.assertNotIn(dhcp_namespace, namespaces_now)
69 87
 
88
+    @staticmethod
89
+    def _launch_processes(namespaces):
90
+        """
91
+        Launch processes in the specified namespaces.
92
+
93
+        This function will spawn processes inside the given namespaces:
94
+                - 6 processes listening on tcp ports (parent + 5 children)
95
+                - 1 process + 5 subprocesses listening on unix sockets
96
+                - 1 process + 5 subprocesses listening on udp6 sockets
97
+
98
+        First two sets of processes will process SIGTERM so when the parent
99
+        gets killed, it will kill all spawned children
100
+        The last set of processes will ignore SIGTERM. This will allow us
101
+        to test the cleanup functionality which will issue a SIGKILL
102
+        to all remaining processes after the SIGTERM attempt
103
+        """
104
+        commands = [['python', process_spawn.__file__,
105
+                     '-n', NUM_SUBPROCESSES,
106
+                     '-f', n_const.IPv4,
107
+                     '-p', n_const.PROTO_NAME_TCP,
108
+                     '--noignore_sigterm',
109
+                     '--parent_listen'],
110
+                    ['python', process_spawn.__file__,
111
+                     '-n', NUM_SUBPROCESSES,
112
+                     '-f', process_spawn.UNIX_FAMILY,
113
+                     '-p', n_const.PROTO_NAME_TCP,
114
+                     '--noignore_sigterm',
115
+                     '--noparent_listen'],
116
+                    ['python', process_spawn.__file__,
117
+                     '-n', NUM_SUBPROCESSES,
118
+                     '-f', n_const.IPv4,
119
+                     '-p', n_const.PROTO_NAME_UDP,
120
+                     '--ignore_sigterm',
121
+                     '--noparent_listen']]
122
+
123
+        proc_count = 0
124
+        for netns in namespaces:
125
+            ip = ip_lib.IPWrapper(namespace=netns)
126
+            for command in commands:
127
+                # The total amount of processes per command is
128
+                # the process itself plus the number of subprocesses spawned by
129
+                # it
130
+                proc_count += (1 + NUM_SUBPROCESSES)
131
+                # We need to pass the PATH env variable so that python
132
+                # interpreter runs under the same virtual environment.
133
+                # Otherwise, it won't find the necessary packages such as
134
+                # oslo_config
135
+                ip.netns.execute(command,
136
+                                 addl_env={'PATH':
137
+                                           os.environ.get('PATH')})
138
+        return proc_count
139
+
140
+    @staticmethod
141
+    def _get_num_spawned_procs():
142
+        cmd = ['ps', '-f', '-u', 'root']
143
+        out = utils.execute(cmd, run_as_root=True)
144
+        return sum([1 for line in out.splitlines() if 'process_spawn' in line])
145
+
70 146
 
71 147
 class TestNETNSCLIConfig(basetest.BaseTestCase):
72 148
 

+ 5
- 7
neutron/tests/unit/agent/linux/test_async_process.py View File

@@ -198,20 +198,18 @@ class TestAsyncProcess(base.BaseTestCase):
198 198
             exc = RuntimeError(exception_message)
199 199
         else:
200 200
             exc = None
201
-        with mock.patch.object(utils, 'execute',
202
-                               side_effect=exc) as mock_execute:
201
+        with mock.patch.object(utils, 'kill_process',
202
+                               side_effect=exc) as mock_kill_process:
203 203
             actual = self.proc._kill_process(pid, kill_signal)
204 204
 
205 205
         self.assertEqual(expected, actual)
206
-        mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid],
207
-                                        run_as_root=self.proc.run_as_root)
206
+        mock_kill_process.assert_called_with(pid,
207
+                                             kill_signal,
208
+                                             self.proc.run_as_root)
208 209
 
209 210
     def test__kill_process_returns_true_for_valid_pid(self):
210 211
         self._test__kill_process('1', True)
211 212
 
212
-    def test__kill_process_returns_true_for_stale_pid(self):
213
-        self._test__kill_process('1', True, 'No such process')
214
-
215 213
     def test__kill_process_returns_false_for_execute_exception(self):
216 214
         self._test__kill_process('1', False, 'Invalid')
217 215
 

+ 103
- 0
neutron/tests/unit/agent/linux/test_utils.py View File

@@ -12,6 +12,7 @@
12 12
 #    License for the specific language governing permissions and limitations
13 13
 #    under the License.
14 14
 
15
+import signal
15 16
 import socket
16 17
 
17 18
 import mock
@@ -181,6 +182,100 @@ class AgentUtilsGetInterfaceMAC(base.BaseTestCase):
181 182
         self.assertEqual(actual_val, expect_val)
182 183
 
183 184
 
185
+class TestFindParentPid(base.BaseTestCase):
186
+    def setUp(self):
187
+        super(TestFindParentPid, self).setUp()
188
+        self.m_execute = mock.patch.object(utils, 'execute').start()
189
+
190
+    def test_returns_none_for_no_valid_pid(self):
191
+        self.m_execute.side_effect = utils.ProcessExecutionError('',
192
+                                                                 returncode=1)
193
+        self.assertIsNone(utils.find_parent_pid(-1))
194
+
195
+    def test_returns_parent_id_for_good_ouput(self):
196
+        self.m_execute.return_value = '123 \n'
197
+        self.assertEqual(utils.find_parent_pid(-1), '123')
198
+
199
+    def test_raises_exception_returncode_0(self):
200
+        with testtools.ExpectedException(utils.ProcessExecutionError):
201
+            self.m_execute.side_effect = \
202
+                utils.ProcessExecutionError('', returncode=0)
203
+            utils.find_parent_pid(-1)
204
+
205
+    def test_raises_unknown_exception(self):
206
+        with testtools.ExpectedException(RuntimeError):
207
+            self.m_execute.side_effect = RuntimeError()
208
+            utils.find_parent_pid(-1)
209
+
210
+
211
+class TestFindForkTopParent(base.BaseTestCase):
212
+    def _test_find_fork_top_parent(self, expected=_marker,
213
+                                   find_parent_pid_retvals=None,
214
+                                   pid_invoked_with_cmdline_retvals=None):
215
+        def _find_parent_pid(x):
216
+            if find_parent_pid_retvals:
217
+                return find_parent_pid_retvals.pop(0)
218
+
219
+        pid_invoked_with_cmdline = {}
220
+        if pid_invoked_with_cmdline_retvals:
221
+            pid_invoked_with_cmdline['side_effect'] = (
222
+                pid_invoked_with_cmdline_retvals)
223
+        else:
224
+            pid_invoked_with_cmdline['return_value'] = False
225
+        with mock.patch.object(utils, 'find_parent_pid',
226
+                               side_effect=_find_parent_pid), \
227
+                mock.patch.object(utils, 'pid_invoked_with_cmdline',
228
+                                  **pid_invoked_with_cmdline):
229
+                    actual = utils.find_fork_top_parent(_marker)
230
+        self.assertEqual(expected, actual)
231
+
232
+    def test_returns_own_pid_no_parent(self):
233
+        self._test_find_fork_top_parent()
234
+
235
+    def test_returns_own_pid_nofork(self):
236
+        self._test_find_fork_top_parent(find_parent_pid_retvals=['2', '3'])
237
+
238
+    def test_returns_first_parent_pid_fork(self):
239
+        self._test_find_fork_top_parent(
240
+            expected='2',
241
+            find_parent_pid_retvals=['2', '3', '4'],
242
+            pid_invoked_with_cmdline_retvals=[True, False, False])
243
+
244
+    def test_returns_top_parent_pid_fork(self):
245
+        self._test_find_fork_top_parent(
246
+            expected='4',
247
+            find_parent_pid_retvals=['2', '3', '4'],
248
+            pid_invoked_with_cmdline_retvals=[True, True, True])
249
+
250
+
251
+class TestKillProcess(base.BaseTestCase):
252
+    def _test_kill_process(self, pid, exception_message=None,
253
+                           kill_signal=signal.SIGKILL):
254
+        if exception_message:
255
+            exc = utils.ProcessExecutionError(exception_message, returncode=0)
256
+        else:
257
+            exc = None
258
+        with mock.patch.object(utils, 'execute',
259
+                               side_effect=exc) as mock_execute:
260
+            utils.kill_process(pid, kill_signal, run_as_root=True)
261
+
262
+        mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid],
263
+                                        run_as_root=True)
264
+
265
+    def test_kill_process_returns_none_for_valid_pid(self):
266
+        self._test_kill_process('1')
267
+
268
+    def test_kill_process_returns_none_for_stale_pid(self):
269
+        self._test_kill_process('1', 'No such process')
270
+
271
+    def test_kill_process_raises_exception_for_execute_exception(self):
272
+        with testtools.ExpectedException(utils.ProcessExecutionError):
273
+            self._test_kill_process('1', 'Invalid')
274
+
275
+    def test_kill_process_with_different_signal(self):
276
+        self._test_kill_process('1', kill_signal=signal.SIGTERM)
277
+
278
+
184 279
 class TestFindChildPids(base.BaseTestCase):
185 280
 
186 281
     def test_returns_empty_list_for_exit_code_1(self):
@@ -197,6 +292,14 @@ class TestFindChildPids(base.BaseTestCase):
197 292
         with mock.patch.object(utils, 'execute', return_value=' 123 \n 185\n'):
198 293
             self.assertEqual(utils.find_child_pids(-1), ['123', '185'])
199 294
 
295
+    def test_returns_list_of_child_process_ids_recursively(self):
296
+        with mock.patch.object(utils, 'execute',
297
+                               side_effect=[' 123 \n 185\n',
298
+                                            ' 40 \n', '\n',
299
+                                            '41\n', '\n']):
300
+            actual = utils.find_child_pids(-1, True)
301
+            self.assertEqual(actual, ['123', '185', '40', '41'])
302
+
200 303
     def test_raises_unknown_exception(self):
201 304
         with testtools.ExpectedException(RuntimeError):
202 305
             with mock.patch.object(utils, 'execute',

+ 163
- 14
neutron/tests/unit/cmd/test_netns_cleanup.py View File

@@ -13,11 +13,47 @@
13 13
 #    License for the specific language governing permissions and limitations
14 14
 #    under the License.
15 15
 
16
+import signal
17
+
16 18
 import mock
19
+import testtools
17 20
 
18 21
 from neutron.cmd import netns_cleanup as util
19 22
 from neutron.tests import base
20 23
 
24
+NETSTAT_NETNS_OUTPUT = ("""
25
+Active Internet connections (only servers)
26
+Proto Recv-Q Send-Q Local Address           Foreign Address         State\
27
+       PID/Program name
28
+tcp        0      0 0.0.0.0:9697            0.0.0.0:*               LISTEN\
29
+      1347/python
30
+raw        0      0 0.0.0.0:112             0.0.0.0:*               7\
31
+           1279/keepalived
32
+raw        0      0 0.0.0.0:112             0.0.0.0:*               7\
33
+           1279/keepalived
34
+raw6       0      0 :::58                   :::*                    7\
35
+           1349/radvd
36
+Active UNIX domain sockets (only servers)
37
+Proto RefCnt Flags       Type       State         I-Node   PID/Program name\
38
+     Path
39
+unix  2      [ ACC ]     STREAM     LISTENING     82039530 1353/python\
40
+          /tmp/rootwrap-VKSm8a/rootwrap.sock
41
+""")
42
+
43
+NETSTAT_NO_NAMESPACE = ("""
44
+Cannot open network namespace "qrouter-e6f206b2-4e8d-4597-a7e1-c3a20337e9c6":\
45
+ No such file or directory
46
+""")
47
+
48
+NETSTAT_NO_LISTEN_PROCS = ("""
49
+Active Internet connections (only servers)
50
+Proto Recv-Q Send-Q Local Address           Foreign Address         State\
51
+       PID/Program name
52
+Active UNIX domain sockets (only servers)
53
+Proto RefCnt Flags       Type       State         I-Node   PID/Program name\
54
+     Path
55
+""")
56
+
21 57
 
22 58
 class TestNetnsCleanup(base.BaseTestCase):
23 59
     def setUp(self):
@@ -163,6 +199,115 @@ class TestNetnsCleanup(base.BaseTestCase):
163 199
                     self.assertEqual([], ovs_br_cls.mock_calls)
164 200
                     self.assertTrue(debug.called)
165 201
 
202
+    def _test_find_listen_pids_namespace_helper(self, expected,
203
+                                                netstat_output=None):
204
+        with mock.patch('neutron.agent.linux.ip_lib.IPWrapper') as ip_wrap:
205
+            ip_wrap.return_value.netns.execute.return_value = netstat_output
206
+            observed = util.find_listen_pids_namespace(mock.ANY)
207
+            self.assertEqual(expected, observed)
208
+
209
+    def test_find_listen_pids_namespace_correct_output(self):
210
+        expected = set(['1347', '1279', '1349', '1353'])
211
+        self._test_find_listen_pids_namespace_helper(expected,
212
+                                                     NETSTAT_NETNS_OUTPUT)
213
+
214
+    def test_find_listen_pids_namespace_no_procs(self):
215
+        expected = set()
216
+        self._test_find_listen_pids_namespace_helper(expected,
217
+                                                     NETSTAT_NO_LISTEN_PROCS)
218
+
219
+    def test_find_listen_pids_namespace_no_namespace(self):
220
+        expected = set()
221
+        self._test_find_listen_pids_namespace_helper(expected,
222
+                                                     NETSTAT_NO_NAMESPACE)
223
+
224
+    def _test__kill_listen_processes_helper(self, pids, parents, children,
225
+                                            kills_expected, force):
226
+        def _get_element(dct, x):
227
+            return dct.get(x, [])
228
+
229
+        def _find_childs(x, recursive):
230
+            return _get_element(children, x)
231
+
232
+        def _find_parent(x):
233
+            return _get_element(parents, x)
234
+
235
+        utils_mock = dict(
236
+            find_fork_top_parent=mock.DEFAULT,
237
+            find_child_pids=mock.DEFAULT,
238
+            get_cmdline_from_pid=mock.DEFAULT,
239
+            kill_process=mock.DEFAULT)
240
+
241
+        self.log_mock = mock.patch.object(util, 'LOG').start()
242
+        with mock.patch.multiple('neutron.agent.linux.utils', **utils_mock)\
243
+                as mocks:
244
+            mocks['find_fork_top_parent'].side_effect = _find_parent
245
+            mocks['find_child_pids'].side_effect = _find_childs
246
+
247
+            with mock.patch.object(util, 'find_listen_pids_namespace',
248
+                                   return_value=pids):
249
+                calls = []
250
+                for pid, sig in kills_expected:
251
+                    calls.append(mock.call(pid, sig, run_as_root=True))
252
+                util._kill_listen_processes(mock.ANY, force=force)
253
+                mocks['kill_process'].assert_has_calls(calls, any_order=True)
254
+
255
+    def test__kill_listen_processes_only_parents_force_false(self):
256
+        pids = ['4', '5', '6']
257
+        parents = {'4': '1', '5': '5', '6': '2'}
258
+        children = {}
259
+        kills_expected = [('1', signal.SIGTERM),
260
+                          ('5', signal.SIGTERM),
261
+                          ('2', signal.SIGTERM)]
262
+
263
+        self._test__kill_listen_processes_helper(pids, parents, children,
264
+                                                 kills_expected, False)
265
+
266
+    def test__kill_listen_processes_parents_and_childs(self):
267
+        pids = ['4', '5', '6']
268
+        parents = {'4': '1', '5': '2', '6': '3'}
269
+        children = {'1': ['4'], '2': ['5'], '3': ['6', '8', '7']}
270
+        kills_expected = [(str(x), signal.SIGKILL) for x in range(1, 9)]
271
+        self._test__kill_listen_processes_helper(pids, parents, children,
272
+                                                 kills_expected, True)
273
+
274
+    def test_kill_listen_processes(self):
275
+        with mock.patch.object(util, '_kill_listen_processes',
276
+                               return_value=1) as mock_kill_listen:
277
+            with mock.patch('neutron.common.utils.wait_until_true')\
278
+                    as wait_until_true_mock:
279
+                wait_until_true_mock.side_effect = [
280
+                    util.PidsInNamespaceException,
281
+                    None]
282
+                namespace = mock.ANY
283
+                util.kill_listen_processes(namespace)
284
+                mock_kill_listen.assert_has_calls(
285
+                    [mock.call(namespace, force=False),
286
+                     mock.call(namespace, force=True)])
287
+
288
+    def test_kill_listen_processes_still_procs(self):
289
+        with mock.patch.object(util, '_kill_listen_processes',
290
+                               return_value=1):
291
+            with mock.patch('neutron.common.utils.wait_until_true')\
292
+                    as wait_until_true_mock:
293
+                wait_until_true_mock.side_effect = (
294
+                    util.PidsInNamespaceException)
295
+                namespace = mock.ANY
296
+                with testtools.ExpectedException(
297
+                        util.PidsInNamespaceException):
298
+                    util.kill_listen_processes(namespace)
299
+
300
+    def test_kill_listen_processes_no_procs(self):
301
+        with mock.patch.object(util, '_kill_listen_processes',
302
+                               return_value=0) as mock_kill_listen:
303
+            with mock.patch('neutron.common.utils.wait_until_true')\
304
+                    as wait_until_true_mock:
305
+                namespace = mock.ANY
306
+                util.kill_listen_processes(namespace)
307
+                mock_kill_listen.assert_called_once_with(namespace,
308
+                                                         force=False)
309
+                self.assertFalse(wait_until_true_mock.called)
310
+
166 311
     def _test_destroy_namespace_helper(self, force, num_devices):
167 312
         ns = 'qrouter-6e322ac7-ab50-4f53-9cdc-d1d3c1164b6d'
168 313
         conf = mock.Mock()
@@ -182,23 +327,27 @@ class TestNetnsCleanup(base.BaseTestCase):
182 327
             ip_wrap.return_value.get_devices.return_value = devices
183 328
             ip_wrap.return_value.netns.exists.return_value = True
184 329
 
185
-            with mock.patch.object(util, 'unplug_device') as unplug:
330
+            with mock.patch.object(util, 'kill_listen_processes'):
331
+
332
+                with mock.patch.object(util, 'unplug_device') as unplug:
186 333
 
187
-                with mock.patch.object(util, 'kill_dhcp') as kill_dhcp:
188
-                    util.destroy_namespace(conf, ns, force)
189
-                    expected = [mock.call(namespace=ns)]
334
+                    with mock.patch.object(util, 'kill_dhcp') as kill_dhcp:
335
+                        util.destroy_namespace(conf, ns, force)
336
+                        expected = [mock.call(namespace=ns)]
190 337
 
191
-                    if force:
192
-                        expected.extend([
193
-                            mock.call().netns.exists(ns),
194
-                            mock.call().get_devices(exclude_loopback=True)])
195
-                        self.assertTrue(kill_dhcp.called)
196
-                        unplug.assert_has_calls(
197
-                            [mock.call(conf, d) for d in
198
-                             devices[1:]])
338
+                        if force:
339
+                            expected.extend([
340
+                                mock.call().netns.exists(ns),
341
+                                mock.call().get_devices(
342
+                                    exclude_loopback=True)])
343
+                            self.assertTrue(kill_dhcp.called)
344
+                            unplug.assert_has_calls(
345
+                                [mock.call(conf, d) for d in
346
+                                 devices[1:]])
199 347
 
200
-                    expected.append(mock.call().garbage_collect_namespace())
201
-                    ip_wrap.assert_has_calls(expected)
348
+                        expected.append(
349
+                            mock.call().garbage_collect_namespace())
350
+                        ip_wrap.assert_has_calls(expected)
202 351
 
203 352
     def test_destroy_namespace_empty(self):
204 353
         self._test_destroy_namespace_helper(False, 0)

+ 8
- 0
releasenotes/notes/netns_cleanup_kill_procs-af88d8c47c07dd9c.yaml View File

@@ -0,0 +1,8 @@
1
+---
2
+features:
3
+  - A new mechanism has been added to netns_cleanup to
4
+    kill processes that are listening on any port/unix
5
+    socket within the namespace. This will try to kill
6
+    them gracefully via SIGTERM and, if they don't die,
7
+    then a SIGKILL will be sent to the remaining
8
+    processes to ensure a proper cleanup.

+ 1
- 0
setup.cfg View File

@@ -36,6 +36,7 @@ data_files =
36 36
         etc/neutron/rootwrap.d/ipset-firewall.filters
37 37
         etc/neutron/rootwrap.d/l3.filters
38 38
         etc/neutron/rootwrap.d/linuxbridge-plugin.filters
39
+        etc/neutron/rootwrap.d/netns-cleanup.filters
39 40
         etc/neutron/rootwrap.d/openvswitch-plugin.filters
40 41
 scripts =
41 42
     bin/neutron-rootwrap-xen-dom0

Loading…
Cancel
Save