From fd1403fd9a971cf3cbd863fa33ca68eb019fbdc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awek=20Kap=C5=82o=C5=84ski?= Date: Thu, 14 Sep 2017 02:11:50 +0000 Subject: [PATCH] Fix for race condition during netns creation In some cases if ip_lib.IPWrapper.ensure_namespace() method is called more than once for same namespace in very short period of time it could raise error that "File already exists" for second call of this method. It happens often e.g. in fullstack tests. Reason of such problem is in Netlink protocol which is used by iproute2 to communicate with kernel. This protocol, according to http://man7.org/linux/man-pages/man7/netlink.7.html is not reliable so it is not guaranteed when the message will be delivered to kernel and when action will be really executed. Because of that if on quite loaded host ensure_namespace() method would be executed twice it can lead to error described above. This patch is changing way how ensure_namespace() method works to avoid raising ProcessExecutionError exception with this error message. Closes-Bug: #1717582 Change-Id: I1898426789c85ce1faa97665bfd47f1fa38ef727 --- neutron/agent/linux/ip_lib.py | 22 ++++++++++--- .../functional/agent/linux/test_ip_lib.py | 12 +++++++ .../tests/unit/agent/l3/test_dvr_fip_ns.py | 7 +++- neutron/tests/unit/agent/linux/test_ip_lib.py | 33 +++++++++++++++---- 4 files changed, 61 insertions(+), 13 deletions(-) diff --git a/neutron/agent/linux/ip_lib.py b/neutron/agent/linux/ip_lib.py index 8f501d9f887..b909f2ba0a9 100644 --- a/neutron/agent/linux/ip_lib.py +++ b/neutron/agent/linux/ip_lib.py @@ -29,6 +29,7 @@ import six from neutron._i18n import _ from neutron.agent.common import utils +from neutron.agent.linux import utils as linux_utils from neutron.common import exceptions as n_exc from neutron.common import utils as common_utils from neutron.privileged.agent.linux import ip_lib as privileged @@ -202,12 +203,23 @@ class IPWrapper(SubProcessBase): return IPDevice(name, namespace=self.namespace) def ensure_namespace(self, name): - if not self.netns.exists(name): + # we must save and restore this before returning + orig_log_fail_as_error = self.get_log_fail_as_error() + self.set_log_fail_as_error(False) + try: ip = self.netns.add(name) - lo = ip.device(LOOPBACK_DEVNAME) - lo.link.set_up() - else: - ip = IPWrapper(namespace=name) + except linux_utils.ProcessExecutionError: + if self.netns.exists(name): + # NOTE(slaweq): error which was catched here might be actually + # because of namespace already exists, in such case it's not + # a real issue + return IPWrapper(namespace=name) + raise + finally: + self.set_log_fail_as_error(orig_log_fail_as_error) + + lo = ip.device(LOOPBACK_DEVNAME) + lo.link.set_up() return ip def namespace_is_empty(self): diff --git a/neutron/tests/functional/agent/linux/test_ip_lib.py b/neutron/tests/functional/agent/linux/test_ip_lib.py index 79bc10c079a..d7b232f6223 100644 --- a/neutron/tests/functional/agent/linux/test_ip_lib.py +++ b/neutron/tests/functional/agent/linux/test_ip_lib.py @@ -185,6 +185,18 @@ class IpLibTestCase(IpLibTestFramework): device.link.delete() + def test_ensure_namespace(self): + attr = self.generate_device_details() + ip = ip_lib.IPWrapper() + + # Try to create same namespace twice, should be no error in both cases + ns = ip.ensure_namespace(attr.namespace) + ns2 = ip.ensure_namespace(attr.namespace) + + self.assertTrue(ip.netns.exists(attr.namespace)) + self.assertEqual(attr.namespace, ns.namespace) + self.assertEqual(attr.namespace, ns2.namespace) + def test_get_routing_table(self): attr = self.generate_device_details( ip_cidrs=["%s/24" % TEST_IP, "fd00::1/64"] diff --git a/neutron/tests/unit/agent/l3/test_dvr_fip_ns.py b/neutron/tests/unit/agent/l3/test_dvr_fip_ns.py index 57414183bf6..1d27da8dd36 100644 --- a/neutron/tests/unit/agent/l3/test_dvr_fip_ns.py +++ b/neutron/tests/unit/agent/l3/test_dvr_fip_ns.py @@ -24,6 +24,7 @@ from neutron.agent.l3 import link_local_allocator as lla from neutron.agent.l3 import router_info from neutron.agent.linux import ip_lib from neutron.agent.linux import iptables_manager +from neutron.agent.linux import utils as linux_utils from neutron.common import exceptions as n_exc from neutron.tests import base @@ -189,8 +190,12 @@ class TestDvrFipNs(base.BaseTestCase): @mock.patch.object(iptables_manager, 'IptablesManager') @mock.patch.object(utils, 'execute') + @mock.patch.object(ip_lib.IpNetnsCommand, 'add') @mock.patch.object(ip_lib.IpNetnsCommand, 'exists') - def _test_create(self, old_kernel, exists, execute, IPTables): + def _test_create(self, old_kernel, exists, add, execute, IPTables): + add.side_effect = linux_utils.ProcessExecutionError( + message="Cannot create namespace file ns: File exists", + returncode=1) exists.return_value = True # There are up to six sysctl calls - two to enable forwarding, # two for arp_ignore and arp_announce, and two for ip_nonlocal_bind diff --git a/neutron/tests/unit/agent/linux/test_ip_lib.py b/neutron/tests/unit/agent/linux/test_ip_lib.py index 99996545e97..7cb744b6b6e 100644 --- a/neutron/tests/unit/agent/linux/test_ip_lib.py +++ b/neutron/tests/unit/agent/linux/test_ip_lib.py @@ -26,6 +26,7 @@ import testtools from neutron.agent.common import utils # noqa from neutron.agent.linux import ip_lib +from neutron.agent.linux import utils as linux_utils from neutron.common import exceptions as n_exc from neutron import privileged from neutron.privileged.agent.linux import ip_lib as priv_lib @@ -403,21 +404,39 @@ class TestIpWrapper(base.BaseTestCase): ip = ip_lib.IPWrapper() with mock.patch.object(ip.netns, 'exists') as ns_exists: with mock.patch('neutron.agent.common.utils.execute'): - ns_exists.return_value = False ip.ensure_namespace('ns') self.execute.assert_has_calls( [mock.call([], 'netns', ('add', 'ns'), run_as_root=True, namespace=None, - log_fail_as_error=True)]) + log_fail_as_error=False)]) + ns_exists.assert_not_called() ip_dev.assert_has_calls([mock.call('lo', namespace='ns'), mock.call().link.set_up()]) def test_ensure_namespace_existing(self): - with mock.patch.object(ip_lib, 'IpNetnsCommand') as ip_ns_cmd: - ip_ns_cmd.exists.return_value = True - ns = ip_lib.IPWrapper().ensure_namespace('ns') - self.assertFalse(self.execute.called) - self.assertEqual(ns.namespace, 'ns') + with mock.patch.object(ip_lib, 'IPDevice') as ip_dev: + ip = ip_lib.IPWrapper() + with mock.patch.object(ip.netns, 'add') as ns_add_cmd, \ + mock.patch.object(ip.netns, 'exists') as ns_exists_cmd: + ns_add_cmd.side_effect = linux_utils.ProcessExecutionError( + message=("Cannot create namespace file /var/run/netns/ns: " + "File exists"), + returncode=1) + ns_exists_cmd.return_value = True + ns = ip.ensure_namespace('ns') + self.assertEqual(ns.namespace, 'ns') + ip_dev.assert_not_called() + + def test_ensure_namespace_error(self): + with mock.patch.object(ip_lib, 'IPDevice'): + ip = ip_lib.IPWrapper() + with mock.patch.object(ip.netns, 'add') as ns_add_cmd, \ + mock.patch.object(ip.netns, 'exists') as ns_exists_cmd: + ns_add_cmd.side_effect = linux_utils.ProcessExecutionError( + message="Test add namespace failed", returncode=1) + ns_exists_cmd.return_value = False + self.assertRaises(linux_utils.ProcessExecutionError, + ip.ensure_namespace, 'ns') def test_namespace_is_empty_no_devices(self): ip = ip_lib.IPWrapper(namespace='ns')