From 157e09e6af758b7669fbe5a8cdb0b1969f04661a Mon Sep 17 00:00:00 2001 From: Slawek Kaplonski Date: Sun, 17 Mar 2019 11:37:40 +0100 Subject: [PATCH] Try to enable dnsmasq process several times Sometimes, during restart of dnsmasq process it may happend that after process is killed, start attempt is made too fast, before old process really unbind from IP address on which it was listening. That causes an issue with starting dnsmasq process again. In patch [1] disable() method was changed that it can wait until process is really not active (no pid for it) but that didn't solve the problem with starting a new dnsmasq process completely and sometimes it still happens, at least in functional tests. So now, enable() method is changed so that it will try to enable dnsmasq process for 1 minute, until it will really be spawned properly. [1] https://review.openstack.org/#/c/634390/ Change-Id: I18d73b787fa3ab8803e12d5e5eb2bb7109205aba Closes-Bug: #1811126 --- neutron/agent/linux/dhcp.py | 32 ++++++++++++++------- neutron/tests/unit/agent/linux/test_dhcp.py | 19 ++++++++---- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/neutron/agent/linux/dhcp.py b/neutron/agent/linux/dhcp.py index 394cbe78b29..4f2e3c74cf7 100644 --- a/neutron/agent/linux/dhcp.py +++ b/neutron/agent/linux/dhcp.py @@ -217,13 +217,26 @@ class DhcpLocalProcess(DhcpBase): def enable(self): """Enables DHCP for this network by spawning a local process.""" - if self.active: - self.restart() - elif self._enable_dhcp(): - fileutils.ensure_tree(self.network_conf_dir, mode=0o755) - interface_name = self.device_manager.setup(self.network) - self.interface_name = interface_name - self.spawn_process() + try: + common_utils.wait_until_true(self._enable) + except common_utils.WaitTimeout: + LOG.error("Failed to start DHCP process for network %s", + self.network.id) + + def _enable(self): + try: + if self.active: + self.restart() + elif self._enable_dhcp(): + fileutils.ensure_tree(self.network_conf_dir, mode=0o755) + interface_name = self.device_manager.setup(self.network) + self.interface_name = interface_name + self.spawn_process() + return True + except exceptions.ProcessExecutionError as error: + LOG.debug("Spawning DHCP process for network %s failed; " + "Error: %s", self.network.id, error) + return False def _get_process_manager(self, cmd_callback=None): return external_process.ProcessManager( @@ -237,10 +250,9 @@ class DhcpLocalProcess(DhcpBase): def disable(self, retain_port=False, block=False): """Disable DHCP for this network by killing the local process.""" self.process_monitor.unregister(self.network.id, DNSMASQ_SERVICE_NAME) - pm = self._get_process_manager() - pm.disable() + self._get_process_manager().disable() if block: - common_utils.wait_until_true(lambda: not pm.active) + common_utils.wait_until_true(lambda: not self.active) if not retain_port: self._destroy_namespace_and_port() self._remove_config_files() diff --git a/neutron/tests/unit/agent/linux/test_dhcp.py b/neutron/tests/unit/agent/linux/test_dhcp.py index 5c55866afdb..6b21526eb74 100644 --- a/neutron/tests/unit/agent/linux/test_dhcp.py +++ b/neutron/tests/unit/agent/linux/test_dhcp.py @@ -19,6 +19,7 @@ import mock import netaddr from neutron_lib.api.definitions import extra_dhcp_opt as edo_ext from neutron_lib import constants +from neutron_lib import exceptions from oslo_config import cfg import oslo_messaging from oslo_utils import fileutils @@ -1134,23 +1135,31 @@ class TestDhcpLocalProcess(TestBase): @mock.patch.object(fileutils, 'ensure_tree') def test_enable(self, ensure_dir): attrs_to_mock = dict( - (a, mock.DEFAULT) for a in ['active', 'interface_name'] + (a, mock.DEFAULT) for a in + ['active', 'interface_name', 'spawn_process'] ) with mock.patch.multiple(LocalChild, **attrs_to_mock) as mocks: mocks['active'].__get__ = mock.Mock(return_value=False) mocks['interface_name'].__set__ = mock.Mock() + mocks['spawn_process'].side_effect = [ + exceptions.ProcessExecutionError( + returncode=2, message="Test dnsmasq start failed"), + None] lp = LocalChild(self.conf, FakeDualNetwork()) + lp.enable() self.mock_mgr.assert_has_calls( [mock.call(self.conf, None), mock.call().setup(mock.ANY)]) - self.assertEqual(lp.called, ['spawn']) - self.assertTrue(mocks['interface_name'].__set__.called) - ensure_dir.assert_called_with( - '/dhcp/cccccccc-cccc-cccc-cccc-cccccccccccc', mode=0o755) + self.assertEqual(2, mocks['interface_name'].__set__.call_count) + ensure_dir.assert_has_calls([ + mock.call( + '/dhcp/cccccccc-cccc-cccc-cccc-cccccccccccc', mode=0o755), + mock.call( + '/dhcp/cccccccc-cccc-cccc-cccc-cccccccccccc', mode=0o755)]) def _assert_disabled(self, lp): self.assertTrue(lp.process_monitor.unregister.called)