Try to enable dnsmasq process several times

Sometimes, during restart of dnsmasq process it may happend that
after process is killed, start attempt is made too fast, before
old process really unbind from IP address on which it was listening.
That causes an issue with starting dnsmasq process again.

In patch [1] disable() method was changed that it can wait
until process is really not active (no pid for it) but that didn't
solve the problem with starting a new dnsmasq process completely and
sometimes it still happens, at least in functional tests.

So now, enable() method is changed so that it will try to enable
dnsmasq process for 1 minute, until it will really be spawned properly.

[1] https://review.openstack.org/#/c/634390/

Change-Id: I18d73b787fa3ab8803e12d5e5eb2bb7109205aba
Closes-Bug: #1811126
This commit is contained in:
Slawek Kaplonski 2019-03-17 11:37:40 +01:00
parent a8f6161b17
commit 157e09e6af
2 changed files with 36 additions and 15 deletions

View File

@ -217,6 +217,14 @@ class DhcpLocalProcess(DhcpBase):
def enable(self): def enable(self):
"""Enables DHCP for this network by spawning a local process.""" """Enables DHCP for this network by spawning a local process."""
try:
common_utils.wait_until_true(self._enable)
except common_utils.WaitTimeout:
LOG.error("Failed to start DHCP process for network %s",
self.network.id)
def _enable(self):
try:
if self.active: if self.active:
self.restart() self.restart()
elif self._enable_dhcp(): elif self._enable_dhcp():
@ -224,6 +232,11 @@ class DhcpLocalProcess(DhcpBase):
interface_name = self.device_manager.setup(self.network) interface_name = self.device_manager.setup(self.network)
self.interface_name = interface_name self.interface_name = interface_name
self.spawn_process() self.spawn_process()
return True
except exceptions.ProcessExecutionError as error:
LOG.debug("Spawning DHCP process for network %s failed; "
"Error: %s", self.network.id, error)
return False
def _get_process_manager(self, cmd_callback=None): def _get_process_manager(self, cmd_callback=None):
return external_process.ProcessManager( return external_process.ProcessManager(
@ -237,10 +250,9 @@ class DhcpLocalProcess(DhcpBase):
def disable(self, retain_port=False, block=False): def disable(self, retain_port=False, block=False):
"""Disable DHCP for this network by killing the local process.""" """Disable DHCP for this network by killing the local process."""
self.process_monitor.unregister(self.network.id, DNSMASQ_SERVICE_NAME) self.process_monitor.unregister(self.network.id, DNSMASQ_SERVICE_NAME)
pm = self._get_process_manager() self._get_process_manager().disable()
pm.disable()
if block: if block:
common_utils.wait_until_true(lambda: not pm.active) common_utils.wait_until_true(lambda: not self.active)
if not retain_port: if not retain_port:
self._destroy_namespace_and_port() self._destroy_namespace_and_port()
self._remove_config_files() self._remove_config_files()

View File

@ -19,6 +19,7 @@ import mock
import netaddr import netaddr
from neutron_lib.api.definitions import extra_dhcp_opt as edo_ext from neutron_lib.api.definitions import extra_dhcp_opt as edo_ext
from neutron_lib import constants from neutron_lib import constants
from neutron_lib import exceptions
from oslo_config import cfg from oslo_config import cfg
import oslo_messaging import oslo_messaging
from oslo_utils import fileutils from oslo_utils import fileutils
@ -1134,23 +1135,31 @@ class TestDhcpLocalProcess(TestBase):
@mock.patch.object(fileutils, 'ensure_tree') @mock.patch.object(fileutils, 'ensure_tree')
def test_enable(self, ensure_dir): def test_enable(self, ensure_dir):
attrs_to_mock = dict( attrs_to_mock = dict(
(a, mock.DEFAULT) for a in ['active', 'interface_name'] (a, mock.DEFAULT) for a in
['active', 'interface_name', 'spawn_process']
) )
with mock.patch.multiple(LocalChild, **attrs_to_mock) as mocks: with mock.patch.multiple(LocalChild, **attrs_to_mock) as mocks:
mocks['active'].__get__ = mock.Mock(return_value=False) mocks['active'].__get__ = mock.Mock(return_value=False)
mocks['interface_name'].__set__ = mock.Mock() mocks['interface_name'].__set__ = mock.Mock()
mocks['spawn_process'].side_effect = [
exceptions.ProcessExecutionError(
returncode=2, message="Test dnsmasq start failed"),
None]
lp = LocalChild(self.conf, lp = LocalChild(self.conf,
FakeDualNetwork()) FakeDualNetwork())
lp.enable() lp.enable()
self.mock_mgr.assert_has_calls( self.mock_mgr.assert_has_calls(
[mock.call(self.conf, None), [mock.call(self.conf, None),
mock.call().setup(mock.ANY)]) mock.call().setup(mock.ANY)])
self.assertEqual(lp.called, ['spawn']) self.assertEqual(2, mocks['interface_name'].__set__.call_count)
self.assertTrue(mocks['interface_name'].__set__.called) ensure_dir.assert_has_calls([
ensure_dir.assert_called_with( mock.call(
'/dhcp/cccccccc-cccc-cccc-cccc-cccccccccccc', mode=0o755) '/dhcp/cccccccc-cccc-cccc-cccc-cccccccccccc', mode=0o755),
mock.call(
'/dhcp/cccccccc-cccc-cccc-cccc-cccccccccccc', mode=0o755)])
def _assert_disabled(self, lp): def _assert_disabled(self, lp):
self.assertTrue(lp.process_monitor.unregister.called) self.assertTrue(lp.process_monitor.unregister.called)