Gracefully restart dnsmasq to not break tcp DNS
When talking to dnsmasq using DNS over tcp dnsmasq will fork out for TCP connections. Forked processes will stay until all connections have been closed, meaning that dangling connections will keep the processes and with that will also keep the tcp/53 port in listening state. On dnsmasq restart (e.g. on network update, subnet create, ...) the parent process is killed with SIGKILL and a new process is started. This new process cannot listen on tcp/53, as it is still in use by the old child with the dangling connection. To prevent dangling dnsmasq connections on tcp we need to properly shutdown the child. This is done by first sending SIGTERM and only send a SIGKILL if the process is not shutting down properly. With that we get proper cleanup of all children and tcp will come up after a restart. Change-Id: Ie633148c512f5124e978648c50a4c6318c61baa8 Closes-bug: #1998621
This commit is contained in:
parent
16399a2ce5
commit
74224e79e0
@ -21,6 +21,7 @@ import itertools
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import signal
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import netaddr
|
import netaddr
|
||||||
@ -45,6 +46,7 @@ from neutron.ipam import utils as ipam_utils
|
|||||||
from neutron.privileged.agent.linux import dhcp as priv_dhcp
|
from neutron.privileged.agent.linux import dhcp as priv_dhcp
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
SIGTERM_TIMEOUT = 5
|
||||||
|
|
||||||
DNS_PORT = 53
|
DNS_PORT = 53
|
||||||
WIN2k3_STATIC_DNS = 249
|
WIN2k3_STATIC_DNS = 249
|
||||||
@ -349,9 +351,18 @@ class DhcpLocalProcess(DhcpBase, metaclass=abc.ABCMeta):
|
|||||||
def disable(self, retain_port=False, block=False):
|
def disable(self, retain_port=False, block=False):
|
||||||
"""Disable DHCP for this network by killing the local process."""
|
"""Disable DHCP for this network by killing the local process."""
|
||||||
self.process_monitor.unregister(self.network.id, DNSMASQ_SERVICE_NAME)
|
self.process_monitor.unregister(self.network.id, DNSMASQ_SERVICE_NAME)
|
||||||
self._get_process_manager().disable()
|
pm = self._get_process_manager()
|
||||||
|
pm.disable(sig=str(int(signal.SIGTERM)))
|
||||||
if block:
|
if block:
|
||||||
common_utils.wait_until_true(lambda: not self.active)
|
try:
|
||||||
|
common_utils.wait_until_true(lambda: not self.active,
|
||||||
|
timeout=SIGTERM_TIMEOUT)
|
||||||
|
except common_utils.WaitTimeout:
|
||||||
|
LOG.warning('dnsmasq process %s did not finish after SIGTERM '
|
||||||
|
'signal in %s seconds, sending SIGKILL signal',
|
||||||
|
pm.pid, SIGTERM_TIMEOUT)
|
||||||
|
pm.disable(sig=str(int(signal.SIGKILL)))
|
||||||
|
common_utils.wait_until_true(lambda: not self.active)
|
||||||
self._del_running_interface(self.interface_name)
|
self._del_running_interface(self.interface_name)
|
||||||
if not retain_port:
|
if not retain_port:
|
||||||
self._destroy_namespace_and_port()
|
self._destroy_namespace_and_port()
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
import copy
|
import copy
|
||||||
import os
|
import os
|
||||||
|
import signal
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import netaddr
|
import netaddr
|
||||||
@ -32,6 +33,7 @@ import testtools
|
|||||||
from neutron.agent.linux import dhcp
|
from neutron.agent.linux import dhcp
|
||||||
from neutron.agent.linux import ip_lib
|
from neutron.agent.linux import ip_lib
|
||||||
from neutron.cmd import runtime_checks as checks
|
from neutron.cmd import runtime_checks as checks
|
||||||
|
from neutron.common import utils as common_utils
|
||||||
from neutron.conf.agent import common as config
|
from neutron.conf.agent import common as config
|
||||||
from neutron.conf.agent import dhcp as dhcp_config
|
from neutron.conf.agent import dhcp as dhcp_config
|
||||||
from neutron.conf import common as base_config
|
from neutron.conf import common as base_config
|
||||||
@ -1272,6 +1274,36 @@ class TestDhcpLocalProcess(TestBase):
|
|||||||
parent.assert_has_calls(expected)
|
parent.assert_has_calls(expected)
|
||||||
delete_ns.assert_called_with('qdhcp-ns')
|
delete_ns.assert_called_with('qdhcp-ns')
|
||||||
|
|
||||||
|
@mock.patch.object(common_utils, 'wait_until_true')
|
||||||
|
def test_disable_blocking(self, mock_wait_until):
|
||||||
|
lp = LocalChild(self.conf, FakeDualNetwork())
|
||||||
|
mock_pm = mock.Mock()
|
||||||
|
with mock.patch('neutron.agent.linux.ip_lib.'
|
||||||
|
'delete_network_namespace'), \
|
||||||
|
mock.patch.object(dhcp.DhcpLocalProcess,
|
||||||
|
'_get_process_manager',
|
||||||
|
return_value=mock_pm):
|
||||||
|
lp.disable(block=True)
|
||||||
|
self.assertEqual(1, mock_wait_until.call_count)
|
||||||
|
mock_pm.disable.assert_called_once_with(sig=str(int(signal.SIGTERM)))
|
||||||
|
|
||||||
|
@mock.patch.object(common_utils, 'wait_until_true')
|
||||||
|
def test_disable_blocking_sigterm_sigkill(self, mock_wait_until):
|
||||||
|
mock_wait_until.side_effect = [common_utils.WaitTimeout, None]
|
||||||
|
|
||||||
|
lp = LocalChild(self.conf, FakeDualNetwork())
|
||||||
|
mock_pm = mock.Mock()
|
||||||
|
with mock.patch('neutron.agent.linux.ip_lib.'
|
||||||
|
'delete_network_namespace'), \
|
||||||
|
mock.patch.object(dhcp.DhcpLocalProcess,
|
||||||
|
'_get_process_manager',
|
||||||
|
return_value=mock_pm):
|
||||||
|
lp.disable(block=True)
|
||||||
|
self.assertEqual(2, mock_wait_until.call_count)
|
||||||
|
mock_pm.disable.assert_has_calls([
|
||||||
|
mock.call(sig=str(int(signal.SIGTERM))),
|
||||||
|
mock.call(sig=str(int(signal.SIGKILL)))])
|
||||||
|
|
||||||
def test_get_interface_name(self):
|
def test_get_interface_name(self):
|
||||||
net = FakeDualNetwork()
|
net = FakeDualNetwork()
|
||||||
path = '/dhcp/%s/interface' % net.id
|
path = '/dhcp/%s/interface' % net.id
|
||||||
|
Loading…
x
Reference in New Issue
Block a user