From c5a1214ca656ea872406a35d40763c69d93e37b9 Mon Sep 17 00:00:00 2001 From: aojeagarcia Date: Thu, 20 Dec 2018 19:49:56 +0100 Subject: [PATCH] protect DHCP agent cache out of sync If DHCP agent port cache is out of sync with neutron server, dnsmasq entries are wrong and VMs may not acquire an IP because of duplicate entries. When DHCP agent executes port_create_end method, port's IP should be checked before being used, if there are duplicate IP addresses in the same network in the cache we should resync. Co-Authored-By: doreilly@suse.com Closes-Bug: #1645835 Change-Id: Icc555050283420fddfb90bb67e02bc303e989e27 --- neutron/agent/dhcp/agent.py | 80 +++++++++++++-------- neutron/tests/unit/agent/dhcp/test_agent.py | 35 +++++++-- 2 files changed, 80 insertions(+), 35 deletions(-) diff --git a/neutron/agent/dhcp/agent.py b/neutron/agent/dhcp/agent.py index 27d417ae6f0..1fabc12fbc2 100644 --- a/neutron/agent/dhcp/agent.py +++ b/neutron/agent/dhcp/agent.py @@ -428,42 +428,62 @@ class DhcpAgent(manager.Manager): network = self.cache.get_network_by_id(updated_port.network_id) if not network: return - LOG.info("Trigger reload_allocations for port %s", - updated_port) - driver_action = 'reload_allocations' - if self._is_port_on_this_agent(updated_port): - orig = self.cache.get_port_by_id(updated_port['id']) - # assume IP change if not in cache - orig = orig or {'fixed_ips': []} - old_ips = {i['ip_address'] for i in orig['fixed_ips'] or []} - new_ips = {i['ip_address'] for i in updated_port['fixed_ips']} - old_subs = {i['subnet_id'] for i in orig['fixed_ips'] or []} - new_subs = {i['subnet_id'] for i in updated_port['fixed_ips']} - if new_subs != old_subs: - # subnets being serviced by port have changed, this could - # indicate a subnet_delete is in progress. schedule a - # resync rather than an immediate restart so we don't - # attempt to re-allocate IPs at the same time the server - # is deleting them. - self.schedule_resync("Agent port was modified", - updated_port.network_id) - return - elif old_ips != new_ips: - LOG.debug("Agent IPs on network %s changed from %s to %s", - network.id, old_ips, new_ips) - driver_action = 'restart' - self.cache.put_port(updated_port) - self.call_driver(driver_action, network) - self.dhcp_ready_ports.add(updated_port.id) - self.update_isolated_metadata_proxy(network) + self.reload_allocations(updated_port, network) + + def reload_allocations(self, port, network): + LOG.info("Trigger reload_allocations for port %s", port) + driver_action = 'reload_allocations' + if self._is_port_on_this_agent(port): + orig = self.cache.get_port_by_id(port['id']) + # assume IP change if not in cache + orig = orig or {'fixed_ips': []} + old_ips = {i['ip_address'] for i in orig['fixed_ips'] or []} + new_ips = {i['ip_address'] for i in port['fixed_ips']} + old_subs = {i['subnet_id'] for i in orig['fixed_ips'] or []} + new_subs = {i['subnet_id'] for i in port['fixed_ips']} + if new_subs != old_subs: + # subnets being serviced by port have changed, this could + # indicate a subnet_delete is in progress. schedule a + # resync rather than an immediate restart so we don't + # attempt to re-allocate IPs at the same time the server + # is deleting them. + self.schedule_resync("Agent port was modified", + port.network_id) + return + elif old_ips != new_ips: + LOG.debug("Agent IPs on network %s changed from %s to %s", + network.id, old_ips, new_ips) + driver_action = 'restart' + self.cache.put_port(port) + self.call_driver(driver_action, network) + self.dhcp_ready_ports.add(port.id) + self.update_isolated_metadata_proxy(network) def _is_port_on_this_agent(self, port): thishost = utils.get_dhcp_agent_device_id( port['network_id'], self.conf.host) return port['device_id'] == thishost - # Use the update handler for the port create event. - port_create_end = port_update_end + @_wait_if_syncing + def port_create_end(self, context, payload): + """Handle the port.create.end notification event.""" + created_port = dhcp.DictModel(payload['port']) + with _net_lock(created_port.network_id): + network = self.cache.get_network_by_id(created_port.network_id) + if not network: + return + new_ips = {i['ip_address'] for i in created_port['fixed_ips']} + for port_cached in network.ports: + # if there are other ports cached with the same ip address in + # the same network this indicate that the cache is out of sync + cached_ips = {i['ip_address'] + for i in port_cached['fixed_ips']} + if new_ips.intersection(cached_ips): + self.schedule_resync("Duplicate IP addresses found, " + "DHCP cache is out of sync", + created_port.network_id) + return + self.reload_allocations(created_port, network) @_wait_if_syncing def port_delete_end(self, context, payload): diff --git a/neutron/tests/unit/agent/dhcp/test_agent.py b/neutron/tests/unit/agent/dhcp/test_agent.py index 6a68d8a39cd..e7e0b63f4a4 100644 --- a/neutron/tests/unit/agent/dhcp/test_agent.py +++ b/neutron/tests/unit/agent/dhcp/test_agent.py @@ -1071,19 +1071,35 @@ class TestDhcpAgentEventHandler(base.BaseTestCase): fake_network) def test_port_update_end(self): - payload = dict(port=fake_port2) + self.reload_allocations_p = mock.patch.object(self.dhcp, + 'reload_allocations') + self.reload_allocations = self.reload_allocations_p.start() + payload = dict(port=copy.deepcopy(fake_port2)) self.cache.get_network_by_id.return_value = fake_network + self.dhcp.port_update_end(None, payload) + self.reload_allocations.assert_called_once_with(fake_port2, + fake_network) + + def test_reload_allocations(self): self.cache.get_port_by_id.return_value = fake_port2 with mock.patch.object( self.dhcp, 'update_isolated_metadata_proxy') as ump: - self.dhcp.port_update_end(None, payload) - self.cache.assert_has_calls( - [mock.call.get_network_by_id(fake_port2.network_id), - mock.call.put_port(mock.ANY)]) + self.dhcp.reload_allocations(fake_port2, fake_network) + self.cache.assert_has_calls([mock.call.put_port(mock.ANY)]) self.call_driver.assert_called_once_with('reload_allocations', fake_network) self.assertTrue(ump.called) + def test_port_create_end(self): + self.reload_allocations_p = mock.patch.object(self.dhcp, + 'reload_allocations') + self.reload_allocations = self.reload_allocations_p.start() + payload = dict(port=copy.deepcopy(fake_port2)) + self.cache.get_network_by_id.return_value = fake_network + self.dhcp.port_create_end(None, payload) + self.reload_allocations.assert_called_once_with(fake_port2, + fake_network) + def test_port_update_end_grabs_lock(self): payload = dict(port=fake_port2) self.cache.get_network_by_id.return_value = None @@ -1143,6 +1159,15 @@ class TestDhcpAgentEventHandler(base.BaseTestCase): self.schedule_resync.assert_called_once_with(mock.ANY, fake_port1.network_id) + def test_port_create_duplicate_ip_on_dhcp_agents_same_network(self): + self.cache.get_network_by_id.return_value = fake_network + payload = dict(port=copy.deepcopy(fake_port2)) + duplicate_ip = fake_port1['fixed_ips'][0]['ip_address'] + payload['port']['fixed_ips'][0]['ip_address'] = duplicate_ip + self.dhcp.port_create_end(None, payload) + self.schedule_resync.assert_called_once_with(mock.ANY, + fake_port2.network_id) + def test_port_update_on_dhcp_agents_port_no_ip_change(self): self.cache.get_network_by_id.return_value = fake_network self.cache.get_port_by_id.return_value = fake_port1