protect DHCP agent cache out of sync

If DHCP agent port cache is out of sync with neutron server, dnsmasq
entries are wrong and VMs may not acquire an IP because of duplicate
entries.

When DHCP agent executes port_create_end method, port's
IP should be checked before being used, if there are duplicate IP
addresses in the same network in the cache we should resync.

Co-Authored-By: doreilly@suse.com
Closes-Bug: #1645835

Change-Id: Icc555050283420fddfb90bb67e02bc303e989e27
(cherry picked from commit ffbf65126f)
This commit is contained in:
aojeagarcia 2018-12-20 19:49:56 +01:00 committed by Yang JianFeng
parent 8d99593adb
commit 88528d191f
2 changed files with 80 additions and 35 deletions

View File

@ -430,42 +430,62 @@ class DhcpAgent(manager.Manager):
network = self.cache.get_network_by_id(updated_port.network_id) network = self.cache.get_network_by_id(updated_port.network_id)
if not network: if not network:
return return
LOG.info("Trigger reload_allocations for port %s", self.reload_allocations(updated_port, network)
updated_port)
driver_action = 'reload_allocations' def reload_allocations(self, port, network):
if self._is_port_on_this_agent(updated_port): LOG.info("Trigger reload_allocations for port %s", port)
orig = self.cache.get_port_by_id(updated_port['id']) driver_action = 'reload_allocations'
# assume IP change if not in cache if self._is_port_on_this_agent(port):
orig = orig or {'fixed_ips': []} orig = self.cache.get_port_by_id(port['id'])
old_ips = {i['ip_address'] for i in orig['fixed_ips'] or []} # assume IP change if not in cache
new_ips = {i['ip_address'] for i in updated_port['fixed_ips']} orig = orig or {'fixed_ips': []}
old_subs = {i['subnet_id'] for i in orig['fixed_ips'] or []} old_ips = {i['ip_address'] for i in orig['fixed_ips'] or []}
new_subs = {i['subnet_id'] for i in updated_port['fixed_ips']} new_ips = {i['ip_address'] for i in port['fixed_ips']}
if new_subs != old_subs: old_subs = {i['subnet_id'] for i in orig['fixed_ips'] or []}
# subnets being serviced by port have changed, this could new_subs = {i['subnet_id'] for i in port['fixed_ips']}
# indicate a subnet_delete is in progress. schedule a if new_subs != old_subs:
# resync rather than an immediate restart so we don't # subnets being serviced by port have changed, this could
# attempt to re-allocate IPs at the same time the server # indicate a subnet_delete is in progress. schedule a
# is deleting them. # resync rather than an immediate restart so we don't
self.schedule_resync("Agent port was modified", # attempt to re-allocate IPs at the same time the server
updated_port.network_id) # is deleting them.
return self.schedule_resync("Agent port was modified",
elif old_ips != new_ips: port.network_id)
LOG.debug("Agent IPs on network %s changed from %s to %s", return
network.id, old_ips, new_ips) elif old_ips != new_ips:
driver_action = 'restart' LOG.debug("Agent IPs on network %s changed from %s to %s",
self.cache.put_port(updated_port) network.id, old_ips, new_ips)
self.call_driver(driver_action, network) driver_action = 'restart'
self.dhcp_ready_ports.add(updated_port.id) self.cache.put_port(port)
self.update_isolated_metadata_proxy(network) self.call_driver(driver_action, network)
self.dhcp_ready_ports.add(port.id)
self.update_isolated_metadata_proxy(network)
def _is_port_on_this_agent(self, port): def _is_port_on_this_agent(self, port):
thishost = utils.get_dhcp_agent_device_id( thishost = utils.get_dhcp_agent_device_id(
port['network_id'], self.conf.host) port['network_id'], self.conf.host)
return port['device_id'] == thishost return port['device_id'] == thishost
# Use the update handler for the port create event. @_wait_if_syncing
port_create_end = port_update_end def port_create_end(self, context, payload):
"""Handle the port.create.end notification event."""
created_port = dhcp.DictModel(payload['port'])
with _net_lock(created_port.network_id):
network = self.cache.get_network_by_id(created_port.network_id)
if not network:
return
new_ips = {i['ip_address'] for i in created_port['fixed_ips']}
for port_cached in network.ports:
# if there are other ports cached with the same ip address in
# the same network this indicate that the cache is out of sync
cached_ips = {i['ip_address']
for i in port_cached['fixed_ips']}
if new_ips.intersection(cached_ips):
self.schedule_resync("Duplicate IP addresses found, "
"DHCP cache is out of sync",
created_port.network_id)
return
self.reload_allocations(created_port, network)
@_wait_if_syncing @_wait_if_syncing
def port_delete_end(self, context, payload): def port_delete_end(self, context, payload):

View File

@ -1072,19 +1072,35 @@ class TestDhcpAgentEventHandler(base.BaseTestCase):
fake_network) fake_network)
def test_port_update_end(self): def test_port_update_end(self):
payload = dict(port=fake_port2) self.reload_allocations_p = mock.patch.object(self.dhcp,
'reload_allocations')
self.reload_allocations = self.reload_allocations_p.start()
payload = dict(port=copy.deepcopy(fake_port2))
self.cache.get_network_by_id.return_value = fake_network self.cache.get_network_by_id.return_value = fake_network
self.dhcp.port_update_end(None, payload)
self.reload_allocations.assert_called_once_with(fake_port2,
fake_network)
def test_reload_allocations(self):
self.cache.get_port_by_id.return_value = fake_port2 self.cache.get_port_by_id.return_value = fake_port2
with mock.patch.object( with mock.patch.object(
self.dhcp, 'update_isolated_metadata_proxy') as ump: self.dhcp, 'update_isolated_metadata_proxy') as ump:
self.dhcp.port_update_end(None, payload) self.dhcp.reload_allocations(fake_port2, fake_network)
self.cache.assert_has_calls( self.cache.assert_has_calls([mock.call.put_port(mock.ANY)])
[mock.call.get_network_by_id(fake_port2.network_id),
mock.call.put_port(mock.ANY)])
self.call_driver.assert_called_once_with('reload_allocations', self.call_driver.assert_called_once_with('reload_allocations',
fake_network) fake_network)
self.assertTrue(ump.called) self.assertTrue(ump.called)
def test_port_create_end(self):
self.reload_allocations_p = mock.patch.object(self.dhcp,
'reload_allocations')
self.reload_allocations = self.reload_allocations_p.start()
payload = dict(port=copy.deepcopy(fake_port2))
self.cache.get_network_by_id.return_value = fake_network
self.dhcp.port_create_end(None, payload)
self.reload_allocations.assert_called_once_with(fake_port2,
fake_network)
def test_port_update_end_grabs_lock(self): def test_port_update_end_grabs_lock(self):
payload = dict(port=fake_port2) payload = dict(port=fake_port2)
self.cache.get_network_by_id.return_value = None self.cache.get_network_by_id.return_value = None
@ -1144,6 +1160,15 @@ class TestDhcpAgentEventHandler(base.BaseTestCase):
self.schedule_resync.assert_called_once_with(mock.ANY, self.schedule_resync.assert_called_once_with(mock.ANY,
fake_port1.network_id) fake_port1.network_id)
def test_port_create_duplicate_ip_on_dhcp_agents_same_network(self):
self.cache.get_network_by_id.return_value = fake_network
payload = dict(port=copy.deepcopy(fake_port2))
duplicate_ip = fake_port1['fixed_ips'][0]['ip_address']
payload['port']['fixed_ips'][0]['ip_address'] = duplicate_ip
self.dhcp.port_create_end(None, payload)
self.schedule_resync.assert_called_once_with(mock.ANY,
fake_port2.network_id)
def test_port_update_on_dhcp_agents_port_no_ip_change(self): def test_port_update_on_dhcp_agents_port_no_ip_change(self):
self.cache.get_network_by_id.return_value = fake_network self.cache.get_network_by_id.return_value = fake_network
self.cache.get_port_by_id.return_value = fake_port1 self.cache.get_port_by_id.return_value = fake_port1