Make HA deletion attempt on RouterNotFound race

The L3 HA RPC code that creates HA interfaces can race
with an HA router deletion on the server side. The L3 HA
code ends up creating a port on the HA network while the
server side is deleting the router and the HA network.

This stops the L3 HA network from being deleted because
it has a new port without a bound segment, which leaves the
HA network in a segmentless condition and no ports after
the L3 RPC code cleans up its port.

This adjusts the L3 RPC logic to attempt an HA network cleanup
whenever it encounters the concurrent router deletion case
to ensure that the HA network gets cleaned up.

To make this more robust in the future, we may need the L3
HA code to recognize when an HA network has no segments and
automatically create a new one.

Change-Id: Idd301f6df92e9bc37187e8ed8ec00004e67da928
Closes-Bug: #1696537
This commit is contained in:
Kevin Benton
2017-06-08 14:56:19 -07:00
committed by Brian Haley
parent 607c1810db
commit 3c1a25d968
2 changed files with 10 additions and 1 deletions

View File

@ -306,6 +306,12 @@ class L3Scheduler(object):
except l3.RouterNotFound:
LOG.debug('Router %s has already been removed '
'by concurrent operation', router_id)
# we try to clear the HA network here in case the port we created
# blocked the concurrent router delete operation from getting rid
# of the HA network
ha_net = plugin.get_ha_network(ctxt, tenant_id)
if ha_net:
plugin.safe_delete_ha_network(ctxt, ha_net, tenant_id)
def get_ha_routers_l3_agents_counts(self, plugin, context, filters=None):
"""Return a mapping (router, # agents) matching specified filters."""

View File

@ -1396,10 +1396,13 @@ class L3HATestCaseMixin(testlib_api.SqlTestCase,
with mock.patch.object(self.plugin.router_scheduler, 'bind_router'):
with mock.patch.object(
self.plugin, 'add_ha_port',
side_effect=l3.RouterNotFound(router_id='foo_router')):
side_effect=l3.RouterNotFound(router_id='foo_router')),\
mock.patch.object(
self.plugin, 'safe_delete_ha_network') as sd_ha_net:
self.plugin.router_scheduler.create_ha_port_and_bind(
self.plugin, self.adminContext,
router['id'], router['tenant_id'], agent)
self.assertTrue(sd_ha_net.called)
def test_create_ha_port_and_bind_bind_router_returns_None(self):
router = self._create_ha_router(tenant_id='foo_tenant')