l3scheduler: create ha_vr_id more robustly
In some cases, the creation of the ha_vr_id could have occurred twice. For example, if a router was created with a given external_gateway then a vr_id would be allocated twice: once from super().create_router() (which triggers the scheduler on its own), and once from the create_router() logic. This patch modifies some code paths to make this allocation more robust. For example the code that allocates a new vr_id will now also assign it to the router in the same transaction, to make sure atomicity. Closes-Bug: #1654032 Change-Id: I82c33aee5cfcc086f60fc74ed4d7bd7d443a3370
This commit is contained in:
parent
d8e3596259
commit
7c0e62ee2f
@ -148,7 +148,10 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
|
||||
return allocated_vr_ids
|
||||
|
||||
@db_api.retry_if_session_inactive()
|
||||
def _allocate_vr_id(self, context, network_id, router_id):
|
||||
def _ensure_vr_id(self, context, router_db, ha_network):
|
||||
router_id = router_db.id
|
||||
network_id = ha_network.network_id
|
||||
|
||||
# TODO(kevinbenton): let decorator handle duplicate retry
|
||||
# like in review.openstack.org/#/c/367179/1/neutron/db/l3_hamode_db.py
|
||||
for count in range(MAX_ALLOCATION_TRIES):
|
||||
@ -156,6 +159,14 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
|
||||
# NOTE(kevinbenton): we disallow subtransactions because the
|
||||
# retry logic will bust any parent transactions
|
||||
with context.session.begin():
|
||||
if router_db.extra_attributes.ha_vr_id:
|
||||
LOG.debug(
|
||||
"Router %(router_id)s has already been "
|
||||
"allocated a ha_vr_id %(ha_vr_id)d!",
|
||||
{'router_id': router_id,
|
||||
'ha_vr_id': router_db.extra_attributes.ha_vr_id})
|
||||
return
|
||||
|
||||
allocated_vr_ids = self._get_allocated_vr_id(context,
|
||||
network_id)
|
||||
available_vr_ids = VR_ID_RANGE - allocated_vr_ids
|
||||
@ -168,6 +179,11 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
|
||||
allocation.vr_id = available_vr_ids.pop()
|
||||
|
||||
context.session.add(allocation)
|
||||
router_db.extra_attributes.ha_vr_id = allocation.vr_id
|
||||
LOG.debug(
|
||||
"Router %(router_id)s has been allocated a ha_vr_id "
|
||||
"%(ha_vr_id)d.",
|
||||
{'router_id': router_id, 'ha_vr_id': allocation.vr_id})
|
||||
|
||||
return allocation.vr_id
|
||||
|
||||
@ -188,10 +204,6 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
|
||||
l3ha_model.L3HARouterVRIdAllocation).filter_by(
|
||||
network_id=ha_network.network_id, vr_id=vr_id).delete()
|
||||
|
||||
def _set_vr_id(self, context, router, ha_network):
|
||||
router.extra_attributes.ha_vr_id = self._allocate_vr_id(
|
||||
context, ha_network.network_id, router.id)
|
||||
|
||||
def _create_ha_subnet(self, context, network_id, tenant_id):
|
||||
args = {'network_id': network_id,
|
||||
'tenant_id': '',
|
||||
@ -376,9 +388,9 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
|
||||
self)._get_device_owner(context, router)
|
||||
|
||||
@n_utils.transaction_guard
|
||||
def _set_vr_id_and_ensure_network(self, context, router_db):
|
||||
def _ensure_vr_id_and_network(self, context, router_db):
|
||||
"""Attach vr_id to router while tolerating network deletes."""
|
||||
creator = functools.partial(self._set_vr_id,
|
||||
creator = functools.partial(self._ensure_vr_id,
|
||||
context, router_db)
|
||||
dep_getter = functools.partial(self.get_ha_network,
|
||||
context, router_db.tenant_id)
|
||||
@ -413,7 +425,6 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
|
||||
try:
|
||||
router_db = self._get_router(context, router_dict['id'])
|
||||
|
||||
self._set_vr_id_and_ensure_network(context, router_db)
|
||||
self.schedule_router(context, router_dict['id'])
|
||||
|
||||
router_dict['ha_vr_id'] = router_db.extra_attributes.ha_vr_id
|
||||
@ -501,9 +512,7 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
|
||||
# working (Only unbind from dvr_snat nodes).
|
||||
self._unbind_ha_router(context, router_id)
|
||||
|
||||
if requested_ha_state:
|
||||
self._set_vr_id_and_ensure_network(context, router_db)
|
||||
else:
|
||||
if not requested_ha_state:
|
||||
self._delete_ha_interfaces(context, router_db.id)
|
||||
# always attempt to cleanup the network as the router is
|
||||
# deleted. the core plugin will stop us if its in use
|
||||
|
@ -310,9 +310,10 @@ class L3Scheduler(object):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _add_port_from_net(self, plugin, ctxt, router_id, tenant_id, ha_net):
|
||||
"""small wrapper function to unpack network id from ha_network"""
|
||||
return plugin.add_ha_port(ctxt, router_id, ha_net.network.id,
|
||||
def _add_port_from_net_and_ensure_vr_id(self, plugin, ctxt, router_db,
|
||||
tenant_id, ha_net):
|
||||
plugin._ensure_vr_id(ctxt, router_db, ha_net)
|
||||
return plugin.add_ha_port(ctxt, router_db.id, ha_net.network.id,
|
||||
tenant_id)
|
||||
|
||||
def create_ha_port_and_bind(self, plugin, context, router_id,
|
||||
@ -320,11 +321,11 @@ class L3Scheduler(object):
|
||||
"""Creates and binds a new HA port for this agent."""
|
||||
ctxt = context.elevated()
|
||||
router_db = plugin._get_router(ctxt, router_id)
|
||||
creator = functools.partial(self._add_port_from_net,
|
||||
plugin, ctxt, router_id, tenant_id)
|
||||
creator = functools.partial(self._add_port_from_net_and_ensure_vr_id,
|
||||
plugin, ctxt, router_db, tenant_id)
|
||||
dep_getter = functools.partial(plugin.get_ha_network, ctxt, tenant_id)
|
||||
dep_creator = functools.partial(plugin._set_vr_id_and_ensure_network,
|
||||
ctxt, router_db)
|
||||
dep_creator = functools.partial(plugin._create_ha_network,
|
||||
ctxt, tenant_id)
|
||||
dep_deleter = functools.partial(plugin._delete_ha_network, ctxt)
|
||||
dep_id_attr = 'network_id'
|
||||
|
||||
|
@ -248,13 +248,13 @@ class L3HATestCase(L3HATestFramework):
|
||||
router['status'])
|
||||
|
||||
def test_router_created_allocating_state_during_interface_create(self):
|
||||
_orig = self.plugin._set_vr_id_and_ensure_network
|
||||
_orig = self.plugin._ensure_vr_id
|
||||
|
||||
def check_state(context, router_db):
|
||||
def check_state(context, router, ha_network):
|
||||
self.assertEqual(n_const.ROUTER_STATUS_ALLOCATING,
|
||||
router_db.status)
|
||||
return _orig(context, router_db)
|
||||
with mock.patch.object(self.plugin, '_set_vr_id_and_ensure_network',
|
||||
router.status)
|
||||
return _orig(context, router, ha_network)
|
||||
with mock.patch.object(self.plugin, '_ensure_vr_id',
|
||||
side_effect=check_state) as vr_id_mock:
|
||||
router = self._create_router()
|
||||
self.assertTrue(vr_id_mock.called)
|
||||
@ -265,6 +265,8 @@ class L3HATestCase(L3HATestFramework):
|
||||
self.assertTrue(router['ha'])
|
||||
|
||||
def test_ha_router_create_with_distributed(self):
|
||||
helpers.register_l3_agent(
|
||||
'host_3', constants.L3_AGENT_MODE_DVR_SNAT)
|
||||
router = self._create_router(ha=True, distributed=True)
|
||||
self.assertTrue(router['ha'])
|
||||
self.assertTrue(router['distributed'])
|
||||
@ -516,12 +518,9 @@ class L3HATestCase(L3HATestFramework):
|
||||
network = self.plugin.get_ha_network(self.admin_ctx,
|
||||
router['tenant_id'])
|
||||
|
||||
with mock.patch.object(self.plugin, '_get_allocated_vr_id',
|
||||
return_value=set()) as alloc:
|
||||
self.assertRaises(l3_ext_ha_mode.MaxVRIDAllocationTriesReached,
|
||||
self.plugin._allocate_vr_id, self.admin_ctx,
|
||||
network.network_id, router['id'])
|
||||
self.assertEqual(2, len(alloc.mock_calls))
|
||||
router_db = self.plugin._get_router(self.admin_ctx, router['id'])
|
||||
self.assertIsNone(self.plugin._ensure_vr_id(self.admin_ctx,
|
||||
router_db, network))
|
||||
|
||||
def test_vr_id_allocation_delete_router(self):
|
||||
router = self._create_router()
|
||||
@ -631,15 +630,15 @@ class L3HATestCase(L3HATestFramework):
|
||||
networks_after = self.core_plugin.get_networks(self.admin_ctx)
|
||||
self.assertEqual(networks_before, networks_after)
|
||||
|
||||
def test_set_vr_id_and_ensure_network_net_exists(self):
|
||||
def test_ensure_vr_id_and_network_net_exists(self):
|
||||
router = self._create_router()
|
||||
router_db = self.plugin._get_router(self.admin_ctx, router['id'])
|
||||
with mock.patch.object(self.plugin, '_create_ha_network') as create:
|
||||
self.plugin._set_vr_id_and_ensure_network(
|
||||
self.plugin._ensure_vr_id_and_network(
|
||||
self.admin_ctx, router_db)
|
||||
self.assertFalse(create.called)
|
||||
|
||||
def test_set_vr_id_and_ensure_network_concurrent_create(self):
|
||||
def test_ensure_vr_id_and_network_concurrent_create(self):
|
||||
# create a non-ha router so we can manually invoke the create ha
|
||||
# interfaces call down below
|
||||
router = self._create_router(ha=False)
|
||||
@ -654,56 +653,56 @@ class L3HATestCase(L3HATestFramework):
|
||||
raise db_exc.DBDuplicateEntry(columns=['tenant_id'])
|
||||
with mock.patch.object(self.plugin, '_create_ha_network',
|
||||
new=_create_ha_network):
|
||||
net = self.plugin._set_vr_id_and_ensure_network(
|
||||
net = self.plugin._ensure_vr_id_and_network(
|
||||
self.admin_ctx, router_db)
|
||||
# ensure that it used the concurrently created network
|
||||
self.assertEqual([net], created_nets)
|
||||
|
||||
def _test_ensure_with_patched_set_vr_id(self, _set_vr_id):
|
||||
def _test_ensure_with_patched_ensure_vr_id(self, _ensure_vr_id):
|
||||
# create a non-ha router so we can manually invoke the create ha
|
||||
# interfaces call down below
|
||||
router = self._create_router(ha=False)
|
||||
router_db = self.plugin._get_router(self.admin_ctx, router['id'])
|
||||
with mock.patch.object(self.plugin, '_set_vr_id',
|
||||
new=_set_vr_id):
|
||||
self.plugin._set_vr_id_and_ensure_network(
|
||||
with mock.patch.object(self.plugin, '_ensure_vr_id',
|
||||
new=_ensure_vr_id):
|
||||
self.plugin._ensure_vr_id_and_network(
|
||||
self.admin_ctx, router_db)
|
||||
self.assertTrue(_set_vr_id.called)
|
||||
self.assertTrue(_ensure_vr_id.called)
|
||||
|
||||
def test_set_vr_id_and_ensure_network_interface_failure(self):
|
||||
def test_ensure_vr_id_and_network_interface_failure(self):
|
||||
|
||||
def _set_vr_id(ctx, rdb, ha_net):
|
||||
def _ensure_vr_id(ctx, rdb, ha_net):
|
||||
raise ValueError('broken')
|
||||
with testtools.ExpectedException(ValueError):
|
||||
self._test_ensure_with_patched_set_vr_id(_set_vr_id)
|
||||
self._test_ensure_with_patched_ensure_vr_id(_ensure_vr_id)
|
||||
self.assertEqual([], self.core_plugin.get_networks(self.admin_ctx))
|
||||
|
||||
def test_set_vr_id_and_ensure_network_concurrent_delete(self):
|
||||
orig_create = self.plugin._set_vr_id
|
||||
def test_ensure_vr_id_and_network_concurrent_delete(self):
|
||||
orig_create = self.plugin._ensure_vr_id
|
||||
|
||||
def _set_vr_id(ctx, rdb, ha_net):
|
||||
def _ensure_vr_id(ctx, rdb, ha_net):
|
||||
# concurrent delete on the first attempt
|
||||
if not getattr(_set_vr_id, 'called', False):
|
||||
setattr(_set_vr_id, 'called', True)
|
||||
if not getattr(_ensure_vr_id, 'called', False):
|
||||
setattr(_ensure_vr_id, 'called', True)
|
||||
self.core_plugin.delete_network(self.admin_ctx,
|
||||
ha_net['network_id'])
|
||||
return orig_create(ctx, rdb, ha_net)
|
||||
self._test_ensure_with_patched_set_vr_id(_set_vr_id)
|
||||
self._test_ensure_with_patched_ensure_vr_id(_ensure_vr_id)
|
||||
|
||||
def test_set_vr_id_and_ensure_network_concurrent_swap(self):
|
||||
orig_create = self.plugin._set_vr_id
|
||||
def test_ensure_vr_id_and_network_concurrent_swap(self):
|
||||
orig_create = self.plugin._ensure_vr_id
|
||||
|
||||
def _set_vr_id(ctx, rdb, ha_net):
|
||||
def _ensure_vr_id(ctx, rdb, ha_net):
|
||||
# concurrent delete on the first attempt
|
||||
if not getattr(_set_vr_id, 'called', False):
|
||||
setattr(_set_vr_id, 'called', True)
|
||||
if not getattr(_ensure_vr_id, 'called', False):
|
||||
setattr(_ensure_vr_id, 'called', True)
|
||||
self.core_plugin.delete_network(self.admin_ctx,
|
||||
ha_net['network_id'])
|
||||
self.plugin._create_ha_network(self.admin_ctx,
|
||||
rdb.tenant_id)
|
||||
return orig_create(ctx, rdb, ha_net)
|
||||
|
||||
self._test_ensure_with_patched_set_vr_id(_set_vr_id)
|
||||
self._test_ensure_with_patched_ensure_vr_id(_ensure_vr_id)
|
||||
|
||||
def test_create_ha_network_tenant_binding_raises_duplicate(self):
|
||||
router = self._create_router()
|
||||
@ -718,7 +717,7 @@ class L3HATestCase(L3HATestFramework):
|
||||
def test_create_router_db_ha_attribute_failure_rolls_back_router(self):
|
||||
routers_before = self.plugin.get_routers(self.admin_ctx)
|
||||
|
||||
for method in ('_set_vr_id',
|
||||
for method in ('_ensure_vr_id',
|
||||
'_notify_router_updated'):
|
||||
with mock.patch.object(self.plugin, method,
|
||||
side_effect=ValueError):
|
||||
|
Loading…
Reference in New Issue
Block a user