From aa42906143f09a8cf40dfcd91bb211eac6689b80 Mon Sep 17 00:00:00 2001 From: Armando Migliaccio Date: Fri, 12 Aug 2016 12:51:14 -0700 Subject: [PATCH] Make auto-allocate plugin handle sneaky DB errors DB errors or retry exceptions that bubble up to the plugin may get masked by integrity violation errors due to partial provisioning of resources. When that happens, we should make sure any pending resource is cleaned up before reattempting the operation. Closes-bug: #1612615 Change-Id: I76e9f8e4b61fb3566d70af4236c19e4c5a523646 --- neutron/services/auto_allocate/db.py | 60 +++++++++++++------ .../unit/services/auto_allocate/test_db.py | 17 ++++++ 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/neutron/services/auto_allocate/db.py b/neutron/services/auto_allocate/db.py index 75087fdf242..26664a5772b 100644 --- a/neutron/services/auto_allocate/db.py +++ b/neutron/services/auto_allocate/db.py @@ -17,6 +17,7 @@ from neutron_lib import exceptions as n_exc from oslo_db import exception as db_exc from oslo_log import log as logging +from oslo_utils import excutils from sqlalchemy import sql from neutron._i18n import _, _LE @@ -24,6 +25,7 @@ from neutron.api.v2 import attributes from neutron.callbacks import events from neutron.callbacks import registry from neutron.callbacks import resources +from neutron.db import api as db_api from neutron.db import common_db_mixin from neutron.db import db_base_plugin_v2 from neutron.db import external_net_db @@ -94,6 +96,19 @@ class AutoAllocatedTopologyMixin(common_db_mixin.CommonDbMixin): # - update router gateway -> prevent operation # - ... + @property + def core_plugin(self): + if not getattr(self, '_core_plugin', None): + self._core_plugin = manager.NeutronManager.get_plugin() + return self._core_plugin + + @property + def l3_plugin(self): + if not getattr(self, '_l3_plugin', None): + self._l3_plugin = manager.NeutronManager.get_service_plugins().get( + constants.L3_ROUTER_NAT) + return self._l3_plugin + def get_auto_allocated_topology(self, context, tenant_id, fields=None): """Return tenant's network associated to auto-allocated topology. @@ -119,26 +134,35 @@ class AutoAllocatedTopologyMixin(common_db_mixin.CommonDbMixin): context) # If we reach this point, then we got some work to do! - subnets = self._provision_tenant_private_network(context, tenant_id) - network_id = subnets[0]['network_id'] - router = self._provision_external_connectivity( - context, default_external_network, subnets, tenant_id) - network_id = self._save( - context, tenant_id, network_id, router['id'], subnets) + network_id = self._build_topology( + context, tenant_id, default_external_network) return self._response(network_id, tenant_id, fields=fields) - @property - def core_plugin(self): - if not getattr(self, '_core_plugin', None): - self._core_plugin = manager.NeutronManager.get_plugin() - return self._core_plugin - - @property - def l3_plugin(self): - if not getattr(self, '_l3_plugin', None): - self._l3_plugin = manager.NeutronManager.get_service_plugins().get( - constants.L3_ROUTER_NAT) - return self._l3_plugin + def _build_topology(self, context, tenant_id, default_external_network): + """Build the network topology and returns its network UUID.""" + network_id = None + router_id = None + subnets = None + try: + subnets = self._provision_tenant_private_network( + context, tenant_id) + network_id = subnets[0]['network_id'] + router = self._provision_external_connectivity( + context, default_external_network, subnets, tenant_id) + network_id = self._save( + context, tenant_id, network_id, router['id'], subnets) + return network_id + except Exception as e: + with excutils.save_and_reraise_exception(): + # FIXME(armax): defensively catch all errors and let + # the caller retry the operation, if it can be retried. + # This catch-all should no longer be necessary once + # bug #1612798 is solved; any other error should just + # surface up to the user and be dealt with as a bug. + if db_api.is_retriable(e): + self._cleanup( + context, network_id=network_id, + router_id=router_id, subnets=subnets) def _check_requirements(self, context, tenant_id): """Raise if requirements are not met.""" diff --git a/neutron/tests/unit/services/auto_allocate/test_db.py b/neutron/tests/unit/services/auto_allocate/test_db.py index 561c7071fc8..ecf94a4f9af 100644 --- a/neutron/tests/unit/services/auto_allocate/test_db.py +++ b/neutron/tests/unit/services/auto_allocate/test_db.py @@ -13,6 +13,7 @@ import mock from neutron_lib import exceptions as n_exc +from oslo_db import exception as db_exc from neutron import context from neutron.services.auto_allocate import db @@ -58,6 +59,22 @@ class AutoAllocateTestCase(testlib_api.SqlTestCaseLight): self.mixin.get_auto_allocated_topology, self.ctx, mock.ANY, fields=['foo']) + def _test__build_topology(self, exception): + with mock.patch.object(self.mixin, + '_provision_tenant_private_network', + side_effect=exception), \ + mock.patch.object(self.mixin, '_cleanup') as f: + self.assertRaises(exception, + self.mixin._build_topology, + self.ctx, mock.ANY, 'foo_net') + return f.call_count + + def test__build_topology_retriable_exception(self): + self.assertTrue(self._test__build_topology(db_exc.DBConnectionError)) + + def test__build_topology_non_retriable_exception(self): + self.assertFalse(self._test__build_topology(Exception)) + def test__check_requirements_fail_on_missing_ext_net(self): self.assertRaises(exceptions.AutoAllocationFailure, self.mixin._check_requirements, self.ctx, 'foo_tenant')