From e2fdeefd37314d3e6c5910fcc7070de34561ec44 Mon Sep 17 00:00:00 2001 From: Armando Migliaccio Date: Thu, 1 Sep 2016 19:33:32 -0700 Subject: [PATCH] Deal with unknown exceptions during auto allocation Uncaught exceptions in the core or L3 plugin layers can cause the auto_allocate plugin to fail unexpectedly. This patch ensures that any unexpected error is properly handled by cleaning up half provisioned deployment. Related-bug: #1612798 Closes-bug: #1619497 Change-Id: I3eb9efd33363045f7b2ccd97fe4a48891f48b161 --- neutron/services/auto_allocate/db.py | 42 ++++---- neutron/services/auto_allocate/exceptions.py | 10 ++ .../unit/services/auto_allocate/test_db.py | 98 +++++++++++++++++-- 3 files changed, 122 insertions(+), 28 deletions(-) diff --git a/neutron/services/auto_allocate/db.py b/neutron/services/auto_allocate/db.py index 930d6d2123a..a29095a6604 100644 --- a/neutron/services/auto_allocate/db.py +++ b/neutron/services/auto_allocate/db.py @@ -17,7 +17,6 @@ from neutron_lib import exceptions as n_exc from oslo_db import exception as db_exc from oslo_log import log as logging -from oslo_utils import excutils from sqlalchemy import sql from neutron._i18n import _, _LE @@ -153,9 +152,6 @@ class AutoAllocatedTopologyMixin(common_db_mixin.CommonDbMixin): def _build_topology(self, context, tenant_id, default_external_network): """Build the network topology and returns its network UUID.""" - network_id = None - router_id = None - subnets = None try: subnets = self._provision_tenant_private_network( context, tenant_id) @@ -165,17 +161,16 @@ class AutoAllocatedTopologyMixin(common_db_mixin.CommonDbMixin): network_id = self._save( context, tenant_id, network_id, router['id'], subnets) return network_id - except Exception as e: - with excutils.save_and_reraise_exception(): - # FIXME(armax): defensively catch all errors and let - # the caller retry the operation, if it can be retried. - # This catch-all should no longer be necessary once - # bug #1612798 is solved; any other error should just - # surface up to the user and be dealt with as a bug. - if db_api.is_retriable(e): - self._cleanup( - context, network_id=network_id, - router_id=router_id, subnets=subnets) + except exceptions.UnknownProvisioningError as e: + # Clean partially provisioned topologies, and reraise the + # error. If it can be retried, so be it. + LOG.error(_LE("Unknown error while provisioning topology for " + "tenant %(tenant_id)s. Reason: %(reason)s"), + {'tenant_id': tenant_id, 'reason': e}) + self._cleanup( + context, network_id=e.network_id, + router_id=e.router_id, subnets=e.subnets) + raise e.error def _check_requirements(self, context, tenant_id): """Raise if requirements are not met.""" @@ -291,6 +286,9 @@ class AutoAllocatedTopologyMixin(common_db_mixin.CommonDbMixin): self._cleanup(context, network['id']) raise exceptions.AutoAllocationFailure( reason=_("Unable to provide tenant private network")) + except Exception as e: + network_id = network['id'] if network else None + raise exceptions.UnknownProvisioningError(e, network_id=network_id) def _provision_external_connectivity( self, context, default_external_network, subnets, tenant_id): @@ -316,15 +314,17 @@ class AutoAllocatedTopologyMixin(common_db_mixin.CommonDbMixin): "%(tenant_id)s because of router errors. " "Reason: %(reason)s"), {'tenant_id': tenant_id, 'reason': e}) - if router: - router_id = router['id'] - else: - router_id = None + router_id = router['id'] if router else None self._cleanup(context, network_id=subnets[0]['network_id'], router_id=router_id, subnets=attached_subnets) raise exceptions.AutoAllocationFailure( reason=_("Unable to provide external connectivity")) + except Exception as e: + router_id = router['id'] if router else None + raise exceptions.UnknownProvisioningError( + e, network_id=subnets[0]['network_id'], + router_id=router_id, subnets=subnets) def _save(self, context, tenant_id, network_id, router_id, subnets): """Save auto-allocated topology, or revert in case of DB errors.""" @@ -354,6 +354,10 @@ class AutoAllocatedTopologyMixin(common_db_mixin.CommonDbMixin): context, network_id=network_id, router_id=router_id, subnets=subnets) network_id = self._get_auto_allocated_network(context, tenant_id) + except Exception as e: + raise exceptions.UnknownProvisioningError( + e, network_id=network_id, + router_id=router_id, subnets=subnets) return network_id # FIXME(kevinbenton): get rid of the retry once bug/1612798 is resolved diff --git a/neutron/services/auto_allocate/exceptions.py b/neutron/services/auto_allocate/exceptions.py index 86651bc46b7..8b708cfb7d0 100644 --- a/neutron/services/auto_allocate/exceptions.py +++ b/neutron/services/auto_allocate/exceptions.py @@ -25,3 +25,13 @@ class AutoAllocationFailure(n_exc.Conflict): class DefaultExternalNetworkExists(n_exc.Conflict): message = _("A default external network already exists: %(net_id)s.") + + +class UnknownProvisioningError(Exception): + """To track unknown errors and partial provisioning steps.""" + + def __init__(self, error, network_id=None, router_id=None, subnets=None): + self.error = error + self.network_id = network_id + self.router_id = router_id + self.subnets = subnets diff --git a/neutron/tests/unit/services/auto_allocate/test_db.py b/neutron/tests/unit/services/auto_allocate/test_db.py index c3bb73d7a78..93a08e45e07 100644 --- a/neutron/tests/unit/services/auto_allocate/test_db.py +++ b/neutron/tests/unit/services/auto_allocate/test_db.py @@ -12,12 +12,15 @@ # limitations under the License. import mock +import testtools + from neutron_lib import exceptions as n_exc from oslo_db import exception as db_exc from oslo_utils import uuidutils from neutron.common import exceptions as c_exc from neutron import context +from neutron.plugins.common import utils from neutron.services.auto_allocate import db from neutron.services.auto_allocate import exceptions from neutron.tests.unit import testlib_api @@ -93,21 +96,98 @@ class AutoAllocateTestCase(testlib_api.SqlTestCaseLight): self.ctx, 'foo_tenant') g.assert_called_once_with(self.ctx, network_id) - def _test__build_topology(self, exception): + def _test__build_topology(self, method, provisioning_exception): with mock.patch.object(self.mixin, - '_provision_tenant_private_network', - side_effect=exception), \ + method, + side_effect=provisioning_exception), \ mock.patch.object(self.mixin, '_cleanup') as f: - self.assertRaises(exception, + self.assertRaises(provisioning_exception.error, self.mixin._build_topology, self.ctx, mock.ANY, 'foo_net') - return f.call_count + f.assert_called_once_with( + self.ctx, + network_id=provisioning_exception.network_id, + router_id=provisioning_exception.router_id, + subnets=provisioning_exception.subnets + ) - def test__build_topology_retriable_exception(self): - self.assertTrue(self._test__build_topology(db_exc.DBConnectionError)) + def test__build_topology_provisioning_error_no_toplogy(self): + provisioning_exception = exceptions.UnknownProvisioningError( + db_exc.DBError) + self._test__build_topology( + '_provision_tenant_private_network', + provisioning_exception) - def test__build_topology_non_retriable_exception(self): - self.assertFalse(self._test__build_topology(Exception)) + def test__build_topology_provisioning_error_network_only(self): + provisioning_exception = exceptions.UnknownProvisioningError( + Exception, network_id='foo') + self._test__build_topology( + '_provision_tenant_private_network', + provisioning_exception) + + def test__build_topology_error_only_network_again(self): + provisioning_exception = exceptions.UnknownProvisioningError( + AttributeError, network_id='foo') + with mock.patch.object(self.mixin, + '_provision_tenant_private_network') as f: + f.return_value = [{'network_id': 'foo'}] + self._test__build_topology( + '_provision_external_connectivity', + provisioning_exception) + + def test__build_topology_error_network_with_router(self): + provisioning_exception = exceptions.UnknownProvisioningError( + KeyError, network_id='foo_n', router_id='foo_r') + with mock.patch.object(self.mixin, + '_provision_tenant_private_network') as f: + f.return_value = [{'network_id': 'foo_n'}] + self._test__build_topology( + '_provision_external_connectivity', + provisioning_exception) + + def test__build_topology_error_network_with_router_and_interfaces(self): + provisioning_exception = exceptions.UnknownProvisioningError( + db_exc.DBConnectionError, + network_id='foo_n', router_id='foo_r', subnets=[{'id': 'foo_s'}]) + with mock.patch.object(self.mixin, + '_provision_tenant_private_network') as f,\ + mock.patch.object(self.mixin, + '_provision_external_connectivity') as g: + f.return_value = [{'network_id': 'foo_n'}] + g.return_value = {'id': 'foo_r'} + self._test__build_topology( + '_save', + provisioning_exception) + + def test__save_with_provisioning_error(self): + with mock.patch.object(utils, "update_network", side_effect=Exception): + with testtools.ExpectedException( + exceptions.UnknownProvisioningError) as e: + self.mixin._save(self.ctx, 'foo_t', 'foo_n', 'foo_r', + [{'id': 'foo_s'}]) + self.assertEqual('foo_n', e.network_id) + self.assertEqual('foo_r', e.router_id) + self.assertEqual([{'id': 'foo_s'}], e.subnets) + + def test__provision_external_connectivity_with_provisioning_error(self): + self.mixin._l3_plugin.create_router.side_effect = Exception + with testtools.ExpectedException( + exceptions.UnknownProvisioningError) as e: + self.mixin._provision_external_connectivity( + self.ctx, 'foo_default', + [{'id': 'foo_s', 'network_id': 'foo_n'}], + 'foo_tenant') + self.assertEqual('foo_n', e.network_id) + self.assertIsNone(e.router_id) + self.assertIsNone(e.subnets) + + def test__provision_tenant_private_network_with_provisioning_error(self): + self.mixin._core_plugin.create_network.side_effect = Exception + with testtools.ExpectedException( + exceptions.UnknownProvisioningError) as e: + self.mixin._provision_tenant_private_network( + self.ctx, 'foo_tenant') + self.assertIsNone(e.network_id) def test__check_requirements_fail_on_missing_ext_net(self): self.assertRaises(exceptions.AutoAllocationFailure,