From 9981d06b4cd0e7af9cc819e0556b507ea34171c3 Mon Sep 17 00:00:00 2001 From: Eric Fried Date: Fri, 19 Jul 2019 16:16:23 -0500 Subject: [PATCH] Remove @safe_connect from _delete_provider This commit removes the @safe_connect decorator from SchedulerReportClient._delete_provider and makes its callers deal with resulting keystoneauth1.exceptions.ClientExceptionZ sanely: - ServiceController.delete (via delete_resource_provider) logs an error and continues (backward compatible behavior, best effort to delete each provider). - ComputeManager.update_available_resource (ditto) ditto (ditto). - SchedulerReportClient.update_from_provider_tree raises the exception through, per the contract described in the catch_all internal helper. Change-Id: I8403a841f21a624a546ae5f26bb9ba19318ece6a --- nova/api/openstack/compute/services.py | 15 +++++++++++++-- nova/compute/manager.py | 11 ++++++++--- nova/scheduler/client/report.py | 12 +++++++----- nova/tests/functional/test_report_client.py | 13 +++++++++++++ .../unit/api/openstack/compute/test_services.py | 17 +++++++++++++---- nova/tests/unit/compute/test_compute_mgr.py | 10 ++++++++-- nova/tests/unit/scheduler/client/test_report.py | 13 +++++++++++++ 7 files changed, 75 insertions(+), 16 deletions(-) diff --git a/nova/api/openstack/compute/services.py b/nova/api/openstack/compute/services.py index 235fd9f61d08..74f287790d31 100644 --- a/nova/api/openstack/compute/services.py +++ b/nova/api/openstack/compute/services.py @@ -12,8 +12,11 @@ # License for the specific language governing permissions and limitations # under the License. +from keystoneauth1 import exceptions as ks_exc +from oslo_log import log as logging from oslo_utils import strutils from oslo_utils import uuidutils +import six import webob.exc from nova.api.openstack import api_version_request @@ -33,6 +36,8 @@ from nova import utils UUID_FOR_ID_MIN_VERSION = '2.53' PARTIAL_CONSTRUCT_FOR_CELL_DOWN_MIN_VERSION = '2.69' +LOG = logging.getLogger(__name__) + class ServiceController(wsgi.Controller): @@ -272,8 +277,14 @@ class ServiceController(wsgi.Controller): compute_nodes = objects.ComputeNodeList.get_all_by_host( context, service.host) for compute_node in compute_nodes: - self.placementclient.delete_resource_provider( - context, compute_node, cascade=True) + try: + self.placementclient.delete_resource_provider( + context, compute_node, cascade=True) + except ks_exc.ClientException as e: + LOG.error( + "Failed to delete compute node resource provider " + "for compute node %s: %s", + compute_node.uuid, six.text_type(e)) # remove the host_mapping of this host. try: hm = objects.HostMapping.get_by_host(context, service.host) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 51110b1fda45..2997ae1e485d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -8301,9 +8301,14 @@ class ComputeManager(manager.Manager): cn.destroy() self.rt.remove_node(cn.hypervisor_hostname) # Delete the corresponding resource provider in placement, - # along with any associated allocations and inventory. - self.reportclient.delete_resource_provider(context, cn, - cascade=True) + # along with any associated allocations. + try: + self.reportclient.delete_resource_provider(context, cn, + cascade=True) + except keystone_exception.ClientException as e: + LOG.error( + "Failed to delete compute node resource provider " + "for compute node %s: %s", cn.uuid, six.text_type(e)) for nodename in nodenames: self._update_available_resource_for_node(context, nodename, diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 541c17b74dc0..ddc5a60da81f 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -403,7 +403,7 @@ class SchedulerReportClient(object): :return: The name of the RP :raise: ResourceProviderRetrievalFailed if the RP is not in the cache and the communication with the placement is failed. - :raise: ResourceProviderNotFound if the RP does not exists. + :raise: ResourceProviderNotFound if the RP does not exist. """ try: @@ -681,7 +681,6 @@ class SchedulerReportClient(object): return uuid - @safe_connect def _delete_provider(self, rp_uuid, global_request_id=None): resp = self.delete('/resource_providers/%s' % rp_uuid, global_request_id=global_request_id) @@ -1307,6 +1306,8 @@ class SchedulerReportClient(object): reshape (see below). :raises: ReshapeFailed if a reshape was signaled (allocations not None) and it fails for any reason. + :raises: keystoneauth1.exceptions.base.ClientException on failure to + communicate with the placement API """ # NOTE(efried): We currently do not handle the "rename" case. This is # where new_tree contains a provider named Y whose UUID already exists @@ -2009,7 +2010,7 @@ class SchedulerReportClient(object): if allocations['allocations'] == {}: # the consumer did not exist in the first place LOG.debug('Cannot delete allocation for %s consumer in placement ' - 'as consumer does not exists', uuid) + 'as consumer does not exist', uuid) return False # removing all resources from the allocation will auto delete the @@ -2140,8 +2141,9 @@ class SchedulerReportClient(object): :param compute_node: The nova.objects.ComputeNode object that is the resource provider being deleted. :param cascade: Boolean value that, when True, will first delete any - associated Allocation and Inventory records for the - compute node + associated Allocation records for the compute node + :raises: keystoneauth1.exceptions.base.ClientException on failure to + communicate with the placement API """ nodename = compute_node.hypervisor_hostname host = compute_node.host diff --git a/nova/tests/functional/test_report_client.py b/nova/tests/functional/test_report_client.py index 79fdc786b019..85034a01b117 100644 --- a/nova/tests/functional/test_report_client.py +++ b/nova/tests/functional/test_report_client.py @@ -938,6 +938,19 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase): # Let's delete some stuff new_tree.remove(uuids.ssp) self.assertFalse(new_tree.exists('ssp')) + + # Verify that placement communication failure raises through + with mock.patch.object(self.client, '_delete_provider', + side_effect=kse.EndpointNotFound): + self.assertRaises( + kse.ClientException, + self.client.update_from_provider_tree, + self.context, new_tree) + # The provider didn't get deleted (this doesn't raise + # ResourceProviderNotFound) + self.client.get_provider_by_name(self.context, 'ssp') + + # Continue removing stuff new_tree.remove('child1') self.assertFalse(new_tree.exists('child1')) # Removing a node removes its descendants too diff --git a/nova/tests/unit/api/openstack/compute/test_services.py b/nova/tests/unit/api/openstack/compute/test_services.py index 67ee253f2882..3c730b2bd4cc 100644 --- a/nova/tests/unit/api/openstack/compute/test_services.py +++ b/nova/tests/unit/api/openstack/compute/test_services.py @@ -16,6 +16,7 @@ import copy import datetime +from keystoneauth1 import exceptions as ks_exc import mock from oslo_utils import fixture as utils_fixture from oslo_utils.fixture import uuidsentinel @@ -713,9 +714,11 @@ class ServicesTestV21(test.TestCase): """ @mock.patch('nova.objects.ComputeNodeList.get_all_by_host', return_value=objects.ComputeNodeList(objects=[ - objects.ComputeNode(host='host1', + objects.ComputeNode(uuid=uuidsentinel.uuid1, + host='host1', hypervisor_hostname='node1'), - objects.ComputeNode(host='host1', + objects.ComputeNode(uuid=uuidsentinel.uuid2, + host='host1', hypervisor_hostname='node2')])) @mock.patch.object(self.controller.host_api, 'service_get_by_id', return_value=objects.Service( @@ -724,8 +727,11 @@ class ServicesTestV21(test.TestCase): 'get_aggregates_by_host', return_value=objects.AggregateList()) @mock.patch.object(self.controller.placementclient, - 'delete_resource_provider') - def _test(delete_resource_provider, + 'delete_resource_provider', + # placement connect error doesn't stop the loop + side_effect=[ks_exc.EndpointNotFound, None]) + @mock.patch.object(services_v21, 'LOG') + def _test(mock_log, delete_resource_provider, get_aggregates_by_host, service_get_by_id, cn_get_all_by_host): self.controller.delete(self.req, 2) @@ -740,6 +746,9 @@ class ServicesTestV21(test.TestCase): ], any_order=True) get_hm.assert_called_once_with(ctxt, 'host1') service_delete.assert_called_once_with() + mock_log.error.assert_called_once_with( + "Failed to delete compute node resource provider for compute " + "node %s: %s", uuidsentinel.uuid1, mock.ANY) _test() # This test is just to verify that the servicegroup API gets used when diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 769e54f1dc20..e95ee2df795e 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -253,7 +253,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, bdms=mock_bdms)]) def _make_compute_node(self, hyp_hostname, cn_id): - cn = mock.Mock(spec_set=['hypervisor_hostname', 'id', + cn = mock.Mock(spec_set=['hypervisor_hostname', 'id', 'uuid', 'destroy']) cn.id = cn_id cn.hypervisor_hostname = hyp_hostname @@ -303,15 +303,18 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.context, mock.sentinel.node, startup=True) log_mock.exception.assert_called_once() + @mock.patch.object(manager, 'LOG') @mock.patch.object(manager.ComputeManager, '_update_available_resource_for_node') @mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes') @mock.patch.object(manager.ComputeManager, '_get_compute_nodes_in_db') def test_update_available_resource(self, get_db_nodes, get_avail_nodes, - update_mock): + update_mock, mock_log): mock_rt = self._mock_rt() rc_mock = self.useFixture(fixtures.fixtures.MockPatchObject( self.compute, 'reportclient')).mock + rc_mock.delete_resource_provider.side_effect = ( + keystone_exception.EndpointNotFound) db_nodes = [self._make_compute_node('node%s' % i, i) for i in range(1, 5)] avail_nodes = set(['node2', 'node3', 'node4', 'node5']) @@ -335,6 +338,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.context, db_node, cascade=True) mock_rt.remove_node.assert_called_once_with( 'node1') + mock_log.error.assert_called_once_with( + "Failed to delete compute node resource provider for " + "compute node %s: %s", db_node.uuid, mock.ANY) else: self.assertFalse(db_node.destroy.called) diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index efb1aa6d5e63..d14aad41a5ab 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -3295,6 +3295,19 @@ class TestAllocations(SchedulerReportClientTestCase): self.assertEqual(0, mock_log.info.call_count) self.assertEqual(1, mock_log.error.call_count) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.delete', + new=mock.Mock(side_effect=ks_exc.EndpointNotFound())) + def test_delete_resource_provider_placement_exception(self): + """Ensure that a ksa exception in delete_resource_provider raises + through. + """ + self.client._provider_tree.new_root(uuids.cn, uuids.cn, generation=1) + cn = objects.ComputeNode(uuid=uuids.cn, host="fake_host", + hypervisor_hostname="fake_hostname", ) + self.assertRaises( + ks_exc.ClientException, + self.client.delete_resource_provider, self.context, cn) + @mock.patch("nova.scheduler.client.report.SchedulerReportClient.get") def test_get_allocations_for_resource_provider(self, mock_get): mock_get.return_value = fake_requests.FakeResponse(