Proper error handling by _ensure_resource_provider

Previously, if _ensure_resource_provider encountered any error from the placement REST API, it would (sometimes log a message and) return None. Furthermore, a name conflict while creating the provider was treated the same as a UUID conflict, which would actually result in None being returned. With this change set, the error paths that previously returned None now raise one of the new ResourceProviderRetrievalFailed or ResourceProviderCreationFailed exceptions; and the name conflict path is detected and treated as an error condition. Note: This change set only touches the SchedulerReportClient side of these error conditions - it makes no attempt to add error handling to its callers. Case in point, the API samples tests needed fixing because they were previously running into the name conflict error condition, but not noticing. As currently implemented, the new exceptions will percolate up to ComputeManager.update_available_resource_for_node like any others coming from SchedulerReportClient, where they will be logged and ignored. Change-Id: I0c4ca6a81f213277fe7219cb905a805712f81e36 Closes-Bug: #1735430
2017-11-30 11:39:11 -06:00
parent 0792d7ad5b
commit 112cd9cd1f
10 changed files with 80 additions and 45 deletions
--- a/doc/api_samples/os-hypervisors/v2.33/hypervisors-detail-resp.json
+++ b/doc/api_samples/os-hypervisors/v2.33/hypervisors-detail-resp.json
@@ -22,7 +22,7 @@
            "host_ip": "1.1.1.1",
            "free_disk_gb": 1028,
            "free_ram_mb": 7680,
-            "hypervisor_hostname": "fake-mini",
+            "hypervisor_hostname": "host1",
            "hypervisor_type": "fake",
            "hypervisor_version": 1000,
            "id": 2,
--- a/doc/api_samples/os-hypervisors/v2.33/hypervisors-list-resp.json
+++ b/doc/api_samples/os-hypervisors/v2.33/hypervisors-list-resp.json
@@ -1,7 +1,7 @@
 {
    "hypervisors": [
        {
-            "hypervisor_hostname": "fake-mini",
+            "hypervisor_hostname": "host1",
            "id": 2,
            "state": "up",
            "status": "enabled"
--- a/doc/api_samples/os-hypervisors/v2.53/hypervisors-detail-resp.json
+++ b/doc/api_samples/os-hypervisors/v2.53/hypervisors-detail-resp.json
@@ -22,7 +22,7 @@
            "host_ip": "1.1.1.1",
            "free_disk_gb": 1028,
            "free_ram_mb": 7680,
-            "hypervisor_hostname": "fake-mini",
+            "hypervisor_hostname": "host2",
            "hypervisor_type": "fake",
            "hypervisor_version": 1000,
            "id": "1bb62a04-c576-402c-8147-9e89757a09e3",
--- a/nova/exception.py
+++ b/nova/exception.py
@@ -2074,6 +2074,14 @@ class ResourceProviderInUse(NovaException):
    msg_fmt = _("Resource provider has allocations.")


+class ResourceProviderRetrievalFailed(NovaException):
+    msg_fmt = _("Failed to get resource provider with UUID %(uuid)s")
+
+
+class ResourceProviderCreationFailed(NovaException):
+    msg_fmt = _("Failed to create resource provider %(name)s")
+
+
 class InventoryWithResourceClassNotFound(NotFound):
    msg_fmt = _("No inventory of class %(resource_class)s found.")

--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@@ -395,10 +395,10 @@ class SchedulerReportClient(object):
        """Queries the placement API for a resource provider record with the
        supplied UUID.

-        Returns a dict of resource provider information if found or None if no
-        such resource provider could be found.
-
        :param uuid: UUID identifier for the resource provider to look up
+        :return: A dict of resource provider information if found or None if no
+                 such resource provider could be found.
+        :raise: ResourceProviderRetrievalFailed on error.
        """
        resp = self.get("/resource_providers/%s" % uuid)
        if resp.status_code == 200:
@@ -418,16 +418,18 @@ class SchedulerReportClient(object):
                'placement_req_id': placement_req_id,
            }
            LOG.error(msg, args)
+            raise exception.ResourceProviderRetrievalFailed(uuid=uuid)

    @safe_connect
    def _create_resource_provider(self, uuid, name):
        """Calls the placement API to create a new resource provider record.

-        Returns a dict of resource provider information object representing
-        the newly-created resource provider.
-
        :param uuid: UUID of the new resource provider
        :param name: Name of the resource provider
+        :return: A dict of resource provider information object representing
+                 the newly-created resource provider.
+        :raise: ResourceProviderCreationFailed or
+                ResourceProviderRetrievalFailed on error.
        """
        url = "/resource_providers"
        payload = {
@@ -451,7 +453,10 @@ class SchedulerReportClient(object):
                    name=name,
                    generation=0,
            )
-        elif resp.status_code == 409:
+
+        # TODO(efried): Push error codes from placement, and use 'em.
+        name_conflict = 'Conflicting resource provider name:'
+        if resp.status_code == 409 and name_conflict not in resp.text:
            # Another thread concurrently created a resource provider with the
            # same UUID. Log a warning and then just return the resource
            # provider object from _get_resource_provider()
@@ -464,18 +469,19 @@ class SchedulerReportClient(object):
            }
            LOG.info(msg, args)
            return self._get_resource_provider(uuid)
-        else:
-            msg = ("[%(placement_req_id)s] Failed to create resource provider "
-                   "record in placement API for UUID %(uuid)s. Got "
-                   "%(status_code)d: %(err_text)s.")
-            args = {
-                'uuid': uuid,
-                'status_code': resp.status_code,
-                'err_text': resp.text,
-                'placement_req_id': placement_req_id,
-            }
-            LOG.error(msg, args)
-            return None
+
+        # A provider with the same *name* already exists, or some other error.
+        msg = ("[%(placement_req_id)s] Failed to create resource provider "
+               "record in placement API for UUID %(uuid)s. Got "
+               "%(status_code)d: %(err_text)s.")
+        args = {
+            'uuid': uuid,
+            'status_code': resp.status_code,
+            'err_text': resp.text,
+            'placement_req_id': placement_req_id,
+        }
+        LOG.error(msg, args)
+        raise exception.ResourceProviderCreationFailed(name=name)

    def _ensure_resource_provider(self, uuid, name=None):
        """Ensures that the placement API has a record of a resource provider
@@ -486,9 +492,9 @@ class SchedulerReportClient(object):
        If found or created, an object representing the provider (and potential
        child providers) is returned from this method. If the resource provider
        for the supplied uuid was not found and the resource provider record
-        could not be created in the placement API, we return None.
+        could not be created in the placement API, an exception is raised.

-        If this method returns a non-None value, callers are assured both that
+        If this method returns successfully, callers are assured both that
        the placement API contains a record of the provider and the local tree
        of resource provider information contains a record of the provider.

@@ -508,10 +514,7 @@ class SchedulerReportClient(object):
        # the placement API.
        rp = self._get_resource_provider(uuid)
        if rp is None:
-            name = name or uuid
-            rp = self._create_resource_provider(uuid, name)
-            if rp is None:
-                return None
+            rp = self._create_resource_provider(uuid, name or uuid)

        # If there had been no resource provider record, force refreshing
        # the aggregate map.
--- a/nova/tests/functional/api_sample_tests/api_samples/os-hypervisors/v2.33/hypervisors-detail-resp.json.tpl
+++ b/nova/tests/functional/api_sample_tests/api_samples/os-hypervisors/v2.33/hypervisors-detail-resp.json.tpl
@@ -22,7 +22,7 @@
            "host_ip": "%(ip)s",
            "free_disk_gb": 1028,
            "free_ram_mb": 7680,
-            "hypervisor_hostname": "fake-mini",
+            "hypervisor_hostname": "host1",
            "hypervisor_type": "fake",
            "hypervisor_version": 1000,
            "id": %(hypervisor_id)s,
--- a/nova/tests/functional/api_sample_tests/api_samples/os-hypervisors/v2.33/hypervisors-list-resp.json.tpl
+++ b/nova/tests/functional/api_sample_tests/api_samples/os-hypervisors/v2.33/hypervisors-list-resp.json.tpl
@@ -1,7 +1,7 @@
 {
    "hypervisors": [
        {
-            "hypervisor_hostname": "fake-mini",
+            "hypervisor_hostname": "host1",
            "id": 2,
            "state": "up",
            "status": "enabled"
--- a/nova/tests/functional/api_sample_tests/api_samples/os-hypervisors/v2.53/hypervisors-detail-resp.json.tpl
+++ b/nova/tests/functional/api_sample_tests/api_samples/os-hypervisors/v2.53/hypervisors-detail-resp.json.tpl
@@ -22,7 +22,7 @@
            "host_ip": "%(ip)s",
            "free_disk_gb": 1028,
            "free_ram_mb": 7680,
-            "hypervisor_hostname": "fake-mini",
+            "hypervisor_hostname": "host2",
            "hypervisor_type": "fake",
            "hypervisor_version": 1000,
            "id": "%(hypervisor_id)s",
--- a/nova/tests/functional/api_sample_tests/test_hypervisors.py
+++ b/nova/tests/functional/api_sample_tests/test_hypervisors.py
@@ -18,6 +18,7 @@ import mock
 from nova.cells import utils as cells_utils
 from nova import objects
 from nova.tests.functional.api_sample_tests import api_sample_base
+from nova.virt import fake


 class HypervisorsSampleJsonTests(api_sample_base.ApiSampleTestBaseV21):
@@ -155,7 +156,10 @@ class HypervisorsSampleJson233Tests(api_sample_base.ApiSampleTestBaseV21):
        self.api.microversion = self.microversion
        # Start a new compute service to fake a record with hypervisor id=2
        # for pagination test.
-        self.start_service('compute', host='host1')
+        host = 'host1'
+        fake.set_nodes([host])
+        self.addCleanup(fake.restore_nodes)
+        self.start_service('compute', host=host)

    def test_hypervisors_list(self):
        response = self._do_get('os-hypervisors?limit=1&marker=1')
@@ -200,7 +204,10 @@ class HypervisorsSampleJson253Tests(HypervisorsSampleJson228Tests):

    def test_hypervisors_detail(self):
        # Start another compute service to get a 2nd compute for paging tests.
-        service_2 = self.start_service('compute', host='host2').service_ref
+        host = 'host2'
+        fake.set_nodes([host])
+        self.addCleanup(fake.restore_nodes)
+        service_2 = self.start_service('compute', host=host).service_ref
        compute_node_2 = service_2.compute_node
        marker = self.compute_node_1.uuid
        subs = {
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@@ -1112,16 +1112,19 @@ class TestProviderOperations(SchedulerReportClientTestCase):
                '_get_provider_aggregates')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                '_get_resource_provider')
-    def test_ensure_resource_provider_create_none(self, get_rp_mock,
+    def test_ensure_resource_provider_create_fail(self, get_rp_mock,
            get_agg_mock, create_rp_mock):
        # No resource provider exists in the client's cache, and
-        # _create_provider returns None, indicating there was an error with the
+        # _create_provider raises, indicating there was an error with the
        # create call. Ensure we don't populate the resource provider cache
        # with a None value.
        get_rp_mock.return_value = None
-        create_rp_mock.return_value = None
+        create_rp_mock.side_effect = exception.ResourceProviderCreationFailed(
+            name=uuids.compute_node)

-        self.client._ensure_resource_provider(uuids.compute_node)
+        self.assertRaises(
+            exception.ResourceProviderCreationFailed,
+            self.client._ensure_resource_provider, uuids.compute_node)

        get_rp_mock.assert_called_once_with(uuids.compute_node)
        create_rp_mock.assert_called_once_with(uuids.compute_node,
@@ -1262,7 +1265,9 @@ class TestProviderOperations(SchedulerReportClientTestCase):
            'openstack-request-id': uuids.request_id}

        uuid = uuids.compute_node
-        result = self.client._get_resource_provider(uuid)
+        self.assertRaises(
+            exception.ResourceProviderRetrievalFailed,
+            self.client._get_resource_provider, uuid)

        expected_url = '/resource_providers/' + uuid
        self.ks_adap_mock.get.assert_called_once_with(
@@ -1271,7 +1276,6 @@ class TestProviderOperations(SchedulerReportClientTestCase):
        # that includes the placement request id and return None
        # from _get_resource_provider()
        self.assertTrue(logging_mock.called)
-        self.assertIsNone(result)
        self.assertEqual(uuids.request_id,
                         logging_mock.call_args[0][1]['placement_req_id'])

@@ -1313,10 +1317,10 @@ class TestProviderOperations(SchedulerReportClientTestCase):
        # record.
        uuid = uuids.compute_node
        name = 'computehost'
-        resp_mock = mock.Mock(status_code=409)
-        self.ks_adap_mock.post.return_value = resp_mock
-        self.ks_adap_mock.post.return_value.headers = {
-            'openstack-request-id': uuids.request_id}
+        self.ks_adap_mock.post.return_value = mock.Mock(
+            status_code=409,
+            headers={'openstack-request-id': uuids.request_id},
+            text='not a name conflict')

        get_rp_mock.return_value = mock.sentinel.get_rp

@@ -1336,6 +1340,18 @@ class TestProviderOperations(SchedulerReportClientTestCase):
        self.assertEqual(uuids.request_id,
                        logging_mock.call_args[0][1]['placement_req_id'])

+    def test_create_resource_provider_name_conflict(self):
+        # When the API call to create the resource provider fails 409 with a
+        # name conflict, we raise an exception.
+        self.ks_adap_mock.post.return_value = mock.Mock(
+            status_code=409,
+            text='<stuff>Conflicting resource provider name: foo already '
+                 'exists.</stuff>')
+
+        self.assertRaises(
+            exception.ResourceProviderCreationFailed,
+            self.client._create_resource_provider, uuids.compute_node, 'foo')
+
    @mock.patch.object(report.LOG, 'error')
    def test_create_resource_provider_error(self, logging_mock):
        # Ensure _create_resource_provider() sets the error flag when trying to
@@ -1348,7 +1364,9 @@ class TestProviderOperations(SchedulerReportClientTestCase):
        self.ks_adap_mock.post.return_value.headers = {
            'x-openstack-request-id': uuids.request_id}

-        result = self.client._create_resource_provider(uuid, name)
+        self.assertRaises(
+            exception.ResourceProviderCreationFailed,
+            self.client._create_resource_provider, uuid, name)

        expected_payload = {
            'uuid': uuid,
@@ -1364,7 +1382,6 @@ class TestProviderOperations(SchedulerReportClientTestCase):
        self.assertTrue(logging_mock.called)
        self.assertEqual(uuids.request_id,
                        logging_mock.call_args[0][1]['placement_req_id'])
-        self.assertFalse(result)


 class TestAggregates(SchedulerReportClientTestCase):