Proper error handling by _ensure_resource_provider

Previously, if _ensure_resource_provider encountered any error from the
placement REST API, it would (sometimes log a message and) return None.

Furthermore, a name conflict while creating the provider was treated the
same as a UUID conflict, which would actually result in None being
returned.

With this change set, the error paths that previously returned None now
raise one of the new ResourceProviderRetrievalFailed or
ResourceProviderCreationFailed exceptions; and the name conflict path is
detected and treated as an error condition.

Note: This change set only touches the SchedulerReportClient side of
these error conditions - it makes no attempt to add error handling to
its callers.  Case in point, the API samples tests needed fixing because
they were previously running into the name conflict error condition, but
not noticing.  As currently implemented, the new exceptions will
percolate up to ComputeManager.update_available_resource_for_node like
any others coming from SchedulerReportClient, where they will be logged
and ignored.

Change-Id: I0c4ca6a81f213277fe7219cb905a805712f81e36
Closes-Bug: #1735430
This commit is contained in:
Eric Fried 2017-11-30 11:39:11 -06:00
parent 0792d7ad5b
commit 112cd9cd1f
10 changed files with 80 additions and 45 deletions

View File

@ -22,7 +22,7 @@
"host_ip": "1.1.1.1",
"free_disk_gb": 1028,
"free_ram_mb": 7680,
"hypervisor_hostname": "fake-mini",
"hypervisor_hostname": "host1",
"hypervisor_type": "fake",
"hypervisor_version": 1000,
"id": 2,

View File

@ -1,7 +1,7 @@
{
"hypervisors": [
{
"hypervisor_hostname": "fake-mini",
"hypervisor_hostname": "host1",
"id": 2,
"state": "up",
"status": "enabled"

View File

@ -22,7 +22,7 @@
"host_ip": "1.1.1.1",
"free_disk_gb": 1028,
"free_ram_mb": 7680,
"hypervisor_hostname": "fake-mini",
"hypervisor_hostname": "host2",
"hypervisor_type": "fake",
"hypervisor_version": 1000,
"id": "1bb62a04-c576-402c-8147-9e89757a09e3",

View File

@ -2074,6 +2074,14 @@ class ResourceProviderInUse(NovaException):
msg_fmt = _("Resource provider has allocations.")
class ResourceProviderRetrievalFailed(NovaException):
msg_fmt = _("Failed to get resource provider with UUID %(uuid)s")
class ResourceProviderCreationFailed(NovaException):
msg_fmt = _("Failed to create resource provider %(name)s")
class InventoryWithResourceClassNotFound(NotFound):
msg_fmt = _("No inventory of class %(resource_class)s found.")

View File

@ -395,10 +395,10 @@ class SchedulerReportClient(object):
"""Queries the placement API for a resource provider record with the
supplied UUID.
Returns a dict of resource provider information if found or None if no
such resource provider could be found.
:param uuid: UUID identifier for the resource provider to look up
:return: A dict of resource provider information if found or None if no
such resource provider could be found.
:raise: ResourceProviderRetrievalFailed on error.
"""
resp = self.get("/resource_providers/%s" % uuid)
if resp.status_code == 200:
@ -418,16 +418,18 @@ class SchedulerReportClient(object):
'placement_req_id': placement_req_id,
}
LOG.error(msg, args)
raise exception.ResourceProviderRetrievalFailed(uuid=uuid)
@safe_connect
def _create_resource_provider(self, uuid, name):
"""Calls the placement API to create a new resource provider record.
Returns a dict of resource provider information object representing
the newly-created resource provider.
:param uuid: UUID of the new resource provider
:param name: Name of the resource provider
:return: A dict of resource provider information object representing
the newly-created resource provider.
:raise: ResourceProviderCreationFailed or
ResourceProviderRetrievalFailed on error.
"""
url = "/resource_providers"
payload = {
@ -451,7 +453,10 @@ class SchedulerReportClient(object):
name=name,
generation=0,
)
elif resp.status_code == 409:
# TODO(efried): Push error codes from placement, and use 'em.
name_conflict = 'Conflicting resource provider name:'
if resp.status_code == 409 and name_conflict not in resp.text:
# Another thread concurrently created a resource provider with the
# same UUID. Log a warning and then just return the resource
# provider object from _get_resource_provider()
@ -464,18 +469,19 @@ class SchedulerReportClient(object):
}
LOG.info(msg, args)
return self._get_resource_provider(uuid)
else:
msg = ("[%(placement_req_id)s] Failed to create resource provider "
"record in placement API for UUID %(uuid)s. Got "
"%(status_code)d: %(err_text)s.")
args = {
'uuid': uuid,
'status_code': resp.status_code,
'err_text': resp.text,
'placement_req_id': placement_req_id,
}
LOG.error(msg, args)
return None
# A provider with the same *name* already exists, or some other error.
msg = ("[%(placement_req_id)s] Failed to create resource provider "
"record in placement API for UUID %(uuid)s. Got "
"%(status_code)d: %(err_text)s.")
args = {
'uuid': uuid,
'status_code': resp.status_code,
'err_text': resp.text,
'placement_req_id': placement_req_id,
}
LOG.error(msg, args)
raise exception.ResourceProviderCreationFailed(name=name)
def _ensure_resource_provider(self, uuid, name=None):
"""Ensures that the placement API has a record of a resource provider
@ -486,9 +492,9 @@ class SchedulerReportClient(object):
If found or created, an object representing the provider (and potential
child providers) is returned from this method. If the resource provider
for the supplied uuid was not found and the resource provider record
could not be created in the placement API, we return None.
could not be created in the placement API, an exception is raised.
If this method returns a non-None value, callers are assured both that
If this method returns successfully, callers are assured both that
the placement API contains a record of the provider and the local tree
of resource provider information contains a record of the provider.
@ -508,10 +514,7 @@ class SchedulerReportClient(object):
# the placement API.
rp = self._get_resource_provider(uuid)
if rp is None:
name = name or uuid
rp = self._create_resource_provider(uuid, name)
if rp is None:
return None
rp = self._create_resource_provider(uuid, name or uuid)
# If there had been no resource provider record, force refreshing
# the aggregate map.

View File

@ -22,7 +22,7 @@
"host_ip": "%(ip)s",
"free_disk_gb": 1028,
"free_ram_mb": 7680,
"hypervisor_hostname": "fake-mini",
"hypervisor_hostname": "host1",
"hypervisor_type": "fake",
"hypervisor_version": 1000,
"id": %(hypervisor_id)s,

View File

@ -1,7 +1,7 @@
{
"hypervisors": [
{
"hypervisor_hostname": "fake-mini",
"hypervisor_hostname": "host1",
"id": 2,
"state": "up",
"status": "enabled"

View File

@ -22,7 +22,7 @@
"host_ip": "%(ip)s",
"free_disk_gb": 1028,
"free_ram_mb": 7680,
"hypervisor_hostname": "fake-mini",
"hypervisor_hostname": "host2",
"hypervisor_type": "fake",
"hypervisor_version": 1000,
"id": "%(hypervisor_id)s",

View File

@ -18,6 +18,7 @@ import mock
from nova.cells import utils as cells_utils
from nova import objects
from nova.tests.functional.api_sample_tests import api_sample_base
from nova.virt import fake
class HypervisorsSampleJsonTests(api_sample_base.ApiSampleTestBaseV21):
@ -155,7 +156,10 @@ class HypervisorsSampleJson233Tests(api_sample_base.ApiSampleTestBaseV21):
self.api.microversion = self.microversion
# Start a new compute service to fake a record with hypervisor id=2
# for pagination test.
self.start_service('compute', host='host1')
host = 'host1'
fake.set_nodes([host])
self.addCleanup(fake.restore_nodes)
self.start_service('compute', host=host)
def test_hypervisors_list(self):
response = self._do_get('os-hypervisors?limit=1&marker=1')
@ -200,7 +204,10 @@ class HypervisorsSampleJson253Tests(HypervisorsSampleJson228Tests):
def test_hypervisors_detail(self):
# Start another compute service to get a 2nd compute for paging tests.
service_2 = self.start_service('compute', host='host2').service_ref
host = 'host2'
fake.set_nodes([host])
self.addCleanup(fake.restore_nodes)
service_2 = self.start_service('compute', host=host).service_ref
compute_node_2 = service_2.compute_node
marker = self.compute_node_1.uuid
subs = {

View File

@ -1112,16 +1112,19 @@ class TestProviderOperations(SchedulerReportClientTestCase):
'_get_provider_aggregates')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_resource_provider')
def test_ensure_resource_provider_create_none(self, get_rp_mock,
def test_ensure_resource_provider_create_fail(self, get_rp_mock,
get_agg_mock, create_rp_mock):
# No resource provider exists in the client's cache, and
# _create_provider returns None, indicating there was an error with the
# _create_provider raises, indicating there was an error with the
# create call. Ensure we don't populate the resource provider cache
# with a None value.
get_rp_mock.return_value = None
create_rp_mock.return_value = None
create_rp_mock.side_effect = exception.ResourceProviderCreationFailed(
name=uuids.compute_node)
self.client._ensure_resource_provider(uuids.compute_node)
self.assertRaises(
exception.ResourceProviderCreationFailed,
self.client._ensure_resource_provider, uuids.compute_node)
get_rp_mock.assert_called_once_with(uuids.compute_node)
create_rp_mock.assert_called_once_with(uuids.compute_node,
@ -1262,7 +1265,9 @@ class TestProviderOperations(SchedulerReportClientTestCase):
'openstack-request-id': uuids.request_id}
uuid = uuids.compute_node
result = self.client._get_resource_provider(uuid)
self.assertRaises(
exception.ResourceProviderRetrievalFailed,
self.client._get_resource_provider, uuid)
expected_url = '/resource_providers/' + uuid
self.ks_adap_mock.get.assert_called_once_with(
@ -1271,7 +1276,6 @@ class TestProviderOperations(SchedulerReportClientTestCase):
# that includes the placement request id and return None
# from _get_resource_provider()
self.assertTrue(logging_mock.called)
self.assertIsNone(result)
self.assertEqual(uuids.request_id,
logging_mock.call_args[0][1]['placement_req_id'])
@ -1313,10 +1317,10 @@ class TestProviderOperations(SchedulerReportClientTestCase):
# record.
uuid = uuids.compute_node
name = 'computehost'
resp_mock = mock.Mock(status_code=409)
self.ks_adap_mock.post.return_value = resp_mock
self.ks_adap_mock.post.return_value.headers = {
'openstack-request-id': uuids.request_id}
self.ks_adap_mock.post.return_value = mock.Mock(
status_code=409,
headers={'openstack-request-id': uuids.request_id},
text='not a name conflict')
get_rp_mock.return_value = mock.sentinel.get_rp
@ -1336,6 +1340,18 @@ class TestProviderOperations(SchedulerReportClientTestCase):
self.assertEqual(uuids.request_id,
logging_mock.call_args[0][1]['placement_req_id'])
def test_create_resource_provider_name_conflict(self):
# When the API call to create the resource provider fails 409 with a
# name conflict, we raise an exception.
self.ks_adap_mock.post.return_value = mock.Mock(
status_code=409,
text='<stuff>Conflicting resource provider name: foo already '
'exists.</stuff>')
self.assertRaises(
exception.ResourceProviderCreationFailed,
self.client._create_resource_provider, uuids.compute_node, 'foo')
@mock.patch.object(report.LOG, 'error')
def test_create_resource_provider_error(self, logging_mock):
# Ensure _create_resource_provider() sets the error flag when trying to
@ -1348,7 +1364,9 @@ class TestProviderOperations(SchedulerReportClientTestCase):
self.ks_adap_mock.post.return_value.headers = {
'x-openstack-request-id': uuids.request_id}
result = self.client._create_resource_provider(uuid, name)
self.assertRaises(
exception.ResourceProviderCreationFailed,
self.client._create_resource_provider, uuid, name)
expected_payload = {
'uuid': uuid,
@ -1364,7 +1382,6 @@ class TestProviderOperations(SchedulerReportClientTestCase):
self.assertTrue(logging_mock.called)
self.assertEqual(uuids.request_id,
logging_mock.call_args[0][1]['placement_req_id'])
self.assertFalse(result)
class TestAggregates(SchedulerReportClientTestCase):