SchedulerReportClient.set_traits_for_provider
Introduce SchedulerReportClient.set_traits_for_provider, which accepts a provider UUID and list of traits to push back to the placement API. On success, the local ProviderTree cache is updated appropriately. On failure, an exception is raised. If the failure was a conflict (409), the exception is a subclass of PlacementAPIConflict (see below) so that the caller may decide to refresh and retry. This change also introduces the _ensure_traits helper, which makes sure a given set of traits exists in the placement service. Per design [1], we want consumers of SchedulerReportClient to be able to detect placement API conflict errors (HTTP 409), as distinct from other errors. Such consumers (the resource tracker) may respond to conflicts by redriving the operation in progress, whereas other exceptions will be allowed to bubble up. PlacementAPIConflict will be the (base) class of exceptions raised in response to placement 409s for which we want to allow this kind of behavior. [1] http://eavesdrop.openstack.org/meetings/nova_scheduler/2018/nova_scheduler.2018-01-08-14.00.log.html#l-251 Change-Id: I9d9b4a5cb5f6c5d255c00e22923728892c37383f blueprint: nested-resource-providers blueprint: ironic-driver-traits
This commit is contained in:
parent
281b38e0d6
commit
60b2525817
|
@ -2115,6 +2115,27 @@ class ResourceProviderCreationFailed(NovaException):
|
|||
msg_fmt = _("Failed to create resource provider %(name)s")
|
||||
|
||||
|
||||
class ResourceProviderUpdateFailed(NovaException):
|
||||
msg_fmt = _("Failed to update resource provider via URL %(url)s: "
|
||||
"%(error)s")
|
||||
|
||||
|
||||
class PlacementAPIConflict(NovaException):
|
||||
"""Any 409 error from placement APIs should use (a subclass of) this
|
||||
exception.
|
||||
"""
|
||||
msg_fmt = _("A conflict was encountered attempting to invoke the "
|
||||
"placement API at URL %(url)s: %(error)s")
|
||||
|
||||
|
||||
class ResourceProviderUpdateConflict(PlacementAPIConflict):
|
||||
"""A 409 caused by generation mismatch from attempting to update an
|
||||
existing provider record or its associated data (aggregates, traits, etc.).
|
||||
"""
|
||||
msg_fmt = _("A conflict was encountered attempting to update resource "
|
||||
"provider %(uuid)s (generation %(generation)d): %(error)s")
|
||||
|
||||
|
||||
class InventoryWithResourceClassNotFound(NotFound):
|
||||
msg_fmt = _("No inventory of class %(resource_class)s found.")
|
||||
|
||||
|
@ -2231,6 +2252,14 @@ class TraitInUse(Invalid):
|
|||
msg_fmt = _("The trait %(name)s is in use by a resource provider.")
|
||||
|
||||
|
||||
class TraitRetrievalFailed(NovaException):
|
||||
msg_fmt = _("Failed to retrieve traits from the placement API: %(error)s")
|
||||
|
||||
|
||||
class TraitCreationFailed(NovaException):
|
||||
msg_fmt = _("Failed to create trait %(name)s: %(error)s")
|
||||
|
||||
|
||||
class CannotMigrateWithTargetHost(NovaException):
|
||||
msg_fmt = _("Cannot migrate with target host. Retry without a host "
|
||||
"specified.")
|
||||
|
|
|
@ -967,6 +967,117 @@ class SchedulerReportClient(object):
|
|||
else:
|
||||
self._delete_inventory(rp_uuid)
|
||||
|
||||
@safe_connect
|
||||
def _ensure_traits(self, traits):
|
||||
"""Make sure all specified traits exist in the placement service.
|
||||
|
||||
:param traits: Iterable of trait strings to ensure exist.
|
||||
:raises: TraitCreationFailed if traits contains a trait that did not
|
||||
exist in placement, and couldn't be created. When this
|
||||
exception is raised, it is possible that *some* of the
|
||||
requested traits were created.
|
||||
:raises: TraitRetrievalFailed if the initial query of existing traits
|
||||
was unsuccessful. In this scenario, it is guaranteed that
|
||||
no traits were created.
|
||||
"""
|
||||
if not traits:
|
||||
return
|
||||
|
||||
# Query for all the requested traits. Whichever ones we *don't* get
|
||||
# back, we need to create.
|
||||
# NOTE(efried): We don't attempt to filter based on our local idea of
|
||||
# standard traits, which may not be in sync with what the placement
|
||||
# service knows. If the caller tries to ensure a nonexistent
|
||||
# "standard" trait, they deserve the TraitCreationFailed exception
|
||||
# they'll get.
|
||||
resp = self.get('/traits?name=in:' + ','.join(traits), version='1.6')
|
||||
if resp.status_code == 200:
|
||||
traits_to_create = set(traits) - set(resp.json()['traits'])
|
||||
# Might be neat to have a batch create. But creating multiple
|
||||
# traits will generally happen once, at initial startup, if at all.
|
||||
for trait in traits_to_create:
|
||||
resp = self.put('/traits/' + trait, None, version='1.6')
|
||||
if not resp:
|
||||
raise exception.TraitCreationFailed(name=trait,
|
||||
error=resp.text)
|
||||
return
|
||||
|
||||
# The initial GET failed
|
||||
msg = ("[%(placement_req_id)s] Failed to retrieve the list of traits. "
|
||||
"Got %(status_code)d: %(err_text)s")
|
||||
args = {
|
||||
'placement_req_id': get_placement_request_id(resp),
|
||||
'status_code': resp.status_code,
|
||||
'err_text': resp.text,
|
||||
}
|
||||
LOG.error(msg, args)
|
||||
raise exception.TraitRetrievalFailed(error=resp.text)
|
||||
|
||||
@safe_connect
|
||||
def set_traits_for_provider(self, rp_uuid, traits):
|
||||
"""Replace a provider's traits with those specified.
|
||||
|
||||
The provider must exist - this method does not attempt to create it.
|
||||
|
||||
:param rp_uuid: The UUID of the provider whose traits are to be updated
|
||||
:param traits: Iterable of traits to set on the provider
|
||||
:raises: ResourceProviderUpdateConflict if the provider's generation
|
||||
doesn't match the generation in the cache. Callers may choose
|
||||
to retrieve the provider and its associations afresh and
|
||||
redrive this operation.
|
||||
:raises: ResourceProviderUpdateFailed on any other placement API
|
||||
failure.
|
||||
:raises: TraitCreationFailed if traits contains a trait that did not
|
||||
exist in placement, and couldn't be created.
|
||||
:raises: TraitRetrievalFailed if the initial query of existing traits
|
||||
was unsuccessful.
|
||||
"""
|
||||
# If not different from what we've got, short out
|
||||
if not self._provider_tree.have_traits_changed(rp_uuid, traits):
|
||||
return
|
||||
|
||||
self._ensure_traits(traits)
|
||||
|
||||
url = '/resource_providers/%s/traits' % rp_uuid
|
||||
# NOTE(efried): Don't use the DELETE API when traits is empty, because
|
||||
# that guy doesn't return content, and we need to update the cached
|
||||
# provider tree with the new generation.
|
||||
traits = traits or []
|
||||
generation = self._provider_tree.data(rp_uuid).generation
|
||||
payload = {
|
||||
'resource_provider_generation': generation,
|
||||
'traits': traits,
|
||||
}
|
||||
resp = self.put(url, payload, version='1.6')
|
||||
|
||||
if resp.status_code == 200:
|
||||
json = resp.json()
|
||||
self._provider_tree.update_traits(
|
||||
rp_uuid, json['traits'],
|
||||
generation=json['resource_provider_generation'])
|
||||
return
|
||||
|
||||
# Some error occurred; log it
|
||||
msg = ("[%(placement_req_id)s] Failed to update traits to "
|
||||
"[%(traits)s] for resource provider with UUID %(uuid)s. Got "
|
||||
"%(status_code)d: %(err_text)s")
|
||||
args = {
|
||||
'placement_req_id': get_placement_request_id(resp),
|
||||
'uuid': rp_uuid,
|
||||
'traits': ','.join(traits),
|
||||
'status_code': resp.status_code,
|
||||
'err_text': resp.text,
|
||||
}
|
||||
LOG.error(msg, args)
|
||||
|
||||
# If a conflict, raise special conflict exception
|
||||
if resp.status_code == 409:
|
||||
raise exception.ResourceProviderUpdateConflict(
|
||||
uuid=rp_uuid, generation=generation, error=resp.text)
|
||||
|
||||
# Otherwise, raise generic exception
|
||||
raise exception.ResourceProviderUpdateFailed(url=url, error=resp.text)
|
||||
|
||||
@safe_connect
|
||||
def _ensure_resource_class(self, name):
|
||||
"""Make sure a custom resource class exists.
|
||||
|
|
|
@ -15,6 +15,7 @@ import time
|
|||
|
||||
from keystoneauth1 import exceptions as ks_exc
|
||||
import mock
|
||||
import requests
|
||||
import six
|
||||
from six.moves.urllib import parse
|
||||
|
||||
|
@ -1826,6 +1827,8 @@ class TestAggregates(SchedulerReportClientTestCase):
|
|||
|
||||
|
||||
class TestTraits(SchedulerReportClientTestCase):
|
||||
trait_api_kwargs = {'raise_exc': False, 'microversion': '1.6'}
|
||||
|
||||
def test_get_provider_traits_found(self):
|
||||
uuid = uuids.compute_node
|
||||
resp_mock = mock.Mock(status_code=200)
|
||||
|
@ -1840,7 +1843,7 @@ class TestTraits(SchedulerReportClientTestCase):
|
|||
|
||||
expected_url = '/resource_providers/' + uuid + '/traits'
|
||||
self.ks_adap_mock.get.assert_called_once_with(
|
||||
expected_url, raise_exc=False, microversion='1.6')
|
||||
expected_url, **self.trait_api_kwargs)
|
||||
self.assertEqual(set(traits), result)
|
||||
|
||||
@mock.patch.object(report.LOG, 'warning')
|
||||
|
@ -1877,12 +1880,141 @@ class TestTraits(SchedulerReportClientTestCase):
|
|||
|
||||
expected_url = '/resource_providers/' + uuid + '/traits'
|
||||
self.ks_adap_mock.get.assert_called_once_with(
|
||||
expected_url, raise_exc=False, microversion='1.6')
|
||||
expected_url, **self.trait_api_kwargs)
|
||||
self.assertTrue(log_mock.called)
|
||||
self.assertEqual(uuids.request_id,
|
||||
log_mock.call_args[0][1]['placement_req_id'])
|
||||
self.assertIsNone(result)
|
||||
|
||||
def test_ensure_traits(self):
|
||||
"""Successful paths, various permutations of traits existing or needing
|
||||
to be created.
|
||||
"""
|
||||
standard_traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA']
|
||||
custom_traits = ['CUSTOM_GOLD', 'CUSTOM_SILVER']
|
||||
all_traits = standard_traits + custom_traits
|
||||
|
||||
get_mock = mock.Mock(status_code=200)
|
||||
self.ks_adap_mock.get.return_value = get_mock
|
||||
|
||||
# Request all traits; custom traits need to be created
|
||||
get_mock.json.return_value = {'traits': standard_traits}
|
||||
self.client._ensure_traits(all_traits)
|
||||
self.ks_adap_mock.get.assert_called_once_with(
|
||||
'/traits?name=in:' + ','.join(all_traits), **self.trait_api_kwargs)
|
||||
self.ks_adap_mock.put.assert_has_calls(
|
||||
[mock.call('/traits/' + trait, **self.trait_api_kwargs)
|
||||
for trait in custom_traits], any_order=True)
|
||||
|
||||
self.ks_adap_mock.reset_mock()
|
||||
|
||||
# Request standard traits; no traits need to be created
|
||||
get_mock.json.return_value = {'traits': standard_traits}
|
||||
self.client._ensure_traits(standard_traits)
|
||||
self.ks_adap_mock.get.assert_called_once_with(
|
||||
'/traits?name=in:' + ','.join(standard_traits),
|
||||
**self.trait_api_kwargs)
|
||||
self.ks_adap_mock.put.assert_not_called()
|
||||
|
||||
self.ks_adap_mock.reset_mock()
|
||||
|
||||
# Request no traits - short circuit
|
||||
self.client._ensure_traits(None)
|
||||
self.client._ensure_traits([])
|
||||
self.ks_adap_mock.get.assert_not_called()
|
||||
self.ks_adap_mock.put.assert_not_called()
|
||||
|
||||
def test_ensure_traits_fail_retrieval(self):
|
||||
self.ks_adap_mock.get.return_value = mock.Mock(status_code=400)
|
||||
|
||||
self.assertRaises(exception.TraitRetrievalFailed,
|
||||
self.client._ensure_traits, ['FOO'])
|
||||
|
||||
self.ks_adap_mock.get.assert_called_once_with(
|
||||
'/traits?name=in:FOO', **self.trait_api_kwargs)
|
||||
self.ks_adap_mock.put.assert_not_called()
|
||||
|
||||
def test_ensure_traits_fail_creation(self):
|
||||
get_mock = mock.Mock(status_code=200)
|
||||
get_mock.json.return_value = {'traits': []}
|
||||
self.ks_adap_mock.get.return_value = get_mock
|
||||
put_mock = requests.Response()
|
||||
put_mock.status_code = 400
|
||||
self.ks_adap_mock.put.return_value = put_mock
|
||||
|
||||
self.assertRaises(exception.TraitCreationFailed,
|
||||
self.client._ensure_traits, ['FOO'])
|
||||
|
||||
self.ks_adap_mock.get.assert_called_once_with(
|
||||
'/traits?name=in:FOO', **self.trait_api_kwargs)
|
||||
self.ks_adap_mock.put.assert_called_once_with(
|
||||
'/traits/FOO', **self.trait_api_kwargs)
|
||||
|
||||
def test_set_traits_for_provider(self):
|
||||
traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA']
|
||||
|
||||
# Make _ensure_traits succeed without PUTting
|
||||
get_mock = mock.Mock(status_code=200)
|
||||
get_mock.json.return_value = {'traits': traits}
|
||||
self.ks_adap_mock.get.return_value = get_mock
|
||||
|
||||
# Prime the provider tree cache
|
||||
self.client._provider_tree.new_root('rp', uuids.rp, 0)
|
||||
|
||||
# Mock the /rp/{u}/traits PUT to succeed
|
||||
put_mock = mock.Mock(status_code=200)
|
||||
put_mock.json.return_value = {'traits': traits,
|
||||
'resource_provider_generation': 1}
|
||||
self.ks_adap_mock.put.return_value = put_mock
|
||||
|
||||
# Invoke
|
||||
self.client.set_traits_for_provider(uuids.rp, traits)
|
||||
|
||||
# Verify API calls
|
||||
self.ks_adap_mock.get.assert_called_once_with(
|
||||
'/traits?name=in:' + ','.join(traits), **self.trait_api_kwargs)
|
||||
self.ks_adap_mock.put.assert_called_once_with(
|
||||
'/resource_providers/%s/traits' % uuids.rp,
|
||||
json={'traits': traits, 'resource_provider_generation': 0},
|
||||
**self.trait_api_kwargs)
|
||||
|
||||
# And ensure the provider tree cache was updated appropriately
|
||||
self.assertFalse(
|
||||
self.client._provider_tree.have_traits_changed(uuids.rp, traits))
|
||||
# Validate the generation
|
||||
self.assertEqual(
|
||||
1, self.client._provider_tree.data(uuids.rp).generation)
|
||||
|
||||
def test_set_traits_for_provider_fail(self):
|
||||
traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA']
|
||||
get_mock = mock.Mock()
|
||||
self.ks_adap_mock.get.return_value = get_mock
|
||||
|
||||
# Prime the provider tree cache
|
||||
self.client._provider_tree.new_root('rp', uuids.rp, 0)
|
||||
|
||||
# _ensure_traits exception bubbles up
|
||||
get_mock.status_code = 400
|
||||
self.assertRaises(
|
||||
exception.TraitRetrievalFailed,
|
||||
self.client.set_traits_for_provider, uuids.rp, traits)
|
||||
self.ks_adap_mock.put.assert_not_called()
|
||||
|
||||
get_mock.status_code = 200
|
||||
get_mock.json.return_value = {'traits': traits}
|
||||
|
||||
# Conflict
|
||||
self.ks_adap_mock.put.return_value = mock.Mock(status_code=409)
|
||||
self.assertRaises(
|
||||
exception.ResourceProviderUpdateConflict,
|
||||
self.client.set_traits_for_provider, uuids.rp, traits)
|
||||
|
||||
# Other error
|
||||
self.ks_adap_mock.put.return_value = mock.Mock(status_code=503)
|
||||
self.assertRaises(
|
||||
exception.ResourceProviderUpdateFailed,
|
||||
self.client.set_traits_for_provider, uuids.rp, traits)
|
||||
|
||||
|
||||
class TestAssociations(SchedulerReportClientTestCase):
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
|
|
Loading…
Reference in New Issue