Handle generation conflicts caused by concurrent updates

As discussed on the Nova-Neutron cross project session of the
Ussuri/Shanghai PTG [1]:

* Nova and Neutron will need to cooperate to identify the resource
  providers representing the same physical NICs.
* This will happen by Nova adding both RPs (created by Nova and
  Neutron respectively) representing the same physical NIC to one
  resource provider aggregate.
* Resource providers have a generation attribute to detect concurrent
  updates to the same RP (and its traits, inventories and aggregates).
* Therefore Neutron will start seeing update failures because of
  concurrent updates by Nova and retry its operation if it failed
  because of a concurrent (but otherwise irrelevant) update.

NOTE: The logic added to update inventory, inventories and traits
should be added to update resource provider too, but that API request
does not take a generation parameter today - which is likely a bug on
the placement side.

Generation conflicts are signalled not just by the 409 Conflict HTTP
response code but by the placement.concurrent_update error code inside
the HTTP JSON body. This is only available in Placement microversions
1.23+, therefore we bump the Placement microversion used in neutron-lib
to 1.23.

While implementing this change I noticed
that update_resource_provider_inventory and
update_resource_provider_inventories translated HTTP 409 Conflicts to
different neutron exception classes - which I deemed to be a bug in
placement client code and corrected it in this change too.

[1] https://etherpad.openstack.org/p/ptg-ussuri-xproj-nova-neutron

Change-Id: I2696c9e407cd5661a49d7d8c6b0232375682f827
This commit is contained in:
Bence Romsics
2019-11-19 17:18:50 +01:00
parent 79cd582084
commit 755f6f462e
3 changed files with 284 additions and 35 deletions

View File

@@ -37,7 +37,9 @@ API_VERSION_REQUEST_HEADER = 'OpenStack-API-Version'
PLACEMENT_API_WITH_MEMBER_OF = 'placement 1.3'
PLACEMENT_API_WITH_NESTED_RESOURCES = 'placement 1.14'
PLACEMENT_API_RETURN_PROVIDER_BODY = 'placement 1.20'
PLACEMENT_API_LATEST_SUPPORTED = PLACEMENT_API_RETURN_PROVIDER_BODY
PLACEMENT_API_ERROR_CODE = 'placement 1.23'
PLACEMENT_API_LATEST_SUPPORTED = PLACEMENT_API_ERROR_CODE
GENERATION_CONFLICT_RETRIES = 10
def _check_placement_api_available(f):
@@ -197,6 +199,46 @@ class PlacementAPIClient(object):
return self._client.put(url, json=data,
endpoint_filter=self._ks_filter, **kwargs)
def _put_with_retry_for_generation_conflict(
self, url, body,
resource_provider_uuid,
resource_provider_generation=None):
if resource_provider_generation is None:
# If the client's user did not supply a generation to us we dare to
# retry without handing the control back to our caller.
max_tries = GENERATION_CONFLICT_RETRIES
else:
# If the client's user supplied a generation to us we don't dare to
# retry on her behalf since we don't know her intention.
max_tries = 1
body['resource_provider_generation'] = resource_provider_generation
for i in range(max_tries):
if resource_provider_generation is None:
# In the bodies of
# PUT /resource_providers/{uuid}/traits
# PUT /resource_providers/{uuid}/inventories
# PUT /resource_providers/{uuid}/inventories/{resource_class}
# resource_provider_generation happens to be at the same place.
body['resource_provider_generation'] = \
self.get_resource_provider(
resource_provider_uuid=resource_provider_uuid)[
'generation']
try:
return self._put(url, body).json()
except ks_exc.Conflict as e:
if e.response.json()[
'errors'][0]['code'] == 'placement.concurrent_update':
continue
else:
raise
raise n_exc.PlacementResourceProviderGenerationConflict(
resource_provider=resource_provider_uuid,
generation=body['resource_provider_generation'])
def _delete(self, url, **kwargs):
kwargs = self._extend_header_with_api_version(**kwargs)
return self._client.delete(url, endpoint_filter=self._ks_filter,
@@ -364,23 +406,19 @@ class PlacementAPIClient(object):
server side.
:returns: The updated set of inventory records.
"""
if resource_provider_generation is None:
resource_provider_generation = self.get_resource_provider(
resource_provider_uuid=resource_provider_uuid)['generation']
url = '/resource_providers/%s/inventories' % resource_provider_uuid
body = {
'resource_provider_generation': resource_provider_generation,
'inventories': inventories
}
try:
return self._put(url, body).json()
return self._put_with_retry_for_generation_conflict(
url, body, resource_provider_uuid,
resource_provider_generation)
except ks_exc.NotFound:
raise n_exc.PlacementResourceProviderNotFound(
resource_provider=resource_provider_uuid)
except ks_exc.Conflict:
raise n_exc.PlacementResourceProviderGenerationConflict(
resource_provider=resource_provider_uuid,
generation=resource_provider_generation)
@_check_placement_api_available
def delete_resource_provider_inventories(self, resource_provider_uuid):
@@ -470,26 +508,23 @@ class PlacementAPIClient(object):
provider. Optional.
:raises PlacementResourceNotFound: If the resource provider or the
resource class is not found.
:raises PlacementInventoryUpdateConflict: If the resource provider
generation does not match
with the server side.
:raises PlacementResourceProviderGenerationConflict: If the resource
provider
generation does
not match with the
server side.
:returns: The updated inventory of the resource class as a dict.
"""
if resource_provider_generation is None:
resource_provider_generation = self.get_resource_provider(
resource_provider_uuid=resource_provider_uuid)['generation']
url = '/resource_providers/%s/inventories/%s' % (
resource_provider_uuid, resource_class)
inventory['resource_provider_generation'] = \
resource_provider_generation
body = inventory
try:
return self._put(url, inventory).json()
return self._put_with_retry_for_generation_conflict(
url, body, resource_provider_uuid,
resource_provider_generation)
except ks_exc.NotFound as e:
raise n_exc.PlacementResourceNotFound(url=e.url)
except ks_exc.Conflict:
raise n_exc.PlacementInventoryUpdateConflict(
resource_provider=resource_provider_uuid,
resource_class=resource_class)
@_check_placement_api_available
def associate_aggregates(self, resource_provider_uuid, aggregates):
@@ -574,24 +609,32 @@ class PlacementAPIClient(object):
to set the traits
:param traits: a list of traits.
:param resource_provider_generation: The generation of the resource
provider. Optional.
provider. Optional. If not
supplied by the caller, handle
potential generation conflict
by retrying the call. If supplied
we assume the caller handles
generation conflict.
:raises PlacementResourceProviderNotFound: If the resource provider
is not found.
:raises PlacementTraitNotFound: If any of the specified traits are not
valid.
:raises PlacementResourceProviderGenerationConflict: For concurrent
conflicting
updates detected.
:returns: The new traits of the resource provider together with the
resource provider generation.
"""
if resource_provider_generation is None:
resource_provider_generation = self.get_resource_provider(
resource_provider_uuid=resource_provider_uuid)['generation']
url = '/resource_providers/%s/traits' % (resource_provider_uuid)
body = {
'resource_provider_generation': resource_provider_generation,
'traits': traits
}
try:
return self._put(url, body).json()
return self._put_with_retry_for_generation_conflict(
url, body, resource_provider_uuid,
resource_provider_generation)
except ks_exc.NotFound:
raise n_exc.PlacementResourceProviderNotFound(
resource_provider=resource_provider_uuid)

View File

@@ -320,14 +320,6 @@ class TestPlacementAPIClient(base.BaseTestCase):
'rc_name': RESOURCE_CLASS_NAME},
expected_body)
def test_update_resource_inventory_inventory_conflict_exception(self):
self.placement_fixture.mock_put.side_effect = ks_exc.Conflict()
self.assertRaises(
n_exc.PlacementInventoryUpdateConflict,
self.placement_api_client.update_resource_provider_inventory,
RESOURCE_PROVIDER_UUID, INVENTORY,
RESOURCE_CLASS_NAME, resource_provider_generation=1)
def test_update_resource_provider_inventory_not_found(self):
# Test the resource provider not found case
self.placement_fixture.mock_put.side_effect = ks_exc.NotFound(
@@ -501,3 +493,208 @@ class TestPlacementAPIClient(base.BaseTestCase):
self.placement_api_client.delete_resource_class,
RESOURCE_CLASS_NAME
)
def test_update_rp_traits_caller_handles_generation_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = ks_exc.Conflict(
response=mock_resp)
self.assertRaises(
n_exc.PlacementResourceProviderGenerationConflict,
self.placement_api_client.update_resource_provider_traits,
resource_provider_uuid='resource provider uuid',
traits=['trait a', 'trait b'],
resource_provider_generation=3,
)
self.placement_fixture.mock_put.assert_called_once()
def test_update_rp_traits_callee_handles_generation_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = [
ks_exc.Conflict(response=mock_resp),
mock.Mock(),
]
self.placement_api_client.update_resource_provider_traits(
resource_provider_uuid='resource provider uuid',
traits=['trait a', 'trait b'],
resource_provider_generation=None,
)
self.assertEqual(2, self.placement_fixture.mock_put.call_count)
def test_update_rp_traits_reached_max_tries(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = 10 * [
ks_exc.Conflict(response=mock_resp),
]
self.assertRaises(
n_exc.PlacementResourceProviderGenerationConflict,
self.placement_api_client.update_resource_provider_traits,
resource_provider_uuid='resource provider uuid',
traits=['trait a', 'trait b'],
resource_provider_generation=None,
)
self.assertEqual(10, self.placement_fixture.mock_put.call_count)
def test_update_rp_traits_raise_other_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'some_other_code'}]}
self.placement_fixture.mock_put.side_effect = [
ks_exc.Conflict(response=mock_resp),
mock.Mock(),
]
self.assertRaises(
n_exc.PlacementClientError,
self.placement_api_client.update_resource_provider_traits,
resource_provider_uuid='resource provider uuid',
traits=[],
resource_provider_generation=None,
)
self.assertEqual(1, self.placement_fixture.mock_put.call_count)
def test_update_rp_inventory_caller_handles_generation_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = ks_exc.Conflict(
response=mock_resp)
self.assertRaises(
n_exc.PlacementResourceProviderGenerationConflict,
self.placement_api_client.update_resource_provider_inventory,
resource_provider_uuid='resource provider uuid',
inventory={},
resource_class='a resource class',
resource_provider_generation=3,
)
self.placement_fixture.mock_put.assert_called_once()
def test_update_rp_inventory_callee_handles_generation_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = [
ks_exc.Conflict(response=mock_resp),
mock.Mock(),
]
self.placement_api_client.update_resource_provider_inventory(
resource_provider_uuid='resource provider uuid',
inventory={},
resource_class='a resource class',
resource_provider_generation=None,
)
self.assertEqual(2, self.placement_fixture.mock_put.call_count)
def test_update_rp_inventory_reached_max_tries(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = 10 * [
ks_exc.Conflict(response=mock_resp),
]
self.assertRaises(
n_exc.PlacementResourceProviderGenerationConflict,
self.placement_api_client.update_resource_provider_inventory,
resource_provider_uuid='resource provider uuid',
inventory={},
resource_class='a resource class',
resource_provider_generation=None,
)
self.assertEqual(10, self.placement_fixture.mock_put.call_count)
def test_update_rp_inventory_raise_other_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'some_other_code'}]}
self.placement_fixture.mock_put.side_effect = [
ks_exc.Conflict(response=mock_resp),
mock.Mock(),
]
self.assertRaises(
n_exc.PlacementClientError,
self.placement_api_client.update_resource_provider_inventory,
resource_provider_uuid='resource provider uuid',
inventory={},
resource_class='a resource class',
resource_provider_generation=None,
)
self.assertEqual(1, self.placement_fixture.mock_put.call_count)
def test_update_rp_inventories_caller_handles_generation_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = ks_exc.Conflict(
response=mock_resp)
self.assertRaises(
n_exc.PlacementResourceProviderGenerationConflict,
self.placement_api_client.update_resource_provider_inventories,
resource_provider_uuid='resource provider uuid',
inventories={},
resource_provider_generation=3,
)
self.placement_fixture.mock_put.assert_called_once()
def test_update_rp_inventories_callee_handles_generation_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = [
ks_exc.Conflict(response=mock_resp),
mock.Mock(),
]
self.placement_api_client.update_resource_provider_inventories(
resource_provider_uuid='resource provider uuid',
inventories={},
resource_provider_generation=None,
)
self.assertEqual(2, self.placement_fixture.mock_put.call_count)
def test_update_rp_inventories_reached_max_tries(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'placement.concurrent_update'}]}
self.placement_fixture.mock_put.side_effect = 10 * [
ks_exc.Conflict(response=mock_resp),
]
self.assertRaises(
n_exc.PlacementResourceProviderGenerationConflict,
self.placement_api_client.update_resource_provider_inventories,
resource_provider_uuid='resource provider uuid',
inventories={},
resource_provider_generation=None,
)
self.assertEqual(10, self.placement_fixture.mock_put.call_count)
def test_update_rp_inventories_raise_other_conflict(self):
mock_resp = mock.Mock()
mock_resp.text = ''
mock_resp.json = lambda: {
'errors': [{'code': 'some_other_code'}]}
self.placement_fixture.mock_put.side_effect = [
ks_exc.Conflict(response=mock_resp),
mock.Mock(),
]
self.assertRaises(
n_exc.PlacementClientError,
self.placement_api_client.update_resource_provider_inventories,
resource_provider_uuid='resource provider uuid',
inventories={},
resource_provider_generation=None,
)
self.assertEqual(1, self.placement_fixture.mock_put.call_count)

View File

@@ -0,0 +1,9 @@
---
other:
- |
Bump the microversion used by ``PlacementAPIClient``
from ``1.20`` to ``1.23`` in order to have access to the
``code`` attribute of JSON error responses (`Placement API changelog
<https://docs.openstack.org/placement/latest/placement-api-microversion-history.html#include-code-attribute-in-json-error-responses>`_).
Both ``1.20`` and ``1.23`` were released in the ``Rocky`` version
of Placement therefore we expect no upgrade impact.