From dfa2e6f221fbd4dd909da052805b0118da72f9a9 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Tue, 14 Aug 2018 14:10:17 +0200 Subject: [PATCH] Consumer gen support for put allocations The placement API version 1.28 introduced consumer generation as a way to make updating allocation safe even if it is done from multiple places. This patch changes the scheduler report client put_allocations function to raise AllocationUpdateFailed in case of generation conflict. The only direct user of this call is the nova-manage heal_allocations CLI which will simply fail to heal the allocation for this instance. Blueprint: use-nested-allocation-candidates Change-Id: Iba230201803ef3d33bccaaf83eb10453eea43f20 --- nova/cmd/manage.py | 7 ++- nova/exception.py | 2 +- nova/scheduler/client/report.py | 33 ++++++---- nova/tests/functional/test_report_client.py | 5 +- .../unit/scheduler/client/test_report.py | 63 ++++++++++++++----- nova/tests/unit/test_nova_manage.py | 38 ++++++++++- 6 files changed, 114 insertions(+), 34 deletions(-) diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 2ec90e37e0a2..c69960a4e2c1 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -1815,7 +1815,7 @@ class PlacementCommands(object): ctxt, instance.uuid) except ks_exc.ClientException as e: raise exception.AllocationUpdateFailed( - instance=instance.uuid, + consumer_uuid=instance.uuid, error=_("Allocation retrieval failed: %s") % e) except exception.ConsumerAllocationRetrievalFailed as e: output(_("Allocation retrieval failed: %s") % e) @@ -1858,7 +1858,7 @@ class PlacementCommands(object): return True else: raise exception.AllocationUpdateFailed( - instance=instance.uuid, error=resp.text) + consumer_uuid=instance.uuid, error=resp.text) # This instance doesn't have allocations so we need to find # its compute node resource provider. @@ -1871,7 +1871,8 @@ class PlacementCommands(object): instance, instance.flavor) if placement.put_allocations( ctxt, node_uuid, instance.uuid, resources, - instance.project_id, instance.user_id): + instance.project_id, instance.user_id, + consumer_generation=None): output(_('Successfully created allocations for ' 'instance %(instance)s against resource ' 'provider %(provider)s.') % diff --git a/nova/exception.py b/nova/exception.py index fb5f7785c4a5..a3b701d205f2 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2318,7 +2318,7 @@ class AllocationCreateFailed(NovaException): class AllocationUpdateFailed(NovaException): - msg_fmt = _('Failed to update allocations for instance %(instance)s. ' + msg_fmt = _('Failed to update allocations for consumer %(consumer_uuid)s. ' 'Error: %(error)s') diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index a6deb80fd839..8de590950659 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -1994,7 +1994,7 @@ class SchedulerReportClient(object): @safe_connect @retries def put_allocations(self, context, rp_uuid, consumer_uuid, alloc_data, - project_id, user_id): + project_id, user_id, consumer_generation): """Creates allocation records for the supplied instance UUID against the supplied resource provider. @@ -2009,29 +2009,38 @@ class SchedulerReportClient(object): consume. :param project_id: The project_id associated with the allocations. :param user_id: The user_id associated with the allocations. + :param consumer_generation: The current generation of the consumer :returns: True if the allocations were created, False otherwise. :raises: Retry if the operation should be retried due to a concurrent - update. + resource provider update. + :raises: AllocationUpdateFailed if placement returns a consumer + generation conflict """ + payload = { - 'allocations': [ - { - 'resource_provider': { - 'uuid': rp_uuid, - }, - 'resources': alloc_data, - }, - ], + 'allocations': { + rp_uuid: {'resources': alloc_data}, + }, 'project_id': project_id, 'user_id': user_id, + 'consumer_generation': consumer_generation } url = '/allocations/%s' % consumer_uuid - r = self.put(url, payload, version='1.8', + r = self.put(url, payload, version=CONSUMER_GENERATION_VERSION, global_request_id=context.global_id) if r.status_code != 204: + err = r.json()['errors'][0] # NOTE(jaypipes): Yes, it sucks doing string comparison like this # but we have no error codes, only error messages. - if 'concurrently updated' in r.text: + # TODO(gibi): Use more granular error codes when available + if err['code'] == 'placement.concurrent_update': + if 'consumer generation conflict' in err['detail']: + raise exception.AllocationUpdateFailed( + consumer_uuid=consumer_uuid, error=err['detail']) + # this is not a consumer generation conflict so it can only be + # a resource provider generation conflict. The caller does not + # provide resource provider generation so this is just a + # placement internal race. We can blindly retry locally. reason = ('another process changed the resource providers ' 'involved in our attempt to put allocations for ' 'consumer %s' % consumer_uuid) diff --git a/nova/tests/functional/test_report_client.py b/nova/tests/functional/test_report_client.py index b6b0089abe58..3eaa88d54846 100644 --- a/nova/tests/functional/test_report_client.py +++ b/nova/tests/functional/test_report_client.py @@ -225,7 +225,8 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase): self.instance.flavor) self.client.put_allocations( self.context, self.compute_uuid, self.instance_uuid, - alloc_dict, self.instance.project_id, self.instance.user_id) + alloc_dict, self.instance.project_id, self.instance.user_id, + None) # Check that allocations were made resp = self.client.get('/allocations/%s' % self.instance_uuid) @@ -724,7 +725,7 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase): self.client.put_allocations( self.context, uuids.cn, uuids.consumer, {fields.ResourceClass.SRIOV_NET_VF: 1}, - uuids.proj, uuids.user)) + uuids.proj, uuids.user, None)) # ...and trying to delete the provider's VF inventory bad_inv = { 'CUSTOM_BANDWIDTH': { diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index b37b4ad0b76b..db84a121d1a9 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -270,10 +270,11 @@ class TestPutAllocations(SchedulerReportClientTestCase): resp = self.client.put_allocations(self.context, rp_uuid, consumer_uuid, data, mock.sentinel.project_id, - mock.sentinel.user_id) + mock.sentinel.user_id, + mock.sentinel.consumer_generation) self.assertTrue(resp) mock_put.assert_called_once_with( - expected_url, mock.ANY, version='1.8', + expected_url, mock.ANY, version='1.28', global_request_id=self.context.global_id) @mock.patch.object(report.LOG, 'warning') @@ -288,20 +289,48 @@ class TestPutAllocations(SchedulerReportClientTestCase): resp = self.client.put_allocations(self.context, rp_uuid, consumer_uuid, data, mock.sentinel.project_id, - mock.sentinel.user_id) + mock.sentinel.user_id, + mock.sentinel.consumer_generation) self.assertFalse(resp) mock_put.assert_called_once_with( - expected_url, mock.ANY, version='1.8', + expected_url, mock.ANY, version='1.28', global_request_id=self.context.global_id) log_msg = mock_warn.call_args[0][0] self.assertIn("Unable to submit allocation for instance", log_msg) + @mock.patch.object(report.LOG, 'warning') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put') + def test_put_allocations_fail_due_to_consumer_generation_conflict( + self, mock_put, mock_warn): + mock_put.return_value = fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps( + {'errors': [{'code': 'placement.concurrent_update', + 'detail': 'consumer generation conflict'}]})) + + rp_uuid = mock.sentinel.rp + consumer_uuid = mock.sentinel.consumer + data = {"MEMORY_MB": 1024} + expected_url = "/allocations/%s" % consumer_uuid + self.assertRaises(exception.AllocationUpdateFailed, + self.client.put_allocations, + self.context, rp_uuid, + consumer_uuid, data, + mock.sentinel.project_id, + mock.sentinel.user_id, + mock.sentinel.consumer_generation) + + mock_put.assert_called_once_with( + expected_url, mock.ANY, version='1.28', + global_request_id=self.context.global_id) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put') def test_put_allocations_retries_conflict(self, mock_put): - - failed = mock.MagicMock() - failed.status_code = 409 - failed.text = "concurrently updated" + failed = fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps( + {'errors': [{'code': 'placement.concurrent_update', + 'detail': ''}]})) succeeded = mock.MagicMock() succeeded.status_code = 204 @@ -315,18 +344,21 @@ class TestPutAllocations(SchedulerReportClientTestCase): resp = self.client.put_allocations(self.context, rp_uuid, consumer_uuid, data, mock.sentinel.project_id, - mock.sentinel.user_id) + mock.sentinel.user_id, + mock.sentinel.consumer_generation) self.assertTrue(resp) mock_put.assert_has_calls([ - mock.call(expected_url, mock.ANY, version='1.8', + mock.call(expected_url, mock.ANY, version='1.28', global_request_id=self.context.global_id)] * 2) @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put') def test_put_allocations_retry_gives_up(self, mock_put): - failed = mock.MagicMock() - failed.status_code = 409 - failed.text = "concurrently updated" + failed = fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps( + {'errors': [{'code': 'placement.concurrent_update', + 'detail': ''}]})) mock_put.return_value = failed @@ -337,10 +369,11 @@ class TestPutAllocations(SchedulerReportClientTestCase): resp = self.client.put_allocations(self.context, rp_uuid, consumer_uuid, data, mock.sentinel.project_id, - mock.sentinel.user_id) + mock.sentinel.user_id, + mock.sentinel.consumer_generation) self.assertFalse(resp) mock_put.assert_has_calls([ - mock.call(expected_url, mock.ANY, version='1.8', + mock.call(expected_url, mock.ANY, version='1.28', global_request_id=self.context.global_id)] * 3) def test_claim_resources_success_with_old_version(self): diff --git a/nova/tests/unit/test_nova_manage.py b/nova/tests/unit/test_nova_manage.py index 85a6ee9f992a..3463e72c766f 100644 --- a/nova/tests/unit/test_nova_manage.py +++ b/nova/tests/unit/test_nova_manage.py @@ -2535,7 +2535,43 @@ class TestNovaManagePlacement(test.NoDBTestCase): mock_put_allocations.assert_called_once_with( test.MatchType(context.RequestContext), uuidsentinel.node, uuidsentinel.instance, mock.sentinel.resources, 'fake-project', - 'fake-user') + 'fake-user', consumer_generation=None) + + @mock.patch('nova.objects.CellMappingList.get_all', + return_value=objects.CellMappingList(objects=[ + objects.CellMapping(name='cell1', + uuid=uuidsentinel.cell1)])) + @mock.patch('nova.objects.InstanceList.get_by_filters', + return_value=objects.InstanceList(objects=[ + objects.Instance( + uuid=uuidsentinel.instance, host='fake', node='fake', + task_state=None, flavor=objects.Flavor(), + project_id='fake-project', user_id='fake-user')])) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'get_allocs_for_consumer', return_value={}) + @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename', + return_value=objects.ComputeNode(uuid=uuidsentinel.node)) + @mock.patch('nova.scheduler.utils.resources_from_flavor', + return_value=mock.sentinel.resources) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'put_allocations', + side_effect=exception.AllocationUpdateFailed( + consumer_uuid=uuidsentinel.instance, + error="consumer generation conflict")) + def test_heal_allocations_put_allocations_fails_with_consumer_conflict( + self, mock_put_allocations, mock_res_from_flavor, + mock_get_compute_node, mock_get_allocs, mock_get_instances, + mock_get_all_cells): + self.assertEqual(3, self.cli.heal_allocations()) + self.assertIn('Failed to update allocations for consumer', + self.output.getvalue()) + instance = mock_get_instances.return_value[0] + mock_res_from_flavor.assert_called_once_with( + instance, instance.flavor) + mock_put_allocations.assert_called_once_with( + test.MatchType(context.RequestContext), uuidsentinel.node, + uuidsentinel.instance, mock.sentinel.resources, 'fake-project', + 'fake-user', consumer_generation=None) @mock.patch('nova.objects.CellMappingList.get_all', new=mock.Mock(return_value=objects.CellMappingList(objects=[