Consumer gen support for put allocations

The placement API version 1.28 introduced consumer generation as a way to make updating allocation safe even if it is done from multiple places. This patch changes the scheduler report client put_allocations function to raise AllocationUpdateFailed in case of generation conflict. The only direct user of this call is the nova-manage heal_allocations CLI which will simply fail to heal the allocation for this instance. Blueprint: use-nested-allocation-candidates Change-Id: Iba230201803ef3d33bccaaf83eb10453eea43f20
2018-08-14 14:10:17 +02:00 · 2018-08-14 14:10:17 +02:00 · dfa2e6f221
parent 6f1a1f5e8e
commit dfa2e6f221
6 changed files with 114 additions and 34 deletions
--- a/nova/cmd/manage.py
+++ b/nova/cmd/manage.py
@ -1815,7 +1815,7 @@ class PlacementCommands(object):
                ctxt, instance.uuid)
        except ks_exc.ClientException as e:
            raise exception.AllocationUpdateFailed(
-                instance=instance.uuid,
+                consumer_uuid=instance.uuid,
                error=_("Allocation retrieval failed: %s") % e)
        except exception.ConsumerAllocationRetrievalFailed as e:
            output(_("Allocation retrieval failed: %s") % e)
@ -1858,7 +1858,7 @@ class PlacementCommands(object):
                return True
            else:
                raise exception.AllocationUpdateFailed(
-                    instance=instance.uuid, error=resp.text)
+                    consumer_uuid=instance.uuid, error=resp.text)
        # This instance doesn't have allocations so we need to find
        # its compute node resource provider.
@ -1871,7 +1871,8 @@ class PlacementCommands(object):
            instance, instance.flavor)
        if placement.put_allocations(
                ctxt, node_uuid, instance.uuid, resources,
-                instance.project_id, instance.user_id):
+                instance.project_id, instance.user_id,
                consumer_generation=None):
            output(_('Successfully created allocations for '
                     'instance %(instance)s against resource '
                     'provider %(provider)s.') %
--- a/nova/exception.py
+++ b/nova/exception.py
@ -2318,7 +2318,7 @@ class AllocationCreateFailed(NovaException):
 class AllocationUpdateFailed(NovaException):
-    msg_fmt = _('Failed to update allocations for instance %(instance)s. '
+    msg_fmt = _('Failed to update allocations for consumer %(consumer_uuid)s. '
                'Error: %(error)s')
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -1994,7 +1994,7 @@ class SchedulerReportClient(object):
    @safe_connect
    @retries
    def put_allocations(self, context, rp_uuid, consumer_uuid, alloc_data,
-                        project_id, user_id):
+                        project_id, user_id, consumer_generation):
        """Creates allocation records for the supplied instance UUID against
        the supplied resource provider.
@ -2009,29 +2009,38 @@ class SchedulerReportClient(object):
                           consume.
        :param project_id: The project_id associated with the allocations.
        :param user_id: The user_id associated with the allocations.
        :param consumer_generation: The current generation of the consumer
        :returns: True if the allocations were created, False otherwise.
        :raises: Retry if the operation should be retried due to a concurrent
-                 update.
+                 resource provider update.
        :raises: AllocationUpdateFailed if placement returns a consumer
                                        generation conflict
        """
        payload = {
-            'allocations': [
+            'allocations': {
-                {
+                    rp_uuid: {'resources': alloc_data},
-                    'resource_provider': {
+            },
                        'uuid': rp_uuid,
                    },
                    'resources': alloc_data,
                },
            ],
            'project_id': project_id,
            'user_id': user_id,
            'consumer_generation': consumer_generation
        }
        url = '/allocations/%s' % consumer_uuid
-        r = self.put(url, payload, version='1.8',
+        r = self.put(url, payload, version=CONSUMER_GENERATION_VERSION,
                     global_request_id=context.global_id)
        if r.status_code != 204:
            err = r.json()['errors'][0]
            # NOTE(jaypipes): Yes, it sucks doing string comparison like this
            # but we have no error codes, only error messages.
-            if 'concurrently updated' in r.text:
+            # TODO(gibi): Use more granular error codes when available
            if err['code'] == 'placement.concurrent_update':
                if 'consumer generation conflict' in err['detail']:
                    raise exception.AllocationUpdateFailed(
                        consumer_uuid=consumer_uuid, error=err['detail'])
                # this is not a consumer generation conflict so it can only be
                # a resource provider generation conflict. The caller does not
                # provide resource provider generation so this is just a
                # placement internal race. We can blindly retry locally.
                reason = ('another process changed the resource providers '
                          'involved in our attempt to put allocations for '
                          'consumer %s' % consumer_uuid)
--- a/nova/tests/functional/test_report_client.py
+++ b/nova/tests/functional/test_report_client.py
@ -225,7 +225,8 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase):
                                                     self.instance.flavor)
            self.client.put_allocations(
                self.context, self.compute_uuid, self.instance_uuid,
-                alloc_dict, self.instance.project_id, self.instance.user_id)
+                alloc_dict, self.instance.project_id, self.instance.user_id,
                None)
            # Check that allocations were made
            resp = self.client.get('/allocations/%s' % self.instance_uuid)
@ -724,7 +725,7 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase):
                self.client.put_allocations(
                    self.context, uuids.cn, uuids.consumer,
                    {fields.ResourceClass.SRIOV_NET_VF: 1},
-                    uuids.proj, uuids.user))
+                    uuids.proj, uuids.user, None))
            # ...and trying to delete the provider's VF inventory
            bad_inv = {
                'CUSTOM_BANDWIDTH': {
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@ -270,10 +270,11 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        resp = self.client.put_allocations(self.context, rp_uuid,
                                           consumer_uuid, data,
                                           mock.sentinel.project_id,
-                                           mock.sentinel.user_id)
+                                           mock.sentinel.user_id,
                                           mock.sentinel.consumer_generation)
        self.assertTrue(resp)
        mock_put.assert_called_once_with(
-            expected_url, mock.ANY, version='1.8',
+            expected_url, mock.ANY, version='1.28',
            global_request_id=self.context.global_id)
    @mock.patch.object(report.LOG, 'warning')
@ -288,20 +289,48 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        resp = self.client.put_allocations(self.context, rp_uuid,
                                           consumer_uuid, data,
                                           mock.sentinel.project_id,
-                                           mock.sentinel.user_id)
+                                           mock.sentinel.user_id,
                                           mock.sentinel.consumer_generation)
        self.assertFalse(resp)
        mock_put.assert_called_once_with(
-            expected_url, mock.ANY, version='1.8',
+            expected_url, mock.ANY, version='1.28',
            global_request_id=self.context.global_id)
        log_msg = mock_warn.call_args[0][0]
        self.assertIn("Unable to submit allocation for instance", log_msg)
    @mock.patch.object(report.LOG, 'warning')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put')
    def test_put_allocations_fail_due_to_consumer_generation_conflict(
            self, mock_put, mock_warn):
        mock_put.return_value = fake_requests.FakeResponse(
            status_code=409,
            content=jsonutils.dumps(
                {'errors': [{'code': 'placement.concurrent_update',
                             'detail': 'consumer generation conflict'}]}))
        rp_uuid = mock.sentinel.rp
        consumer_uuid = mock.sentinel.consumer
        data = {"MEMORY_MB": 1024}
        expected_url = "/allocations/%s" % consumer_uuid
        self.assertRaises(exception.AllocationUpdateFailed,
                          self.client.put_allocations,
                          self.context, rp_uuid,
                          consumer_uuid, data,
                          mock.sentinel.project_id,
                          mock.sentinel.user_id,
                          mock.sentinel.consumer_generation)
        mock_put.assert_called_once_with(
            expected_url, mock.ANY, version='1.28',
            global_request_id=self.context.global_id)
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put')
    def test_put_allocations_retries_conflict(self, mock_put):
-
+        failed = fake_requests.FakeResponse(
-        failed = mock.MagicMock()
+            status_code=409,
-        failed.status_code = 409
+            content=jsonutils.dumps(
-        failed.text = "concurrently updated"
+                {'errors': [{'code': 'placement.concurrent_update',
                             'detail': ''}]}))
        succeeded = mock.MagicMock()
        succeeded.status_code = 204
@ -315,18 +344,21 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        resp = self.client.put_allocations(self.context, rp_uuid,
                                           consumer_uuid, data,
                                           mock.sentinel.project_id,
-                                           mock.sentinel.user_id)
+                                           mock.sentinel.user_id,
                                           mock.sentinel.consumer_generation)
        self.assertTrue(resp)
        mock_put.assert_has_calls([
-            mock.call(expected_url, mock.ANY, version='1.8',
+            mock.call(expected_url, mock.ANY, version='1.28',
                      global_request_id=self.context.global_id)] * 2)
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put')
    def test_put_allocations_retry_gives_up(self, mock_put):
-        failed = mock.MagicMock()
+        failed = fake_requests.FakeResponse(
-        failed.status_code = 409
+            status_code=409,
-        failed.text = "concurrently updated"
+            content=jsonutils.dumps(
                {'errors': [{'code': 'placement.concurrent_update',
                             'detail': ''}]}))
        mock_put.return_value = failed
@ -337,10 +369,11 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        resp = self.client.put_allocations(self.context, rp_uuid,
                                           consumer_uuid, data,
                                           mock.sentinel.project_id,
-                                           mock.sentinel.user_id)
+                                           mock.sentinel.user_id,
                                           mock.sentinel.consumer_generation)
        self.assertFalse(resp)
        mock_put.assert_has_calls([
-            mock.call(expected_url, mock.ANY, version='1.8',
+            mock.call(expected_url, mock.ANY, version='1.28',
            global_request_id=self.context.global_id)] * 3)
    def test_claim_resources_success_with_old_version(self):
--- a/nova/tests/unit/test_nova_manage.py
+++ b/nova/tests/unit/test_nova_manage.py
@ -2535,7 +2535,43 @@ class TestNovaManagePlacement(test.NoDBTestCase):
        mock_put_allocations.assert_called_once_with(
            test.MatchType(context.RequestContext), uuidsentinel.node,
            uuidsentinel.instance, mock.sentinel.resources, 'fake-project',
-            'fake-user')
+            'fake-user', consumer_generation=None)
    @mock.patch('nova.objects.CellMappingList.get_all',
                return_value=objects.CellMappingList(objects=[
                    objects.CellMapping(name='cell1',
                                        uuid=uuidsentinel.cell1)]))
    @mock.patch('nova.objects.InstanceList.get_by_filters',
                return_value=objects.InstanceList(objects=[
                    objects.Instance(
                        uuid=uuidsentinel.instance, host='fake', node='fake',
                        task_state=None, flavor=objects.Flavor(),
                        project_id='fake-project', user_id='fake-user')]))
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                'get_allocs_for_consumer', return_value={})
    @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
                return_value=objects.ComputeNode(uuid=uuidsentinel.node))
    @mock.patch('nova.scheduler.utils.resources_from_flavor',
                return_value=mock.sentinel.resources)
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                'put_allocations',
                side_effect=exception.AllocationUpdateFailed(
                    consumer_uuid=uuidsentinel.instance,
                    error="consumer generation conflict"))
    def test_heal_allocations_put_allocations_fails_with_consumer_conflict(
            self, mock_put_allocations, mock_res_from_flavor,
            mock_get_compute_node, mock_get_allocs, mock_get_instances,
            mock_get_all_cells):
        self.assertEqual(3, self.cli.heal_allocations())
        self.assertIn('Failed to update allocations for consumer',
                      self.output.getvalue())
        instance = mock_get_instances.return_value[0]
        mock_res_from_flavor.assert_called_once_with(
            instance, instance.flavor)
        mock_put_allocations.assert_called_once_with(
            test.MatchType(context.RequestContext), uuidsentinel.node,
            uuidsentinel.instance, mock.sentinel.resources, 'fake-project',
            'fake-user', consumer_generation=None)
    @mock.patch('nova.objects.CellMappingList.get_all',
                new=mock.Mock(return_value=objects.CellMappingList(objects=[