Heal allocations with incomplete consumer information

Allocations created before microversion 1.8 didn't have project_id / user_id consumer information. In Rocky those will be migrated to have consumer records, but using configurable sentinel values. As part of heal_allocations, we can detect this and heal the allocations using the instance.project_id/user_id information. This is something we'd need if we ever use Placement allocation information counting quotas. Note that we should be using Placement API version 1.28 with consumer_generation when updating the allocations, but since people might backport this change the usage of consumer generations is left for a follow up patch. Related to blueprint add-consumer-generation Change-Id: Idba40838b7b1d5389ab308f2ea40e28911aecffa
2018-06-11 19:46:16 -04:00 · 2018-06-11 19:46:16 -04:00 · 6b6d81cf2b
parent 4f9a7da581
commit 6b6d81cf2b
7 changed files with 248 additions and 21 deletions
--- a/doc/source/cli/nova-manage.rst
+++ b/doc/source/cli/nova-manage.rst
@ -296,6 +296,11 @@ Placement
    the compute node resource provider for that instance based on the flavor
    associated with the instance.

+    There is also a special case handled for instances that *do* have
+    allocations created before Placement API microversion 1.8 where project_id
+    and user_id values were required. For those types of allocations, the
+    project_id and user_id are updated using the values from the instance.
+
    Specify ``--max-count`` to control the maximum number of instances to
    process. If not specified, all instances in each cell will be mapped in
    batches of 50. If you have a large number of instances, consider
@ -311,7 +316,7 @@ Placement
    * 0: Command completed successfully and allocations were created.
    * 1: --max-count was reached and there are more instances to process.
    * 2: Unable to find a compute node record for a given instance.
-    * 3: Unable to create allocations for an instance against its
+    * 3: Unable to create (or update) allocations for an instance against its
      compute node resource provider.
    * 4: Command completed successfully but no allocations were created.
    * 127: Invalid input.
--- a/nova/cmd/manage.py
+++ b/nova/cmd/manage.py
@ -1778,6 +1778,8 @@ class PlacementCommands(object):
            given instance cannot be found
        :raises: AllocationCreateFailed if unable to create allocations for
            a given instance against a given compute node resource provider
+        :raises: AllocationUpdateFailed if unable to update allocations for
+            a given instance with consumer project/user information
        """
        # Keep a cache of instance.node to compute node resource provider UUID.
        # This will save some queries for non-ironic instances to the
@ -1817,15 +1819,45 @@ class PlacementCommands(object):
                    continue

                allocations = placement.get_allocations_for_consumer(
-                    ctxt, instance.uuid)
-                if allocations:
-                    output(_('Instance %s already has allocations.') %
-                           instance.uuid)
-                    # TODO(mriedem): Check to see if the allocation project_id
+                    ctxt, instance.uuid, include_project_user=True)
+                # get_allocations_for_consumer uses safe_connect which will
+                # return None if we can't communicate with Placement, and the
+                # response can have an empty {'allocations': {}} response if
+                # there are no allocations for the instance so handle both
+                if allocations and allocations.get('allocations'):
+                    # Check to see if the allocation project_id
                    # and user_id matches the instance project and user and
-                    # fix the allocation project/user if they don't match; see
-                    # blueprint add-consumer-generation for details.
-                    continue
+                    # fix the allocation project/user if they don't match.
+                    # Allocations created before Placement API version 1.8
+                    # did not have a project_id/user_id, and migrated records
+                    # could have sentinel values from config.
+                    if (allocations.get('project_id') ==
+                            instance.project_id and
+                            allocations.get('user_id') == instance.user_id):
+                        output(_('Instance %s already has allocations with '
+                                 'matching consumer project/user.') %
+                               instance.uuid)
+                        continue
+                    # We have an instance with allocations but not the correct
+                    # project_id/user_id, so we want to update the allocations
+                    # and re-put them. We don't use put_allocations here
+                    # because we don't want to mess up shared or nested
+                    # provider allocations.
+                    allocations['project_id'] = instance.project_id
+                    allocations['user_id'] = instance.user_id
+                    # We use 1.12 for PUT /allocations/{consumer_id} to mirror
+                    # the body structure from get_allocations_for_consumer.
+                    # TODO(mriedem): Pass a consumer generation using 1.28.
+                    resp = placement.put('/allocations/%s' % instance.uuid,
+                                         allocations, version='1.12')
+                    if resp:
+                        num_processed += 1
+                        output(_('Successfully updated allocations for '
+                                 'instance %s.') % instance.uuid)
+                        continue
+                    else:
+                        raise exception.AllocationUpdateFailed(
+                            instance=instance.uuid, error=resp.text)

                # This instance doesn't have allocations so we need to find
                # its compute node resource provider.
@ -1866,12 +1898,14 @@ class PlacementCommands(object):

    @action_description(
        _("Iterates over non-cell0 cells looking for instances which do "
-          "not have allocations in the Placement service and which are not "
-          "undergoing a task state transition. For each instance found, "
-          "allocations are created against the compute node resource provider "
-          "for that instance based on the flavor associated with the "
-          "instance. This command requires that the [api_database]/connection "
-          "and [placement] configuration options are set."))
+          "not have allocations in the Placement service, or have incomplete "
+          "consumer project_id/user_id values in existing allocations, and "
+          "which are not undergoing a task state transition. For each "
+          "instance found, allocations are created (or updated) against the "
+          "compute node resource provider for that instance based on the "
+          "flavor associated with the instance. This command requires that "
+          "the [api_database]/connection and [placement] configuration "
+          "options are set."))
    @args('--max-count', metavar='<max_count>', dest='max_count',
          help='Maximum number of instances to process. If not specified, all '
               'instances in each cell will be mapped in batches of 50. '
@ -1888,8 +1922,8 @@ class PlacementCommands(object):
        * 0: Command completed successfully and allocations were created.
        * 1: --max-count was reached and there are more instances to process.
        * 2: Unable to find a compute node record for a given instance.
-        * 3: Unable to create allocations for an instance against its
-             compute node resource provider.
+        * 3: Unable to create (or update) allocations for an instance against
+             its compute node resource provider.
        * 4: Command completed successfully but no allocations were created.
        * 127: Invalid input.
        """
@ -1961,7 +1995,8 @@ class PlacementCommands(object):
                except exception.ComputeHostNotFound as e:
                    print(e.format_message())
                    return 2
-                except exception.AllocationCreateFailed as e:
+                except (exception.AllocationCreateFailed,
+                        exception.AllocationUpdateFailed) as e:
                    print(e.format_message())
                    return 3

--- a/nova/exception.py
+++ b/nova/exception.py
@ -2286,6 +2286,11 @@ class AllocationCreateFailed(NovaException):
                'against resource provider %(provider)s.')


+class AllocationUpdateFailed(NovaException):
+    msg_fmt = _('Failed to update allocations for instance %(instance)s. '
+                'Error: %(error)s')
+
+
 class CertificateValidationFailed(NovaException):
    msg_fmt = _("Image signature certificate validation failed for "
                "certificate: %(cert_uuid)s. %(reason)s")
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -49,6 +49,7 @@ GRANULAR_AC_VERSION = '1.25'
 POST_RPS_RETURNS_PAYLOAD_API_VERSION = '1.20'
 NESTED_PROVIDER_API_VERSION = '1.14'
 POST_ALLOCATIONS_API_VERSION = '1.13'
+ALLOCATION_PROJECT_USER = '1.12'


 def warn_limit(self, msg):
@ -1460,12 +1461,27 @@ class SchedulerReportClient(object):
            raise exception.ResourceProviderSyncFailed()

    @safe_connect
-    def get_allocations_for_consumer(self, context, consumer):
+    def get_allocations_for_consumer(self, context, consumer,
+                                     include_project_user=False):
+        """Makes a GET /allocations/{consumer} call to Placement.
+
+        :param context: The nova.context.RequestContext auth context
+        :param consumer: UUID of the consumer resource
+        :param include_project_user: True if the response should be the
+            full allocations response including project_id and user_id (new
+            in microversion 1.12), False if only the "allocations" dict from
+            the response body should be returned.
+        :returns: dict, see ``include_project_user`` for details on format;
+            returns None if unable to connect to Placement (see safe_connect)
+        """
        url = '/allocations/%s' % consumer
-        resp = self.get(url, global_request_id=context.global_id)
+        resp = self.get(url, version=ALLOCATION_PROJECT_USER,
+                        global_request_id=context.global_id)
        if not resp:
            return {}
        else:
+            if include_project_user:
+                return resp.json()
            return resp.json()['allocations']

    def get_allocations_for_consumer_by_provider(self, context, rp_uuid,
--- a/nova/tests/functional/test_nova_manage.py
+++ b/nova/tests/functional/test_nova_manage.py
@ -14,11 +14,14 @@ import fixtures
 from six.moves import StringIO

 from nova.cmd import manage
+from nova import config
 from nova import context
 from nova import objects
 from nova import test
 from nova.tests.functional import integrated_helpers

+CONF = config.CONF
+

 class NovaManageDBIronicTest(test.TestCase):
    def setUp(self):
@ -585,3 +588,57 @@ class TestNovaManagePlacementHealAllocations(
        result = self.cli.heal_allocations(verbose=True)
        self.assertEqual(0, result, self.output.getvalue())
        self.assertIn('Processed 1 instances.', self.output.getvalue())
+
+    def test_heal_allocations_update_sentinel_consumer(self):
+        """Tests the scenario that allocations were created before microversion
+        1.8 when consumer (project_id and user_id) were not required so the
+        consumer information is using sentinel values from config.
+
+        Since the CachingScheduler used in this test class won't actually
+        create allocations during scheduling, we have to create the allocations
+        out-of-band and then run our heal routine to see they get updated with
+        the instance project and user information.
+        """
+        server, rp_uuid = self._boot_and_assert_no_allocations(
+            self.flavor, 'cell1')
+        # Now we'll create allocations using microversion < 1.8 to so that
+        # placement creates the consumer record with the config-based project
+        # and user values.
+        alloc_body = {
+            "allocations": [
+                {
+                    "resource_provider": {
+                        "uuid": rp_uuid
+                    },
+                    "resources": {
+                        "MEMORY_MB": self.flavor['ram'],
+                        "VCPU": self.flavor['vcpus'],
+                        "DISK_GB": self.flavor['disk']
+                    }
+                }
+            ]
+        }
+        self.placement_api.put('/allocations/%s' % server['id'], alloc_body)
+        # Make sure we did that correctly. Use version 1.12 so we can assert
+        # the project_id and user_id are based on the sentinel values.
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server['id'], version='1.12').body
+        self.assertEqual(CONF.placement.incomplete_consumer_project_id,
+                         allocations['project_id'])
+        self.assertEqual(CONF.placement.incomplete_consumer_user_id,
+                         allocations['user_id'])
+        allocations = allocations['allocations']
+        self.assertIn(rp_uuid, allocations)
+        self.assertFlavorMatchesAllocation(
+            self.flavor, allocations[rp_uuid]['resources'])
+        # Now run heal_allocations which should update the consumer info.
+        result = self.cli.heal_allocations(verbose=True)
+        self.assertEqual(0, result, self.output.getvalue())
+        output = self.output.getvalue()
+        self.assertIn('Successfully updated allocations for instance', output)
+        self.assertIn('Processed 1 instances.', output)
+        # Now assert that the consumer was actually updated.
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server['id'], version='1.12').body
+        self.assertEqual(server['tenant_id'], allocations['project_id'])
+        self.assertEqual(server['user_id'], allocations['user_id'])
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@ -3202,7 +3202,7 @@ class TestAllocations(SchedulerReportClientTestCase):
        self.client.update_instance_allocation(self.context, cn, inst, 1)
        self.assertFalse(mock_put.called)
        mock_get.assert_called_once_with(
-            '/allocations/%s' % inst.uuid,
+            '/allocations/%s' % inst.uuid, version='1.12',
            global_request_id=self.context.global_id)

    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
--- a/nova/tests/unit/test_nova_manage.py
+++ b/nova/tests/unit/test_nova_manage.py
@ -35,6 +35,7 @@ from nova import objects
 from nova import test
 from nova.tests import fixtures as nova_fixtures
 from nova.tests.unit.db import fakes as db_fakes
+from nova.tests.unit import fake_requests
 from nova.tests.unit.objects import test_network
 from nova.tests import uuidsentinel

@ -2488,6 +2489,114 @@ class TestNovaManagePlacement(test.NoDBTestCase):
            uuidsentinel.instance, mock.sentinel.resources, 'fake-project',
            'fake-user')

+    @mock.patch('nova.objects.CellMappingList.get_all',
+                return_value=objects.CellMappingList(objects=[
+                    objects.CellMapping(name='cell1',
+                                        uuid=uuidsentinel.cell1)]))
+    @mock.patch('nova.objects.InstanceList.get_by_filters',
+                # Called twice, first returns 1 instance, second returns []
+                side_effect=(
+                    objects.InstanceList(objects=[
+                        objects.Instance(
+                            uuid=uuidsentinel.instance, host='fake',
+                            node='fake', task_state=None,
+                            project_id='fake-project', user_id='fake-user')]),
+                    objects.InstanceList()))
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'get_allocations_for_consumer')
+    @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
+                new_callable=mock.NonCallableMock)  # assert not called
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put',
+                return_value=fake_requests.FakeResponse(204))
+    def test_heal_allocations_sentinel_consumer(
+            self, mock_put, mock_get_compute_node, mock_get_allocs,
+            mock_get_instances, mock_get_all_cells):
+        """Tests the scenario that there are allocations created using
+        placement API microversion < 1.8 where project/user weren't provided.
+        The allocations will be re-put with the instance project_id/user_id
+        values. Note that GET /allocations/{consumer_id} since commit f44965010
+        will create the missing consumer record using the config option
+        sentinels for project and user, so we won't get null back for the
+        consumer project/user.
+        """
+        mock_get_allocs.return_value = {
+            "allocations": {
+                "92637880-2d79-43c6-afab-d860886c6391": {
+                    "generation": 2,
+                    "resources": {
+                        "DISK_GB": 50,
+                        "MEMORY_MB": 512,
+                        "VCPU": 2
+                    }
+                }
+            },
+            "project_id": CONF.placement.incomplete_consumer_project_id,
+            "user_id": CONF.placement.incomplete_consumer_user_id
+        }
+        self.assertEqual(0, self.cli.heal_allocations(verbose=True))
+        self.assertIn('Processed 1 instances.', self.output.getvalue())
+        mock_get_allocs.assert_called_once_with(
+            test.MatchType(context.RequestContext), uuidsentinel.instance,
+            include_project_user=True)
+        expected_put_data = mock_get_allocs.return_value
+        expected_put_data['project_id'] = 'fake-project'
+        expected_put_data['user_id'] = 'fake-user'
+        mock_put.assert_called_once_with(
+            '/allocations/%s' % uuidsentinel.instance, expected_put_data,
+            version='1.12')
+
+    @mock.patch('nova.objects.CellMappingList.get_all',
+                return_value=objects.CellMappingList(objects=[
+                    objects.CellMapping(name='cell1',
+                                        uuid=uuidsentinel.cell1)]))
+    @mock.patch('nova.objects.InstanceList.get_by_filters',
+                return_value=objects.InstanceList(objects=[
+                    objects.Instance(
+                        uuid=uuidsentinel.instance, host='fake', node='fake',
+                        task_state=None, project_id='fake-project',
+                        user_id='fake-user')]))
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'get_allocations_for_consumer')
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put',
+                return_value=fake_requests.FakeResponse(
+                    409, content='Inventory and/or allocations changed while '
+                                 'attempting to allocate'))
+    def test_heal_allocations_sentinel_consumer_put_fails(
+            self, mock_put, mock_get_allocs, mock_get_instances,
+            mock_get_all_cells):
+        """Tests the scenario that there are allocations created using
+        placement API microversion < 1.8 where project/user weren't provided
+        and there was no consumer. The allocations will be re-put with the
+        instance project_id/user_id values but that fails with a 409 so a
+        return code of 3 is expected from the command.
+        """
+        mock_get_allocs.return_value = {
+            "allocations": {
+                "92637880-2d79-43c6-afab-d860886c6391": {
+                    "generation": 2,
+                    "resources": {
+                        "DISK_GB": 50,
+                        "MEMORY_MB": 512,
+                        "VCPU": 2
+                    }
+                }
+            },
+            "project_id": CONF.placement.incomplete_consumer_project_id,
+            "user_id": CONF.placement.incomplete_consumer_user_id
+        }
+        self.assertEqual(3, self.cli.heal_allocations(verbose=True))
+        self.assertIn(
+            'Inventory and/or allocations changed', self.output.getvalue())
+        mock_get_allocs.assert_called_once_with(
+            test.MatchType(context.RequestContext), uuidsentinel.instance,
+            include_project_user=True)
+        expected_put_data = mock_get_allocs.return_value
+        expected_put_data['project_id'] = 'fake-project'
+        expected_put_data['user_id'] = 'fake-user'
+        mock_put.assert_called_once_with(
+            '/allocations/%s' % uuidsentinel.instance, expected_put_data,
+            version='1.12')
+

 class TestNovaManageMain(test.NoDBTestCase):
    """Tests the nova-manage:main() setup code."""