Merge "Support for --force flag for nova-manage placement heal_allocations command"

2020-05-11 18:15:34 +00:00 · 2020-05-11 18:15:34 +00:00 · 5b76ae4e66
commit 5b76ae4e66
parent 7dfe999180 87936baaac
4 changed files with 152 additions and 9 deletions
--- a/doc/source/cli/nova-manage.rst
+++ b/doc/source/cli/nova-manage.rst
@ -546,7 +546,7 @@ Placement

 .. _heal_allocations_cli:

-``nova-manage placement heal_allocations [--max-count <max_count>] [--verbose] [--skip-port-allocations] [--dry-run] [--instance <instance_uuid>] [--cell <cell_uuid]``
+``nova-manage placement heal_allocations [--max-count <max_count>] [--verbose] [--skip-port-allocations] [--dry-run] [--instance <instance_uuid>] [--cell <cell_uuid] [--force]``
    Iterates over non-cell0 cells looking for instances which do not have
    allocations in the Placement service and which are not undergoing a task
    state transition. For each instance found, allocations are created against
@ -606,6 +606,9 @@ Placement
    Specify ``--cell`` to  process heal allocations within a specific cell.
    This is mutually exclusive with the ``--instance`` option.

+    Specify ``--force`` to forcefully heal single instance allocation. This
+    option needs to be passed with ``--instance``.
+
    This command requires that the
    :oslo.config:option:`api_database.connection` and
    :oslo.config:group:`placement` configuration options are set. Placement API
--- a/nova/cmd/manage.py
+++ b/nova/cmd/manage.py
@ -1825,7 +1825,8 @@ class PlacementCommands(object):

    def _heal_allocations_for_instance(self, ctxt, instance, node_cache,
                                       output, placement, dry_run,
-                                       heal_port_allocations, neutron):
+                                       heal_port_allocations, neutron,
+                                       force):
        """Checks the given instance to see if it needs allocation healing

        :param ctxt: cell-targeted nova.context.RequestContext
@ -1841,6 +1842,8 @@ class PlacementCommands(object):
            requested, False otherwise.
        :param neutron: nova.network.neutron.ClientWrapper to
            communicate with Neutron
+        :param force: True if force healing is requested for particular
+            instance, False otherwise.
        :return: True if allocations were created or updated for the instance,
            None if nothing needed to be done
        :raises: nova.exception.ComputeHostNotFound if a compute node for a
@ -1905,6 +1908,16 @@ class PlacementCommands(object):
            allocations = self._heal_missing_project_and_user_id(
                allocations, instance)

+        if force:
+            output(_('Force flag passed for instance %s') % instance.uuid)
+            need_healing = _UPDATE
+            # get default allocations
+            alloc = self._heal_missing_alloc(ctxt, instance, node_cache)
+            # set consumer generation of existing allocations
+            alloc["consumer_generation"] = allocations["consumer_generation"]
+            # set allocations
+            allocations = alloc
+
        if heal_port_allocations:
            to_heal = self._get_port_allocations_to_heal(
                ctxt, instance, node_cache, placement, neutron, output)
@ -1974,7 +1987,8 @@ class PlacementCommands(object):

    def _heal_instances_in_cell(self, ctxt, max_count, unlimited, output,
                                placement, dry_run, instance_uuid,
-                                heal_port_allocations, neutron):
+                                heal_port_allocations, neutron,
+                                force):
        """Checks for instances to heal in a given cell.

        :param ctxt: cell-targeted nova.context.RequestContext
@ -1991,6 +2005,8 @@ class PlacementCommands(object):
            requested, False otherwise.
        :param neutron: nova.network.neutron.ClientWrapper to
            communicate with Neutron
+        :param force: True if force healing is requested for particular
+            instance, False otherwise.
        :return: Number of instances that had allocations created.
        :raises: nova.exception.ComputeHostNotFound if a compute node for a
            given instance cannot be found
@ -2044,7 +2060,7 @@ class PlacementCommands(object):
            for instance in instances:
                if self._heal_allocations_for_instance(
                        ctxt, instance, node_cache, output, placement,
-                        dry_run, heal_port_allocations, neutron):
+                        dry_run, heal_port_allocations, neutron, force):
                    num_processed += 1

            # Make sure we don't go over the max count. Note that we
@ -2101,9 +2117,11 @@ class PlacementCommands(object):
    @args('--cell', metavar='<cell_uuid>', dest='cell_uuid',
          help='Heal allocations within a specific cell. '
               'The --cell and --instance options are mutually exclusive.')
+    @args('--force', action='store_true', dest='force', default=False,
+          help='Force heal allocations. Requires the --instance argument.')
    def heal_allocations(self, max_count=None, verbose=False, dry_run=False,
                         instance_uuid=None, skip_port_allocations=False,
-                         cell_uuid=None):
+                         cell_uuid=None, force=False):
        """Heals instance allocations in the Placement service

        Return codes:
@ -2126,9 +2144,6 @@ class PlacementCommands(object):
        #   for example, this could cleanup ironic instances that have
        #   allocations on VCPU/MEMORY_MB/DISK_GB but are now using a custom
        #   resource class
-        # - add an option to overwrite allocations for instances which already
-        #   have allocations (but the operator thinks might be wrong?); this
-        #   would probably only be safe with a specific instance.
        # - deal with nested resource providers?

        heal_port_allocations = not skip_port_allocations
@ -2144,6 +2159,11 @@ class PlacementCommands(object):
                    'are mutually exclusive.'))
            return 127

+        if force and not instance_uuid:
+            print(_('The --instance flag is required'
+                    'when using --force flag.'))
+            return 127
+
        # TODO(mriedem): Rather than --max-count being both a total and batch
        # count, should we have separate options to be specific, i.e. --total
        # and --batch-size? Then --batch-size defaults to 50 and --total
@ -2220,7 +2240,8 @@ class PlacementCommands(object):
                try:
                    num_processed += self._heal_instances_in_cell(
                        cctxt, limit_per_cell, unlimited, output, placement,
-                        dry_run, instance_uuid, heal_port_allocations, neutron)
+                        dry_run, instance_uuid, heal_port_allocations, neutron,
+                        force)
                except exception.ComputeHostNotFound as e:
                    print(e.format_message())
                    return 2
--- a/nova/tests/functional/test_nova_manage.py
+++ b/nova/tests/functional/test_nova_manage.py
@ -783,6 +783,121 @@ class TestNovaManagePlacementHealAllocations(
        self.assertIn('Found 1 candidate instances', output)
        self.assertIn('Processed 0 instances.', output)

+    def test_heal_allocations_force_allocation(self):
+        """Tests the case that a specific instance allocations are
+        forcefully changed.
+        1. create server without allocations
+        2. heal allocations without forcing them.
+           Assert the allocations match the flavor
+        3. update the allocations to change MEMORY_MB to not match the flavor
+        4. run heal allocations without --force.
+           Assert the allocations still have the bogus
+           MEMORY_MB value since they were not forcefully updated.
+        5. run heal allocations with --force.
+           Assert the allocations match the flavor again
+        6. run heal allocations again.
+           You should get rc=4 back since nothing changed.
+        """
+        # 1. Create server that we will forcefully heal specifically.
+        server, rp_uuid = self._boot_and_assert_no_allocations(
+            self.flavor, 'cell1', volume_backed=True)
+
+        # 2. heal allocations without forcing them
+        result = self.cli.heal_allocations(
+            verbose=True, instance_uuid=server['id']
+        )
+        self.assertEqual(0, result, self.output.getvalue())
+
+        # assert the allocations match the flavor
+        allocs = self._get_allocations_by_server_uuid(
+          server['id'])[rp_uuid]['resources']
+        self.assertEqual(self.flavor['vcpus'], allocs['VCPU'])
+        self.assertEqual(self.flavor['ram'], allocs['MEMORY_MB'])
+
+        # 3. update the allocations to change MEMORY_MB
+        # to not match the flavor
+        alloc_body = {
+            "allocations": [
+                {
+                    "resource_provider": {
+                        "uuid": rp_uuid
+                    },
+                    "resources": {
+                        "MEMORY_MB": 1024,
+                        "VCPU": self.flavor['vcpus'],
+                        "DISK_GB": self.flavor['disk']
+                    }
+                }
+            ]
+        }
+        self.placement_api.put('/allocations/%s' % server['id'], alloc_body)
+
+        # Check allocation to see if memory has changed
+        allocs = self._get_allocations_by_server_uuid(
+            server['id'])[rp_uuid]['resources']
+        self.assertEqual(self.flavor['vcpus'], allocs['VCPU'])
+        self.assertEqual(1024, allocs['MEMORY_MB'])
+
+        # 4. run heal allocations without --force
+        result = self.cli.heal_allocations(
+            verbose=True, instance_uuid=server['id']
+        )
+        self.assertEqual(0, result, self.output.getvalue())
+        self.assertIn(
+            'Successfully updated allocations for',
+            self.output.getvalue())
+
+        # assert the allocations still have the bogus memory
+        allocs = self._get_allocations_by_server_uuid(
+          server['id'])[rp_uuid]['resources']
+        self.assertEqual(1024, allocs['MEMORY_MB'])
+
+        # call heal without force flag
+        # rc should be 4 since force flag was not used.
+        result = self.cli.heal_allocations(
+            verbose=True, instance_uuid=server['id']
+        )
+        self.assertEqual(4, result, self.output.getvalue())
+
+        # call heal with force flag and dry run
+        result = self.cli.heal_allocations(
+            dry_run=True, verbose=True,
+            instance_uuid=server['id'],
+            force=True
+        )
+        self.assertEqual(4, result, self.output.getvalue())
+        self.assertIn(
+            '[dry-run] Update allocations for instance',
+            self.output.getvalue())
+
+        # assert nothing has changed after dry run
+        allocs = self._get_allocations_by_server_uuid(
+          server['id'])[rp_uuid]['resources']
+        self.assertEqual(1024, allocs['MEMORY_MB'])
+
+        # 5. run heal allocations with --force
+        result = self.cli.heal_allocations(
+            verbose=True, instance_uuid=server['id'],
+            force=True
+        )
+        self.assertEqual(0, result, self.output.getvalue())
+        self.assertIn('Force flag passed for instance',
+                      self.output.getvalue())
+        self.assertIn('Successfully updated allocations',
+                      self.output.getvalue())
+
+        # assert the allocations match the flavor again
+        allocs = self._get_allocations_by_server_uuid(
+            server['id'])[rp_uuid]['resources']
+        self.assertEqual(self.flavor['ram'], allocs['MEMORY_MB'])
+
+        # 6. run heal allocations again and you should get rc=4
+        # back since nothing changed
+        result = self.cli.heal_allocations(
+            verbose=True, instance_uuid=server['id']
+        )
+        self.assertEqual(4, result, self.output.getvalue())
+

 class TestNovaManagePlacementHealPortAllocations(
        test_servers.PortResourceRequestBasedSchedulingTestBase):
--- a/releasenotes/notes/force-heal-allocations-7834f3156be90c94.yaml
+++ b/releasenotes/notes/force-heal-allocations-7834f3156be90c94.yaml
@ -0,0 +1,4 @@
+---
+features:
+  - Add ``--force`` option to the ``nova-manage placement heal_allocations``
+    command to forcefully heal allocations for a specific instance.