Consider allocations invovling child providers during allocation cleanup

Nova calls remove_provider_from_instance_allocation() in couple of cleanup cases. As the name of the function suggests this does not handle allocations against RP trees containing child providers. This leads to leaking allocations on the child providers during cleanup. As the functional tests in ServerMovingTestsWithNestedResourceRequests previously shown mostly evacuation is directly affected by this call path because evacuation still uses the instance_uuid as the consumer for both the source host and the destination host allocation. This patch replaces remove_provider_from_instance_allocation() with remove_provider_tree_from_instance_allocation() and as the name suggests this call removes allocation against the RP tree. After this change most of the evacuation functional tests passes without resource leak, except force evacuation. That will be handled in a subsequent patch. Also as this change made the scheduler/utils remove_allocation_from_compute just a proxy call of remove_provider_from_instance_allocation and therefore remove_allocation_from_compute is deleted. Change-Id: I2af45a9540e7ccd60ace80d9fcadc79972da7df7 Blueprint: use-nested-allocation-candidates
2018-09-28 14:10:04 +02:00 · 2018-09-28 14:10:04 +02:00 · 66297f0c4a
parent e936b82a42
commit 66297f0c4a
10 changed files with 312 additions and 243 deletions
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@ -90,7 +90,6 @@ from nova.pci import whitelist
 from nova import rpc
 from nova import safe_utils
 from nova.scheduler import client as scheduler_client
-from nova.scheduler import utils as scheduler_utils
 from nova import utils
 from nova.virt import block_device as driver_block_device
 from nova.virt import configdrive
@ -709,8 +708,9 @@ class ComputeManager(manager.Manager):
                    continue
            cn_uuid = compute_nodes[migration.source_node]

-            if not scheduler_utils.remove_allocation_from_compute(
-                    context, instance, cn_uuid, self.reportclient):
+            if not self.reportclient.\
+                    remove_provider_tree_from_instance_allocation(
+                        context, instance.uuid, cn_uuid):
                LOG.error("Failed to clean allocation of evacuated instance "
                          "on the source node %s",
                          cn_uuid, instance=instance)
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@ -43,7 +43,6 @@ from nova.pci import request as pci_request
 from nova import rc_fields as fields
 from nova import rpc
 from nova.scheduler import client as scheduler_client
-from nova.scheduler import utils as scheduler_utils
 from nova import utils
 from nova.virt import hardware

@ -1362,8 +1361,8 @@ class ResourceTracker(object):
            self, context, instance, node, move_type, node_type='source'):
        # Clean up the instance allocation from this node in placement
        cn_uuid = self.compute_nodes[node].uuid
-        if not scheduler_utils.remove_allocation_from_compute(
-                context, instance, cn_uuid, self.reportclient):
+        if not self.reportclient.remove_provider_tree_from_instance_allocation(
+                context, instance.uuid, cn_uuid):
            LOG.error("Failed to clean allocation of %s "
                      "instance on the %s node %s",
                      move_type, node_type, cn_uuid, instance=instance)
@ -1380,8 +1379,8 @@ class ResourceTracker(object):
        :param flavor: This is the new_flavor during a resize.
        """
        cn = self.compute_nodes[node]
-        if not scheduler_utils.remove_allocation_from_compute(
-                context, instance, cn.uuid, self.reportclient, flavor):
+        if not self.reportclient.remove_provider_tree_from_instance_allocation(
+                context, instance.uuid, cn.uuid):
            if instance.instance_type_id == flavor.id:
                operation = 'migration'
            else:
--- a/nova/conductor/tasks/live_migrate.py
+++ b/nova/conductor/tasks/live_migrate.py
@ -423,19 +423,13 @@ class LiveMigrationTask(base.TaskBase):
                     instance=self.instance)
            return

-        # Calculate the resource class amounts to subtract from the allocations
-        # on the node based on the instance flavor.
-        resources = scheduler_utils.resources_from_flavor(
-            self.instance, self.instance.flavor)
-
        # Now remove the allocations for our instance against that node.
        # Note that this does not remove allocations against any other node
        # or shared resource provider, it's just undoing what the scheduler
        # allocated for the given (destination) node.
        self.scheduler_client.reportclient.\
-            remove_provider_from_instance_allocation(
-                self.context, self.instance.uuid, compute_node.uuid,
-                self.instance.user_id, self.instance.project_id, resources)
+            remove_provider_tree_from_instance_allocation(
+            self.context, self.instance.uuid, compute_node.uuid)

    def _check_not_over_max_retries(self, attempted_hosts):
        if CONF.migrate_max_retries == -1:
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -1835,8 +1835,11 @@ class SchedulerReportClient(object):
                    CONSUMER_GENERATION_VERSION)):
            payload['consumer_generation'] = consumer_generation

-        r = self.put(url, payload, version=allocation_request_version,
-                     global_request_id=context.global_id)
+        r = self._put_allocations(
+            context,
+            consumer_uuid,
+            payload,
+            version=allocation_request_version)
        if r.status_code != 204:
            err = r.json()['errors'][0]
            if err['code'] == 'placement.concurrent_update':
@ -1858,109 +1861,71 @@ class SchedulerReportClient(object):
                          'involved in our attempt to put allocations for '
                          'consumer %s' % consumer_uuid)
                raise Retry('claim_resources', reason)
-            else:
-                LOG.warning(
-                    'Unable to submit allocation for instance '
-                    '%(uuid)s (%(code)i %(text)s)',
-                    {'uuid': consumer_uuid,
-                     'code': r.status_code,
-                     'text': r.text})
        return r.status_code == 204

-    @safe_connect
-    def remove_provider_from_instance_allocation(self, context, consumer_uuid,
-                                                 rp_uuid, user_id, project_id,
-                                                 resources):
-        """Grabs an allocation for a particular consumer UUID, strips parts of
-        the allocation that refer to a supplied resource provider UUID, and
-        then PUTs the resulting allocation back to the placement API for the
-        consumer.
+    def remove_provider_tree_from_instance_allocation(self, context,
+                                                      consumer_uuid,
+                                                      root_rp_uuid):
+        """Removes every allocation from the consumer that is on the
+        specified provider tree.

-        This is used to reconcile the "doubled-up" allocation that the
-        scheduler constructs when claiming resources against the destination
-        host during a move operation.
-
-        If the move was between hosts, the entire allocation for rp_uuid will
-        be dropped. If the move is a resize on the same host, then we will
-        subtract resources from the single allocation to ensure we do not
-        exceed the reserved or max_unit amounts for the resource on the host.
+        Note that this function does not try to remove allocations from sharing
+        providers.

        :param context: The security context
-        :param consumer_uuid: The instance/consumer UUID
-        :param rp_uuid: The UUID of the provider whose resources we wish to
-                        remove from the consumer's allocation
-        :param user_id: The instance's user
-        :param project_id: The instance's project
-        :param resources: The resources to be dropped from the allocation
+        :param consumer_uuid: The UUID of the consumer to manipulate
+        :param root_rp_uuid: The root of the provider tree
+        :raises: keystoneauth1.exceptions.base.ClientException on failure to
+                 communicate with the placement API
+        :raises: ConsumerAllocationRetrievalFailed if this call cannot read
+                 the current state of the allocations from placement
+        :raises: ResourceProviderRetrievalFailed if it cannot collect the RPs
+                 in the tree specified by root_rp_uuid.
        """
-        url = '/allocations/%s' % consumer_uuid
-
-        # Grab the "doubled-up" allocation that we will manipulate
-        r = self.get(url, global_request_id=context.global_id,
-                     version=CONSUMER_GENERATION_VERSION)
-        if r.status_code != 200:
-            LOG.warning("Failed to retrieve allocations for %s. Got HTTP %s",
-                        consumer_uuid, r.status_code)
-            return False
-
-        current_allocs = r.json()['allocations']
-        if not current_allocs:
+        current_allocs = self.get_allocs_for_consumer(context, consumer_uuid)
+        if not current_allocs['allocations']:
            LOG.error("Expected to find current allocations for %s, but "
                      "found none.", consumer_uuid)
+            # TODO(gibi): do not return False as none of the callers
+            # do anything with the return value except log
            return False
-        else:
-            current_consumer_generation = r.json()['consumer_generation']

-        # If the host isn't in the current allocation for the instance, don't
-        # do anything
-        if rp_uuid not in current_allocs:
-            LOG.warning("Expected to find allocations referencing resource "
-                        "provider %s for %s, but found none.",
-                        rp_uuid, consumer_uuid)
+        rps = self._get_providers_in_tree(context, root_rp_uuid)
+        rp_uuids = [rp['uuid'] for rp in rps]
+
+        # go through the current allocations and remove every RP from it that
+        # belongs to the RP tree identified by the root_rp_uuid parameter
+        has_changes = False
+        for rp_uuid in rp_uuids:
+            changed = bool(
+                current_allocs['allocations'].pop(rp_uuid, None))
+            has_changes = has_changes or changed
+
+        # If nothing changed then don't do anything
+        if not has_changes:
+            LOG.warning(
+                "Expected to find allocations referencing resource "
+                "provider tree rooted at %s for %s, but found none.",
+                root_rp_uuid, consumer_uuid)
+            # TODO(gibi): do not return a value as none of the callers
+            # do anything with the return value except logging
            return True

-        compute_providers = [uuid for uuid, alloc in current_allocs.items()
-                             if 'VCPU' in alloc['resources']]
-        LOG.debug('Current allocations for instance: %s', current_allocs,
-                  instance_uuid=consumer_uuid)
-        LOG.debug('Instance %s has resources on %i compute nodes',
-                  consumer_uuid, len(compute_providers))
-        new_allocs = {
-             alloc_rp_uuid: {
-                'resources': alloc['resources'],
-            }
-            for alloc_rp_uuid, alloc in current_allocs.items()
-            if alloc_rp_uuid != rp_uuid
-        }
+        r = self._put_allocations(context, consumer_uuid, current_allocs)
+        # TODO(gibi): do not return a value as none of the callers
+        # do anything with the return value except logging
+        return r.status_code == 204

-        if len(compute_providers) == 1:
-            # NOTE(danms): We are in a resize to same host scenario. Since we
-            # are the only provider then we need to merge back in the doubled
-            # allocation with our part subtracted
-            peer_alloc = {
-                    'resources': current_allocs[rp_uuid]['resources'],
-            }
-            LOG.debug('Original resources from same-host '
-                      'allocation: %s', peer_alloc['resources'])
-            scheduler_utils.merge_resources(peer_alloc['resources'],
-                                            resources, -1)
-            LOG.debug('Subtracting old resources from same-host '
-                      'allocation: %s', peer_alloc['resources'])
-            new_allocs[rp_uuid] = peer_alloc
-
-        payload = {'allocations': new_allocs}
-        payload['project_id'] = project_id
-        payload['user_id'] = user_id
-        payload['consumer_generation'] = current_consumer_generation
-        LOG.debug("Sending updated allocation %s for instance %s after "
-                  "removing resources for %s.",
-                  new_allocs, consumer_uuid, rp_uuid)
-        r = self.put(url, payload, version=CONSUMER_GENERATION_VERSION,
+    def _put_allocations(
+            self, context, consumer_uuid, payload,
+            version=CONSUMER_GENERATION_VERSION):
+        url = '/allocations/%s' % consumer_uuid
+        r = self.put(url, payload, version=version,
                     global_request_id=context.global_id)
        if r.status_code != 204:
            LOG.warning("Failed to save allocation for %s. Got HTTP %s: %s",
                        consumer_uuid, r.status_code, r.text)
-        return r.status_code == 204
+        return r

    @safe_connect
    @retries
@ -2078,9 +2043,7 @@ class SchedulerReportClient(object):
            'user_id': user_id,
            'consumer_generation': consumer_generation
        }
-        url = '/allocations/%s' % consumer_uuid
-        r = self.put(url, payload, version=CONSUMER_GENERATION_VERSION,
-                     global_request_id=context.global_id)
+        r = self._put_allocations(context, consumer_uuid, payload)
        if r.status_code != 204:
            err = r.json()['errors'][0]
            # NOTE(jaypipes): Yes, it sucks doing string comparison like this
@ -2098,13 +2061,6 @@ class SchedulerReportClient(object):
                          'involved in our attempt to put allocations for '
                          'consumer %s' % consumer_uuid)
                raise Retry('put_allocations', reason)
-            else:
-                LOG.warning(
-                    'Unable to submit allocation for instance '
-                    '%(uuid)s (%(code)i %(text)s)',
-                    {'uuid': consumer_uuid,
-                     'code': r.status_code,
-                     'text': r.text})
        return r.status_code == 204

    @safe_connect
--- a/nova/scheduler/utils.py
+++ b/nova/scheduler/utils.py
@ -1015,28 +1015,3 @@ def claim_resources(ctx, client, spec_obj, instance_uuid, alloc_req,
    return client.claim_resources(ctx, instance_uuid, alloc_req, project_id,
            user_id, allocation_request_version=allocation_request_version,
            consumer_generation=None)
-
-
-def remove_allocation_from_compute(context, instance, compute_node_uuid,
-                                   reportclient, flavor=None):
-    """Removes the instance allocation from the compute host.
-
-    :param context: The request context
-    :param instance: the instance object owning the allocation
-    :param compute_node_uuid: the UUID of the compute node where the allocation
-                              needs to be removed
-    :param reportclient: the SchedulerReportClient instances to be used to
-                         communicate with Placement
-    :param flavor: If provided then it is used to calculate the amount of
-                   resource that needs to be removed. If not provided then
-                   instance.flavor will be used
-    :return: True if the removal was successful, False otherwise
-    """
-
-    if not flavor:
-        flavor = instance.flavor
-
-    my_resources = resources_from_flavor(instance, flavor)
-    return reportclient.remove_provider_from_instance_allocation(
-        context, instance.uuid, compute_node_uuid, instance.user_id,
-        instance.project_id, my_resources)
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@ -4903,30 +4903,6 @@ class ServerMovingTestsWithNestedResourceRequests(
        super(ServerMovingTestsWithNestedResourceRequests,
              self).test_live_migrate_force()

-    @unittest.expectedFailure
-    def test_evacuate(self):
-        # This test shows a bug. It seems that when the source allocation is
-        # dropped after the failed evacuate nova only drops the allocation from
-        # the root RP but not from the child RP.
-        super(ServerMovingTestsWithNestedResourceRequests,
-              self).test_evacuate()
-
-    @unittest.expectedFailure
-    def test_evacuate_rebuild_on_dest_fails(self):
-        # This test shows a bug. It seems that when the rebuild fails on the
-        # destination host only the root RP allocation is deleted on the dest
-        # host.
-        super(ServerMovingTestsWithNestedResourceRequests,
-              self).test_evacuate_rebuild_on_dest_fails()
-
-    @unittest.expectedFailure
-    def test_evacuate_claim_on_dest_fails(self):
-        # This test shows a bug. It seems that when the evacuation fails on the
-        # destination host only the root RP allocation is deleted on the dest
-        # host.
-        super(ServerMovingTestsWithNestedResourceRequests,
-              self).test_evacuate_claim_on_dest_fails()
-
    @unittest.expectedFailure
    def test_evacuate_forced_host(self):
        # This test shows a bug. When the conductor tries to claim the same
--- a/nova/tests/unit/compute/test_compute.py
+++ b/nova/tests/unit/compute/test_compute.py
@ -5664,7 +5664,10 @@ class ComputeTestCase(BaseTestCase,
                self.context))
        self._test_confirm_resize(power_on=True, numa_topology=numa_topology)

-    def test_confirm_resize_with_numa_topology_and_cpu_pinning(self):
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
+    def test_confirm_resize_with_numa_topology_and_cpu_pinning(
+            self, mock_remove_allocs):
        instance = self._create_fake_instance_obj()
        instance.old_flavor = instance.flavor
        instance.new_flavor = instance.flavor
@ -5797,7 +5800,7 @@ class ComputeTestCase(BaseTestCase,
                              'free_device'),
            mock.patch.object(self.rt.pci_tracker.stats, 'to_device_pools_obj',
                              return_value=objects.PciDevicePoolList())
-            ) as (mock_setup, mock_migrate, mock_pci_free_device,
+        ) as (mock_setup, mock_migrate, mock_pci_free_device,
                  mock_to_device_pools_obj):
            method(self.context, instance=instance,
                                 migration=migration)
@ -6047,7 +6050,6 @@ class ComputeTestCase(BaseTestCase,
        self.assertNotEqual(migration.dest_compute, migration.source_compute)
        self.assertNotEqual(migration.dest_node, migration.source_node)
        self.assertEqual(NODENAME2, migration.dest_compute)
-        self.assertEqual(NODENAME2, migration.dest_node)

    def test_get_by_flavor_id(self):
        flavor_type = flavors.get_flavor_by_flavor_id(1)
@ -6249,6 +6251,8 @@ class ComputeTestCase(BaseTestCase,
            migrate_data=test.MatchType(
                            migrate_data_obj.XenapiLiveMigrateData))

+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
    @mock.patch.object(compute_rpcapi.ComputeAPI, 'pre_live_migration')
    @mock.patch.object(compute_rpcapi.ComputeAPI,
                       'post_live_migration_at_destination')
@ -6257,7 +6261,7 @@ class ComputeTestCase(BaseTestCase,
    @mock.patch.object(compute_utils, 'EventReporter')
    @mock.patch('nova.objects.Migration.save')
    def test_live_migration_works_correctly(self, mock_save, mock_event,
-            mock_clear, mock_post, mock_pre):
+            mock_clear, mock_post, mock_pre, mock_remove_allocs):
        # Confirm live_migration() works as expected correctly.
        # creating instance testdata
        c = context.get_admin_context()
@ -6304,6 +6308,8 @@ class ComputeTestCase(BaseTestCase,
                                              'host'], 'dest_compute': dest})
        mock_post.assert_called_once_with(c, instance, False, dest)
        mock_clear.assert_called_once_with(mock.ANY)
+        mock_remove_allocs.assert_called_once_with(
+            c, instance.uuid, self.rt.compute_nodes[NODENAME].uuid)

    @mock.patch.object(compute_rpcapi.ComputeAPI, 'pre_live_migration')
    @mock.patch.object(compute_rpcapi.ComputeAPI,
@ -6350,13 +6356,15 @@ class ComputeTestCase(BaseTestCase,
        # cleanup
        instance.destroy()

+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
    @mock.patch.object(fake.FakeDriver, 'unfilter_instance')
    @mock.patch.object(compute_rpcapi.ComputeAPI,
                       'post_live_migration_at_destination')
    @mock.patch.object(compute_manager.InstanceEvents,
                       'clear_events_for_instance')
    def test_post_live_migration_no_shared_storage_working_correctly(self,
-            mock_clear, mock_post, mock_unfilter):
+            mock_clear, mock_post, mock_unfilter, mock_remove_allocs):
        """Confirm post_live_migration() works correctly as expected
           for non shared storage migration.
        """
@ -6410,9 +6418,14 @@ class ComputeTestCase(BaseTestCase,
        mock_migrate.assert_called_once_with(c, instance, migration)
        mock_post.assert_called_once_with(c, instance, False, dest)
        mock_clear.assert_called_once_with(mock.ANY)
+        mock_remove_allocs.assert_called_once_with(
+            c, instance.uuid, self.rt.compute_nodes[NODENAME].uuid)

+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
    @mock.patch('nova.compute.utils.notify_about_instance_action')
-    def test_post_live_migration_working_correctly(self, mock_notify):
+    def test_post_live_migration_working_correctly(self, mock_notify,
+                                                   mock_remove_allocs):
        # Confirm post_live_migration() works as expected correctly.
        dest = 'desthost'
        srchost = self.compute.host
@ -6487,6 +6500,8 @@ class ComputeTestCase(BaseTestCase,
            self.assertIn(
                'Migrating instance to desthost finished successfully.',
                self.stdlog.logger.output)
+        mock_remove_allocs.assert_called_once_with(
+            c, instance.uuid, self.rt.compute_nodes[NODENAME].uuid)

    def test_post_live_migration_exc_on_dest_works_correctly(self):
        """Confirm that post_live_migration() completes successfully
@ -6584,7 +6599,8 @@ class ComputeTestCase(BaseTestCase,
                              'get_by_instance_uuid'),
            mock.patch.object(self.compute.driver, 'get_volume_connector'),
            mock.patch.object(cinder.API, 'terminate_connection'),
-            mock.patch.object(self.compute, '_delete_allocation_after_move'),
+            mock.patch('nova.compute.resource_tracker.ResourceTracker.'
+                       'delete_allocation_for_migrated_instance'),
        ) as (
            migrate_instance_start, post_live_migration_at_destination,
            setup_networks_on_host, clear_events_for_instance,
@ -7457,6 +7473,8 @@ class ComputeTestCase(BaseTestCase,
        instance = self._create_fake_instance_obj({'host': 'someotherhost'})
        self.compute._instance_update(self.context, instance, vcpus=4)

+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
    @mock.patch.object(compute_manager.ComputeManager,
                       '_get_instances_on_driver')
    @mock.patch.object(compute_manager.ComputeManager,
@ -7468,7 +7486,7 @@ class ComputeTestCase(BaseTestCase,
    @mock.patch('nova.objects.Migration.save')
    def test_destroy_evacuated_instance_on_shared_storage(self, mock_save,
            mock_get_filter, mock_destroy, mock_is_inst, mock_get_blk,
-            mock_get_inst):
+            mock_get_inst, mock_remove_allocs):
        fake_context = context.get_admin_context()

        # instances in central db
@ -7513,7 +7531,12 @@ class ComputeTestCase(BaseTestCase,
        mock_destroy.assert_called_once_with(fake_context, evacuated_instance,
                                             'fake_network_info',
                                             'fake_bdi', False)
+        mock_remove_allocs.assert_called_once_with(
+            fake_context, evacuated_instance.uuid,
+            self.rt.compute_nodes[NODENAME].uuid)

+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
    @mock.patch.object(compute_manager.ComputeManager,
                       '_get_instances_on_driver')
    @mock.patch.object(compute_manager.ComputeManager,
@ -7529,7 +7552,7 @@ class ComputeTestCase(BaseTestCase,
    @mock.patch('nova.objects.Migration.save')
    def test_destroy_evacuated_instance_with_disks(self, mock_save,
            mock_get_filter, mock_destroy, mock_check_clean, mock_check,
-            mock_check_local, mock_get_blk, mock_get_drv):
+            mock_check_local, mock_get_blk, mock_get_drv, mock_remove_allocs):
        fake_context = context.get_admin_context()

        # instances in central db
@ -7574,7 +7597,12 @@ class ComputeTestCase(BaseTestCase,
        mock_destroy.assert_called_once_with(fake_context, evacuated_instance,
                                             'fake_network_info', 'fake-bdi',
                                             True)
+        mock_remove_allocs.assert_called_once_with(
+            fake_context, evacuated_instance.uuid,
+            self.rt.compute_nodes[NODENAME].uuid)

+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
    @mock.patch.object(compute_manager.ComputeManager,
                       '_get_instances_on_driver')
    @mock.patch.object(compute_manager.ComputeManager,
@ -7589,7 +7617,7 @@ class ComputeTestCase(BaseTestCase,
    @mock.patch('nova.objects.Migration.save')
    def test_destroy_evacuated_instance_not_implemented(self, mock_save,
            mock_get_filter, mock_destroy, mock_check_clean, mock_check,
-            mock_check_local, mock_get_blk, mock_get_inst):
+            mock_check_local, mock_get_blk, mock_get_inst, mock_remove_allocs):
        fake_context = context.get_admin_context()

        # instances in central db
@ -8007,8 +8035,10 @@ class ComputeTestCase(BaseTestCase,
                              return_value=fake_rt),
            mock.patch.object(self.compute.network_api,
                              'setup_networks_on_host',
-                              side_effect=fake_setup_networks_on_host)
-        ) as (mock_drop, mock_get, mock_setup):
+                              side_effect=fake_setup_networks_on_host),
+            mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                       'remove_provider_tree_from_instance_allocation')
+        ) as (mock_drop, mock_get, mock_setup, mock_remove_allocs):
            migration = objects.Migration(context=self.context.elevated())
            migration.instance_uuid = instance.uuid
            migration.status = 'finished'
@ -12835,7 +12865,9 @@ class EvacuateHostTestCase(BaseTestCase):
        instance = db.instance_get(self.context, self.inst.id)
        self.assertEqual(instance['vm_state'], vm_states.STOPPED)

-    def test_rebuild_with_wrong_shared_storage(self):
+    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
+    def test_rebuild_with_wrong_shared_storage(self, mock_remove_allocs):
        """Confirm evacuate scenario does not update host."""
        with mock.patch.object(self.compute.driver, 'instance_on_disk',
                               side_effect=lambda x: True) as mock_inst:
@ -12846,6 +12878,8 @@ class EvacuateHostTestCase(BaseTestCase):
            instance = db.instance_get(self.context, self.inst.id)
            self.assertEqual(instance['host'], 'fake_host_2')
            self.assertTrue(mock_inst.called)
+        mock_remove_allocs.assert_called_once_with(
+            mock.ANY, instance.uuid, self.rt.compute_nodes[NODENAME].uuid)

    @mock.patch.object(cinder.API, 'detach')
    @mock.patch.object(compute_manager.ComputeManager, '_prep_block_device')
@ -12943,12 +12977,19 @@ class EvacuateHostTestCase(BaseTestCase):

        self.stub_out('nova.virt.fake.FakeDriver.instance_on_disk',
                       lambda *a, **kw: True)
-        self.assertRaises(exception.InstanceExists,
-                          lambda: self._rebuild(on_shared_storage=True))
+        patch_remove_allocs = mock.patch(
+            'nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
+        with patch_remove_allocs:
+            self.assertRaises(exception.InstanceExists,
+                              lambda: self._rebuild(on_shared_storage=True))

    def test_driver_does_not_support_recreate(self):
+        patch_remove_allocs = mock.patch(
+            'nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
        with mock.patch.dict(self.compute.driver.capabilities,
-                             supports_evacuate=False):
+                             supports_evacuate=False), patch_remove_allocs:
            self.stub_out('nova.virt.fake.FakeDriver.instance_on_disk',
                           lambda *a, **kw: True)
            self.assertRaises(exception.InstanceEvacuateNotSupported,
@ -13024,7 +13065,10 @@ class EvacuateHostTestCase(BaseTestCase):
        patch_claim = mock.patch.object(
            self.compute._resource_tracker, 'rebuild_claim',
            side_effect=exception.ComputeResourcesUnavailable(reason="boom"))
-        with patch_spawn, patch_on_disk, patch_claim:
+        patch_remove_allocs = mock.patch(
+            'nova.scheduler.client.report.SchedulerReportClient.'
+            'remove_provider_tree_from_instance_allocation')
+        with patch_spawn, patch_on_disk, patch_claim, patch_remove_allocs:
            self.assertRaises(exception.BuildAbortException,
                              self._rebuild, migration=migration,
                              send_node=True)
@ -13042,7 +13086,11 @@ class EvacuateHostTestCase(BaseTestCase):
        patch_rebuild = mock.patch.object(
            self.compute, '_do_rebuild_instance_with_claim',
            side_effect=test.TestingException())
-        with patch_spawn, patch_on_disk, patch_claim, patch_rebuild:
+        patch_remove_allocs = mock.patch(
+            'nova.scheduler.client.report.SchedulerReportClient.'
+                'remove_provider_tree_from_instance_allocation')
+        with patch_spawn, patch_on_disk, patch_claim, patch_rebuild, \
+             patch_remove_allocs:
            self.assertRaises(test.TestingException,
                              self._rebuild, migration=migration,
                              send_node=True)
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@ -885,15 +885,13 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
                    instance_id=deleted_instance['uuid'])),
            mock.patch.object(
                self.compute.reportclient,
-                'remove_provider_from_instance_allocation')
+                'remove_provider_tree_from_instance_allocation')
        ) as (mock_get_net, mock_remove_allocation):

            self.compute.init_host()

            mock_remove_allocation.assert_called_once_with(
-                self.context, deleted_instance.uuid, uuids.our_node_uuid,
-                deleted_instance.user_id, deleted_instance.project_id,
-                mock.sentinel.my_resources)
+                self.context, deleted_instance.uuid, uuids.our_node_uuid)

        mock_init_host.assert_called_once_with(host=our_host)
        mock_host_get.assert_called_once_with(self.context, our_host,
@ -3834,7 +3832,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
            mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename'),
            mock.patch('nova.scheduler.utils.resources_from_flavor'),
            mock.patch.object(self.compute.reportclient,
-                              'remove_provider_from_instance_allocation')
+                              'remove_provider_tree_from_instance_allocation')
        ) as (_get_instances_on_driver, get_instance_nw_info,
              _get_instance_block_device_info, _is_instance_storage_shared,
              destroy, migration_list, migration_save, get_node,
@ -3852,8 +3850,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
            get_node.assert_called_once_with(
                self.context, our_host, migration.source_node)
            remove_allocation.assert_called_once_with(
-                self.context, instance_2.uuid, uuids.our_node_uuid,
-                uuids.user_id, uuids.project_id, mock.sentinel.resources)
+                self.context, instance_2.uuid, uuids.our_node_uuid)

    def test_destroy_evacuated_instances_node_deleted(self):
        our_host = self.compute.host
@ -3903,7 +3900,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
            mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename'),
            mock.patch('nova.scheduler.utils.resources_from_flavor'),
            mock.patch.object(self.compute.reportclient,
-                              'remove_provider_from_instance_allocation')
+                              'remove_provider_tree_from_instance_allocation')
        ) as (_get_instances_on_driver, get_instance_nw_info,
              _get_instance_block_device_info, _is_instance_storage_shared,
              destroy, migration_list, migration_save, get_node,
@ -3929,8 +3926,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
            # but only instance_2 is deallocated as the compute node for
            # instance_1 is already deleted
            remove_allocation.assert_called_once_with(
-                self.context, instance_2.uuid, uuids.our_node_uuid,
-                uuids.user_id, uuids.project_id, mock.sentinel.resources)
+                self.context, instance_2.uuid, uuids.our_node_uuid)

            self.assertEqual(2, get_node.call_count)

@ -8021,7 +8017,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
    @mock.patch.object(objects.ComputeNode,
                       'get_first_node_by_host_for_old_compat')
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
-                'remove_provider_from_instance_allocation')
+                'remove_provider_tree_from_instance_allocation')
    def test_rollback_live_migration_cinder_v3_api(self, mock_remove_allocs,
                                                   mock_get_node):
        compute = manager.ComputeManager()
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@ -3143,11 +3143,7 @@ class TestUpdateUsageFromInstance(BaseTestCase):
        self.assertEqual(-1024, cn.free_ram_mb)
        self.assertEqual(-1, cn.free_disk_gb)

-    @mock.patch('nova.scheduler.utils.resources_from_flavor')
-    def test_delete_allocation_for_evacuated_instance(
-            self, mock_resource_from_flavor):
-        mock_resource = mock.Mock()
-        mock_resource_from_flavor.return_value = mock_resource
+    def test_delete_allocation_for_evacuated_instance(self):
        instance = _INSTANCE_FIXTURES[0].obj_clone()
        instance.uuid = uuids.inst0
        ctxt = context.get_admin_context()
@ -3156,10 +3152,9 @@ class TestUpdateUsageFromInstance(BaseTestCase):
            ctxt, instance, _NODENAME)

        rc = self.rt.reportclient
-        mock_remove_allocation = rc.remove_provider_from_instance_allocation
-        mock_remove_allocation.assert_called_once_with(
-            ctxt, instance.uuid, self.rt.compute_nodes[_NODENAME].uuid,
-            instance.user_id, instance.project_id, mock_resource)
+        mock_remove_allocs = rc.remove_provider_tree_from_instance_allocation
+        mock_remove_allocs.assert_called_once_with(
+            ctxt, instance.uuid, self.rt.compute_nodes[_NODENAME].uuid)


 class TestInstanceInResizeState(test.NoDBTestCase):
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@ -296,7 +296,7 @@ class TestPutAllocations(SchedulerReportClientTestCase):
            expected_url, mock.ANY, version='1.28',
            global_request_id=self.context.global_id)
        log_msg = mock_warn.call_args[0][0]
-        self.assertIn("Unable to submit allocation for instance", log_msg)
+        self.assertIn("Failed to save allocation for", log_msg)

    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.put')
    def test_put_allocations_fail_due_to_consumer_generation_conflict(
@ -1142,7 +1142,8 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        /allocations/{consumer_uuid} call.
        """
        get_resp_mock = mock.Mock(status_code=200)
-        get_resp_mock.json.return_value = {
+        get_resp_mock.json.side_effect = [
+            {
            'allocations': {
                uuids.source: {
                    'resource_provider_generation': 42,
@ -1162,22 +1163,32 @@ class TestPutAllocations(SchedulerReportClientTestCase):
            'consumer_generation': 1,
            'project_id': uuids.project_id,
            'user_id': uuids.user_id,
-        }
+            },
+            # the second get is for resource providers in the compute tree,
+            # return just the compute
+            {
+                "resource_providers": [
+                    {
+                        "uuid": uuids.source,
+                    },
+                ]
+            },
+        ]
        self.ks_adap_mock.get.return_value = get_resp_mock
        resp_mock = mock.Mock(status_code=204)
        self.ks_adap_mock.put.return_value = resp_mock
        consumer_uuid = uuids.consumer_uuid
        project_id = uuids.project_id
        user_id = uuids.user_id
-        res = self.client.remove_provider_from_instance_allocation(
-            self.context, consumer_uuid, uuids.source, user_id, project_id,
-            mock.Mock())
+        res = self.client.remove_provider_tree_from_instance_allocation(
+            self.context, consumer_uuid, uuids.source)

        expected_url = "/allocations/%s" % consumer_uuid
        # New allocations should only include the destination...
        expected_payload = {
            'allocations': {
                uuids.destination: {
+                    'resource_provider_generation': 42,
                    'resources': {
                        'VCPU': 1,
                        'MEMORY_MB': 1024,
@ -1206,7 +1217,8 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        call.
        """
        get_resp_mock = mock.Mock(status_code=200)
-        get_resp_mock.json.return_value = {
+        get_resp_mock.json.side_effect = [
+            {
            'allocations': {
                uuids.source: {
                    'resource_provider_generation': 42,
@ -1232,27 +1244,38 @@ class TestPutAllocations(SchedulerReportClientTestCase):
            'consumer_generation': 1,
            'project_id': uuids.project_id,
            'user_id': uuids.user_id,
-        }
+            },
+            # the second get is for resource providers in the compute tree,
+            # return just the compute
+            {
+                "resource_providers": [
+                    {
+                        "uuid": uuids.source,
+                    },
+                ]
+            },
+        ]
        self.ks_adap_mock.get.return_value = get_resp_mock
        resp_mock = mock.Mock(status_code=204)
        self.ks_adap_mock.put.return_value = resp_mock
        consumer_uuid = uuids.consumer_uuid
        project_id = uuids.project_id
        user_id = uuids.user_id
-        res = self.client.remove_provider_from_instance_allocation(
-            self.context, consumer_uuid, uuids.source, user_id, project_id,
-            mock.Mock())
+        res = self.client.remove_provider_tree_from_instance_allocation(
+            self.context, consumer_uuid, uuids.source)

        expected_url = "/allocations/%s" % consumer_uuid
        # New allocations should only include the destination...
        expected_payload = {
            'allocations': {
                uuids.shared_storage: {
+                    'resource_provider_generation': 42,
                    'resources': {
                        'DISK_GB': 100,
                    },
                },
                uuids.destination: {
+                    'resource_provider_generation': 42,
                    'resources': {
                        'VCPU': 1,
                        'MEMORY_MB': 1024,
@ -1274,14 +1297,15 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        self.assertTrue(res)

    def test_remove_provider_from_inst_alloc_no_source(self):
-        """Tests that if remove_provider_from_instance_allocation() fails to
-        find any allocations for the source host, it just returns True and
+        """Tests that if remove_provider_tree_from_instance_allocation() fails
+        to find any allocations for the source host, it just returns True and
        does not attempt to rewrite the allocation for the consumer.
        """
        get_resp_mock = mock.Mock(status_code=200)
-        # Act like the allocations already did not include the source host for
-        # some reason
-        get_resp_mock.json.return_value = {
+        get_resp_mock.json.side_effect = [
+            # Act like the allocations already did not include the source host
+            # for some reason
+            {
            'allocations': {
                uuids.shared_storage: {
                    'resource_provider_generation': 42,
@ -1300,14 +1324,21 @@ class TestPutAllocations(SchedulerReportClientTestCase):
            'consumer_generation': 1,
            'project_id': uuids.project_id,
            'user_id': uuids.user_id,
-        }
+            },
+            # the second get is for resource providers in the compute tree,
+            # return just the compute
+            {
+                "resource_providers": [
+                    {
+                        "uuid": uuids.source,
+                    },
+                ]
+            },
+        ]
        self.ks_adap_mock.get.return_value = get_resp_mock
        consumer_uuid = uuids.consumer_uuid
-        project_id = uuids.project_id
-        user_id = uuids.user_id
-        res = self.client.remove_provider_from_instance_allocation(
-            self.context, consumer_uuid, uuids.source, user_id, project_id,
-            mock.Mock())
+        res = self.client.remove_provider_tree_from_instance_allocation(
+            self.context, consumer_uuid, uuids.source)

        self.ks_adap_mock.get.assert_called()
        self.ks_adap_mock.put.assert_not_called()
@ -1315,27 +1346,21 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        self.assertTrue(res)

    def test_remove_provider_from_inst_alloc_fail_get_allocs(self):
-        """Tests that we gracefully exit with False from
-        remove_provider_from_instance_allocation() if the call to get the
-        existing allocations fails for some reason
-        """
-        get_resp_mock = mock.Mock(status_code=500)
-        self.ks_adap_mock.get.return_value = get_resp_mock
+        self.ks_adap_mock.get.return_value = fake_requests.FakeResponse(
+            status_code=500)
        consumer_uuid = uuids.consumer_uuid
-        project_id = uuids.project_id
-        user_id = uuids.user_id
-        res = self.client.remove_provider_from_instance_allocation(
-            self.context, consumer_uuid, uuids.source, user_id, project_id,
-            mock.Mock())
+        self.assertRaises(
+            exception.ConsumerAllocationRetrievalFailed,
+            self.client.remove_provider_tree_from_instance_allocation,
+            self.context, consumer_uuid, uuids.source)

        self.ks_adap_mock.get.assert_called()
        self.ks_adap_mock.put.assert_not_called()

-        self.assertFalse(res)
-
    def test_remove_provider_from_inst_alloc_consumer_gen_conflict(self):
        get_resp_mock = mock.Mock(status_code=200)
-        get_resp_mock.json.return_value = {
+        get_resp_mock.json.side_effect = [
+            {
            'allocations': {
                uuids.source: {
                    'resource_provider_generation': 42,
@ -1355,19 +1380,124 @@ class TestPutAllocations(SchedulerReportClientTestCase):
            'consumer_generation': 1,
            'project_id': uuids.project_id,
            'user_id': uuids.user_id,
-        }
+            },
+            # the second get is for resource providers in the compute tree,
+            # return just the compute
+            {
+                "resource_providers": [
+                    {
+                        "uuid": uuids.source,
+                    },
+                ]
+            },
+        ]
        self.ks_adap_mock.get.return_value = get_resp_mock
        resp_mock = mock.Mock(status_code=409)
        self.ks_adap_mock.put.return_value = resp_mock
        consumer_uuid = uuids.consumer_uuid
-        project_id = uuids.project_id
-        user_id = uuids.user_id
-        res = self.client.remove_provider_from_instance_allocation(
-            self.context, consumer_uuid, uuids.source, user_id, project_id,
-            mock.Mock())
+        res = self.client.remove_provider_tree_from_instance_allocation(
+            self.context, consumer_uuid, uuids.source)

        self.assertFalse(res)

+    def test_remove_provider_tree_from_inst_alloc_nested(self):
+        self.ks_adap_mock.get.side_effect = [
+            fake_requests.FakeResponse(
+                status_code=200,
+                content=jsonutils.dumps(
+                    {
+                        'allocations': {
+                            uuids.source_compute: {
+                                'resource_provider_generation': 42,
+                                'resources': {
+                                    'VCPU': 1,
+                                    'MEMORY_MB': 1024,
+                                },
+                            },
+                            uuids.source_nested: {
+                                'resource_provider_generation': 42,
+                                'resources': {
+                                    'CUSTOM_MAGIC': 1
+                                },
+                            },
+                            uuids.destination: {
+                                'resource_provider_generation': 42,
+                                'resources': {
+                                    'VCPU': 1,
+                                    'MEMORY_MB': 1024,
+                                },
+                            },
+                        },
+                        'consumer_generation': 1,
+                        'project_id': uuids.project_id,
+                        'user_id': uuids.user_id,
+                    })),
+            # the second get is for resource providers in the compute tree,
+            # return both RPs in the source compute tree
+            fake_requests.FakeResponse(
+                status_code=200,
+                content=jsonutils.dumps(
+                    {
+                        "resource_providers": [
+                            {
+                                "uuid": uuids.source_compute,
+                            },
+                            {
+                                "uuid": uuids.source_nested,
+                            },
+                        ]
+                    }))
+        ]
+        self.ks_adap_mock.put.return_value = fake_requests.FakeResponse(
+            status_code=204)
+        consumer_uuid = uuids.consumer_uuid
+        project_id = uuids.project_id
+        user_id = uuids.user_id
+        res = self.client.remove_provider_tree_from_instance_allocation(
+            self.context, consumer_uuid, uuids.source_compute)
+
+        expected_url = "/allocations/%s" % consumer_uuid
+        # New allocations should only include the destination...
+        expected_payload = {
+            'allocations': {
+                uuids.destination: {
+                    'resource_provider_generation': 42,
+                    'resources': {
+                        'VCPU': 1,
+                        'MEMORY_MB': 1024,
+                    },
+                },
+            },
+            'consumer_generation': 1,
+            'project_id': project_id,
+            'user_id': user_id
+        }
+
+        self.assertEqual(
+            [
+                mock.call(
+                    '/allocations/%s' % consumer_uuid,
+                    headers=mock.ANY,
+                    microversion='1.28'
+                ),
+                mock.call(
+                    '/resource_providers?in_tree=%s' % uuids.source_compute,
+                    headers=mock.ANY,
+                    microversion='1.14'
+                )
+            ],
+            self.ks_adap_mock.get.mock_calls)
+
+        # We have to pull the json body from the mock call_args to validate
+        # it separately otherwise hash seed issues get in the way.
+        actual_payload = self.ks_adap_mock.put.call_args[1]['json']
+        self.assertEqual(expected_payload, actual_payload)
+        self.ks_adap_mock.put.assert_called_once_with(
+            expected_url, microversion='1.28', json=mock.ANY,
+            headers={'X-Openstack-Request-Id': self.context.global_id})
+
+        self.assertTrue(res)
+

 class TestMoveAllocations(SchedulerReportClientTestCase):