diff --git a/nova/compute/manager.py b/nova/compute/manager.py index f69685d94e4f..ee7de74a739a 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -642,6 +642,9 @@ class ComputeManager(manager.Manager): compared to the instances for the migration records and those local guests are destroyed, along with instance allocation records in Placement for this node. + Then allocations are removed from Placement for every instance that is + evacuated from this host regardless if the instance is reported by the + hypervisor or not. """ filters = { 'source_compute': self.host, @@ -673,18 +676,14 @@ class ComputeManager(manager.Manager): # we know we'll use, like info_cache and flavor. We can also replace # this with a generic solution: https://review.openstack.org/575190/ local_instances = self._get_instances_on_driver(read_deleted_context) - evacuated = [inst for inst in local_instances - if inst.uuid in evacuations] + evacuated_local_instances = {inst.uuid: inst + for inst in local_instances + if inst.uuid in evacuations} - # NOTE(gibi): We are called from init_host and at this point the - # compute_nodes of the resource tracker has not been populated yet so - # we cannot rely on the resource tracker here. - compute_nodes = {} - - for instance in evacuated: - migration = evacuations[instance.uuid] - LOG.info('Deleting instance as it has been evacuated from ' - 'this host', instance=instance) + for instance in evacuated_local_instances.values(): + LOG.info('Destroying instance as it has been evacuated from ' + 'this host but still exists in the hypervisor', + instance=instance) try: network_info = self.network_api.get_instance_nw_info( context, instance) @@ -704,7 +703,28 @@ class ComputeManager(manager.Manager): network_info, bdi, destroy_disks) - # delete the allocation of the evacuated instance from this host + # NOTE(gibi): We are called from init_host and at this point the + # compute_nodes of the resource tracker has not been populated yet so + # we cannot rely on the resource tracker here. + compute_nodes = {} + + for instance_uuid, migration in evacuations.items(): + try: + if instance_uuid in evacuated_local_instances: + # Avoid the db call if we already have the instance loaded + # above + instance = evacuated_local_instances[instance_uuid] + else: + instance = objects.Instance.get_by_uuid( + context, instance_uuid) + except exception.InstanceNotFound: + # The instance already deleted so we expect that every + # allocation of that instance has already been cleaned up + continue + + LOG.info('Cleaning up allocations of the instance as it has been ' + 'evacuated from this host', + instance=instance) if migration.source_node not in compute_nodes: try: cn_uuid = objects.ComputeNode.get_by_host_and_nodename( diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 85dc0cb61247..679a66a1703a 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -3917,7 +3917,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): # both instance_1 and instance_2 is destroyed in the driver destroy.assert_has_calls( [mock.call(self.context, instance_1, None, {}, True), - mock.call(self.context, instance_2, None, {}, True)]) + mock.call(self.context, instance_2, None, {}, True)], + any_order=True) # but only instance_2 is deallocated as the compute node for # instance_1 is already deleted @@ -3927,6 +3928,93 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): self.assertEqual(2, get_node.call_count) + def test_destroy_evacuated_instances_not_on_hypervisor(self): + our_host = self.compute.host + flavor = objects.Flavor() + instance_1 = objects.Instance(self.context, flavor=flavor) + instance_1.uuid = uuids.instance_1 + instance_1.task_state = None + instance_1.vm_state = vm_states.ACTIVE + instance_1.host = 'not-' + our_host + instance_1.user_id = uuids.user_id + instance_1.project_id = uuids.project_id + instance_1.deleted = False + + migration_1 = objects.Migration(instance_uuid=instance_1.uuid) + migration_1.status = 'done' + migration_1.source_node = 'our-node' + + our_node = objects.ComputeNode( + host=our_host, uuid=uuids.our_node_uuid) + + with test.nested( + mock.patch.object(self.compute, '_get_instances_on_driver', + return_value=[]), + mock.patch.object(self.compute.driver, 'destroy'), + mock.patch('nova.objects.MigrationList.get_by_filters'), + mock.patch('nova.objects.Migration.save'), + mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename'), + mock.patch('nova.scheduler.utils.resources_from_flavor'), + mock.patch.object(self.compute.reportclient, + 'remove_provider_from_instance_allocation'), + mock.patch('nova.objects.Instance.get_by_uuid') + ) as (_get_intances_on_driver, destroy, migration_list, migration_save, + get_node, get_resources, remove_allocation, + instance_get_by_uuid): + migration_list.return_value = [migration_1] + instance_get_by_uuid.return_value = instance_1 + get_node.return_value = our_node + get_resources.return_value = mock.sentinel.resources + + self.compute._destroy_evacuated_instances(self.context) + + # nothing to be destroyed as the driver returned no instances on + # the hypervisor + self.assertFalse(destroy.called) + + # but our only instance still cleaned up in placement + remove_allocation.assert_called_once_with( + self.context, instance_1.uuid, uuids.our_node_uuid, + instance_1.user_id, instance_1.project_id, + mock.sentinel.resources) + instance_get_by_uuid.assert_called_once_with( + self.context, instance_1.uuid) + get_node.assert_called_once_with( + self.context, our_host, 'our-node') + + def test_destroy_evacuated_instances_not_on_hyp_and_instance_deleted(self): + migration_1 = objects.Migration(instance_uuid=uuids.instance_1) + migration_1.status = 'done' + migration_1.source_node = 'our-node' + + with test.nested( + mock.patch.object(self.compute, '_get_instances_on_driver', + return_value=[]), + mock.patch.object(self.compute.driver, 'destroy'), + mock.patch('nova.objects.MigrationList.get_by_filters'), + mock.patch('nova.objects.Migration.save'), + mock.patch('nova.scheduler.utils.resources_from_flavor'), + mock.patch.object(self.compute.reportclient, + 'remove_provider_from_instance_allocation'), + mock.patch('nova.objects.Instance.get_by_uuid') + ) as (_get_instances_on_driver, + destroy, migration_list, migration_save, get_resources, + remove_allocation, instance_get_by_uuid): + migration_list.return_value = [migration_1] + instance_get_by_uuid.side_effect = exception.InstanceNotFound( + instance_id=uuids.instance_1) + + self.compute._destroy_evacuated_instances(self.context) + + # nothing to be destroyed as the driver returned no instances on + # the hypervisor + self.assertFalse(destroy.called) + + instance_get_by_uuid.assert_called_once_with( + self.context, uuids.instance_1) + # nothing to be cleaned as the instance was deleted already + self.assertFalse(remove_allocation.called) + @mock.patch('nova.compute.manager.ComputeManager.' '_destroy_evacuated_instances') @mock.patch('nova.compute.manager.LOG')