diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index e063b01991c8..0fb8bc527cbf 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -329,8 +329,9 @@ class LiveMigrationTask(base.TaskBase): self._check_not_over_max_retries(attempted_hosts) request_spec.ignore_hosts = attempted_hosts try: - host = self.scheduler_client.select_destinations(self.context, - request_spec, [self.instance.uuid])[0]['host'] + hoststate = self.scheduler_client.select_destinations( + self.context, request_spec, [self.instance.uuid])[0] + host = hoststate['host'] except messaging.RemoteError as ex: # TODO(ShaoHe Feng) There maybe multi-scheduler, and the # scheduling algorithm is R-R, we can let other scheduler try. @@ -346,9 +347,47 @@ class LiveMigrationTask(base.TaskBase): LOG.debug("Skipping host: %(host)s because: %(e)s", {"host": host, "e": e}) attempted_hosts.append(host) + # The scheduler would have created allocations against the + # selected destination host in Placement, so we need to remove + # those before moving on. + self._remove_host_allocations(host, hoststate['nodename']) host = None return host + def _remove_host_allocations(self, host, node): + """Removes instance allocations against the given host from Placement + + :param host: The name of the host. + :param node: The name of the node. + """ + # Get the compute node object since we need the UUID. + # TODO(mriedem): If the result of select_destinations eventually + # returns the compute node uuid, we wouldn't need to look it + # up via host/node and we can save some time. + try: + compute_node = objects.ComputeNode.get_by_host_and_nodename( + self.context, host, node) + except exception.ComputeHostNotFound: + # This shouldn't happen, but we're being careful. + LOG.info('Unable to remove instance allocations from host %s ' + 'and node %s since it was not found.', host, node, + instance=self.instance) + return + + # Calculate the resource class amounts to subtract from the allocations + # on the node based on the instance flavor. + resources = scheduler_utils.resources_from_flavor( + self.instance, self.instance.flavor) + + # Now remove the allocations for our instance against that node. + # Note that this does not remove allocations against any other node + # or shared resource provider, it's just undoing what the scheduler + # allocated for the given (destination) node. + self.scheduler_client.reportclient.\ + remove_provider_from_instance_allocation( + self.instance.uuid, compute_node.uuid, self.instance.user_id, + self.instance.project_id, resources) + def _check_not_over_max_retries(self, attempted_hosts): if CONF.migrate_max_retries == -1: return diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 08b5235188f7..c7099c16dd79 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -1014,13 +1014,7 @@ class SchedulerReportClient(object): then PUTs the resulting allocation back to the placement API for the consumer. - We call this method on three occasions: - - 1. On the source host during a confirm_resize() operation. - 2. On the destination host during a revert_resize() operation. - 3. On the destination host when prep_resize fails. - - This is important to reconcile the "doubled-up" allocation that the + This is used to reconcile the "doubled-up" allocation that the scheduler constructs when claiming resources against the destination host during a move operation. diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 9787f6ee1697..6f259bfb9edd 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -1953,23 +1953,17 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin): self.assertFlavorMatchesAllocation(self.flavor1, source_usages) dest_usages = self._get_provider_usages(dest_rp_uuid) - # FIXME(mriedem): This is bug 1712411 where the allocations, created - # by the scheduler, via conductor, are not cleaned up on the migration - # pre-check error. Uncomment the following code when this is fixed. - self.assertFlavorMatchesAllocation(self.flavor1, dest_usages) - # # Assert the allocations, created by the scheduler, are cleaned up - # # after the migration pre-check error happens. - # self.assertFlavorMatchesAllocation( - # {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages) + # Assert the allocations, created by the scheduler, are cleaned up + # after the migration pre-check error happens. + self.assertFlavorMatchesAllocation( + {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages) allocations = self._get_allocations_by_server_uuid(server['id']) - # FIXME(mriedem): After bug 1712411 is fixed there should only be 1 - # allocation for the instance, on the source node. - self.assertEqual(2, len(allocations)) + # There should only be 1 allocation for the instance on the source node + self.assertEqual(1, len(allocations)) self.assertIn(source_rp_uuid, allocations) - self.assertIn(dest_rp_uuid, allocations) - dest_allocation = allocations[dest_rp_uuid]['resources'] - self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation) + self.assertFlavorMatchesAllocation( + self.flavor1, allocations[source_rp_uuid]['resources']) self._delete_and_check_allocations( server, source_rp_uuid, dest_rp_uuid) diff --git a/nova/tests/unit/conductor/tasks/test_live_migrate.py b/nova/tests/unit/conductor/tasks/test_live_migrate.py index 29a2ba407d19..5ae6303c97c6 100644 --- a/nova/tests/unit/conductor/tasks/test_live_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_live_migrate.py @@ -373,7 +373,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): scheduler_utils.setup_instance_group(self.context, self.fake_spec) self.task.scheduler_client.select_destinations(self.context, self.fake_spec, [self.instance.uuid]).AndReturn( - [{'host': 'host1'}]) + [{'host': 'host1', 'nodename': 'node1'}]) self.task._check_compatible_with_source_hypervisor("host1")\ .AndRaise(error) @@ -384,7 +384,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): self.task._call_livem_checks_on_host("host2") self.mox.ReplayAll() - self.assertEqual("host2", self.task._find_destination()) + with mock.patch.object(self.task, + '_remove_host_allocations') as remove_allocs: + self.assertEqual("host2", self.task._find_destination()) + # Should have removed allocations for the first host. + remove_allocs.assert_called_once_with('host1', 'node1') def test_find_destination_retry_with_old_hypervisor(self): self._test_find_destination_retry_hypervisor_raises( @@ -409,7 +413,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): scheduler_utils.setup_instance_group(self.context, self.fake_spec) self.task.scheduler_client.select_destinations(self.context, self.fake_spec, [self.instance.uuid]).AndReturn( - [{'host': 'host1'}]) + [{'host': 'host1', 'nodename': 'node1'}]) self.task._check_compatible_with_source_hypervisor("host1") self.task._call_livem_checks_on_host("host1")\ .AndRaise(exception.Invalid) @@ -421,7 +425,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): self.task._call_livem_checks_on_host("host2") self.mox.ReplayAll() - self.assertEqual("host2", self.task._find_destination()) + with mock.patch.object(self.task, + '_remove_host_allocations') as remove_allocs: + self.assertEqual("host2", self.task._find_destination()) + # Should have removed allocations for the first host. + remove_allocs.assert_called_once_with('host1', 'node1') def test_find_destination_retry_with_failed_migration_pre_checks(self): self.flags(migrate_max_retries=1) @@ -438,7 +446,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): scheduler_utils.setup_instance_group(self.context, self.fake_spec) self.task.scheduler_client.select_destinations(self.context, self.fake_spec, [self.instance.uuid]).AndReturn( - [{'host': 'host1'}]) + [{'host': 'host1', 'nodename': 'node1'}]) self.task._check_compatible_with_source_hypervisor("host1") self.task._call_livem_checks_on_host("host1")\ .AndRaise(exception.MigrationPreCheckError("reason")) @@ -450,7 +458,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): self.task._call_livem_checks_on_host("host2") self.mox.ReplayAll() - self.assertEqual("host2", self.task._find_destination()) + with mock.patch.object(self.task, + '_remove_host_allocations') as remove_allocs: + self.assertEqual("host2", self.task._find_destination()) + # Should have removed allocations for the first host. + remove_allocs.assert_called_once_with('host1', 'node1') def test_find_destination_retry_exceeds_max(self): self.flags(migrate_max_retries=0) @@ -466,16 +478,23 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): scheduler_utils.setup_instance_group(self.context, self.fake_spec) self.task.scheduler_client.select_destinations(self.context, self.fake_spec, [self.instance.uuid]).AndReturn( - [{'host': 'host1'}]) + [{'host': 'host1', 'nodename': 'node1'}]) self.task._check_compatible_with_source_hypervisor("host1")\ .AndRaise(exception.DestinationHypervisorTooOld) self.mox.ReplayAll() - with mock.patch.object(self.task.migration, 'save') as save_mock: + with test.nested( + mock.patch.object(self.task.migration, 'save'), + mock.patch.object(self.task, '_remove_host_allocations') + ) as ( + save_mock, remove_allocs + ): self.assertRaises(exception.MaxRetriesExceeded, self.task._find_destination) self.assertEqual('failed', self.task.migration.status) save_mock.assert_called_once_with() + # Should have removed allocations for the first host. + remove_allocs.assert_called_once_with('host1', 'node1') def test_find_destination_when_runs_out_of_hosts(self): self.mox.StubOutWithMock(utils, 'get_image_from_system_metadata') @@ -636,3 +655,15 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): self.task._get_destination_cell_mapping) mock_get.assert_called_once_with( self.task.context, self.task.destination) + + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename', + side_effect=exception.ComputeHostNotFound(host='host')) + def test_remove_host_allocations_compute_host_not_found(self, get_cn): + """Tests that failing to find a ComputeNode will not blow up + the _remove_host_allocations method. + """ + with mock.patch.object( + self.task.scheduler_client.reportclient, + 'remove_provider_from_instance_allocation') as remove_provider: + self.task._remove_host_allocations('host', 'node') + remove_provider.assert_not_called()