diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py index 38b280d6c80a..d0b09f8e7963 100644 --- a/nova/conductor/tasks/migrate.py +++ b/nova/conductor/tasks/migrate.py @@ -185,6 +185,101 @@ class MigrationTask(base.TaskBase): self.request_spec.requested_destination = objects.Destination( cell=instance_mapping.cell_mapping) + def _support_resource_request(self, selection): + """Returns true if the host is new enough to support resource request + during migration. + """ + svc = objects.Service.get_by_host_and_binary( + self.context, selection.service_host, 'nova-compute') + return svc.version >= 39 + + # TODO(gibi): Remove this compat code when nova doesn't need to support + # Train computes any more. + def _get_host_supporting_request(self, selection_list): + """Return the first compute selection from the selection_list where + the service is new enough to support resource request during migration + and the resources claimed successfully. + + :param selection_list: a list of Selection objects returned by the + scheduler + :return: A two tuple. The first item is a Selection object + representing the host that supports the request. The second item + is a list of Selection objects representing the remaining alternate + hosts. + :raises MaxRetriesExceeded: if none of the hosts in the selection_list + is new enough to support the request or we cannot claim resource + on any of the hosts that are new enough. + """ + + if not self.request_spec.requested_resources: + return selection_list[0], selection_list[1:] + + # Scheduler allocated resources on the first host. So check if the + # first host is new enough + if self._support_resource_request(selection_list[0]): + return selection_list[0], selection_list[1:] + + # First host is old, so we need to use an alternate. Therefore we have + # to remove the allocation from the first host. + self.reportclient.delete_allocation_for_instance( + self.context, self.instance.uuid) + LOG.debug( + 'Scheduler returned host %(host)s as a possible migration target ' + 'but that host is not new enough to support the migration with ' + 'resource request %(request)s. Trying alternate hosts.', + {'host': selection_list[0].service_host, + 'request': self.request_spec.requested_resources}, + instance=self.instance) + + alternates = selection_list[1:] + + for i, selection in enumerate(alternates): + if self._support_resource_request(selection): + # this host is new enough so we need to try to claim resources + # on it + if selection.allocation_request: + alloc_req = jsonutils.loads( + selection.allocation_request) + resource_claimed = scheduler_utils.claim_resources( + self.context, self.reportclient, self.request_spec, + self.instance.uuid, alloc_req, + selection.allocation_request_version) + + if not resource_claimed: + LOG.debug( + 'Scheduler returned alternate host %(host)s as a ' + 'possible migration target but resource claim ' + 'failed on that host. Trying another alternate.', + {'host': selection.service_host}, + instance=self.instance) + else: + return selection, alternates[i + 1:] + + else: + # Some deployments use different schedulers that do not + # use Placement, so they will not have an + # allocation_request to claim with. For those cases, + # there is no concept of claiming, so just assume that + # the resources are available. + return selection, alternates[i + 1:] + + else: + LOG.debug( + 'Scheduler returned alternate host %(host)s as a possible ' + 'migration target but that host is not new enough to ' + 'support the migration with resource request %(request)s. ' + 'Trying another alternate.', + {'host': selection.service_host, + 'request': self.request_spec.requested_resources}, + instance=self.instance) + + # if we reach this point then none of the hosts was new enough for the + # request or we failed to claim resources on every alternate + reason = ("Exhausted all hosts available during compute service level " + "check for instance %(instance_uuid)s." % + {"instance_uuid": self.instance.uuid}) + raise exception.MaxRetriesExceeded(reason=reason) + def _execute(self): # TODO(sbauza): Remove once all the scheduler.utils methods accept a # RequestSpec object in the signature. @@ -277,9 +372,9 @@ class MigrationTask(base.TaskBase): # Since there is only ever one instance to migrate per call, we # just need the first returned element. selection_list = selection_lists[0] - # The selected host is the first item in the list, with the - # alternates being the remainder of the list. - selection, self.host_list = selection_list[0], selection_list[1:] + + selection, self.host_list = self._get_host_supporting_request( + selection_list) scheduler_utils.fill_provider_mapping( self.context, self.reportclient, self.request_spec, selection) @@ -295,6 +390,17 @@ class MigrationTask(base.TaskBase): selection = None while self.host_list and not host_available: selection = self.host_list.pop(0) + if (self.request_spec.requested_resources and not + self._support_resource_request(selection)): + LOG.debug( + 'Scheduler returned alternate host %(host)s as a possible ' + 'migration target for re-schedule but that host is not ' + 'new enough to support the migration with resource ' + 'request %(request)s. Trying another alternate.', + {'host': selection.service_host, + 'request': self.request_spec.requested_resources}, + instance=self.instance) + continue if selection.allocation_request: alloc_req = jsonutils.loads(selection.allocation_request) else: diff --git a/nova/tests/unit/conductor/tasks/test_migrate.py b/nova/tests/unit/conductor/tasks/test_migrate.py index f2a77b00ddfc..307773c61dd3 100644 --- a/nova/tests/unit/conductor/tasks/test_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_migrate.py @@ -292,6 +292,538 @@ class MigrationTaskTestCase(test.NoDBTestCase): self.instance.uuid, alloc_req, '1.19') mock_fill_provider_mapping.assert_not_called() + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'delete_allocation_for_instance') + @mock.patch('nova.objects.Service.get_by_host_and_binary') + def test_get_host_supporting_request_no_resource_request( + self, mock_get_service, mock_delete_allocation, + mock_claim_resources): + # no resource request so we expect the first host is simply returned + self.request_spec.requested_resources = [] + task = self._generate_task() + resources = { + "resources": { + "VCPU": 1, + "MEMORY_MB": 1024, + "DISK_GB": 100}} + + first = objects.Selection( + service_host="host1", + nodename="node1", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host1: resources}}), + allocation_request_version='1.19') + alternate = objects.Selection( + service_host="host2", + nodename="node2", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host2: resources}}), + allocation_request_version='1.19') + selection_list = [first, alternate] + + selected, alternates = task._get_host_supporting_request( + selection_list) + + self.assertEqual(first, selected) + self.assertEqual([alternate], alternates) + mock_get_service.assert_not_called() + # The first host was good and the scheduler made allocation on that + # host. So we don't expect any resource claim manipulation + mock_delete_allocation.assert_not_called() + mock_claim_resources.assert_not_called() + + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'delete_allocation_for_instance') + @mock.patch('nova.objects.Service.get_by_host_and_binary') + def test_get_host_supporting_request_first_host_is_new( + self, mock_get_service, mock_delete_allocation, + mock_claim_resources): + self.request_spec.requested_resources = [ + objects.RequestGroup() + ] + task = self._generate_task() + resources = { + "resources": { + "VCPU": 1, + "MEMORY_MB": 1024, + "DISK_GB": 100}} + + first = objects.Selection( + service_host="host1", + nodename="node1", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host1: resources}}), + allocation_request_version='1.19') + alternate = objects.Selection( + service_host="host2", + nodename="node2", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host2: resources}}), + allocation_request_version='1.19') + selection_list = [first, alternate] + + first_service = objects.Service(service_host='host1') + first_service.version = 39 + mock_get_service.return_value = first_service + + selected, alternates = task._get_host_supporting_request( + selection_list) + + self.assertEqual(first, selected) + self.assertEqual([alternate], alternates) + mock_get_service.assert_called_once_with( + task.context, 'host1', 'nova-compute') + # The first host was good and the scheduler made allocation on that + # host. So we don't expect any resource claim manipulation + mock_delete_allocation.assert_not_called() + mock_claim_resources.assert_not_called() + + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'delete_allocation_for_instance') + @mock.patch('nova.objects.Service.get_by_host_and_binary') + def test_get_host_supporting_request_first_host_is_old_no_alternates( + self, mock_get_service, mock_delete_allocation, + mock_claim_resources): + self.request_spec.requested_resources = [ + objects.RequestGroup() + ] + task = self._generate_task() + resources = { + "resources": { + "VCPU": 1, + "MEMORY_MB": 1024, + "DISK_GB": 100}} + + first = objects.Selection( + service_host="host1", + nodename="node1", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host1: resources}}), + allocation_request_version='1.19') + selection_list = [first] + + first_service = objects.Service(service_host='host1') + first_service.version = 38 + mock_get_service.return_value = first_service + + self.assertRaises( + exception.MaxRetriesExceeded, task._get_host_supporting_request, + selection_list) + + mock_get_service.assert_called_once_with( + task.context, 'host1', 'nova-compute') + mock_delete_allocation.assert_called_once_with( + task.context, self.instance.uuid) + mock_claim_resources.assert_not_called() + + @mock.patch.object(migrate.LOG, 'debug') + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'delete_allocation_for_instance') + @mock.patch('nova.objects.Service.get_by_host_and_binary') + def test_get_host_supporting_request_first_host_is_old_second_good( + self, mock_get_service, mock_delete_allocation, + mock_claim_resources, mock_debug): + + self.request_spec.requested_resources = [ + objects.RequestGroup() + ] + task = self._generate_task() + resources = { + "resources": { + "VCPU": 1, + "MEMORY_MB": 1024, + "DISK_GB": 100}} + + first = objects.Selection( + service_host="host1", + nodename="node1", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host1: resources}}), + allocation_request_version='1.19') + second = objects.Selection( + service_host="host2", + nodename="node2", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host2: resources}}), + allocation_request_version='1.19') + third = objects.Selection( + service_host="host3", + nodename="node3", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host3: resources}}), + allocation_request_version='1.19') + selection_list = [first, second, third] + + first_service = objects.Service(service_host='host1') + first_service.version = 38 + second_service = objects.Service(service_host='host2') + second_service.version = 39 + mock_get_service.side_effect = [first_service, second_service] + + selected, alternates = task._get_host_supporting_request( + selection_list) + + self.assertEqual(second, selected) + self.assertEqual([third], alternates) + mock_get_service.assert_has_calls([ + mock.call(task.context, 'host1', 'nova-compute'), + mock.call(task.context, 'host2', 'nova-compute'), + ]) + mock_delete_allocation.assert_called_once_with( + task.context, self.instance.uuid) + mock_claim_resources.assert_called_once_with( + self.context, task.reportclient, task.request_spec, + self.instance.uuid, {"allocations": {uuids.host2: resources}}, + '1.19') + + mock_debug.assert_called_once_with( + 'Scheduler returned host %(host)s as a possible migration target ' + 'but that host is not new enough to support the migration with ' + 'resource request %(request)s. Trying alternate hosts.', + {'host': 'host1', + 'request': self.request_spec.requested_resources}, + instance=self.instance) + + @mock.patch.object(migrate.LOG, 'debug') + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'delete_allocation_for_instance') + @mock.patch('nova.objects.Service.get_by_host_and_binary') + def test_get_host_supporting_request_first_host_is_old_second_claim_fails( + self, mock_get_service, mock_delete_allocation, + mock_claim_resources, mock_debug): + self.request_spec.requested_resources = [ + objects.RequestGroup() + ] + task = self._generate_task() + resources = { + "resources": { + "VCPU": 1, + "MEMORY_MB": 1024, + "DISK_GB": 100}} + + first = objects.Selection( + service_host="host1", + nodename="node1", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host1: resources}}), + allocation_request_version='1.19') + second = objects.Selection( + service_host="host2", + nodename="node2", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host2: resources}}), + allocation_request_version='1.19') + third = objects.Selection( + service_host="host3", + nodename="node3", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host3: resources}}), + allocation_request_version='1.19') + fourth = objects.Selection( + service_host="host4", + nodename="node4", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host4: resources}}), + allocation_request_version='1.19') + selection_list = [first, second, third, fourth] + + first_service = objects.Service(service_host='host1') + first_service.version = 38 + second_service = objects.Service(service_host='host2') + second_service.version = 39 + third_service = objects.Service(service_host='host3') + third_service.version = 39 + mock_get_service.side_effect = [ + first_service, second_service, third_service] + # not called for the first host but called for the second and third + # make the second claim fail to force the selection of the third + mock_claim_resources.side_effect = [False, True] + + selected, alternates = task._get_host_supporting_request( + selection_list) + + self.assertEqual(third, selected) + self.assertEqual([fourth], alternates) + mock_get_service.assert_has_calls([ + mock.call(task.context, 'host1', 'nova-compute'), + mock.call(task.context, 'host2', 'nova-compute'), + mock.call(task.context, 'host3', 'nova-compute'), + ]) + mock_delete_allocation.assert_called_once_with( + task.context, self.instance.uuid) + mock_claim_resources.assert_has_calls([ + mock.call( + self.context, task.reportclient, task.request_spec, + self.instance.uuid, + {"allocations": {uuids.host2: resources}}, '1.19'), + mock.call( + self.context, task.reportclient, task.request_spec, + self.instance.uuid, + {"allocations": {uuids.host3: resources}}, '1.19'), + ]) + mock_debug.assert_has_calls([ + mock.call( + 'Scheduler returned host %(host)s as a possible migration ' + 'target but that host is not new enough to support the ' + 'migration with resource request %(request)s. Trying ' + 'alternate hosts.', + {'host': 'host1', + 'request': self.request_spec.requested_resources}, + instance=self.instance), + mock.call( + 'Scheduler returned alternate host %(host)s as a possible ' + 'migration target but resource claim ' + 'failed on that host. Trying another alternate.', + {'host': 'host2'}, + instance=self.instance), + ]) + + @mock.patch.object(migrate.LOG, 'debug') + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'delete_allocation_for_instance') + @mock.patch('nova.objects.Service.get_by_host_and_binary') + def test_get_host_supporting_request_both_first_and_second_too_old( + self, mock_get_service, mock_delete_allocation, + mock_claim_resources, mock_debug): + self.request_spec.requested_resources = [ + objects.RequestGroup() + ] + task = self._generate_task() + resources = { + "resources": { + "VCPU": 1, + "MEMORY_MB": 1024, + "DISK_GB": 100}} + + first = objects.Selection( + service_host="host1", + nodename="node1", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host1: resources}}), + allocation_request_version='1.19') + second = objects.Selection( + service_host="host2", + nodename="node2", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host2: resources}}), + allocation_request_version='1.19') + third = objects.Selection( + service_host="host3", + nodename="node3", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host3: resources}}), + allocation_request_version='1.19') + fourth = objects.Selection( + service_host="host4", + nodename="node4", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host4: resources}}), + allocation_request_version='1.19') + selection_list = [first, second, third, fourth] + + first_service = objects.Service(service_host='host1') + first_service.version = 38 + second_service = objects.Service(service_host='host2') + second_service.version = 38 + third_service = objects.Service(service_host='host3') + third_service.version = 39 + mock_get_service.side_effect = [ + first_service, second_service, third_service] + # not called for the first and second hosts but called for the third + mock_claim_resources.side_effect = [True] + + selected, alternates = task._get_host_supporting_request( + selection_list) + + self.assertEqual(third, selected) + self.assertEqual([fourth], alternates) + mock_get_service.assert_has_calls([ + mock.call(task.context, 'host1', 'nova-compute'), + mock.call(task.context, 'host2', 'nova-compute'), + mock.call(task.context, 'host3', 'nova-compute'), + ]) + mock_delete_allocation.assert_called_once_with( + task.context, self.instance.uuid) + mock_claim_resources.assert_called_once_with( + self.context, task.reportclient, task.request_spec, + self.instance.uuid, + {"allocations": {uuids.host3: resources}}, '1.19') + mock_debug.assert_has_calls([ + mock.call( + 'Scheduler returned host %(host)s as a possible migration ' + 'target but that host is not new enough to support the ' + 'migration with resource request %(request)s. Trying ' + 'alternate hosts.', + {'host': 'host1', + 'request': self.request_spec.requested_resources}, + instance=self.instance), + mock.call( + 'Scheduler returned alternate host %(host)s as a possible ' + 'migration target but that host is not new enough to support ' + 'the migration with resource request %(request)s. Trying ' + 'another alternate.', + {'host': 'host2', + 'request': self.request_spec.requested_resources}, + instance=self.instance), + ]) + + @mock.patch.object(migrate.LOG, 'debug') + @mock.patch('nova.scheduler.utils.fill_provider_mapping') + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.objects.Service.get_by_host_and_binary') + def test_reschedule_old_compute_skipped( + self, mock_get_service, mock_claim_resources, mock_fill_mapping, + mock_debug): + self.request_spec.requested_resources = [ + objects.RequestGroup() + ] + task = self._generate_task() + resources = { + "resources": { + "VCPU": 1, + "MEMORY_MB": 1024, + "DISK_GB": 100}} + + first = objects.Selection( + service_host="host1", + nodename="node1", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host1: resources}}), + allocation_request_version='1.19') + second = objects.Selection( + service_host="host2", + nodename="node2", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host2: resources}}), + allocation_request_version='1.19') + + first_service = objects.Service(service_host='host1') + first_service.version = 38 + second_service = objects.Service(service_host='host2') + second_service.version = 39 + mock_get_service.side_effect = [first_service, second_service] + + # set up task for re-schedule + task.host_list = [first, second] + + selected = task._reschedule() + + self.assertEqual(second, selected) + self.assertEqual([], task.host_list) + mock_get_service.assert_has_calls([ + mock.call(task.context, 'host1', 'nova-compute'), + mock.call(task.context, 'host2', 'nova-compute'), + ]) + mock_claim_resources.assert_called_once_with( + self.context.elevated(), task.reportclient, task.request_spec, + self.instance.uuid, + {"allocations": {uuids.host2: resources}}, '1.19') + mock_fill_mapping.assert_called_once_with( + task.context, task.reportclient, task.request_spec, second) + mock_debug.assert_has_calls([ + mock.call( + 'Scheduler returned alternate host %(host)s as a possible ' + 'migration target for re-schedule but that host is not ' + 'new enough to support the migration with resource ' + 'request %(request)s. Trying another alternate.', + {'host': 'host1', + 'request': self.request_spec.requested_resources}, + instance=self.instance), + ]) + + @mock.patch.object(migrate.LOG, 'debug') + @mock.patch('nova.scheduler.utils.fill_provider_mapping') + @mock.patch('nova.scheduler.utils.claim_resources') + @mock.patch('nova.objects.Service.get_by_host_and_binary') + def test_reschedule_old_computes_no_more_alternates( + self, mock_get_service, mock_claim_resources, mock_fill_mapping, + mock_debug): + self.request_spec.requested_resources = [ + objects.RequestGroup() + ] + task = self._generate_task() + resources = { + "resources": { + "VCPU": 1, + "MEMORY_MB": 1024, + "DISK_GB": 100}} + + first = objects.Selection( + service_host="host1", + nodename="node1", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host1: resources}}), + allocation_request_version='1.19') + second = objects.Selection( + service_host="host2", + nodename="node2", + cell_uuid=uuids.cell1, + allocation_request=jsonutils.dumps( + {"allocations": {uuids.host2: resources}}), + allocation_request_version='1.19') + + first_service = objects.Service(service_host='host1') + first_service.version = 38 + second_service = objects.Service(service_host='host2') + second_service.version = 38 + mock_get_service.side_effect = [first_service, second_service] + + # set up task for re-schedule + task.host_list = [first, second] + + self.assertRaises(exception.MaxRetriesExceeded, task._reschedule) + + self.assertEqual([], task.host_list) + mock_get_service.assert_has_calls([ + mock.call(task.context, 'host1', 'nova-compute'), + mock.call(task.context, 'host2', 'nova-compute'), + ]) + mock_claim_resources.assert_not_called() + mock_fill_mapping.assert_not_called() + mock_debug.assert_has_calls([ + mock.call( + 'Scheduler returned alternate host %(host)s as a possible ' + 'migration target for re-schedule but that host is not ' + 'new enough to support the migration with resource ' + 'request %(request)s. Trying another alternate.', + {'host': 'host1', + 'request': self.request_spec.requested_resources}, + instance=self.instance), + mock.call( + 'Scheduler returned alternate host %(host)s as a possible ' + 'migration target for re-schedule but that host is not ' + 'new enough to support the migration with resource ' + 'request %(request)s. Trying another alternate.', + {'host': 'host2', + 'request': self.request_spec.requested_resources}, + instance=self.instance), + ]) + class MigrationTaskAllocationUtils(test.NoDBTestCase): @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')