Add min service level check for migrate with bandwidth

During resize and cold migrate the dest compute service needs to update
the port binding based on the re-calculated port - resource provider mapping.
This update happens in finish_resize.

To do that the dest compute service needs to be at least on service level
39.

The calculation is based on the RequestSpec. The RequestSpec is sent
to the dest compute in pre_resize but the dest compute only sends it to the
source compute in resize_instance if the compute rpc api version is at least
5.2. Also the source compute only sends the RequestSpec to the dest
compute in the finish_resize if the rpc api version is at least 5.2. So
migration with bandwidth only works if both compute talks at least 5.2
which means that the min service level is at least 39.

Change-Id: Ia500b105b9ec70c0d8bd38faa084270b825476eb
blueprint: support-move-ops-with-qos-ports
This commit is contained in:
Balazs Gibizer 2019-09-05 11:34:13 +02:00
parent a2984b647a
commit d6795e1393
2 changed files with 641 additions and 3 deletions

View File

@ -185,6 +185,101 @@ class MigrationTask(base.TaskBase):
self.request_spec.requested_destination = objects.Destination(
cell=instance_mapping.cell_mapping)
def _support_resource_request(self, selection):
"""Returns true if the host is new enough to support resource request
during migration.
"""
svc = objects.Service.get_by_host_and_binary(
self.context, selection.service_host, 'nova-compute')
return svc.version >= 39
# TODO(gibi): Remove this compat code when nova doesn't need to support
# Train computes any more.
def _get_host_supporting_request(self, selection_list):
"""Return the first compute selection from the selection_list where
the service is new enough to support resource request during migration
and the resources claimed successfully.
:param selection_list: a list of Selection objects returned by the
scheduler
:return: A two tuple. The first item is a Selection object
representing the host that supports the request. The second item
is a list of Selection objects representing the remaining alternate
hosts.
:raises MaxRetriesExceeded: if none of the hosts in the selection_list
is new enough to support the request or we cannot claim resource
on any of the hosts that are new enough.
"""
if not self.request_spec.requested_resources:
return selection_list[0], selection_list[1:]
# Scheduler allocated resources on the first host. So check if the
# first host is new enough
if self._support_resource_request(selection_list[0]):
return selection_list[0], selection_list[1:]
# First host is old, so we need to use an alternate. Therefore we have
# to remove the allocation from the first host.
self.reportclient.delete_allocation_for_instance(
self.context, self.instance.uuid)
LOG.debug(
'Scheduler returned host %(host)s as a possible migration target '
'but that host is not new enough to support the migration with '
'resource request %(request)s. Trying alternate hosts.',
{'host': selection_list[0].service_host,
'request': self.request_spec.requested_resources},
instance=self.instance)
alternates = selection_list[1:]
for i, selection in enumerate(alternates):
if self._support_resource_request(selection):
# this host is new enough so we need to try to claim resources
# on it
if selection.allocation_request:
alloc_req = jsonutils.loads(
selection.allocation_request)
resource_claimed = scheduler_utils.claim_resources(
self.context, self.reportclient, self.request_spec,
self.instance.uuid, alloc_req,
selection.allocation_request_version)
if not resource_claimed:
LOG.debug(
'Scheduler returned alternate host %(host)s as a '
'possible migration target but resource claim '
'failed on that host. Trying another alternate.',
{'host': selection.service_host},
instance=self.instance)
else:
return selection, alternates[i + 1:]
else:
# Some deployments use different schedulers that do not
# use Placement, so they will not have an
# allocation_request to claim with. For those cases,
# there is no concept of claiming, so just assume that
# the resources are available.
return selection, alternates[i + 1:]
else:
LOG.debug(
'Scheduler returned alternate host %(host)s as a possible '
'migration target but that host is not new enough to '
'support the migration with resource request %(request)s. '
'Trying another alternate.',
{'host': selection.service_host,
'request': self.request_spec.requested_resources},
instance=self.instance)
# if we reach this point then none of the hosts was new enough for the
# request or we failed to claim resources on every alternate
reason = ("Exhausted all hosts available during compute service level "
"check for instance %(instance_uuid)s." %
{"instance_uuid": self.instance.uuid})
raise exception.MaxRetriesExceeded(reason=reason)
def _execute(self):
# TODO(sbauza): Remove once all the scheduler.utils methods accept a
# RequestSpec object in the signature.
@ -277,9 +372,9 @@ class MigrationTask(base.TaskBase):
# Since there is only ever one instance to migrate per call, we
# just need the first returned element.
selection_list = selection_lists[0]
# The selected host is the first item in the list, with the
# alternates being the remainder of the list.
selection, self.host_list = selection_list[0], selection_list[1:]
selection, self.host_list = self._get_host_supporting_request(
selection_list)
scheduler_utils.fill_provider_mapping(
self.context, self.reportclient, self.request_spec, selection)
@ -295,6 +390,17 @@ class MigrationTask(base.TaskBase):
selection = None
while self.host_list and not host_available:
selection = self.host_list.pop(0)
if (self.request_spec.requested_resources and not
self._support_resource_request(selection)):
LOG.debug(
'Scheduler returned alternate host %(host)s as a possible '
'migration target for re-schedule but that host is not '
'new enough to support the migration with resource '
'request %(request)s. Trying another alternate.',
{'host': selection.service_host,
'request': self.request_spec.requested_resources},
instance=self.instance)
continue
if selection.allocation_request:
alloc_req = jsonutils.loads(selection.allocation_request)
else:

View File

@ -292,6 +292,538 @@ class MigrationTaskTestCase(test.NoDBTestCase):
self.instance.uuid, alloc_req, '1.19')
mock_fill_provider_mapping.assert_not_called()
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance')
@mock.patch('nova.objects.Service.get_by_host_and_binary')
def test_get_host_supporting_request_no_resource_request(
self, mock_get_service, mock_delete_allocation,
mock_claim_resources):
# no resource request so we expect the first host is simply returned
self.request_spec.requested_resources = []
task = self._generate_task()
resources = {
"resources": {
"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}}
first = objects.Selection(
service_host="host1",
nodename="node1",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host1: resources}}),
allocation_request_version='1.19')
alternate = objects.Selection(
service_host="host2",
nodename="node2",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host2: resources}}),
allocation_request_version='1.19')
selection_list = [first, alternate]
selected, alternates = task._get_host_supporting_request(
selection_list)
self.assertEqual(first, selected)
self.assertEqual([alternate], alternates)
mock_get_service.assert_not_called()
# The first host was good and the scheduler made allocation on that
# host. So we don't expect any resource claim manipulation
mock_delete_allocation.assert_not_called()
mock_claim_resources.assert_not_called()
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance')
@mock.patch('nova.objects.Service.get_by_host_and_binary')
def test_get_host_supporting_request_first_host_is_new(
self, mock_get_service, mock_delete_allocation,
mock_claim_resources):
self.request_spec.requested_resources = [
objects.RequestGroup()
]
task = self._generate_task()
resources = {
"resources": {
"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}}
first = objects.Selection(
service_host="host1",
nodename="node1",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host1: resources}}),
allocation_request_version='1.19')
alternate = objects.Selection(
service_host="host2",
nodename="node2",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host2: resources}}),
allocation_request_version='1.19')
selection_list = [first, alternate]
first_service = objects.Service(service_host='host1')
first_service.version = 39
mock_get_service.return_value = first_service
selected, alternates = task._get_host_supporting_request(
selection_list)
self.assertEqual(first, selected)
self.assertEqual([alternate], alternates)
mock_get_service.assert_called_once_with(
task.context, 'host1', 'nova-compute')
# The first host was good and the scheduler made allocation on that
# host. So we don't expect any resource claim manipulation
mock_delete_allocation.assert_not_called()
mock_claim_resources.assert_not_called()
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance')
@mock.patch('nova.objects.Service.get_by_host_and_binary')
def test_get_host_supporting_request_first_host_is_old_no_alternates(
self, mock_get_service, mock_delete_allocation,
mock_claim_resources):
self.request_spec.requested_resources = [
objects.RequestGroup()
]
task = self._generate_task()
resources = {
"resources": {
"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}}
first = objects.Selection(
service_host="host1",
nodename="node1",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host1: resources}}),
allocation_request_version='1.19')
selection_list = [first]
first_service = objects.Service(service_host='host1')
first_service.version = 38
mock_get_service.return_value = first_service
self.assertRaises(
exception.MaxRetriesExceeded, task._get_host_supporting_request,
selection_list)
mock_get_service.assert_called_once_with(
task.context, 'host1', 'nova-compute')
mock_delete_allocation.assert_called_once_with(
task.context, self.instance.uuid)
mock_claim_resources.assert_not_called()
@mock.patch.object(migrate.LOG, 'debug')
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance')
@mock.patch('nova.objects.Service.get_by_host_and_binary')
def test_get_host_supporting_request_first_host_is_old_second_good(
self, mock_get_service, mock_delete_allocation,
mock_claim_resources, mock_debug):
self.request_spec.requested_resources = [
objects.RequestGroup()
]
task = self._generate_task()
resources = {
"resources": {
"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}}
first = objects.Selection(
service_host="host1",
nodename="node1",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host1: resources}}),
allocation_request_version='1.19')
second = objects.Selection(
service_host="host2",
nodename="node2",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host2: resources}}),
allocation_request_version='1.19')
third = objects.Selection(
service_host="host3",
nodename="node3",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host3: resources}}),
allocation_request_version='1.19')
selection_list = [first, second, third]
first_service = objects.Service(service_host='host1')
first_service.version = 38
second_service = objects.Service(service_host='host2')
second_service.version = 39
mock_get_service.side_effect = [first_service, second_service]
selected, alternates = task._get_host_supporting_request(
selection_list)
self.assertEqual(second, selected)
self.assertEqual([third], alternates)
mock_get_service.assert_has_calls([
mock.call(task.context, 'host1', 'nova-compute'),
mock.call(task.context, 'host2', 'nova-compute'),
])
mock_delete_allocation.assert_called_once_with(
task.context, self.instance.uuid)
mock_claim_resources.assert_called_once_with(
self.context, task.reportclient, task.request_spec,
self.instance.uuid, {"allocations": {uuids.host2: resources}},
'1.19')
mock_debug.assert_called_once_with(
'Scheduler returned host %(host)s as a possible migration target '
'but that host is not new enough to support the migration with '
'resource request %(request)s. Trying alternate hosts.',
{'host': 'host1',
'request': self.request_spec.requested_resources},
instance=self.instance)
@mock.patch.object(migrate.LOG, 'debug')
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance')
@mock.patch('nova.objects.Service.get_by_host_and_binary')
def test_get_host_supporting_request_first_host_is_old_second_claim_fails(
self, mock_get_service, mock_delete_allocation,
mock_claim_resources, mock_debug):
self.request_spec.requested_resources = [
objects.RequestGroup()
]
task = self._generate_task()
resources = {
"resources": {
"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}}
first = objects.Selection(
service_host="host1",
nodename="node1",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host1: resources}}),
allocation_request_version='1.19')
second = objects.Selection(
service_host="host2",
nodename="node2",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host2: resources}}),
allocation_request_version='1.19')
third = objects.Selection(
service_host="host3",
nodename="node3",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host3: resources}}),
allocation_request_version='1.19')
fourth = objects.Selection(
service_host="host4",
nodename="node4",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host4: resources}}),
allocation_request_version='1.19')
selection_list = [first, second, third, fourth]
first_service = objects.Service(service_host='host1')
first_service.version = 38
second_service = objects.Service(service_host='host2')
second_service.version = 39
third_service = objects.Service(service_host='host3')
third_service.version = 39
mock_get_service.side_effect = [
first_service, second_service, third_service]
# not called for the first host but called for the second and third
# make the second claim fail to force the selection of the third
mock_claim_resources.side_effect = [False, True]
selected, alternates = task._get_host_supporting_request(
selection_list)
self.assertEqual(third, selected)
self.assertEqual([fourth], alternates)
mock_get_service.assert_has_calls([
mock.call(task.context, 'host1', 'nova-compute'),
mock.call(task.context, 'host2', 'nova-compute'),
mock.call(task.context, 'host3', 'nova-compute'),
])
mock_delete_allocation.assert_called_once_with(
task.context, self.instance.uuid)
mock_claim_resources.assert_has_calls([
mock.call(
self.context, task.reportclient, task.request_spec,
self.instance.uuid,
{"allocations": {uuids.host2: resources}}, '1.19'),
mock.call(
self.context, task.reportclient, task.request_spec,
self.instance.uuid,
{"allocations": {uuids.host3: resources}}, '1.19'),
])
mock_debug.assert_has_calls([
mock.call(
'Scheduler returned host %(host)s as a possible migration '
'target but that host is not new enough to support the '
'migration with resource request %(request)s. Trying '
'alternate hosts.',
{'host': 'host1',
'request': self.request_spec.requested_resources},
instance=self.instance),
mock.call(
'Scheduler returned alternate host %(host)s as a possible '
'migration target but resource claim '
'failed on that host. Trying another alternate.',
{'host': 'host2'},
instance=self.instance),
])
@mock.patch.object(migrate.LOG, 'debug')
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance')
@mock.patch('nova.objects.Service.get_by_host_and_binary')
def test_get_host_supporting_request_both_first_and_second_too_old(
self, mock_get_service, mock_delete_allocation,
mock_claim_resources, mock_debug):
self.request_spec.requested_resources = [
objects.RequestGroup()
]
task = self._generate_task()
resources = {
"resources": {
"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}}
first = objects.Selection(
service_host="host1",
nodename="node1",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host1: resources}}),
allocation_request_version='1.19')
second = objects.Selection(
service_host="host2",
nodename="node2",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host2: resources}}),
allocation_request_version='1.19')
third = objects.Selection(
service_host="host3",
nodename="node3",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host3: resources}}),
allocation_request_version='1.19')
fourth = objects.Selection(
service_host="host4",
nodename="node4",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host4: resources}}),
allocation_request_version='1.19')
selection_list = [first, second, third, fourth]
first_service = objects.Service(service_host='host1')
first_service.version = 38
second_service = objects.Service(service_host='host2')
second_service.version = 38
third_service = objects.Service(service_host='host3')
third_service.version = 39
mock_get_service.side_effect = [
first_service, second_service, third_service]
# not called for the first and second hosts but called for the third
mock_claim_resources.side_effect = [True]
selected, alternates = task._get_host_supporting_request(
selection_list)
self.assertEqual(third, selected)
self.assertEqual([fourth], alternates)
mock_get_service.assert_has_calls([
mock.call(task.context, 'host1', 'nova-compute'),
mock.call(task.context, 'host2', 'nova-compute'),
mock.call(task.context, 'host3', 'nova-compute'),
])
mock_delete_allocation.assert_called_once_with(
task.context, self.instance.uuid)
mock_claim_resources.assert_called_once_with(
self.context, task.reportclient, task.request_spec,
self.instance.uuid,
{"allocations": {uuids.host3: resources}}, '1.19')
mock_debug.assert_has_calls([
mock.call(
'Scheduler returned host %(host)s as a possible migration '
'target but that host is not new enough to support the '
'migration with resource request %(request)s. Trying '
'alternate hosts.',
{'host': 'host1',
'request': self.request_spec.requested_resources},
instance=self.instance),
mock.call(
'Scheduler returned alternate host %(host)s as a possible '
'migration target but that host is not new enough to support '
'the migration with resource request %(request)s. Trying '
'another alternate.',
{'host': 'host2',
'request': self.request_spec.requested_resources},
instance=self.instance),
])
@mock.patch.object(migrate.LOG, 'debug')
@mock.patch('nova.scheduler.utils.fill_provider_mapping')
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.objects.Service.get_by_host_and_binary')
def test_reschedule_old_compute_skipped(
self, mock_get_service, mock_claim_resources, mock_fill_mapping,
mock_debug):
self.request_spec.requested_resources = [
objects.RequestGroup()
]
task = self._generate_task()
resources = {
"resources": {
"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}}
first = objects.Selection(
service_host="host1",
nodename="node1",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host1: resources}}),
allocation_request_version='1.19')
second = objects.Selection(
service_host="host2",
nodename="node2",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host2: resources}}),
allocation_request_version='1.19')
first_service = objects.Service(service_host='host1')
first_service.version = 38
second_service = objects.Service(service_host='host2')
second_service.version = 39
mock_get_service.side_effect = [first_service, second_service]
# set up task for re-schedule
task.host_list = [first, second]
selected = task._reschedule()
self.assertEqual(second, selected)
self.assertEqual([], task.host_list)
mock_get_service.assert_has_calls([
mock.call(task.context, 'host1', 'nova-compute'),
mock.call(task.context, 'host2', 'nova-compute'),
])
mock_claim_resources.assert_called_once_with(
self.context.elevated(), task.reportclient, task.request_spec,
self.instance.uuid,
{"allocations": {uuids.host2: resources}}, '1.19')
mock_fill_mapping.assert_called_once_with(
task.context, task.reportclient, task.request_spec, second)
mock_debug.assert_has_calls([
mock.call(
'Scheduler returned alternate host %(host)s as a possible '
'migration target for re-schedule but that host is not '
'new enough to support the migration with resource '
'request %(request)s. Trying another alternate.',
{'host': 'host1',
'request': self.request_spec.requested_resources},
instance=self.instance),
])
@mock.patch.object(migrate.LOG, 'debug')
@mock.patch('nova.scheduler.utils.fill_provider_mapping')
@mock.patch('nova.scheduler.utils.claim_resources')
@mock.patch('nova.objects.Service.get_by_host_and_binary')
def test_reschedule_old_computes_no_more_alternates(
self, mock_get_service, mock_claim_resources, mock_fill_mapping,
mock_debug):
self.request_spec.requested_resources = [
objects.RequestGroup()
]
task = self._generate_task()
resources = {
"resources": {
"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}}
first = objects.Selection(
service_host="host1",
nodename="node1",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host1: resources}}),
allocation_request_version='1.19')
second = objects.Selection(
service_host="host2",
nodename="node2",
cell_uuid=uuids.cell1,
allocation_request=jsonutils.dumps(
{"allocations": {uuids.host2: resources}}),
allocation_request_version='1.19')
first_service = objects.Service(service_host='host1')
first_service.version = 38
second_service = objects.Service(service_host='host2')
second_service.version = 38
mock_get_service.side_effect = [first_service, second_service]
# set up task for re-schedule
task.host_list = [first, second]
self.assertRaises(exception.MaxRetriesExceeded, task._reschedule)
self.assertEqual([], task.host_list)
mock_get_service.assert_has_calls([
mock.call(task.context, 'host1', 'nova-compute'),
mock.call(task.context, 'host2', 'nova-compute'),
])
mock_claim_resources.assert_not_called()
mock_fill_mapping.assert_not_called()
mock_debug.assert_has_calls([
mock.call(
'Scheduler returned alternate host %(host)s as a possible '
'migration target for re-schedule but that host is not '
'new enough to support the migration with resource '
'request %(request)s. Trying another alternate.',
{'host': 'host1',
'request': self.request_spec.requested_resources},
instance=self.instance),
mock.call(
'Scheduler returned alternate host %(host)s as a possible '
'migration target for re-schedule but that host is not '
'new enough to support the migration with resource '
'request %(request)s. Trying another alternate.',
{'host': 'host2',
'request': self.request_spec.requested_resources},
instance=self.instance),
])
class MigrationTaskAllocationUtils(test.NoDBTestCase):
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')