Reject migration with QoS port from conductor if RPC pinned

The MigrationTask in the conductor already checks the service version as
old computes cannot support migration with QoS port. However it is still
possible that every compute is new but the compute RPC API is pinned to
< 5.2. In this case the migration still cannot be supported.

This patch adds an extra RPC version check to the conductor.

Change-Id: Ib4e0b9ab050a59ab5a290e6eecea01b87c3bd4c6
Closes-Bug: #1844993
This commit is contained in:
Balazs Gibizer 2019-09-23 12:48:20 +02:00
parent a3024b5cea
commit 4a10f8eaa7
4 changed files with 46 additions and 58 deletions

View File

@ -806,6 +806,13 @@ class ComputeAPI(object):
block_migration=block_migration,
disk=disk, migrate_data=migrate_data)
def supports_resize_with_qos_port(self, ctxt):
"""Returns whether we can send 5.2, needed for migrating and resizing
servers with ports having resource request.
"""
client = self.router.client(ctxt)
return client.can_send_version('5.2')
def prep_resize(self, ctxt, instance, image, instance_type, host,
migration, request_spec, filter_properties, node,
clean_shutdown, host_list):

View File

@ -187,11 +187,14 @@ class MigrationTask(base.TaskBase):
def _support_resource_request(self, selection):
"""Returns true if the host is new enough to support resource request
during migration.
during migration and that the RPC API version is not pinned during
rolling upgrade.
"""
svc = objects.Service.get_by_host_and_binary(
self.context, selection.service_host, 'nova-compute')
return svc.version >= 39
return (svc.version >= 39 and
self.compute_rpcapi.supports_resize_with_qos_port(
self.context))
# TODO(gibi): Remove this compat code when nova doesn't need to support
# Train computes any more.
@ -226,7 +229,8 @@ class MigrationTask(base.TaskBase):
LOG.debug(
'Scheduler returned host %(host)s as a possible migration target '
'but that host is not new enough to support the migration with '
'resource request %(request)s. Trying alternate hosts.',
'resource request %(request)s or the compute RPC is pinned to '
'less than 5.2. Trying alternate hosts.',
{'host': selection_list[0].service_host,
'request': self.request_spec.requested_resources},
instance=self.instance)
@ -267,7 +271,8 @@ class MigrationTask(base.TaskBase):
LOG.debug(
'Scheduler returned alternate host %(host)s as a possible '
'migration target but that host is not new enough to '
'support the migration with resource request %(request)s. '
'support the migration with resource request %(request)s '
'or the compute RPC is pinned to less than 5.2. '
'Trying another alternate.',
{'host': selection.service_host,
'request': self.request_spec.requested_resources},

View File

@ -6873,77 +6873,51 @@ class ServerMoveWithPortResourceRequestTest(
server = self._create_server_with_ports(
non_qos_normal_port, qos_normal_port)
self.api.post_server_action(server['id'], {'migrate': None})
# There are ports with resource request but the finish_resize is
# called without request_spec (as that is only added in 5.2 an we are
# pinned to 5.1) and therefore the request group - resource provider
# mapping cannot be calculated for these ports so the server goes to
# ERROR on the target host. This is bug 1844993.
server = self._wait_for_server_parameter(
self.api, server,
# Note that the server remains on the dest host after the
# failure as we are failing in finish_resize
{'OS-EXT-SRV-ATTR:host': 'host2',
'OS-EXT-STS:task_state': None,
'status': 'ERROR'})
# This migration expected to fail as the old RPC does not provide
# enough information to do a proper port binding on the target host.
# The MigrationTask in the conductor checks that the RPC is new enough
# for this request for each possible destination provided by the
# scheduler and skips the old hosts.
ex = self.assertRaises(
client.OpenStackApiException, self.api.post_server_action,
server['id'], {'migrate': None})
self.assertEqual(400, ex.response.status_code)
self.assertIn('No valid host was found.', six.text_type(ex))
# The migration is put into error
self._wait_for_migration_status(server, ['error'])
self.assertIn(
'Provider mappings are not available to the compute service but '
'are required for ports with a resource request. If compute RPC '
'API versions are pinned for a rolling upgrade, you will need to '
'retry this operation once the RPC version is unpinned and the '
'nova-compute services are all upgraded',
self.stdlog.logger.output)
self.assertIn(
'Provider mappings are not available to the compute service but '
'are required for ports with a resource request.',
server['fault']['message'])
self._wait_for_action_fail_completion(
server, instance_actions.MIGRATE, 'compute_finish_resize',
self.admin_api)
# Note that we don't get error notification about the failed resize
fake_notifier.wait_for_versioned_notifications(
'instance.resize.end')
# Just a generic error notification
fake_notifier.wait_for_versioned_notifications(
'compute.exception')
# The migration is rejected so the instance remains on the source host
server = self.api.get_server(server['id'])
self.assertEqual('ACTIVE', server['status'])
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])
migration_uuid = self.get_migration_uuid_for_instance(server['id'])
# The migration is failed the migration allocation on the source host
# is deleted
# The migration allocation is deleted
migration_allocations = self.placement_api.get(
'/allocations/%s' % migration_uuid).body['allocations']
self.assertEqual({}, migration_allocations)
# Note(gibi): inlined _check_allocation() as the current state of the
# port binding is complicated that cannot be handled with
# _check_allocation
# The instance is still allocated from the source host
updated_non_qos_port = self.neutron.show_port(
non_qos_normal_port['id'])['port']
updated_qos_port = self.neutron.show_port(
qos_normal_port['id'])['port']
allocations = self.placement_api.get(
'/allocations/%s' % server['id']).body['allocations']
# We expect one set of allocations for the compute resources on the
# compute rp and one set for the networking resources on the ovs
# bridge rp due to the qos_port resource request
# Note that the resource allocation is on the dest host...
self.assertEqual(2, len(allocations))
self.assertComputeAllocationMatchesFlavor(
allocations, self.compute2_rp_uuid, self.flavor_with_group_policy)
allocations, self.compute1_rp_uuid, self.flavor_with_group_policy)
ovs_allocations = allocations[
self.ovs_bridge_rp_per_host[self.compute2_rp_uuid]]['resources']
self.ovs_bridge_rp_per_host[self.compute1_rp_uuid]]['resources']
self.assertPortMatchesAllocation(qos_normal_port, ovs_allocations)
# ... but the binding update failed so the allocation key in the
# binding:profile still points to the networking RP on the source host
# (host1). This is bug 1844993.
qos_binding_profile = updated_qos_port['binding:profile']
self.assertEqual(self.ovs_bridge_rp_per_host[self.compute1_rp_uuid],
qos_binding_profile['allocation'])

View File

@ -491,7 +491,8 @@ class MigrationTaskTestCase(test.NoDBTestCase):
mock_debug.assert_called_once_with(
'Scheduler returned host %(host)s as a possible migration target '
'but that host is not new enough to support the migration with '
'resource request %(request)s. Trying alternate hosts.',
'resource request %(request)s or the compute RPC is pinned to '
'less than 5.2. Trying alternate hosts.',
{'host': 'host1',
'request': self.request_spec.requested_resources},
instance=self.instance)
@ -582,8 +583,8 @@ class MigrationTaskTestCase(test.NoDBTestCase):
mock.call(
'Scheduler returned host %(host)s as a possible migration '
'target but that host is not new enough to support the '
'migration with resource request %(request)s. Trying '
'alternate hosts.',
'migration with resource request %(request)s or the compute '
'RPC is pinned to less than 5.2. Trying alternate hosts.',
{'host': 'host1',
'request': self.request_spec.requested_resources},
instance=self.instance),
@ -674,16 +675,17 @@ class MigrationTaskTestCase(test.NoDBTestCase):
mock.call(
'Scheduler returned host %(host)s as a possible migration '
'target but that host is not new enough to support the '
'migration with resource request %(request)s. Trying '
'alternate hosts.',
'migration with resource request %(request)s or the compute '
'RPC is pinned to less than 5.2. Trying alternate hosts.',
{'host': 'host1',
'request': self.request_spec.requested_resources},
instance=self.instance),
mock.call(
'Scheduler returned alternate host %(host)s as a possible '
'migration target but that host is not new enough to support '
'the migration with resource request %(request)s. Trying '
'another alternate.',
'the migration with resource request %(request)s or the '
'compute RPC is pinned to less than 5.2. Trying another '
'alternate.',
{'host': 'host2',
'request': self.request_spec.requested_resources},
instance=self.instance),