Check if destination can support the src mdev types

Now that the source knows that both the computes support the right
libvirt version, it passes to the destination the list of mdevs it has
for the instance. By this change, we'll verify if the types of those
mdevs are actually supported by the destination.
On the next change, we'll pass the destination mdevs back to the
source.

Partially-Implements: blueprint libvirt-mdev-live-migrate
Change-Id: Icb52fa5eb0adc0aa6106a90d87149456b39e79c2
This commit is contained in:
Sylvain Bauza 2023-12-20 15:56:59 +01:00
parent baa78326dd
commit 489aab934c
6 changed files with 127 additions and 1 deletions

View File

@ -8567,6 +8567,10 @@ class ComputeManager(manager.Manager):
# Update migrate VIFs with the newly claimed PCI devices
self._update_migrate_vifs_profile_with_pci(
migrate_data.vifs, port_id_to_pci)
# This should always be the last check as we persist some internal
# dictionary value in the libvirt driver.
migrate_data = self.driver.check_source_migrate_data_at_dest(
ctxt, instance, migrate_data, migration, limits, allocs)
finally:
self.driver.cleanup_live_migration_destination_check(ctxt,
dest_check_data)

View File

@ -540,6 +540,34 @@ class VGPULiveMigrationTests(base.LibvirtMigrationMixin, VGPUTestBase):
'' % server['id'],
log_out)
def test_live_migration_fails_due_to_non_supported_mdev_types(self):
self.flags(
enabled_mdev_types=[fakelibvirt.NVIDIA_11_VGPU_TYPE],
group='devices')
self.src = self.restart_compute_service(self.src.host)
self.flags(
enabled_mdev_types=[fakelibvirt.NVIDIA_12_VGPU_TYPE],
group='devices')
self.dest = self.restart_compute_service(self.dest.host)
# Force a periodic run in order to make sure all service resources
# are changed before we call create_service()
self._run_periodics()
self.server = self._create_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=self.flavor, networks='auto', host=self.src.host)
# now live migrate that server
ex = self.assertRaises(
client.OpenStackApiException,
self._live_migrate,
self.server, 'completed')
self.assertEqual(500, ex.response.status_code)
self.assertIn('NoValidHost', str(ex))
log_out = self.stdlog.logger.output
# The log is fully JSON-serialized, so just check the phrase.
self.assertIn('Unable to migrate %s: ' % self.server['id'], log_out)
self.assertIn('Source mdev types ', log_out)
self.assertIn('are not supported by this compute : ', log_out)
def test_live_migrate_server(self):
self.server = self._create_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',

View File

@ -3576,10 +3576,13 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
'_update_migrate_vifs_profile_with_pci'),
mock.patch.object(self.compute, '_dest_can_numa_live_migrate'),
mock.patch('nova.compute.manager.LOG'),
mock.patch.object(self.compute.driver,
'check_source_migrate_data_at_dest'),
) as (mock_lm_claim, mock_get, mock_check_dest, mock_check_src,
mock_check_clean, mock_fault_create, mock_event,
mock_create_mig_vif, mock_nw_info, mock_claim_pci,
mock_update_mig_vif, mock_dest_can_numa, mock_log):
mock_update_mig_vif, mock_dest_can_numa, mock_log,
mock_post_check):
mock_get.side_effect = (src_info, dest_info)
mock_check_dest.return_value = dest_check_data
mock_dest_can_numa.return_value = dest_check_data
@ -3587,6 +3590,10 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
dst_numa_info=objects.LibvirtLiveMigrateNUMAInfo())
mock_lm_claim.return_value = post_claim_md
# In the case of a NUMA related claim, migrate_data is updated
post_claim_mig_data = post_claim_md if src_numa_lm else mig_data
mock_post_check.return_value = post_claim_mig_data
if do_raise:
mock_check_src.side_effect = test.TestingException
mock_fault_create.return_value = \
@ -3631,6 +3638,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_event.assert_called_once_with(
self.context, 'compute_check_can_live_migrate_destination',
CONF.host, instance.uuid, graceful_exit=False)
mock_post_check.assert_called_once_with(
self.context, instance, post_claim_mig_data, migration, limits,
None)
return result
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(

View File

@ -11699,6 +11699,43 @@ class LibvirtConnTestCase(test.NoDBTestCase,
for vif in result.vifs:
self.assertTrue(vif.supports_os_vif_delegation)
def test_check_source_migrate_data_at_dest_no_src_mdevs(self):
"""Noop test in case no mdevs from the source."""
instance_ref = objects.Instance(**self.test_instance)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
dest_check_data = objects.LibvirtLiveMigrateData(
filename="file",
block_migration=True,
disk_over_commit=False,
disk_available_mb=1024)
allocations = {uuids.rp1: {'resources': {orc.VGPU: 1}}}
result = drvr.check_source_migrate_data_at_dest(
self.context, instance_ref, dest_check_data, None, None,
allocations)
self.assertEqual(dest_check_data, result)
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_supported_vgpu_types')
def test_check_source_migrate_data_at_dest_types_not_supported(self,
mock_get):
"""Raises an exception if the destination doesn't support the source
mdev types.
"""
mock_get.return_value = ['type1']
instance_ref = objects.Instance(**self.test_instance)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
dest_check_data = objects.LibvirtLiveMigrateData(
filename="file",
block_migration=True,
disk_over_commit=False,
disk_available_mb=1024,
source_mdev_types={uuids.mdev1: 'wrongtype'})
allocations = {uuids.rp1: {'resources': {orc.VGPU: 1}}}
self.assertRaises(exception.MigrationPreCheckError,
drvr.check_source_migrate_data_at_dest,
self.context, instance_ref, dest_check_data,
None, None, allocations)
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
@mock.patch.object(fakelibvirt.Connection, 'getVersion')
def _test_host_can_support_mdev_lm(self, mock_getversion,

View File

@ -1322,6 +1322,22 @@ class ComputeDriver(object):
"""
raise NotImplementedError()
def check_source_migrate_data_at_dest(self, ctxt, instance, migrate_data,
migration, limits, allocs):
"""Runs the last checks on the destination after the source returned
the migrate_data.
:param ctxt: security context
:param instance: nova.db.main.models.Instance
:param migrate_data: result of check_can_live_migrate_source
:param migration: The Migration object for this live migration
:param limits: The SchedulerLimits object for this live migration
:param allocs: Allocations for this instance
:returns: a LibvirtLiveMigrateData object
:raises: MigrationPreCheckError
"""
return migrate_data
def post_claim_migrate_data(self, context, instance, migrate_data, claim):
"""Returns migrate_data augmented with any information obtained from
the claim. Intended to run on the destination of a live-migration

View File

@ -9771,6 +9771,37 @@ class LibvirtDriver(driver.ComputeDriver):
return data
def check_source_migrate_data_at_dest(self, ctxt, instance, migrate_data,
migration, limits, allocs):
"""Runs the last checks on the destination after the source returned
the migrate_data.
:param ctxt: security context
:param instance: nova.db.main.models.Instance
:param migrate_data: result of check_can_live_migrate_source
:param migration: The Migration object for this live migration
:param limits: The SchedulerLimits object for this live migration
:param allocs: Allocations for this instance
:returns: a LibvirtLiveMigrateData object
:raises: MigrationPreCheckError
"""
if ('source_mdev_types' in migrate_data and
migrate_data.source_mdev_types):
# The instance that needs to be live-migrated has some mdevs
src_mdev_types = migrate_data.source_mdev_types
# As a reminder, src_mdev_types is a dict of mdev UUID and its type
# Are all the types supported by this compute ?
if not all(map(lambda m_type: m_type in self.supported_vgpu_types,
src_mdev_types.values())):
reason = (_('Unable to migrate %(instance_uuid)s: '
'Source mdev types %(src_types)s are not '
'supported by this compute : %(dest_types)s ' %
{'instance_uuid': instance.uuid,
'src_types': list(src_mdev_types.values()),
'dest_types': self.supported_vgpu_types}))
raise exception.MigrationPreCheckError(reason)
return migrate_data
def post_claim_migrate_data(self, context, instance, migrate_data, claim):
migrate_data.dst_numa_info = self._get_live_migrate_numa_info(
claim.claimed_numa_topology, claim.flavor, claim.image_meta)