Merge "Reserve mdevs to return to the source"
This commit is contained in:
commit
9a9ab2128b
@ -9161,7 +9161,7 @@ class ComputeManager(manager.Manager):
|
||||
rollback process
|
||||
|
||||
There may be other resources which need cleanup; currently this is
|
||||
limited to vPMEM devices with the libvirt driver.
|
||||
limited to vPMEM and mdev devices with the libvirt driver.
|
||||
|
||||
:param migrate_data: implementation specific data
|
||||
:param migr_ctxt: specific resources stored in migration_context
|
||||
@ -9182,12 +9182,14 @@ class ComputeManager(manager.Manager):
|
||||
objects.LibvirtVPMEMDevice)):
|
||||
has_vpmem = True
|
||||
break
|
||||
has_mdevs = 'target_mdevs' in migrate_data
|
||||
# No instance booting at source host, but instance dir
|
||||
# must be deleted for preparing next block migration
|
||||
# must be deleted for preparing next live migration w/o shared
|
||||
# storage
|
||||
# vpmem must be cleaned
|
||||
do_cleanup = not migrate_data.is_shared_instance_path or has_vpmem
|
||||
do_cleanup = (not migrate_data.is_shared_instance_path or
|
||||
has_vpmem or has_mdevs)
|
||||
destroy_disks = not migrate_data.is_shared_block_storage
|
||||
elif isinstance(migrate_data, migrate_data_obj.HyperVLiveMigrateData):
|
||||
# NOTE(claudiub): We need to cleanup any zombie Planned VM.
|
||||
|
@ -582,6 +582,50 @@ class VGPULiveMigrationTests(base.LibvirtMigrationMixin, VGPUTestBase):
|
||||
self.assertRaises(KeyError, self.assert_mdev_usage, self.dest, 0)
|
||||
|
||||
|
||||
class VGPULiveMigrationTestsLMFailed(VGPULiveMigrationTests):
|
||||
"""Tests that expect the live migration to fail, and exist to test the
|
||||
rollback code. Stubs out fakelibvirt's migrateToURI3() with a stub that
|
||||
"fails" the migration.
|
||||
"""
|
||||
|
||||
def _migrate_stub(self, domain, destination, params, flags):
|
||||
"""Designed to stub fakelibvirt's migrateToURI3 and "fail" the
|
||||
live migration by monkeypatching jobStats() to return an error.
|
||||
"""
|
||||
|
||||
# During the migration, we reserved a mdev in the dest
|
||||
self.assert_mdev_usage(self.src, 1)
|
||||
self.assert_mdev_usage(self.dest, 1)
|
||||
|
||||
# The resource update periodic task should not change the consumed
|
||||
# mdevs, as the migration is still happening. As usual, running
|
||||
# periodics is not necessary to make the test pass, but it's good to
|
||||
# make sure it does the right thing.
|
||||
self._run_periodics()
|
||||
self.assert_mdev_usage(self.src, 1)
|
||||
self.assert_mdev_usage(self.dest, 1)
|
||||
|
||||
source = self.computes['src']
|
||||
conn = source.driver._host.get_connection()
|
||||
dom = conn.lookupByUUIDString(self.server['id'])
|
||||
dom.fail_job()
|
||||
self.migrate_stub_ran = True
|
||||
|
||||
def test_live_migrate_server(self):
|
||||
self.server = self._create_server(
|
||||
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||
flavor_id=self.flavor, networks='auto', host=self.src.host)
|
||||
inst = objects.Instance.get_by_uuid(self.context, self.server['id'])
|
||||
mdevs = self.src.driver._get_all_assigned_mediated_devices(inst)
|
||||
self.assertEqual(1, len(mdevs))
|
||||
|
||||
self._live_migrate(self.server, 'failed')
|
||||
|
||||
# We released the reserved mdev after the migration failed.
|
||||
self.assert_mdev_usage(self.src, 1)
|
||||
self.assert_mdev_usage(self.dest, 0)
|
||||
|
||||
|
||||
class DifferentMdevClassesTests(VGPUTestBase):
|
||||
|
||||
def setUp(self):
|
||||
|
@ -11736,6 +11736,73 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
self.context, instance_ref, dest_check_data,
|
||||
None, None, allocations)
|
||||
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_mdev_types_from_uuids')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_allocate_mdevs')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_supported_vgpu_types')
|
||||
def test_check_source_migrate_data_at_dest_types_missing_mdevs(self,
|
||||
mock_get,
|
||||
mock_alloc,
|
||||
mock_gmtfu):
|
||||
"""Raises an exception if the destination doesn't allocate all the
|
||||
necessary mediated devices.
|
||||
"""
|
||||
mock_get.return_value = ['type1']
|
||||
mock_alloc.return_value = [uuids.dst_mdev1]
|
||||
mock_gmtfu.return_value = {uuids.dst_mdev1: 'type1'}
|
||||
instance_ref = objects.Instance(**self.test_instance)
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertEqual({}, drvr.instance_claimed_mdevs)
|
||||
dest_check_data = objects.LibvirtLiveMigrateData(
|
||||
filename="file",
|
||||
block_migration=True,
|
||||
disk_over_commit=False,
|
||||
disk_available_mb=1024,
|
||||
src_supports_mdev_live_migration=True,
|
||||
source_mdev_types={uuids.src_mdev1: 'type1',
|
||||
uuids.src_mdev2: 'type1'})
|
||||
allocations = {uuids.rp1: {'resources': {orc.VGPU: 2}}}
|
||||
self.assertRaises(exception.MigrationPreCheckError,
|
||||
drvr.check_source_migrate_data_at_dest,
|
||||
self.context, instance_ref, dest_check_data,
|
||||
None, None, allocations)
|
||||
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_mdev_types_from_uuids')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_allocate_mdevs')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||
'_get_supported_vgpu_types')
|
||||
def test_check_source_migrate_data_at_dest_types_claim_mdevs(self,
|
||||
mock_get,
|
||||
mock_alloc,
|
||||
mock_gmtfu):
|
||||
mock_get.return_value = ['type1']
|
||||
mock_alloc.return_value = [uuids.dst_mdev1]
|
||||
mock_gmtfu.return_value = {uuids.dst_mdev1: 'type1'}
|
||||
instance_ref = objects.Instance(**self.test_instance)
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertEqual({}, drvr.instance_claimed_mdevs)
|
||||
dest_check_data = objects.LibvirtLiveMigrateData(
|
||||
filename="file",
|
||||
block_migration=True,
|
||||
disk_over_commit=False,
|
||||
disk_available_mb=1024,
|
||||
src_supports_mdev_live_migration=True,
|
||||
source_mdev_types={uuids.src_mdev1: 'type1'})
|
||||
allocations = {uuids.rp1: {'resources': {orc.VGPU: 1}}}
|
||||
drvr.check_source_migrate_data_at_dest(
|
||||
self.context, instance_ref, dest_check_data, None, None,
|
||||
allocations)
|
||||
mock_alloc.assert_called_once_with(allocations)
|
||||
mock_gmtfu.assert_called_once_with([uuids.dst_mdev1])
|
||||
self.assertEqual({uuids.src_mdev1: uuids.dst_mdev1},
|
||||
dest_check_data.target_mdevs)
|
||||
self.assertEqual({instance_ref.uuid: [uuids.dst_mdev1]},
|
||||
drvr.instance_claimed_mdevs)
|
||||
|
||||
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
|
||||
@mock.patch.object(fakelibvirt.Connection, 'getVersion')
|
||||
def _test_host_can_support_mdev_lm(self, mock_getversion,
|
||||
@ -13426,12 +13493,13 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
mock_get_instance_path.return_value = fake_instance_path
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
|
||||
instance = objects.Instance(id=1, uuid=uuids.instance)
|
||||
migrate_data = objects.LibvirtLiveMigrateData(
|
||||
is_shared_instance_path=False,
|
||||
instance_relative_path=False)
|
||||
self.assertRaises(exception.Invalid,
|
||||
drvr.rollback_live_migration_at_destination,
|
||||
"context", "instance", [], None, True, migrate_data)
|
||||
"context", instance, [], None, True, migrate_data)
|
||||
mock_exist.assert_called_once_with(fake_instance_path)
|
||||
mock_shutil.assert_called_once_with(fake_instance_path)
|
||||
|
||||
@ -13453,17 +13521,24 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
|
||||
mock_destroy.side_effect = fake_destroy
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertEqual({}, drvr.instance_claimed_mdevs)
|
||||
|
||||
instance = objects.Instance(id=1, uuid=uuids.instance)
|
||||
migrate_data = objects.LibvirtLiveMigrateData(
|
||||
is_shared_instance_path=True,
|
||||
instance_relative_path=False)
|
||||
drvr.rollback_live_migration_at_destination("context", "instance", [],
|
||||
drvr.instance_claimed_mdevs = {instance.uuid: [uuids.mdev1],
|
||||
uuids.other_inst: [uuids.mdev2]}
|
||||
drvr.rollback_live_migration_at_destination("context", instance, [],
|
||||
None, True, migrate_data)
|
||||
mock_destroy.assert_called_once_with("context", "instance", [],
|
||||
mock_destroy.assert_called_once_with("context", instance, [],
|
||||
None, True)
|
||||
self.assertFalse(mock_get_instance_path.called)
|
||||
self.assertFalse(mock_exist.called)
|
||||
self.assertFalse(mock_shutil.called)
|
||||
# Assert we delete existing claimed mdevs for the instance
|
||||
self.assertEqual({uuids.other_inst: [uuids.mdev2]},
|
||||
drvr.instance_claimed_mdevs)
|
||||
|
||||
@mock.patch.object(fakelibvirt.Domain, "XMLDesc")
|
||||
def test_live_migration_copy_disk_paths_tunnelled(self, mock_xml):
|
||||
@ -20257,15 +20332,20 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
mock_get_guest.return_value = guest
|
||||
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertEqual({}, drvr.instance_claimed_mdevs)
|
||||
net_info = network_model.NetworkInfo()
|
||||
mock_get_interfaces.return_type = []
|
||||
mock_image_meta.return_value = mock.sentinel.image_meta
|
||||
drvr.instance_claimed_mdevs = {instance.uuid: [uuids.mdev1],
|
||||
uuids.other_inst: [uuids.mdev2]}
|
||||
drvr.post_live_migration_at_destination(mock.ANY, instance, net_info)
|
||||
# Assert that we don't try to write anything to the destination node
|
||||
# since the source live migrated with the VIR_MIGRATE_PERSIST_DEST flag
|
||||
mock_write_instance_config.assert_not_called()
|
||||
mock_attach.assert_not_called()
|
||||
|
||||
# Assert we delete existing claimed mdevs for the instance
|
||||
self.assertEqual({uuids.other_inst: [uuids.mdev2]},
|
||||
drvr.instance_claimed_mdevs)
|
||||
vif = network_model.VIF(id=uuids.port_id,
|
||||
vnic_type=network_model.VNIC_TYPE_NORMAL)
|
||||
vif_direct = network_model.VIF(id=uuids.port_id,
|
||||
@ -27058,6 +27138,40 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
||||
self.assertEqual({},
|
||||
drvr._get_all_assigned_mediated_devices(fake_inst))
|
||||
|
||||
@mock.patch.object(host.Host, 'list_guests')
|
||||
def test_get_all_assigned_mediated_devices_with_claimed_mdevs(self,
|
||||
list_guests):
|
||||
dom_with_vgpu = """
|
||||
<domain type="kvm">
|
||||
<devices>
|
||||
<hostdev mode='subsystem' type='mdev' model='vfio-pci'>
|
||||
<source>
|
||||
<address uuid='%s'/>
|
||||
</source>
|
||||
</hostdev>
|
||||
</devices>
|
||||
</domain>
|
||||
""" % uuids.mdev
|
||||
guest1 = libvirt_guest.Guest(FakeVirtDomain())
|
||||
guest2 = libvirt_guest.Guest(FakeVirtDomain(fake_xml=dom_with_vgpu))
|
||||
list_guests.return_value = [guest1, guest2]
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertEqual({}, drvr.instance_claimed_mdevs)
|
||||
# Just add some claimed mdevs by the live migration
|
||||
drvr.instance_claimed_mdevs = {uuids.inst1: [uuids.mdev1, uuids.mdev2],
|
||||
uuids.inst2: [uuids.mdev3]}
|
||||
self.assertEqual({uuids.mdev: guest2.uuid,
|
||||
uuids.mdev1: uuids.inst1,
|
||||
uuids.mdev2: uuids.inst1,
|
||||
uuids.mdev3: uuids.inst2},
|
||||
drvr._get_all_assigned_mediated_devices())
|
||||
|
||||
# Just double-check we only return claimed mdevs from inst2 if we ask
|
||||
# for it
|
||||
fake_inst2 = objects.Instance(uuid=uuids.inst2)
|
||||
self.assertEqual({uuids.mdev3: uuids.inst2},
|
||||
drvr._get_all_assigned_mediated_devices(fake_inst2))
|
||||
|
||||
def test_allocate_mdevs_with_no_vgpu_allocations(self):
|
||||
allocations = {
|
||||
'rp1': {
|
||||
@ -30012,6 +30126,17 @@ class LibvirtPMEMNamespaceTests(test.NoDBTestCase):
|
||||
</domain>'''
|
||||
self.assertXmlEqual(expected, guest.to_xml())
|
||||
|
||||
def test_cleanup_lingering_instance_resources_deletes_mdevs(self):
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
self.assertEqual({}, drvr.instance_claimed_mdevs)
|
||||
# Adding some claimed mdevs
|
||||
instance = objects.Instance(id=1, uuid=uuids.instance, resources=None)
|
||||
drvr.instance_claimed_mdevs = {instance.uuid: [uuids.mdev1],
|
||||
uuids.other_inst: [uuids.mdev2]}
|
||||
drvr.cleanup_lingering_instance_resources(instance)
|
||||
self.assertEqual({uuids.other_inst: [uuids.mdev2]},
|
||||
drvr.instance_claimed_mdevs)
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class LibvirtDeviceRemoveEventTestCase(test.NoDBTestCase):
|
||||
|
@ -540,6 +540,11 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
self.mdev_classes = set([])
|
||||
self.supported_vgpu_types = self._get_supported_vgpu_types()
|
||||
|
||||
# This dict is for knowing which mdevs are already claimed by some
|
||||
# instance. This is keyed by instance UUID and the value is a list
|
||||
# of mediated device UUIDs.
|
||||
self.instance_claimed_mdevs = {}
|
||||
|
||||
# Handles ongoing device manipultion in libvirt where we wait for the
|
||||
# events about success or failure.
|
||||
self._device_event_handler = AsyncDeviceEventsHandler()
|
||||
@ -1732,6 +1737,13 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
vpmems = self._get_vpmems(instance)
|
||||
if vpmems:
|
||||
self._cleanup_vpmems(vpmems)
|
||||
# we may have some claimed mdev residue, we need to delete it
|
||||
mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)
|
||||
if mdevs:
|
||||
# The live migration was aborted, we need to remove the reserved
|
||||
# values.
|
||||
LOG.debug("Unclaiming mdevs %s from instance %s",
|
||||
mdevs, instance.uuid)
|
||||
|
||||
def _cleanup_vpmems(self, vpmems):
|
||||
for vpmem in vpmems:
|
||||
@ -8423,6 +8435,12 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
found in the hypervisor.
|
||||
"""
|
||||
allocated_mdevs = {}
|
||||
# Add the reserved mediated devices for live-migration
|
||||
for instance_uuid, mdev_uuids in self.instance_claimed_mdevs.items():
|
||||
if instance and instance.uuid != instance_uuid:
|
||||
continue
|
||||
for mdev in mdev_uuids:
|
||||
allocated_mdevs[mdev] = instance_uuid
|
||||
if instance:
|
||||
# NOTE(sbauza): In some cases (like a migration issue), the
|
||||
# instance can exist in the Nova database but libvirt doesn't know
|
||||
@ -8435,7 +8453,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
except exception.InstanceNotFound:
|
||||
# Bail out early if libvirt doesn't know about it since we
|
||||
# can't know the existing mediated devices
|
||||
return {}
|
||||
# Some mdevs could be claimed for that instance
|
||||
return allocated_mdevs
|
||||
guests = [guest]
|
||||
else:
|
||||
guests = self._host.list_guests(only_running=False)
|
||||
@ -9798,6 +9817,33 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
'src_types': list(src_mdev_types.values()),
|
||||
'dest_types': self.supported_vgpu_types}))
|
||||
raise exception.MigrationPreCheckError(reason)
|
||||
dst_mdevs = self._allocate_mdevs(allocs)
|
||||
dst_mdev_types = self._get_mdev_types_from_uuids(dst_mdevs)
|
||||
target_mdevs: ty.Dict[str, str] = {}
|
||||
for src_mdev, src_type in src_mdev_types.items():
|
||||
for dst_mdev, dst_type in dst_mdev_types.items():
|
||||
# we want to associate by 1:1 between dst and src mdevs
|
||||
if (src_type == dst_type and
|
||||
src_type not in target_mdevs and
|
||||
dst_mdev not in target_mdevs.values()):
|
||||
target_mdevs[src_mdev] = dst_mdev
|
||||
continue
|
||||
if len(target_mdevs) != len(src_mdev_types):
|
||||
reason = (_('Unable to migrate %(instance_uuid)s: '
|
||||
'Source mdevs %(src_mdevs)s are not '
|
||||
'fully mapped for this compute : %(targets)s ' %
|
||||
{'instance_uuid': instance.uuid,
|
||||
'src_mdevs': list(src_mdev_types.keys()),
|
||||
'targets': target_mdevs}))
|
||||
raise exception.MigrationPreCheckError(reason)
|
||||
LOG.debug('Source mediated devices are now associated with those '
|
||||
'existing mediated devices '
|
||||
'(source uuid : dest uuid): %s', str(target_mdevs))
|
||||
migrate_data.target_mdevs = target_mdevs
|
||||
self.instance_claimed_mdevs[instance.uuid] = dst_mdevs
|
||||
LOG.info("Current mediated devices reserved by this host "
|
||||
"(instance UUID: list of reserved mdev UUIDs) : %s ",
|
||||
self.instance_claimed_mdevs)
|
||||
return migrate_data
|
||||
|
||||
def post_claim_migrate_data(self, context, instance, migrate_data, claim):
|
||||
@ -10916,6 +10962,12 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
instance, migrate_data)
|
||||
if os.path.exists(instance_dir):
|
||||
shutil.rmtree(instance_dir)
|
||||
mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)
|
||||
if mdevs:
|
||||
# The live migration is aborted, we need to remove the reserved
|
||||
# values.
|
||||
LOG.debug("Unclaiming mdevs %s from instance %s",
|
||||
mdevs, instance.uuid)
|
||||
|
||||
def _pre_live_migration_plug_vifs(self, instance, network_info,
|
||||
migrate_data):
|
||||
@ -11298,6 +11350,12 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
"""
|
||||
self._reattach_instance_vifs(context, instance, network_info)
|
||||
self._qemu_monitor_announce_self(instance)
|
||||
mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)
|
||||
if mdevs:
|
||||
# The live migration is done, the related mdevs are now associated
|
||||
# to the domain XML so we can remove the reserved values.
|
||||
LOG.debug("Unclaiming mdevs %s from instance %s",
|
||||
mdevs, instance.uuid)
|
||||
|
||||
def _get_instance_disk_info_from_config(self, guest_config,
|
||||
block_device_info):
|
||||
|
Loading…
Reference in New Issue
Block a user