libvirt: Fix the rescue race for vGPU instances

When rescuing an instance having a vGPU, we were not using the vGPU.
There would then be a race condition during the rescue where the vGPU
could be passed to another instance.
Instead, we should just make sure the vGPU would also be in the rescued
instance.

Change-Id: I7150e15694bb149ae67da37b5e43b6ea7507fe82
Closes-bug: #1762688
This commit is contained in:
Sylvain Bauza 2018-06-22 14:53:57 +02:00 committed by Matt Riedemann
parent a0bdebac04
commit 1c59397e09
3 changed files with 21 additions and 2 deletions

View File

@ -170,7 +170,8 @@ Caveats
.. note::
All the caveats are related to the Queens release
This information is correct as of the 17.0.0 Queens release. Where
improvements have been made or issues fixed, they are noted per item.
For libvirt:
@ -197,6 +198,8 @@ For libvirt:
instance immediately after rescue. However, rebuilding the rescued instance
only helps if there are other free vGPUs on the host.
.. note:: This has been resolved in the Rocky release [#]_.
For XenServer:
* Suspend and live migration with vGPUs attached depends on support from the
@ -217,6 +220,7 @@ For XenServer:
resize. If you want to migrate an instance, make sure to rebuild it after the
migration.
.. [#] https://bugs.launchpad.net/nova/+bug/1762688
.. Links
.. _Intel GVT-g: https://01.org/igvt-g

View File

@ -18996,6 +18996,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.drvr.detach_interface(self.context, instance, network_info[0])
self.mox.VerifyAll()
@mock.patch('nova.virt.libvirt.LibvirtDriver.'
'_get_all_assigned_mediated_devices')
@mock.patch('nova.virt.libvirt.utils.write_to_file')
# NOTE(mdbooth): The following 4 mocks are required to execute
# get_guest_xml().
@ -19007,11 +19009,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
mock_instance_metadata, mock_supports_direct_io,
mock_build_device_metadata, mock_set_host_enabled,
mock_write_to_file,
mock_get_mdev,
exists=None):
self.flags(instances_path=self.useFixture(fixtures.TempDir()).path)
mock_build_device_metadata.return_value = None
mock_supports_direct_io.return_value = True
mock_get_mdev.return_value = {uuids.mdev1: uuids.inst1}
backend = self.useFixture(
fake_imagebackend.ImageBackendFixture(exists=exists))
@ -19075,6 +19080,11 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.assertEqual(expected_kernel_ramdisk_paths,
kernel_ramdisk_paths)
# The generated domain XML should also contain any existing mdev
self.assertEqual(
[uuids.mdev1],
doc.xpath("devices/*[@type='mdev']/source/address/@uuid"))
@mock.patch('nova.virt.configdrive.ConfigDriveBuilder._make_iso9660')
def test_rescue_config_drive(self, mock_mkisofs):
instance = self._create_instance({'config_drive': str(True)})

View File

@ -2959,11 +2959,16 @@ class LibvirtDriver(driver.ComputeDriver):
gen_confdrive = functools.partial(self._create_configdrive,
context, instance, injection_info,
rescue=True)
# NOTE(sbauza): Since rescue recreates the guest XML, we need to
# remember the existing mdevs for reusing them.
mdevs = self._get_all_assigned_mediated_devices(instance)
mdevs = list(mdevs.keys())
self._create_image(context, instance, disk_info['mapping'],
injection_info=injection_info, suffix='.rescue',
disk_images=rescue_images)
xml = self._get_guest_xml(context, instance, network_info, disk_info,
image_meta, rescue=rescue_images)
image_meta, rescue=rescue_images,
mdevs=mdevs)
self._destroy(instance)
self._create_domain(xml, post_xml_callback=gen_confdrive)