libvirt: Fix the rescue race for vGPU instances

When rescuing an instance having a vGPU, we were not using the vGPU.
There would then be a race condition during the rescue where the vGPU
could be passed to another instance.
Instead, we should just make sure the vGPU would also be in the rescued
instance.

Change-Id: I7150e15694bb149ae67da37b5e43b6ea7507fe82
Closes-bug: #1762688
(cherry picked from commit 1c59397e09)
This commit is contained in:
Sylvain Bauza 2018-06-22 14:53:57 +02:00
parent 0c80d1c5e9
commit 3163c9391e
3 changed files with 21 additions and 2 deletions

View File

@ -172,7 +172,8 @@ Caveats
.. note:: .. note::
All the caveats are related to the Queens release This information is correct as of the 17.0.0 Queens release. Where
improvements have been made or issues fixed, they are noted per item.
For libvirt: For libvirt:
@ -199,6 +200,8 @@ For libvirt:
instance immediately after rescue. However, rebuilding the rescued instance instance immediately after rescue. However, rebuilding the rescued instance
only helps if there are other free vGPUs on the host. only helps if there are other free vGPUs on the host.
.. note:: This has been resolved in the Rocky release [#]_.
For XenServer: For XenServer:
* Suspend and live migration with vGPUs attached depends on support from the * Suspend and live migration with vGPUs attached depends on support from the
@ -219,6 +222,7 @@ For XenServer:
resize. If you want to migrate an instance, make sure to rebuild it after the resize. If you want to migrate an instance, make sure to rebuild it after the
migration. migration.
.. [#] https://bugs.launchpad.net/nova/+bug/1762688
.. Links .. Links
.. _Intel GVT-g: https://01.org/igvt-g .. _Intel GVT-g: https://01.org/igvt-g

View File

@ -18811,6 +18811,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.drvr.detach_interface(self.context, instance, network_info[0]) self.drvr.detach_interface(self.context, instance, network_info[0])
self.mox.VerifyAll() self.mox.VerifyAll()
@mock.patch('nova.virt.libvirt.LibvirtDriver.'
'_get_all_assigned_mediated_devices')
@mock.patch('nova.virt.libvirt.utils.write_to_file') @mock.patch('nova.virt.libvirt.utils.write_to_file')
# NOTE(mdbooth): The following 4 mocks are required to execute # NOTE(mdbooth): The following 4 mocks are required to execute
# get_guest_xml(). # get_guest_xml().
@ -18822,11 +18824,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
mock_instance_metadata, mock_supports_direct_io, mock_instance_metadata, mock_supports_direct_io,
mock_build_device_metadata, mock_set_host_enabled, mock_build_device_metadata, mock_set_host_enabled,
mock_write_to_file, mock_write_to_file,
mock_get_mdev,
exists=None): exists=None):
self.flags(instances_path=self.useFixture(fixtures.TempDir()).path) self.flags(instances_path=self.useFixture(fixtures.TempDir()).path)
mock_build_device_metadata.return_value = None mock_build_device_metadata.return_value = None
mock_supports_direct_io.return_value = True mock_supports_direct_io.return_value = True
mock_get_mdev.return_value = {uuids.mdev1: uuids.inst1}
backend = self.useFixture( backend = self.useFixture(
fake_imagebackend.ImageBackendFixture(exists=exists)) fake_imagebackend.ImageBackendFixture(exists=exists))
@ -18890,6 +18895,11 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.assertEqual(expected_kernel_ramdisk_paths, self.assertEqual(expected_kernel_ramdisk_paths,
kernel_ramdisk_paths) kernel_ramdisk_paths)
# The generated domain XML should also contain any existing mdev
self.assertEqual(
[uuids.mdev1],
doc.xpath("devices/*[@type='mdev']/source/address/@uuid"))
@mock.patch('nova.virt.configdrive.ConfigDriveBuilder._make_iso9660') @mock.patch('nova.virt.configdrive.ConfigDriveBuilder._make_iso9660')
def test_rescue_config_drive(self, mock_mkisofs): def test_rescue_config_drive(self, mock_mkisofs):
instance = self._create_instance({'config_drive': str(True)}) instance = self._create_instance({'config_drive': str(True)})

View File

@ -3013,11 +3013,16 @@ class LibvirtDriver(driver.ComputeDriver):
gen_confdrive = functools.partial(self._create_configdrive, gen_confdrive = functools.partial(self._create_configdrive,
context, instance, injection_info, context, instance, injection_info,
rescue=True) rescue=True)
# NOTE(sbauza): Since rescue recreates the guest XML, we need to
# remember the existing mdevs for reusing them.
mdevs = self._get_all_assigned_mediated_devices(instance)
mdevs = list(mdevs.keys())
self._create_image(context, instance, disk_info['mapping'], self._create_image(context, instance, disk_info['mapping'],
injection_info=injection_info, suffix='.rescue', injection_info=injection_info, suffix='.rescue',
disk_images=rescue_images) disk_images=rescue_images)
xml = self._get_guest_xml(context, instance, network_info, disk_info, xml = self._get_guest_xml(context, instance, network_info, disk_info,
image_meta, rescue=rescue_images) image_meta, rescue=rescue_images,
mdevs=mdevs)
self._destroy(instance) self._destroy(instance)
self._create_domain(xml, post_xml_callback=gen_confdrive) self._create_domain(xml, post_xml_callback=gen_confdrive)