Merge "libvirt: pass the mdevs when rebooting the guest"

This commit is contained in:
Zuul 2018-01-27 15:29:32 +00:00 committed by Gerrit Code Review
commit ec8b9a59c0
3 changed files with 74 additions and 9 deletions

View File

@ -12371,7 +12371,9 @@ class LibvirtConnTestCase(test.NoDBTestCase,
@mock.patch('nova.virt.libvirt.LibvirtDriver.'
'_get_instance_disk_info_from_config')
@mock.patch('nova.virt.libvirt.LibvirtDriver.destroy')
def test_hard_reboot(self, mock_destroy, mock_get_disk_info,
@mock.patch('nova.virt.libvirt.LibvirtDriver.'
'_get_all_assigned_mediated_devices')
def test_hard_reboot(self, mock_get_mdev, mock_destroy, mock_get_disk_info,
mock_get_guest_xml, mock_create_domain_and_network,
mock_get_info):
self.context.auth_token = True # any non-None value will suffice
@ -12389,6 +12391,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
"<target dev='vdb' bus='virtio'/></disk>"
"</devices></domain>")
mock_get_mdev.return_value = {uuids.mdev1: uuids.inst1}
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
return_values = [hardware.InstanceInfo(state=power_state.SHUTDOWN),
@ -12421,10 +12424,14 @@ class LibvirtConnTestCase(test.NoDBTestCase,
for name in ('disk', 'disk.local'):
self.assertTrue(disks[name].cache.called)
mock_get_mdev.assert_called_once_with(instance)
mock_destroy.assert_called_once_with(self.context, instance,
network_info, destroy_disks=False,
block_device_info=block_device_info)
mock_get_guest_xml.assert_called_once_with(self.context, instance,
network_info, mock.ANY, mock.ANY,
block_device_info=block_device_info, mdevs=[uuids.mdev1])
mock_create_domain_and_network.assert_called_once_with(self.context,
dummyxml, instance, network_info,
block_device_info=block_device_info)
@ -12442,9 +12449,11 @@ class LibvirtConnTestCase(test.NoDBTestCase,
@mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_config')
@mock.patch('nova.virt.libvirt.blockinfo.get_disk_info')
@mock.patch('nova.virt.libvirt.LibvirtDriver._destroy')
@mock.patch('nova.virt.libvirt.LibvirtDriver.'
'_get_all_assigned_mediated_devices')
def test_hard_reboot_does_not_call_glance_show(self,
mock_destroy, mock_get_disk_info, mock_get_guest_config,
mock_get_instance_path, mock_write_to_file,
mock_get_mdev, mock_destroy, mock_get_disk_info,
mock_get_guest_config, mock_get_instance_path, mock_write_to_file,
mock_get_instance_disk_info, mock_create_images_and_backing,
mock_create_domand_and_network, mock_prepare_pci_devices_for_use,
mock_get_instance_pci_devs, mock_looping_call, mock_ensure_tree):
@ -12462,6 +12471,8 @@ class LibvirtConnTestCase(test.NoDBTestCase,
instance = objects.Instance(**self.test_instance)
mock_get_mdev.return_value = {}
network_info = mock.MagicMock()
block_device_info = mock.MagicMock()
mock_get_disk_info.return_value = {}
@ -18634,6 +18645,28 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.assertEqual({uuids.mdev: guest2.uuid},
drvr._get_all_assigned_mediated_devices())
@mock.patch.object(host.Host, 'get_guest')
def test_get_all_assigned_mediated_devices_for_an_instance(self,
get_guest):
dom_with_vgpu = """
<domain type="kvm">
<devices>
<hostdev mode='subsystem' type='mdev' model='vfio-pci'>
<source>
<address uuid='%s'/>
</source>
</hostdev>
</devices>
</domain>
""" % uuids.mdev
guest = libvirt_guest.Guest(FakeVirtDomain(fake_xml=dom_with_vgpu))
get_guest.return_value = guest
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
fake_inst = objects.Instance()
self.assertEqual({uuids.mdev: guest.uuid},
drvr._get_all_assigned_mediated_devices(fake_inst))
get_guest.assert_called_once_with(fake_inst)
def test_allocate_mdevs_with_no_vgpu_allocations(self):
allocations = {
'rp1': {

View File

@ -2589,6 +2589,10 @@ class LibvirtDriver(driver.ComputeDriver):
re-creates the domain to ensure the reboot happens, as the guest
OS cannot ignore this action.
"""
# NOTE(sbauza): Since we undefine the guest XML when destroying, we
# need to remember the existing mdevs for reusing them.
mdevs = self._get_all_assigned_mediated_devices(instance)
mdevs = list(mdevs.keys())
# NOTE(mdbooth): In addition to performing a hard reboot of the domain,
# the hard reboot operation is relied upon by operators to be an
# automated attempt to fix as many things as possible about a
@ -2617,7 +2621,8 @@ class LibvirtDriver(driver.ComputeDriver):
# are in place.
xml = self._get_guest_xml(context, instance, network_info, disk_info,
instance.image_meta,
block_device_info=block_device_info)
block_device_info=block_device_info,
mdevs=mdevs)
# NOTE(mdbooth): context.auth_token will not be set when we call
# _hard_reboot from resume_state_on_host_boot()
@ -5767,15 +5772,22 @@ class LibvirtDriver(driver.ComputeDriver):
mediated_devices.append(device)
return mediated_devices
def _get_all_assigned_mediated_devices(self):
def _get_all_assigned_mediated_devices(self, instance=None):
"""Lookup all instances from the host and return all the mediated
devices that are assigned to a guest.
:param instance: Only return mediated devices for that instance.
:returns: A dictionary of keys being mediated device UUIDs and their
respective values the instance UUID of the guest using it.
"""
allocated_mdevs = {}
for guest in self._host.list_guests(only_running=False):
if instance:
guest = self._host.get_guest(instance)
guests = [guest]
else:
guests = self._host.list_guests(only_running=False)
for guest in guests:
cfg = guest.get_config()
for device in cfg.devices:
if isinstance(device, vconfig.LibvirtConfigGuestHostdevMDEV):

View File

@ -30,9 +30,28 @@ features:
different types but there is no possibility yet to specify in the flavor
which specific type we want to use for that instance.
* Suspending a guest having vGPUs doesn't work yet given a libvirt concern
(it can't hot-unplug mediated devices from a guest). Workarounds using
other instance actions (like snapshotting the instance or shelving it)
are recommended until libvirt supports that.
* For the moment, please don't restart instances (or suspend/resume them)
or the VGPU related device will be removed from the guest.
* Resizing an instance with a new flavor that has vGPU resources doesn't
allocate those vGPUs to the instance (the instance is created without
vGPU resources). We propose to work around this problem by rebuilding the
instance once it has been resized so then it will have allocated vGPUs.
* Migrating an instance to another host will have the same problem as
resize. In case you want to migrate an instance, make sure to rebuild
it.
* Rescuing an instance having vGPUs will mean that the rescue image won't
use the existing vGPUs. When unrescuing, it will use again the existing
vGPUs that were allocated to the instance. That said, given Nova looks
at all the allocated vGPUs when trying to find unallocated ones, there
could be a race condition if an instance is rescued at the moment a new
instance asking for vGPUs is created, because both instances could use
the same vGPUs. If you want to rescue an instance, make sure to disable
the host until we fix that in Nova.
* Mediated devices that are created by the libvirt driver are not persisted
upon reboot. Consequently, a guest startup would fail since the virtual
@ -45,7 +64,8 @@ features:
the nvidia driver that prevents one guest to have more than one virtual
GPU from the same physical card. One guest can have two or more virtual
GPUs but then it requires each vGPU to be hosted by a separate physical
card.
card. Until that limitation is removed, please avoid creating flavors
asking for more than 1 vGPU.
We are working actively to remove or workaround those caveats, but please
understand that for the moment this feature is experimental given all the