Merge "libvirt: Provide VGPU inventory for a single GPU type"

This commit is contained in:
Zuul 2018-01-09 20:15:20 +00:00 committed by Gerrit Code Review
commit 99f5d9e611
6 changed files with 487 additions and 2 deletions

View File

@ -2793,6 +2793,24 @@ class LibvirtConfigNodeDeviceTest(LibvirtConfigBaseTest):
['rx', 'tx', 'sg', 'tso', 'gso', 'gro', 'rxvlan',
'txvlan'])
def test_config_mdev_device(self):
xmlin = """
<device>
<name>mdev_4b20d080_1b54_4048_85b3_a6a62d165c01</name>
<parent>pci_0000_06_00_0</parent>
<capability type='mdev'>
<type id='nvidia-11'/>
<iommuGroup number='12'/>
</capability>
</device>"""
obj = config.LibvirtConfigNodeDevice()
obj.parse_str(xmlin)
self.assertIsInstance(obj.mdev_information,
config.LibvirtConfigNodeDeviceMdevInformation)
self.assertEqual("nvidia-11", obj.mdev_information.type)
self.assertEqual(12, obj.mdev_information.iommu_group)
class LibvirtConfigNodeDevicePciCapTest(LibvirtConfigBaseTest):
@ -2871,6 +2889,45 @@ class LibvirtConfigNodeDevicePciCapTest(LibvirtConfigBaseTest):
self.assertEqual(obj.fun_capability[1].device_addrs,
[(0, 10, 1, 1), ])
def test_config_device_pci_mdev_capable(self):
xmlin = """
<capability type="pci">
<domain>0</domain>
<bus>10</bus>
<slot>1</slot>
<function>5</function>
<product id="0x0FFE">GRID M60-0B</product>
<vendor id="0x10DE">Nvidia</vendor>
<capability type='mdev_types'>
<type id='nvidia-11'>
<name>GRID M60-0B</name>
<deviceAPI>vfio-pci</deviceAPI>
<availableInstances>16</availableInstances>
</type>
</capability>
</capability>"""
obj = config.LibvirtConfigNodeDevicePciCap()
obj.parse_str(xmlin)
self.assertEqual(0, obj.domain)
self.assertEqual(10, obj.bus)
self.assertEqual(1, obj.slot)
self.assertEqual(5, obj.function)
self.assertEqual("GRID M60-0B", obj.product)
self.assertEqual(0x0FFE, obj.product_id)
self.assertEqual("Nvidia", obj.vendor)
self.assertEqual(0x10DE, obj.vendor_id)
self.assertIsNone(obj.numa_node)
self.assertIsInstance(
obj.mdev_capability[0],
config.LibvirtConfigNodeDeviceMdevCapableSubFunctionCap)
self.assertEqual([{
'availableInstances': 16,
'deviceAPI': 'vfio-pci',
'name': 'GRID M60-0B',
'type': 'nvidia-11'}], obj.mdev_capability[0].mdev_types)
class LibvirtConfigNodeDevicePciSubFunctionCap(LibvirtConfigBaseTest):

View File

@ -289,7 +289,46 @@ _fake_NodeDevXml = \
<feature name='txvlan'/>
<capability type='80203'/>
</capability>
</device>"""
</device>""",
"pci_0000_06_00_0": """
<device>
<name>pci_0000_06_00_0</name>
<path>/sys/devices/pci0000:00/0000:00:06.0</path>
<parent></parent>
<driver>
<name>nvidia</name>
</driver>
<capability type="pci">
<domain>0</domain>
<bus>10</bus>
<slot>1</slot>
<function>5</function>
<product id="0x0FFE">GRID M60-0B</product>
<vendor id="0x10DE">Nvidia</vendor>
<numa node="8"/>
<capability type='mdev_types'>
<type id='nvidia-11'>
<name>GRID M60-0B</name>
<deviceAPI>vfio-pci</deviceAPI>
<availableInstances>16</availableInstances>
</type>
</capability>
</capability>
</device>""",
"mdev_4b20d080_1b54_4048_85b3_a6a62d165c01": """
<device>
<name>mdev_4b20d080_1b54_4048_85b3_a6a62d165c01</name>
<path>/sys/devices/pci0000:00/0000:00:02.0/4b20d080-1b54-4048-85b3-a6a62d165c01</path>
<parent>pci_0000_00_02_0</parent>
<driver>
<name>vfio_mdev</name>
</driver>
<capability type='mdev'>
<type id='nvidia-11'/>
<iommuGroup number='12'/>
</capability>
</device>
""",
}
_fake_cpu_info = {
@ -15843,6 +15882,9 @@ class HostStateTestCase(test.NoDBTestCase):
def _get_vcpu_used(self):
return 0
def _get_vgpu_total(self):
return 0
def _get_cpu_info(self):
return HostStateTestCase.cpu_info
@ -15865,6 +15907,12 @@ class HostStateTestCase(test.NoDBTestCase):
def _get_pci_passthrough_devices(self):
return jsonutils.dumps(HostStateTestCase.pci_devices)
def _get_mdev_capable_devices(self, types=None):
return []
def _get_mediated_devices(self, types=None):
return []
def _get_host_numa_topology(self):
return HostStateTestCase.numa_topology
@ -15913,13 +15961,16 @@ class TestGetInventory(test.NoDBTestCase):
self.useFixture(fakelibvirt.FakeLibvirtFixture())
self.driver = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vgpu_total')
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info',
return_value={'total': 200})
@mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total',
return_value=1024)
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_total',
return_value=24)
def test_get_inventory(self, mock_vcpu, mock_mem, mock_disk):
def _test_get_inventory(self, mock_vcpu, mock_mem, mock_disk, mock_vgpus,
total_vgpus=0):
mock_vgpus.return_value = total_vgpus
expected_inv = {
fields.ResourceClass.VCPU: {
'total': 24,
@ -15940,9 +15991,24 @@ class TestGetInventory(test.NoDBTestCase):
'step_size': 1,
},
}
if total_vgpus > 0:
expected_inv.update({
fields.ResourceClass.VGPU: {
'total': total_vgpus,
'min_unit': 1,
'max_unit': total_vgpus,
'step_size': 1,
}
})
inv = self.driver.get_inventory(mock.sentinel.nodename)
self.assertEqual(expected_inv, inv)
def test_get_inventory(self):
self._test_get_inventory()
def test_get_inventory_with_vgpus(self):
self._test_get_inventory(total_vgpus=8)
class LibvirtDriverTestCase(test.NoDBTestCase):
"""Test for nova.virt.libvirt.libvirt_driver.LibvirtDriver."""
@ -18182,6 +18248,124 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
'nova.tests.unit.virt.libvirt.test_driver.FakeInvalidVolumeDriver'
)
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver'
'._get_mediated_devices')
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver'
'._get_mdev_capable_devices')
def test_get_vgpu_total(self, get_mdev_devs, get_mdevs):
get_mdev_devs.return_value = [
{'dev_id': 'pci_0000_84_00_0',
'types': {'nvidia-11': {'availableInstances': 14,
'name': 'GRID M60-0B',
'deviceAPI': 'vfio-pci'},
}}]
get_mdevs.return_value = [
{'dev_id': 'pci_0000_84_00_0',
'type': 'nvidia-11',
'iommuGroup': 1
},
{'dev_id': 'pci_0000_84_00_0',
'type': 'nvidia-11',
'iommuGroup': 1
},
]
# By default, no specific types are supported
self.assertEqual(0, self.drvr._get_vgpu_total())
# Now, ask for only one
self.flags(enabled_vgpu_types=['nvidia-11'], group='devices')
# We have 14 available for nvidia-11. We also have 2 mdevs of the type.
# So, as a total, we have 14+2, hence 16.
self.assertEqual(16, self.drvr._get_vgpu_total())
@mock.patch.object(host.Host, 'device_lookup_by_name')
@mock.patch.object(host.Host, 'list_mdev_capable_devices')
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion',
return_value=versionutils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_MDEV_SUPPORT))
def test_get_mdev_capable_devices(self, _get_libvirt_version,
list_mdev_capable_devs,
device_lookup_by_name):
list_mdev_capable_devs.return_value = ['pci_0000_06_00_0']
def fake_nodeDeviceLookupByName(name):
return FakeNodeDevice(_fake_NodeDevXml[name])
device_lookup_by_name.side_effect = fake_nodeDeviceLookupByName
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
expected = [{"dev_id": "pci_0000_06_00_0",
"types": {'nvidia-11': {'availableInstances': 16,
'name': 'GRID M60-0B',
'deviceAPI': 'vfio-pci'},
}
}]
self.assertEqual(expected, drvr._get_mdev_capable_devices())
@mock.patch.object(host.Host, 'device_lookup_by_name')
@mock.patch.object(host.Host, 'list_mdev_capable_devices')
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion',
return_value=versionutils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_MDEV_SUPPORT))
def test_get_mdev_capable_devices_filtering(self, _get_libvirt_version,
list_mdev_capable_devs,
device_lookup_by_name):
list_mdev_capable_devs.return_value = ['pci_0000_06_00_0']
def fake_nodeDeviceLookupByName(name):
return FakeNodeDevice(_fake_NodeDevXml[name])
device_lookup_by_name.side_effect = fake_nodeDeviceLookupByName
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
# Since we filter by a type not supported by the physical device,
# we don't get results.
self.assertEqual([],
drvr._get_mdev_capable_devices(types=['nvidia-12']))
@mock.patch.object(host.Host, 'device_lookup_by_name')
@mock.patch.object(host.Host, 'list_mediated_devices')
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion',
return_value=versionutils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_MDEV_SUPPORT))
def test_get_mediated_devices(self, _get_libvirt_version,
list_mediated_devices,
device_lookup_by_name):
list_mediated_devices.return_value = [
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c01']
def fake_nodeDeviceLookupByName(name):
return FakeNodeDevice(_fake_NodeDevXml[name])
device_lookup_by_name.side_effect = fake_nodeDeviceLookupByName
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
expected = [{"dev_id": "mdev_4b20d080_1b54_4048_85b3_a6a62d165c01",
"type": "nvidia-11",
"iommu_group": 12
}]
self.assertEqual(expected, drvr._get_mediated_devices())
@mock.patch.object(host.Host, 'device_lookup_by_name')
@mock.patch.object(host.Host, 'list_mediated_devices')
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion',
return_value=versionutils.convert_version_to_int(
libvirt_driver.MIN_LIBVIRT_MDEV_SUPPORT))
def test_get_mediated_devices_filtering(self, _get_libvirt_version,
list_mediated_devices,
device_lookup_by_name):
list_mediated_devices.return_value = [
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c01']
def fake_nodeDeviceLookupByName(name):
return FakeNodeDevice(_fake_NodeDevXml[name])
device_lookup_by_name.side_effect = fake_nodeDeviceLookupByName
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
# Since we filter by a type not supported by the physical device,
# we don't get results.
self.assertEqual([], drvr._get_mediated_devices(types=['nvidia-12']))
class LibvirtVolumeUsageTestCase(test.NoDBTestCase):
"""Test for LibvirtDriver.get_all_volume_usage."""

View File

@ -785,6 +785,37 @@ Active: 8381604 kB
self.host.list_pci_devices(8)
mock_listDevices.assert_called_once_with('pci', 8)
def test_list_mdev_capable_devices(self):
with mock.patch.object(self.host, "_list_devices") as mock_listDevices:
self.host.list_mdev_capable_devices(8)
mock_listDevices.assert_called_once_with('mdev_types', flags=8)
def test_list_mediated_devices(self):
with mock.patch.object(self.host, "_list_devices") as mock_listDevices:
self.host.list_mediated_devices(8)
mock_listDevices.assert_called_once_with('mdev', flags=8)
@mock.patch.object(fakelibvirt.virConnect, "listDevices")
def test_list_devices(self, mock_listDevices):
self.host._list_devices('mdev', 8)
mock_listDevices.assert_called_once_with('mdev', 8)
@mock.patch.object(fakelibvirt.virConnect, "listDevices")
def test_list_devices_unsupported(self, mock_listDevices):
not_supported_exc = fakelibvirt.make_libvirtError(
fakelibvirt.libvirtError,
'this function is not supported by the connection driver:'
' listDevices',
error_code=fakelibvirt.VIR_ERR_NO_SUPPORT)
mock_listDevices.side_effect = not_supported_exc
self.assertEqual([], self.host._list_devices('mdev', 8))
@mock.patch.object(fakelibvirt.virConnect, "listDevices")
def test_list_devices_other_exc(self, mock_listDevices):
mock_listDevices.side_effect = fakelibvirt.libvirtError('test')
self.assertRaises(fakelibvirt.libvirtError,
self.host._list_devices, 'mdev', 8)
@mock.patch.object(fakelibvirt.virConnect, "compareCPU")
def test_compare_cpu(self, mock_compareCPU):
self.host.compare_cpu("cpuxml")

View File

@ -2377,6 +2377,7 @@ class LibvirtConfigNodeDevice(LibvirtConfigObject):
self.parent = None
self.driver = None
self.pci_capability = None
self.mdev_information = None
def parse_dom(self, xmldoc):
super(LibvirtConfigNodeDevice, self).parse_dom(xmldoc)
@ -2390,6 +2391,10 @@ class LibvirtConfigNodeDevice(LibvirtConfigObject):
pcicap = LibvirtConfigNodeDevicePciCap()
pcicap.parse_dom(c)
self.pci_capability = pcicap
elif c.tag == "capability" and c.get("type") in ['mdev']:
mdev_info = LibvirtConfigNodeDeviceMdevInformation()
mdev_info.parse_dom(c)
self.mdev_information = mdev_info
class LibvirtConfigNodeDevicePciCap(LibvirtConfigObject):
@ -2408,6 +2413,7 @@ class LibvirtConfigNodeDevicePciCap(LibvirtConfigObject):
self.vendor_id = None
self.numa_node = None
self.fun_capability = []
self.mdev_capability = []
self.interface = None
self.address = None
self.link_state = None
@ -2446,6 +2452,10 @@ class LibvirtConfigNodeDevicePciCap(LibvirtConfigObject):
funcap = LibvirtConfigNodeDevicePciSubFunctionCap()
funcap.parse_dom(c)
self.fun_capability.append(funcap)
elif c.tag == "capability" and c.get('type') in ('mdev_types',):
mdevcap = LibvirtConfigNodeDeviceMdevCapableSubFunctionCap()
mdevcap.parse_dom(c)
self.mdev_capability.append(mdevcap)
class LibvirtConfigNodeDevicePciSubFunctionCap(LibvirtConfigObject):
@ -2466,6 +2476,45 @@ class LibvirtConfigNodeDevicePciSubFunctionCap(LibvirtConfigObject):
int(c.get('function'), 16)))
class LibvirtConfigNodeDeviceMdevCapableSubFunctionCap(LibvirtConfigObject):
def __init__(self, **kwargs):
super(LibvirtConfigNodeDeviceMdevCapableSubFunctionCap, self).__init__(
root_name="capability", **kwargs)
# mdev_types is a list of dictionaries where each item looks like:
# {'type': 'nvidia-11', 'name': 'GRID M60-0B', 'deviceAPI': 'vfio-pci',
# 'availableInstances': 16}
self.mdev_types = list()
def parse_dom(self, xmldoc):
super(LibvirtConfigNodeDeviceMdevCapableSubFunctionCap,
self).parse_dom(xmldoc)
for c in xmldoc.getchildren():
if c.tag == "type":
mdev_type = {'type': c.get('id')}
for e in c.getchildren():
mdev_type[e.tag] = (int(e.text)
if e.tag == 'availableInstances'
else e.text)
self.mdev_types.append(mdev_type)
class LibvirtConfigNodeDeviceMdevInformation(LibvirtConfigObject):
def __init__(self, **kwargs):
super(LibvirtConfigNodeDeviceMdevInformation, self).__init__(
root_name="capability", **kwargs)
self.type = None
self.iommu_group = None
def parse_dom(self, xmldoc):
super(LibvirtConfigNodeDeviceMdevInformation,
self).parse_dom(xmldoc)
for c in xmldoc.getchildren():
if c.tag == "type":
self.type = c.get('id')
if c.tag == "iommuGroup":
self.iommu_group = int(c.get('number'))
class LibvirtConfigGuestRng(LibvirtConfigGuestDevice):
def __init__(self, **kwargs):

View File

@ -295,6 +295,9 @@ PERF_EVENTS_CPU_FLAG_MAPPING = {'cmt': 'cmt',
'mbmt': 'mbm_total',
}
# Mediated devices support
MIN_LIBVIRT_MDEV_SUPPORT = (3, 4, 0)
class LibvirtDriver(driver.ComputeDriver):
capabilities = {
@ -5370,6 +5373,37 @@ class LibvirtDriver(driver.ComputeDriver):
greenthread.sleep(0)
return total
def _get_supported_vgpu_types(self):
if not CONF.devices.enabled_vgpu_types:
return []
# TODO(sbauza): Move this check up to compute_manager.init_host
if len(CONF.devices.enabled_vgpu_types) > 1:
LOG.warning('libvirt only supports one GPU type per compute node,'
' only first type will be used.')
requested_types = CONF.devices.enabled_vgpu_types[:1]
return requested_types
def _get_vgpu_total(self):
"""Returns the number of total available vGPUs for any GPU type that is
enabled with the enabled_vgpu_types CONF option.
"""
requested_types = self._get_supported_vgpu_types()
# Bail out early if operator doesn't care about providing vGPUs
if not requested_types:
return 0
# Filter how many available mdevs we can create for all the supported
# types.
mdev_capable_devices = self._get_mdev_capable_devices(requested_types)
vgpus = 0
for dev in mdev_capable_devices:
for _type in dev['types']:
vgpus += dev['types'][_type]['availableInstances']
# Count the already created (but possibly not assigned to a guest)
# mdevs for all the supported types
mediated_devices = self._get_mediated_devices(requested_types)
vgpus += len(mediated_devices)
return vgpus
def _get_instance_capabilities(self):
"""Get hypervisor instance capabilities
@ -5555,6 +5589,81 @@ class LibvirtDriver(driver.ComputeDriver):
return jsonutils.dumps(pci_info)
def _get_mdev_capabilities_for_dev(self, devname, types=None):
"""Returns a dict of MDEV capable device with the ID as first key
and then a list of supported types, each of them being a dict.
:param types: Only return those specific types.
"""
virtdev = self._host.device_lookup_by_name(devname)
xmlstr = virtdev.XMLDesc(0)
cfgdev = vconfig.LibvirtConfigNodeDevice()
cfgdev.parse_str(xmlstr)
device = {
"dev_id": cfgdev.name,
"types": {},
}
for mdev_cap in cfgdev.pci_capability.mdev_capability:
for cap in mdev_cap.mdev_types:
if not types or cap['type'] in types:
device["types"].update({cap['type']: {
'availableInstances': cap['availableInstances'],
'name': cap['name'],
'deviceAPI': cap['deviceAPI']}})
return device
def _get_mdev_capable_devices(self, types=None):
"""Get host devices supporting mdev types.
Obtain devices information from libvirt and returns a list of
dictionaries.
:param types: Filter only devices supporting those types.
"""
if not self._host.has_min_version(MIN_LIBVIRT_MDEV_SUPPORT):
return []
dev_names = self._host.list_mdev_capable_devices() or []
mdev_capable_devices = []
for name in dev_names:
device = self._get_mdev_capabilities_for_dev(name, types)
if not device["types"]:
continue
mdev_capable_devices.append(device)
return mdev_capable_devices
def _get_mediated_device_information(self, devname):
"""Returns a dict of a mediated device."""
virtdev = self._host.device_lookup_by_name(devname)
xmlstr = virtdev.XMLDesc(0)
cfgdev = vconfig.LibvirtConfigNodeDevice()
cfgdev.parse_str(xmlstr)
device = {
"dev_id": cfgdev.name,
"type": cfgdev.mdev_information.type,
"iommu_group": cfgdev.mdev_information.iommu_group,
}
return device
def _get_mediated_devices(self, types=None):
"""Get host mediated devices.
Obtain devices information from libvirt and returns a list of
dictionaries.
:param types: Filter only devices supporting those types.
"""
if not self._host.has_min_version(MIN_LIBVIRT_MDEV_SUPPORT):
return []
dev_names = self._host.list_mediated_devices() or []
mediated_devices = []
for name in dev_names:
device = self._get_mediated_device_information(name)
if not types or device["type"] in types:
mediated_devices.append(device)
return mediated_devices
def _has_numa_support(self):
# This means that the host can support LibvirtConfigGuestNUMATune
# and the nodeset field in LibvirtConfigGuestMemoryBackingPage
@ -5716,6 +5825,19 @@ class LibvirtDriver(driver.ComputeDriver):
disk_gb = int(self._get_local_gb_info()['total'])
memory_mb = int(self._host.get_memory_mb_total())
vcpus = self._get_vcpu_total()
# NOTE(sbauza): For the moment, the libvirt driver only supports
# providing the total number of virtual GPUs for a single GPU type. If
# you have multiple physical GPUs, each of them providing multiple GPU
# types, libvirt will return the total sum of virtual GPUs
# corresponding to the single type passed in enabled_vgpu_types
# configuration option. Eg. if you have 2 pGPUs supporting 'nvidia-35',
# each of them having 16 available instances, the total here will be
# 32.
# If one of the 2 pGPUs doesn't support 'nvidia-35', it won't be used.
# TODO(sbauza): Use ProviderTree and traits to make a better world.
vgpus = self._get_vgpu_total()
# NOTE(jaypipes): We leave some fields like allocation_ratio and
# reserved out of the returned dicts here because, for now at least,
# the RT injects those values into the inventory dict based on the
@ -5740,6 +5862,16 @@ class LibvirtDriver(driver.ComputeDriver):
'step_size': 1,
},
}
if vgpus > 0:
# Only provide VGPU resource classes if the driver supports it.
result[fields.ResourceClass.VGPU] = {
'total': vgpus,
'min_unit': 1,
'max_unit': vgpus,
'step_size': 1,
}
return result
def get_available_resource(self, nodename):

View File

@ -839,8 +839,40 @@ class Host(object):
:returns: a list of virNodeDevice instance
"""
# TODO(sbauza): Replace that call by a generic _list_devices("pci")
return self.get_connection().listDevices("pci", flags)
def list_mdev_capable_devices(self, flags=0):
"""Lookup devices supporting mdev capabilities.
:returns: a list of virNodeDevice instance
"""
return self._list_devices("mdev_types", flags=flags)
def list_mediated_devices(self, flags=0):
"""Lookup mediated devices.
:returns: a list of virNodeDevice instance
"""
return self._list_devices("mdev", flags=flags)
def _list_devices(self, cap, flags=0):
"""Lookup devices.
:returns: a list of virNodeDevice instance
"""
try:
return self.get_connection().listDevices(cap, flags)
except libvirt.libvirtError as ex:
error_code = ex.get_error_code()
if error_code == libvirt.VIR_ERR_NO_SUPPORT:
LOG.warning("URI %(uri)s does not support "
"listDevices: %(error)s",
{'uri': self._uri, 'error': ex})
return []
else:
raise
def compare_cpu(self, xmlDesc, flags=0):
"""Compares the given CPU description with the host CPU."""
return self.get_connection().compareCPU(xmlDesc, flags)