Expose the mdev class
It's the last functional change for the series, we will expose this new option after using it. doc changes will be addressed in a follow up patch. Change-Id: Ibccbb93352f93dba7e15e1f77be9ee0fc466fee0 Implements: blueprint generic-mdevs
This commit is contained in:
committed by
Sean Mooney
parent
2b847085c8
commit
9be996c696
@@ -70,14 +70,25 @@ def register_dynamic_opts(conf):
|
|||||||
the initial configuration has been loaded.
|
the initial configuration has been loaded.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
for mdev_type in conf.devices.enabled_mdev_types:
|
||||||
# Register the '[mdev_$(MDEV_TYPE)]/device_addresses' opts, implicitly
|
# Register the '[mdev_$(MDEV_TYPE)]/device_addresses' opts, implicitly
|
||||||
# registering the '[mdev_$(MDEV_TYPE)]' groups in the process
|
# registering the '[mdev_$(MDEV_TYPE)]' groups in the process
|
||||||
for mdev_type in conf.devices.enabled_mdev_types:
|
|
||||||
opt = cfg.ListOpt('device_addresses', default=[],
|
opt = cfg.ListOpt('device_addresses', default=[],
|
||||||
item_type=cfg.types.String(),
|
item_type=cfg.types.String(),
|
||||||
deprecated_group='vgpu_%s' % mdev_type)
|
deprecated_group='vgpu_%s' % mdev_type)
|
||||||
conf.register_opt(opt, group='mdev_%s' % mdev_type)
|
conf.register_opt(opt, group='mdev_%s' % mdev_type)
|
||||||
|
|
||||||
|
# Register the '[mdev_$(MDEV_TYPE)]/mdev_class' opts
|
||||||
|
class_opt = cfg.StrOpt(
|
||||||
|
'mdev_class',
|
||||||
|
default='VGPU',
|
||||||
|
regex=r'^(VGPU|CUSTOM_[A-Z0-9_]+)$',
|
||||||
|
max_length=255,
|
||||||
|
help='Class of mediated device to manage used to differentiate '
|
||||||
|
'between device types. The name has to be prefixed by '
|
||||||
|
'CUSTOM_ if it is not VGPU.')
|
||||||
|
conf.register_opt(class_opt, group='mdev_%s' % mdev_type)
|
||||||
|
|
||||||
|
|
||||||
def list_opts():
|
def list_opts():
|
||||||
return {devices_group: mdev_opts}
|
return {devices_group: mdev_opts}
|
||||||
|
|||||||
38
nova/tests/fixtures/libvirt.py
vendored
38
nova/tests/fixtures/libvirt.py
vendored
@@ -232,9 +232,10 @@ MDEV_CAPABLE_CAP_TYPE = 'mdev_types'
|
|||||||
|
|
||||||
NVIDIA_11_VGPU_TYPE = 'nvidia-11'
|
NVIDIA_11_VGPU_TYPE = 'nvidia-11'
|
||||||
NVIDIA_12_VGPU_TYPE = 'nvidia-12'
|
NVIDIA_12_VGPU_TYPE = 'nvidia-12'
|
||||||
PGPU1_PCI_ADDR = 'pci_0000_81_00_0'
|
MLX5_CORE_TYPE = 'mlx5_core'
|
||||||
PGPU2_PCI_ADDR = 'pci_0000_81_01_0'
|
MDEVCAP_DEV1_PCI_ADDR = 'pci_0000_81_00_0'
|
||||||
PGPU3_PCI_ADDR = 'pci_0000_81_02_0'
|
MDEVCAP_DEV2_PCI_ADDR = 'pci_0000_81_01_0'
|
||||||
|
MDEVCAP_DEV3_PCI_ADDR = 'pci_0000_81_02_0'
|
||||||
|
|
||||||
os_uname = collections.namedtuple(
|
os_uname = collections.namedtuple(
|
||||||
'uname_result', ['sysname', 'nodename', 'release', 'version', 'machine'],
|
'uname_result', ['sysname', 'nodename', 'release', 'version', 'machine'],
|
||||||
@@ -296,9 +297,9 @@ class FakePCIDevice(object):
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, dev_type, bus, slot, function, iommu_group, numa_node, *,
|
self, dev_type, bus, slot, function, iommu_group, numa_node, *,
|
||||||
vf_ratio=None, multiple_gpu_types=False, parent=None,
|
vf_ratio=None, multiple_gpu_types=False, generic_types=False,
|
||||||
vend_id=None, vend_name=None, prod_id=None, prod_name=None,
|
parent=None, vend_id=None, vend_name=None, prod_id=None,
|
||||||
driver_name=None,
|
prod_name=None, driver_name=None,
|
||||||
):
|
):
|
||||||
"""Populate pci devices
|
"""Populate pci devices
|
||||||
|
|
||||||
@@ -312,6 +313,7 @@ class FakePCIDevice(object):
|
|||||||
:param vf_ratio: (int) Ratio of Virtual Functions on Physical. Only
|
:param vf_ratio: (int) Ratio of Virtual Functions on Physical. Only
|
||||||
applicable if ``dev_type`` is one of: ``PF``, ``VF``.
|
applicable if ``dev_type`` is one of: ``PF``, ``VF``.
|
||||||
:param multiple_gpu_types: (bool) Supports different vGPU types.
|
:param multiple_gpu_types: (bool) Supports different vGPU types.
|
||||||
|
:param generic_types: (bool) Support both mlx5 and nvidia-12 types.
|
||||||
:param parent: (int, int, int) A tuple of bus, slot and function
|
:param parent: (int, int, int) A tuple of bus, slot and function
|
||||||
corresponding to the parent.
|
corresponding to the parent.
|
||||||
:param vend_id: (str) The vendor ID.
|
:param vend_id: (str) The vendor ID.
|
||||||
@@ -329,6 +331,7 @@ class FakePCIDevice(object):
|
|||||||
self.numa_node = numa_node
|
self.numa_node = numa_node
|
||||||
self.vf_ratio = vf_ratio
|
self.vf_ratio = vf_ratio
|
||||||
self.multiple_gpu_types = multiple_gpu_types
|
self.multiple_gpu_types = multiple_gpu_types
|
||||||
|
self.generic_types = generic_types
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
self.vend_id = vend_id
|
self.vend_id = vend_id
|
||||||
@@ -414,6 +417,15 @@ class FakePCIDevice(object):
|
|||||||
'type_id': NVIDIA_12_VGPU_TYPE,
|
'type_id': NVIDIA_12_VGPU_TYPE,
|
||||||
'instances': 8,
|
'instances': 8,
|
||||||
})
|
})
|
||||||
|
if self.generic_types:
|
||||||
|
types = [self.mdevtypes_templ % {
|
||||||
|
'type_id': MLX5_CORE_TYPE,
|
||||||
|
'instances': 16,
|
||||||
|
}]
|
||||||
|
types.append(self.mdevtypes_templ % {
|
||||||
|
'type_id': NVIDIA_12_VGPU_TYPE,
|
||||||
|
'instances': 8,
|
||||||
|
})
|
||||||
if not skip_capability:
|
if not skip_capability:
|
||||||
capability = self.cap_templ % {
|
capability = self.cap_templ % {
|
||||||
'cap_type': MDEV_CAPABLE_CAP_TYPE,
|
'cap_type': MDEV_CAPABLE_CAP_TYPE,
|
||||||
@@ -457,7 +469,8 @@ class HostPCIDevicesInfo(object):
|
|||||||
TOTAL_NUMA_NODES = 2
|
TOTAL_NUMA_NODES = 2
|
||||||
|
|
||||||
def __init__(self, num_pci=0, num_pfs=2, num_vfs=8, num_mdevcap=0,
|
def __init__(self, num_pci=0, num_pfs=2, num_vfs=8, num_mdevcap=0,
|
||||||
numa_node=None, multiple_gpu_types=False):
|
numa_node=None, multiple_gpu_types=False,
|
||||||
|
generic_types=False):
|
||||||
"""Create a new HostPCIDevicesInfo object.
|
"""Create a new HostPCIDevicesInfo object.
|
||||||
|
|
||||||
:param num_pci: (int) The number of (non-SR-IOV) and (non-MDEV capable)
|
:param num_pci: (int) The number of (non-SR-IOV) and (non-MDEV capable)
|
||||||
@@ -470,6 +483,7 @@ class HostPCIDevicesInfo(object):
|
|||||||
devices will be assigned to the specified node else they will be
|
devices will be assigned to the specified node else they will be
|
||||||
split between ``$TOTAL_NUMA_NODES`` nodes.
|
split between ``$TOTAL_NUMA_NODES`` nodes.
|
||||||
:param multiple_gpu_types: (bool) Supports different vGPU types
|
:param multiple_gpu_types: (bool) Supports different vGPU types
|
||||||
|
:param generic_types: (bool) Supports both nvidia-12 and mlx5 types
|
||||||
"""
|
"""
|
||||||
self.devices = {}
|
self.devices = {}
|
||||||
|
|
||||||
@@ -509,7 +523,8 @@ class HostPCIDevicesInfo(object):
|
|||||||
function=function,
|
function=function,
|
||||||
iommu_group=iommu_group,
|
iommu_group=iommu_group,
|
||||||
numa_node=self._calc_numa_node(dev, numa_node),
|
numa_node=self._calc_numa_node(dev, numa_node),
|
||||||
multiple_gpu_types=multiple_gpu_types)
|
multiple_gpu_types=multiple_gpu_types,
|
||||||
|
generic_types=generic_types)
|
||||||
|
|
||||||
slot += 1
|
slot += 1
|
||||||
iommu_group += 1
|
iommu_group += 1
|
||||||
@@ -555,9 +570,9 @@ class HostPCIDevicesInfo(object):
|
|||||||
|
|
||||||
def add_device(
|
def add_device(
|
||||||
self, dev_type, bus, slot, function, iommu_group, numa_node,
|
self, dev_type, bus, slot, function, iommu_group, numa_node,
|
||||||
vf_ratio=None, multiple_gpu_types=False, parent=None,
|
vf_ratio=None, multiple_gpu_types=False, generic_types=False,
|
||||||
vend_id=None, vend_name=None, prod_id=None, prod_name=None,
|
parent=None, vend_id=None, vend_name=None, prod_id=None,
|
||||||
driver_name=None,
|
prod_name=None, driver_name=None,
|
||||||
):
|
):
|
||||||
pci_dev_name = _get_libvirt_nodedev_name(bus, slot, function)
|
pci_dev_name = _get_libvirt_nodedev_name(bus, slot, function)
|
||||||
|
|
||||||
@@ -572,6 +587,7 @@ class HostPCIDevicesInfo(object):
|
|||||||
numa_node=numa_node,
|
numa_node=numa_node,
|
||||||
vf_ratio=vf_ratio,
|
vf_ratio=vf_ratio,
|
||||||
multiple_gpu_types=multiple_gpu_types,
|
multiple_gpu_types=multiple_gpu_types,
|
||||||
|
generic_types=generic_types,
|
||||||
parent=parent,
|
parent=parent,
|
||||||
vend_id=vend_id,
|
vend_id=vend_id,
|
||||||
vend_name=vend_name,
|
vend_name=vend_name,
|
||||||
|
|||||||
@@ -64,17 +64,17 @@ class VGPUReshapeTests(base.ServersTestBase):
|
|||||||
fakelibvirt.FakeMdevDevice(
|
fakelibvirt.FakeMdevDevice(
|
||||||
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c01',
|
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c01',
|
||||||
type_id=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
type_id=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
||||||
parent=fakelibvirt.PGPU1_PCI_ADDR),
|
parent=fakelibvirt.MDEVCAP_DEV1_PCI_ADDR),
|
||||||
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c02':
|
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c02':
|
||||||
fakelibvirt.FakeMdevDevice(
|
fakelibvirt.FakeMdevDevice(
|
||||||
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c02',
|
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c02',
|
||||||
type_id=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
type_id=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
||||||
parent=fakelibvirt.PGPU2_PCI_ADDR),
|
parent=fakelibvirt.MDEVCAP_DEV2_PCI_ADDR),
|
||||||
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c03':
|
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c03':
|
||||||
fakelibvirt.FakeMdevDevice(
|
fakelibvirt.FakeMdevDevice(
|
||||||
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c03',
|
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c03',
|
||||||
type_id=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
type_id=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
||||||
parent=fakelibvirt.PGPU3_PCI_ADDR),
|
parent=fakelibvirt.MDEVCAP_DEV3_PCI_ADDR),
|
||||||
}
|
}
|
||||||
|
|
||||||
# start a compute with vgpu support disabled so the driver will
|
# start a compute with vgpu support disabled so the driver will
|
||||||
@@ -181,9 +181,9 @@ class VGPUReshapeTests(base.ServersTestBase):
|
|||||||
# which ones are used.
|
# which ones are used.
|
||||||
usages = {}
|
usages = {}
|
||||||
pgpu_uuid_to_name = {}
|
pgpu_uuid_to_name = {}
|
||||||
for pci_device in [fakelibvirt.PGPU1_PCI_ADDR,
|
for pci_device in [fakelibvirt.MDEVCAP_DEV1_PCI_ADDR,
|
||||||
fakelibvirt.PGPU2_PCI_ADDR,
|
fakelibvirt.MDEVCAP_DEV2_PCI_ADDR,
|
||||||
fakelibvirt.PGPU3_PCI_ADDR]:
|
fakelibvirt.MDEVCAP_DEV3_PCI_ADDR]:
|
||||||
gpu_rp_uuid = self.placement.get(
|
gpu_rp_uuid = self.placement.get(
|
||||||
'/resource_providers?name=compute1_%s' % pci_device).body[
|
'/resource_providers?name=compute1_%s' % pci_device).body[
|
||||||
'resource_providers'][0]['uuid']
|
'resource_providers'][0]['uuid']
|
||||||
|
|||||||
@@ -58,6 +58,17 @@ class VGPUTestBase(base.ServersTestBase):
|
|||||||
'nova.privsep.libvirt.create_mdev',
|
'nova.privsep.libvirt.create_mdev',
|
||||||
side_effect=self._create_mdev))
|
side_effect=self._create_mdev))
|
||||||
|
|
||||||
|
# for the sake of resizing, we need to patch the two methods below
|
||||||
|
self.useFixture(fixtures.MockPatch(
|
||||||
|
'nova.virt.libvirt.LibvirtDriver._get_instance_disk_info',
|
||||||
|
return_value=[]))
|
||||||
|
self.useFixture(fixtures.MockPatch('os.rename'))
|
||||||
|
|
||||||
|
# Allow non-admins to see instance action events.
|
||||||
|
self.policy.set_rules({
|
||||||
|
'os_compute_api:os-instance-actions:events': 'rule:admin_or_owner'
|
||||||
|
}, overwrite=False)
|
||||||
|
|
||||||
# NOTE(sbauza): Since the fake create_mdev doesn't know which compute
|
# NOTE(sbauza): Since the fake create_mdev doesn't know which compute
|
||||||
# was called, we need to look at a value that can be provided just
|
# was called, we need to look at a value that can be provided just
|
||||||
# before the driver calls create_mdev. That's why we fake the below
|
# before the driver calls create_mdev. That's why we fake the below
|
||||||
@@ -122,6 +133,57 @@ class VGPUTestBase(base.ServersTestBase):
|
|||||||
self.assertEqual([], compute.driver._get_mediated_devices())
|
self.assertEqual([], compute.driver._get_mediated_devices())
|
||||||
return compute
|
return compute
|
||||||
|
|
||||||
|
def _confirm_resize(self, server, host='host1'):
|
||||||
|
# NOTE(sbauza): Unfortunately, _cleanup_resize() in libvirt checks the
|
||||||
|
# host option to know the source hostname but given we have a global
|
||||||
|
# CONF, the value will be the hostname of the last compute service that
|
||||||
|
# was created, so we need to change it here.
|
||||||
|
# TODO(sbauza): Remove the below once we stop using CONF.host in
|
||||||
|
# libvirt and rather looking at the compute host value.
|
||||||
|
orig_host = CONF.host
|
||||||
|
self.flags(host=host)
|
||||||
|
super(VGPUTestBase, self)._confirm_resize(server)
|
||||||
|
self.flags(host=orig_host)
|
||||||
|
self._wait_for_state_change(server, 'ACTIVE')
|
||||||
|
|
||||||
|
def assert_mdev_usage(self, compute, expected_amount, instance=None,
|
||||||
|
expected_rc=orc.VGPU, expected_rp_name=None):
|
||||||
|
"""Verify the allocations for either a whole compute or just a
|
||||||
|
specific instance.
|
||||||
|
|
||||||
|
:param compute: the internal compute object
|
||||||
|
:param expected_amount: the expected amount of allocations
|
||||||
|
:param instance: if not None, a specific Instance to lookup instead
|
||||||
|
of the whole compute allocations.
|
||||||
|
:param expected_rc: the expected resource class
|
||||||
|
:param expected_rp_name: the expected resource provider name if an
|
||||||
|
instance is provided.
|
||||||
|
"""
|
||||||
|
total_usages = collections.defaultdict(int)
|
||||||
|
# We only want to get mdevs that are assigned to either all the
|
||||||
|
# instances or just one.
|
||||||
|
mdevs = compute.driver._get_all_assigned_mediated_devices(instance)
|
||||||
|
for mdev in mdevs:
|
||||||
|
mdev_name = libvirt_utils.mdev_uuid2name(mdev)
|
||||||
|
mdev_info = compute.driver._get_mediated_device_information(
|
||||||
|
mdev_name)
|
||||||
|
parent_name = mdev_info['parent']
|
||||||
|
parent_rp_name = compute.host + '_' + parent_name
|
||||||
|
parent_rp_uuid = self._get_provider_uuid_by_name(parent_rp_name)
|
||||||
|
parent_usage = self._get_provider_usages(parent_rp_uuid)
|
||||||
|
if (expected_rc in parent_usage and
|
||||||
|
parent_rp_name not in total_usages
|
||||||
|
):
|
||||||
|
# We only set the total amount if we didn't had it already
|
||||||
|
total_usages[parent_rp_name] = parent_usage[expected_rc]
|
||||||
|
if expected_rp_name and instance is not None:
|
||||||
|
# If this is for an instance, all the mdevs should be in the
|
||||||
|
# same RP.
|
||||||
|
self.assertEqual(expected_rp_name, parent_rp_name)
|
||||||
|
self.assertEqual(expected_amount, len(mdevs))
|
||||||
|
self.assertEqual(expected_amount,
|
||||||
|
sum(total_usages[k] for k in total_usages))
|
||||||
|
|
||||||
|
|
||||||
class VGPUTests(VGPUTestBase):
|
class VGPUTests(VGPUTestBase):
|
||||||
|
|
||||||
@@ -135,36 +197,10 @@ class VGPUTests(VGPUTestBase):
|
|||||||
enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
||||||
group='devices')
|
group='devices')
|
||||||
|
|
||||||
# for the sake of resizing, we need to patch the two methods below
|
|
||||||
self.useFixture(fixtures.MockPatch(
|
|
||||||
'nova.virt.libvirt.LibvirtDriver._get_instance_disk_info',
|
|
||||||
return_value=[]))
|
|
||||||
self.useFixture(fixtures.MockPatch('os.rename'))
|
|
||||||
|
|
||||||
# Allow non-admins to see instance action events.
|
|
||||||
self.policy.set_rules({
|
|
||||||
'os_compute_api:os-instance-actions:events': 'rule:admin_or_owner'
|
|
||||||
}, overwrite=False)
|
|
||||||
|
|
||||||
self.compute1 = self.start_compute('host1')
|
self.compute1 = self.start_compute('host1')
|
||||||
|
|
||||||
def assert_vgpu_usage_for_compute(self, compute, expected):
|
def assert_vgpu_usage_for_compute(self, compute, expected):
|
||||||
total_usages = collections.defaultdict(int)
|
self.assert_mdev_usage(compute, expected_amount=expected)
|
||||||
# We only want to get mdevs that are assigned to instances
|
|
||||||
mdevs = compute.driver._get_all_assigned_mediated_devices()
|
|
||||||
for mdev in mdevs:
|
|
||||||
mdev_name = libvirt_utils.mdev_uuid2name(mdev)
|
|
||||||
mdev_info = compute.driver._get_mediated_device_information(
|
|
||||||
mdev_name)
|
|
||||||
parent_name = mdev_info['parent']
|
|
||||||
parent_rp_name = compute.host + '_' + parent_name
|
|
||||||
parent_rp_uuid = self._get_provider_uuid_by_name(parent_rp_name)
|
|
||||||
parent_usage = self._get_provider_usages(parent_rp_uuid)
|
|
||||||
if orc.VGPU in parent_usage and parent_rp_name not in total_usages:
|
|
||||||
# We only set the total amount if we didn't had it already
|
|
||||||
total_usages[parent_rp_name] = parent_usage[orc.VGPU]
|
|
||||||
self.assertEqual(expected, len(mdevs))
|
|
||||||
self.assertEqual(expected, sum(total_usages[k] for k in total_usages))
|
|
||||||
|
|
||||||
def test_create_servers_with_vgpu(self):
|
def test_create_servers_with_vgpu(self):
|
||||||
self._create_server(
|
self._create_server(
|
||||||
@@ -173,19 +209,6 @@ class VGPUTests(VGPUTestBase):
|
|||||||
networks='auto', expected_state='ACTIVE')
|
networks='auto', expected_state='ACTIVE')
|
||||||
self.assert_vgpu_usage_for_compute(self.compute1, expected=1)
|
self.assert_vgpu_usage_for_compute(self.compute1, expected=1)
|
||||||
|
|
||||||
def _confirm_resize(self, server, host='host1'):
|
|
||||||
# NOTE(sbauza): Unfortunately, _cleanup_resize() in libvirt checks the
|
|
||||||
# host option to know the source hostname but given we have a global
|
|
||||||
# CONF, the value will be the hostname of the last compute service that
|
|
||||||
# was created, so we need to change it here.
|
|
||||||
# TODO(sbauza): Remove the below once we stop using CONF.host in
|
|
||||||
# libvirt and rather looking at the compute host value.
|
|
||||||
orig_host = CONF.host
|
|
||||||
self.flags(host=host)
|
|
||||||
super(VGPUTests, self)._confirm_resize(server)
|
|
||||||
self.flags(host=orig_host)
|
|
||||||
self._wait_for_state_change(server, 'ACTIVE')
|
|
||||||
|
|
||||||
def test_resize_servers_with_vgpu(self):
|
def test_resize_servers_with_vgpu(self):
|
||||||
# Add another compute for the sake of resizing
|
# Add another compute for the sake of resizing
|
||||||
self.compute2 = self.start_compute('host2')
|
self.compute2 = self.start_compute('host2')
|
||||||
@@ -302,10 +325,14 @@ class VGPUMultipleTypesTests(VGPUTestBase):
|
|||||||
# host1 will have 2 physical GPUs :
|
# host1 will have 2 physical GPUs :
|
||||||
# - 0000:81:00.0 will only support nvidia-11
|
# - 0000:81:00.0 will only support nvidia-11
|
||||||
# - 0000:81:01.0 will only support nvidia-12
|
# - 0000:81:01.0 will only support nvidia-12
|
||||||
pgpu1_pci_addr = self.libvirt2pci_address(fakelibvirt.PGPU1_PCI_ADDR)
|
MDEVCAP_DEV1_PCI_ADDR = self.libvirt2pci_address(
|
||||||
pgpu2_pci_addr = self.libvirt2pci_address(fakelibvirt.PGPU2_PCI_ADDR)
|
fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
|
||||||
self.flags(device_addresses=[pgpu1_pci_addr], group='mdev_nvidia-11')
|
MDEVCAP_DEV2_PCI_ADDR = self.libvirt2pci_address(
|
||||||
self.flags(device_addresses=[pgpu2_pci_addr], group='mdev_nvidia-12')
|
fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
|
||||||
|
self.flags(device_addresses=[MDEVCAP_DEV1_PCI_ADDR],
|
||||||
|
group='mdev_nvidia-11')
|
||||||
|
self.flags(device_addresses=[MDEVCAP_DEV2_PCI_ADDR],
|
||||||
|
group='mdev_nvidia-12')
|
||||||
|
|
||||||
# Prepare traits for later on
|
# Prepare traits for later on
|
||||||
self._create_trait('CUSTOM_NVIDIA_11')
|
self._create_trait('CUSTOM_NVIDIA_11')
|
||||||
@@ -323,7 +350,7 @@ class VGPUMultipleTypesTests(VGPUTestBase):
|
|||||||
# nvidia-12 *BUT* doesn't actually have this type as a PCI capability,
|
# nvidia-12 *BUT* doesn't actually have this type as a PCI capability,
|
||||||
# we are sure that only 0000:81:00.0 is used.
|
# we are sure that only 0000:81:00.0 is used.
|
||||||
parent_name = mdevs[0]['parent']
|
parent_name = mdevs[0]['parent']
|
||||||
self.assertEqual(fakelibvirt.PGPU1_PCI_ADDR, parent_name)
|
self.assertEqual(fakelibvirt.MDEVCAP_DEV1_PCI_ADDR, parent_name)
|
||||||
|
|
||||||
# We are also sure that there is no RP for 0000:81:01.0 since there
|
# We are also sure that there is no RP for 0000:81:01.0 since there
|
||||||
# is no inventory for nvidia-12
|
# is no inventory for nvidia-12
|
||||||
@@ -333,7 +360,7 @@ class VGPUMultipleTypesTests(VGPUTestBase):
|
|||||||
self.assertEqual(2, len(rp_uuids))
|
self.assertEqual(2, len(rp_uuids))
|
||||||
# ... but we double-check by asking the RP by its expected name
|
# ... but we double-check by asking the RP by its expected name
|
||||||
expected_pgpu2_rp_name = (self.compute1.host + '_' +
|
expected_pgpu2_rp_name = (self.compute1.host + '_' +
|
||||||
fakelibvirt.PGPU2_PCI_ADDR)
|
fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
|
||||||
pgpu2_rp = self.placement.get(
|
pgpu2_rp = self.placement.get(
|
||||||
'/resource_providers?name=' + expected_pgpu2_rp_name).body[
|
'/resource_providers?name=' + expected_pgpu2_rp_name).body[
|
||||||
'resource_providers']
|
'resource_providers']
|
||||||
@@ -350,9 +377,9 @@ class VGPUMultipleTypesTests(VGPUTestBase):
|
|||||||
# Make a restart to update the Resource Providers
|
# Make a restart to update the Resource Providers
|
||||||
self.compute1 = self.restart_compute_service(self.compute1)
|
self.compute1 = self.restart_compute_service(self.compute1)
|
||||||
pgpu1_rp_uuid = self._get_provider_uuid_by_name(
|
pgpu1_rp_uuid = self._get_provider_uuid_by_name(
|
||||||
self.compute1.host + '_' + fakelibvirt.PGPU1_PCI_ADDR)
|
self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
|
||||||
pgpu2_rp_uuid = self._get_provider_uuid_by_name(
|
pgpu2_rp_uuid = self._get_provider_uuid_by_name(
|
||||||
self.compute1.host + '_' + fakelibvirt.PGPU2_PCI_ADDR)
|
self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
|
||||||
|
|
||||||
pgpu1_inventory = self._get_provider_inventory(pgpu1_rp_uuid)
|
pgpu1_inventory = self._get_provider_inventory(pgpu1_rp_uuid)
|
||||||
self.assertEqual(16, pgpu1_inventory[orc.VGPU]['total'])
|
self.assertEqual(16, pgpu1_inventory[orc.VGPU]['total'])
|
||||||
@@ -363,8 +390,8 @@ class VGPUMultipleTypesTests(VGPUTestBase):
|
|||||||
self._set_provider_traits(pgpu1_rp_uuid, ['CUSTOM_NVIDIA_11'])
|
self._set_provider_traits(pgpu1_rp_uuid, ['CUSTOM_NVIDIA_11'])
|
||||||
self._set_provider_traits(pgpu2_rp_uuid, ['CUSTOM_NVIDIA_12'])
|
self._set_provider_traits(pgpu2_rp_uuid, ['CUSTOM_NVIDIA_12'])
|
||||||
|
|
||||||
expected = {'CUSTOM_NVIDIA_11': fakelibvirt.PGPU1_PCI_ADDR,
|
expected = {'CUSTOM_NVIDIA_11': fakelibvirt.MDEVCAP_DEV1_PCI_ADDR,
|
||||||
'CUSTOM_NVIDIA_12': fakelibvirt.PGPU2_PCI_ADDR}
|
'CUSTOM_NVIDIA_12': fakelibvirt.MDEVCAP_DEV2_PCI_ADDR}
|
||||||
|
|
||||||
for trait in expected.keys():
|
for trait in expected.keys():
|
||||||
# Add a trait to the flavor
|
# Add a trait to the flavor
|
||||||
@@ -395,3 +422,131 @@ class VGPUMultipleTypesTests(VGPUTestBase):
|
|||||||
# We can be deterministic : since we asked for a specific type,
|
# We can be deterministic : since we asked for a specific type,
|
||||||
# we know which pGPU we landed.
|
# we know which pGPU we landed.
|
||||||
self.assertEqual(expected[trait], mdev_info['parent'])
|
self.assertEqual(expected[trait], mdev_info['parent'])
|
||||||
|
|
||||||
|
|
||||||
|
class DifferentMdevClassesTests(VGPUTestBase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super(DifferentMdevClassesTests, self).setUp()
|
||||||
|
self.extra_spec = {"resources:CUSTOM_NOTVGPU": "1"}
|
||||||
|
self.flavor = self._create_flavor(extra_spec=self.extra_spec)
|
||||||
|
|
||||||
|
self.flags(
|
||||||
|
enabled_mdev_types=[fakelibvirt.MLX5_CORE_TYPE,
|
||||||
|
fakelibvirt.NVIDIA_12_VGPU_TYPE],
|
||||||
|
group='devices')
|
||||||
|
# we need to call the below again to ensure the updated
|
||||||
|
# 'device_addresses' value is read and the new groups created
|
||||||
|
nova.conf.devices.register_dynamic_opts(CONF)
|
||||||
|
# host1 will have 2 physical devices :
|
||||||
|
# - 0000:81:00.0 will only support mlx5_core
|
||||||
|
# - 0000:81:01.0 will only support nvidia-12
|
||||||
|
MDEVCAP_DEV1_PCI_ADDR = self.libvirt2pci_address(
|
||||||
|
fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
|
||||||
|
MDEVCAP_DEV2_PCI_ADDR = self.libvirt2pci_address(
|
||||||
|
fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
|
||||||
|
self.flags(device_addresses=[MDEVCAP_DEV1_PCI_ADDR],
|
||||||
|
group='mdev_mlx5_core')
|
||||||
|
self.flags(device_addresses=[MDEVCAP_DEV2_PCI_ADDR],
|
||||||
|
group='mdev_nvidia-12')
|
||||||
|
self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_mlx5_core')
|
||||||
|
|
||||||
|
self.compute1 = self.start_compute('host1')
|
||||||
|
# Regenerate the PCI addresses so they can support both mlx5 and
|
||||||
|
# nvidia-12 types
|
||||||
|
connection = self.computes[
|
||||||
|
self.compute1.host].driver._host.get_connection()
|
||||||
|
connection.pci_info = fakelibvirt.HostPCIDevicesInfo(
|
||||||
|
num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
|
||||||
|
generic_types=True)
|
||||||
|
# Make a restart to update the Resource Providers
|
||||||
|
self.compute1 = self.restart_compute_service(self.compute1)
|
||||||
|
|
||||||
|
def test_create_servers_with_different_mdev_classes(self):
|
||||||
|
physdev1_rp_uuid = self._get_provider_uuid_by_name(
|
||||||
|
self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
|
||||||
|
physdev2_rp_uuid = self._get_provider_uuid_by_name(
|
||||||
|
self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
|
||||||
|
|
||||||
|
# Remember, we asked to create 1st device inventory to use a
|
||||||
|
# CUSTOM_NOTVGPU RC.
|
||||||
|
physdev1_inventory = self._get_provider_inventory(physdev1_rp_uuid)
|
||||||
|
self.assertEqual(16, physdev1_inventory['CUSTOM_NOTVGPU']['total'])
|
||||||
|
# But, we didn't ask for the second device inventory...
|
||||||
|
physdev2_inventory = self._get_provider_inventory(physdev2_rp_uuid)
|
||||||
|
self.assertEqual(8, physdev2_inventory[orc.VGPU]['total'])
|
||||||
|
|
||||||
|
expected = {'CUSTOM_NOTVGPU': fakelibvirt.MDEVCAP_DEV1_PCI_ADDR,
|
||||||
|
orc.VGPU: fakelibvirt.MDEVCAP_DEV2_PCI_ADDR}
|
||||||
|
|
||||||
|
for mdev_rc in expected.keys():
|
||||||
|
# Use a specific mdev resource class for the flavor
|
||||||
|
extra_spec = {"resources:%s" % mdev_rc: "1"}
|
||||||
|
flavor = self._create_flavor(extra_spec=extra_spec)
|
||||||
|
|
||||||
|
# Use the new flavor for booting
|
||||||
|
server = self._create_server(
|
||||||
|
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||||
|
flavor_id=flavor, networks='auto', host=self.compute1.host)
|
||||||
|
|
||||||
|
# Get the instance we just created
|
||||||
|
inst = objects.Instance.get_by_uuid(self.context, server['id'])
|
||||||
|
expected_rp_name = self.compute1.host + '_' + expected[mdev_rc]
|
||||||
|
self.assert_mdev_usage(self.compute1, expected_amount=1,
|
||||||
|
expected_rc=mdev_rc, instance=inst,
|
||||||
|
expected_rp_name=expected_rp_name)
|
||||||
|
|
||||||
|
def test_resize_servers_with_mlx5(self):
|
||||||
|
# Add another compute for the sake of resizing
|
||||||
|
self.compute2 = self.start_compute('host2')
|
||||||
|
# Regenerate the PCI addresses so they can support both mlx5 and
|
||||||
|
# nvidia-12 types
|
||||||
|
connection = self.computes[
|
||||||
|
self.compute2.host].driver._host.get_connection()
|
||||||
|
connection.pci_info = fakelibvirt.HostPCIDevicesInfo(
|
||||||
|
num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
|
||||||
|
generic_types=True)
|
||||||
|
# Make a restart to update the Resource Providers
|
||||||
|
self.compute2 = self.restart_compute_service(self.compute2)
|
||||||
|
|
||||||
|
# Use the new flavor for booting
|
||||||
|
server = self._create_server(
|
||||||
|
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||||
|
flavor_id=self.flavor, networks='auto', host=self.compute1.host)
|
||||||
|
|
||||||
|
# Make sure we only have 1 mdev for compute1
|
||||||
|
self.assert_mdev_usage(self.compute1, expected_amount=1,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
self.assert_mdev_usage(self.compute2, expected_amount=0,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
|
||||||
|
new_flavor = self._create_flavor(memory_mb=4096,
|
||||||
|
extra_spec=self.extra_spec)
|
||||||
|
# First, resize and then revert.
|
||||||
|
self._resize_server(server, new_flavor)
|
||||||
|
# After resizing, we then have two mdevs, both for each compute
|
||||||
|
self.assert_mdev_usage(self.compute1, expected_amount=1,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
self.assert_mdev_usage(self.compute2, expected_amount=1,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
|
||||||
|
self._revert_resize(server)
|
||||||
|
# We're back to the original resources usage
|
||||||
|
self.assert_mdev_usage(self.compute1, expected_amount=1,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
self.assert_mdev_usage(self.compute2, expected_amount=0,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
|
||||||
|
# Now resize and then confirm it.
|
||||||
|
self._resize_server(server, new_flavor)
|
||||||
|
self.assert_mdev_usage(self.compute1, expected_amount=1,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
self.assert_mdev_usage(self.compute2, expected_amount=1,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
|
||||||
|
self._confirm_resize(server)
|
||||||
|
# In the last case, the source guest disappeared so we only have 1 mdev
|
||||||
|
self.assert_mdev_usage(self.compute1, expected_amount=0,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
self.assert_mdev_usage(self.compute2, expected_amount=1,
|
||||||
|
expected_rc='CUSTOM_NOTVGPU')
|
||||||
|
|||||||
@@ -32,3 +32,5 @@ class DevicesConfTestCase(test.NoDBTestCase):
|
|||||||
self.assertIn('mdev_nvidia-12', CONF)
|
self.assertIn('mdev_nvidia-12', CONF)
|
||||||
self.assertEqual([], getattr(CONF, 'mdev_nvidia-11').device_addresses)
|
self.assertEqual([], getattr(CONF, 'mdev_nvidia-11').device_addresses)
|
||||||
self.assertEqual([], getattr(CONF, 'mdev_nvidia-12').device_addresses)
|
self.assertEqual([], getattr(CONF, 'mdev_nvidia-12').device_addresses)
|
||||||
|
self.assertEqual('VGPU', getattr(CONF, 'mdev_nvidia-11').mdev_class)
|
||||||
|
self.assertEqual('VGPU', getattr(CONF, 'mdev_nvidia-12').mdev_class)
|
||||||
|
|||||||
@@ -25285,14 +25285,15 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
# And now do it correctly !
|
# And now do it correctly !
|
||||||
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
||||||
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
|
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
|
||||||
|
self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_nvidia-12')
|
||||||
self.assertEqual(['nvidia-11', 'nvidia-12'],
|
self.assertEqual(['nvidia-11', 'nvidia-12'],
|
||||||
drvr._get_supported_vgpu_types())
|
drvr._get_supported_vgpu_types())
|
||||||
self.assertEqual({'0000:84:00.0': 'nvidia-11',
|
self.assertEqual({'0000:84:00.0': 'nvidia-11',
|
||||||
'0000:85:00.0': 'nvidia-12'}, drvr.pgpu_type_mapping)
|
'0000:85:00.0': 'nvidia-12'}, drvr.pgpu_type_mapping)
|
||||||
self.assertEqual({'0000:84:00.0': 'VGPU',
|
self.assertEqual({'0000:84:00.0': 'VGPU',
|
||||||
'0000:85:00.0': 'VGPU'},
|
'0000:85:00.0': 'CUSTOM_NOTVGPU'},
|
||||||
drvr.mdev_class_mapping)
|
drvr.mdev_class_mapping)
|
||||||
self.assertEqual({orc.VGPU}, drvr.mdev_classes)
|
self.assertEqual({orc.VGPU, 'CUSTOM_NOTVGPU'}, drvr.mdev_classes)
|
||||||
mock_warning.assert_not_called()
|
mock_warning.assert_not_called()
|
||||||
|
|
||||||
def test_get_supported_vgpu_types_with_duplicate_types(self):
|
def test_get_supported_vgpu_types_with_duplicate_types(self):
|
||||||
@@ -25387,10 +25388,15 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
nova.conf.devices.register_dynamic_opts(CONF)
|
nova.conf.devices.register_dynamic_opts(CONF)
|
||||||
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
||||||
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
|
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
|
||||||
|
self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_nvidia-12')
|
||||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
orc.VGPU,
|
orc.VGPU,
|
||||||
drvr._get_resource_class_for_device('pci_0000_84_00_0'))
|
drvr._get_resource_class_for_device('pci_0000_84_00_0'))
|
||||||
|
self.assertEqual(
|
||||||
|
'CUSTOM_NOTVGPU',
|
||||||
|
drvr._get_resource_class_for_device('pci_0000_85_00_0')
|
||||||
|
)
|
||||||
|
|
||||||
def test_get_resource_class_for_device_with_incorrect_pci_addr(self):
|
def test_get_resource_class_for_device_with_incorrect_pci_addr(self):
|
||||||
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
||||||
@@ -25426,8 +25432,10 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
nova.conf.devices.register_dynamic_opts(CONF)
|
nova.conf.devices.register_dynamic_opts(CONF)
|
||||||
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
||||||
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
|
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
|
||||||
|
self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_nvidia-12')
|
||||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
self.assertEqual({orc.VGPU},
|
self.assertEqual(
|
||||||
|
{orc.VGPU, 'CUSTOM_NOTVGPU'},
|
||||||
drvr._get_supported_mdev_resource_classes())
|
drvr._get_supported_mdev_resource_classes())
|
||||||
|
|
||||||
@mock.patch.object(host.Host, 'device_lookup_by_name')
|
@mock.patch.object(host.Host, 'device_lookup_by_name')
|
||||||
|
|||||||
@@ -7423,9 +7423,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
# VGPU resource class.
|
# VGPU resource class.
|
||||||
self.mdev_classes = {orc.VGPU}
|
self.mdev_classes = {orc.VGPU}
|
||||||
return [first_type]
|
return [first_type]
|
||||||
# TODO(sbauza): Directly use the mdev_class option once we add the
|
mdev_class = group.mdev_class
|
||||||
# new configuration option.
|
|
||||||
mdev_class = getattr(group, 'mdev_class', orc.VGPU)
|
|
||||||
for device_address in group.device_addresses:
|
for device_address in group.device_addresses:
|
||||||
if device_address in self.pgpu_type_mapping:
|
if device_address in self.pgpu_type_mapping:
|
||||||
raise exception.InvalidLibvirtMdevConfig(
|
raise exception.InvalidLibvirtMdevConfig(
|
||||||
|
|||||||
10
releasenotes/notes/generic_mdevs_2-d1b1c71e8035527f.yaml
Normal file
10
releasenotes/notes/generic_mdevs_2-d1b1c71e8035527f.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
A new configuration option is now available for supporting PCI devices that
|
||||||
|
use the `VFIO-mdev`_ kernel framework and are stateless. Instead of using
|
||||||
|
the ``VGPU`` resource class for both the inventory and the related
|
||||||
|
allocations, the operator could ask to use another custom resource class
|
||||||
|
for a specific mdev type by using the dynamic ``mdev_class``.
|
||||||
|
|
||||||
|
.. _`VFIO-mdev` : https://www.kernel.org/doc/html/latest/driver-api/vfio-mediated-device.html
|
||||||
Reference in New Issue
Block a user