Merge "vgpu: Allow device_addresses to not be set"
This commit is contained in:
commit
3e358bc37c
@ -29,15 +29,20 @@ guest instance.
|
|||||||
|
|
||||||
If more than one single mdev type is provided, then for each *mdev type* an
|
If more than one single mdev type is provided, then for each *mdev type* an
|
||||||
additional section, ``[mdev_$(MDEV_TYPE)]``, must be added to the configuration
|
additional section, ``[mdev_$(MDEV_TYPE)]``, must be added to the configuration
|
||||||
file. Each section then **must** be configured with a single configuration
|
file. Each section then can be configured with a single configuration option,
|
||||||
option, ``device_addresses``, which should be a list of PCI addresses
|
``device_addresses``, which should be a list of PCI addresses corresponding to
|
||||||
corresponding to the physical GPU(s) or mdev-capable hardware to assign to this
|
the physical GPU(s) or mdev-capable hardware to assign to this type. If
|
||||||
type.
|
`device_addresses` is not provided, then the related GPU type will be the
|
||||||
|
default for all the found GPUs that aren't used by other types.
|
||||||
|
|
||||||
|
|
||||||
If one or more sections are missing (meaning that a specific type is not wanted
|
If one or more sections are missing (meaning that a specific type is not wanted
|
||||||
to use for at least one physical device) or if no device addresses are provided
|
to use for at least one physical device), then Nova will only use the first
|
||||||
, then Nova will only use the first type that was provided by
|
type that was provided by ``[devices]/enabled_mdev_types``.
|
||||||
``[devices]/enabled_mdev_types``.
|
|
||||||
|
If two or more sections are not set with ``device_addresses`` values, then only
|
||||||
|
the first one will be used for defaulting all the non-defined GPUs to use this
|
||||||
|
type.
|
||||||
|
|
||||||
If the same PCI address is provided for two different types, nova-compute will
|
If the same PCI address is provided for two different types, nova-compute will
|
||||||
return an InvalidLibvirtMdevConfig exception at restart.
|
return an InvalidLibvirtMdevConfig exception at restart.
|
||||||
@ -54,6 +59,17 @@ will be accepted. A valid configuration could then be::
|
|||||||
[vgpu_nvidia-36]
|
[vgpu_nvidia-36]
|
||||||
device_addresses = 0000:86:00.0
|
device_addresses = 0000:86:00.0
|
||||||
|
|
||||||
|
Another valid configuration could be::
|
||||||
|
|
||||||
|
[devices]
|
||||||
|
enabled_mdev_types = nvidia-35, nvidia-36
|
||||||
|
|
||||||
|
[mdev_nvidia-35]
|
||||||
|
|
||||||
|
[mdev_nvidia-36]
|
||||||
|
device_addresses = 0000:86:00.0
|
||||||
|
|
||||||
|
|
||||||
""")
|
""")
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -26870,7 +26870,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
'nvidia-12'])
|
'nvidia-12'])
|
||||||
|
|
||||||
@mock.patch.object(libvirt_driver.LOG, 'warning')
|
@mock.patch.object(libvirt_driver.LOG, 'warning')
|
||||||
def test_get_supported_vgpu_types(self, mock_warning):
|
def test_get_supported_vgpu_types_fails(self, mock_warning):
|
||||||
# Verify that by default we don't support vGPU types
|
# Verify that by default we don't support vGPU types
|
||||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
self.assertEqual([], drvr._get_supported_vgpu_types())
|
self.assertEqual([], drvr._get_supported_vgpu_types())
|
||||||
@ -26889,34 +26889,34 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
# Since the operator wanted to only support one type, it's fine to not
|
# Since the operator wanted to only support one type, it's fine to not
|
||||||
# provide config groups
|
# provide config groups
|
||||||
mock_warning.assert_not_called()
|
mock_warning.assert_not_called()
|
||||||
# For further checking
|
|
||||||
mock_warning.reset_mock()
|
|
||||||
|
|
||||||
# Now two types without forgetting to provide the pGPU addresses
|
@mock.patch.object(libvirt_driver.LOG, 'warning')
|
||||||
|
def test_get_supported_vgpu_types_two_types_unset(self, mock_warning):
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
# Now two types without providing the pGPU addresses
|
||||||
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
||||||
group='devices')
|
group='devices')
|
||||||
# we need to call the below again to ensure the updated
|
|
||||||
# 'device_addresses' value is read and the new groups created
|
|
||||||
nova.conf.devices.register_dynamic_opts(CONF)
|
|
||||||
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
|
||||||
self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types())
|
self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types())
|
||||||
self.assertEqual({}, drvr.pgpu_type_mapping)
|
self.assertEqual({}, drvr.pgpu_type_mapping)
|
||||||
self.assertEqual({}, drvr.mdev_class_mapping)
|
self.assertEqual({}, drvr.mdev_class_mapping)
|
||||||
self.assertEqual({}, drvr.mdev_type_max_mapping)
|
self.assertEqual({}, drvr.mdev_type_max_mapping)
|
||||||
# Here we only support one vGPU type
|
# Here we only support one vGPU type
|
||||||
self.assertEqual({orc.VGPU}, drvr.mdev_classes)
|
self.assertEqual({orc.VGPU}, drvr.mdev_classes)
|
||||||
msg = ("The mdev type '%(type)s' was listed in '[devices] "
|
msg = ("Mdev type default already set to "
|
||||||
"enabled_mdev_types' but no corresponding "
|
" %(default_type)s so %(this_type)s will not "
|
||||||
"'[mdev_%(type)s]' group or "
|
"be used." % {
|
||||||
"'[mdev_%(type)s] device_addresses' "
|
'default_type': 'nvidia-11',
|
||||||
"option was defined. Only the first type '%(ftype)s' "
|
'this_type': 'nvidia-12'})
|
||||||
"will be used." % {'type': 'nvidia-12',
|
|
||||||
'ftype': 'nvidia-11'})
|
|
||||||
mock_warning.assert_called_once_with(msg)
|
mock_warning.assert_called_once_with(msg)
|
||||||
# For further checking
|
|
||||||
mock_warning.reset_mock()
|
|
||||||
|
|
||||||
# And now do it correctly !
|
@mock.patch.object(libvirt_driver.LOG, 'warning')
|
||||||
|
def test_get_supported_vgpu_types(self, mock_warning):
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
||||||
|
group='devices')
|
||||||
|
# we need to call the below again to ensure the updated
|
||||||
|
# 'device_addresses' value is read and the new groups created
|
||||||
|
nova.conf.devices.register_dynamic_opts(CONF)
|
||||||
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
||||||
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
|
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
|
||||||
self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_nvidia-12')
|
self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_nvidia-12')
|
||||||
@ -26957,20 +26957,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
libvirt_driver.LibvirtDriver,
|
libvirt_driver.LibvirtDriver,
|
||||||
fake.FakeVirtAPI(), False)
|
fake.FakeVirtAPI(), False)
|
||||||
|
|
||||||
@mock.patch.object(nova.conf.devices, 'register_dynamic_opts')
|
|
||||||
def test_get_supported_vgpu_types_registering_dynamic_opts(self, rdo):
|
|
||||||
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
|
||||||
group='devices')
|
|
||||||
|
|
||||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
|
||||||
drvr._get_supported_vgpu_types()
|
|
||||||
|
|
||||||
# Okay below is confusing, but remember, ._get_supported_vgpu_types()
|
|
||||||
# is first called by the LibvirtDriver object creation, so when
|
|
||||||
# calling the above drvr._get_supported_vgpu_types() method, it will
|
|
||||||
# be the second time that register_dynamic_opts() will be called.
|
|
||||||
rdo.assert_has_calls([mock.call(CONF), mock.call(CONF)])
|
|
||||||
|
|
||||||
@mock.patch.object(libvirt_driver.LOG, 'warning')
|
@mock.patch.object(libvirt_driver.LOG, 'warning')
|
||||||
def test_get_supported_vgpu_types_with_a_single_type(self, mock_warning):
|
def test_get_supported_vgpu_types_with_a_single_type(self, mock_warning):
|
||||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
@ -26990,6 +26976,49 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
self.assertEqual({'CUSTOM_NOTVGPU'}, drvr.mdev_classes)
|
self.assertEqual({'CUSTOM_NOTVGPU'}, drvr.mdev_classes)
|
||||||
mock_warning.assert_not_called()
|
mock_warning.assert_not_called()
|
||||||
|
|
||||||
|
def test_get_supported_vgpu_types_with_default_type(self):
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
||||||
|
group='devices')
|
||||||
|
# we need to call the below again to ensure the updated
|
||||||
|
# 'device_addresses' value is read and the new groups created
|
||||||
|
nova.conf.devices.register_dynamic_opts(CONF)
|
||||||
|
# Enable nvidia-11 as a the default type for all GPUs but 0000:84:00.0
|
||||||
|
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-12')
|
||||||
|
|
||||||
|
self.assertEqual(['nvidia-11', 'nvidia-12'],
|
||||||
|
drvr._get_supported_vgpu_types())
|
||||||
|
self.assertEqual({'0000:84:00.0': 'nvidia-12'}, drvr.pgpu_type_mapping)
|
||||||
|
self.assertEqual('nvidia-11', drvr.pgpu_type_default)
|
||||||
|
|
||||||
|
@mock.patch.object(libvirt_driver.LOG, 'warning')
|
||||||
|
def test_get_supported_vgpu_types_with_duplicate_default_type(
|
||||||
|
self, mock_warning):
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12', 'nvidia-13'],
|
||||||
|
group='devices')
|
||||||
|
# we need to call the below again to ensure the updated
|
||||||
|
# 'device_addresses' value is read and the new groups created
|
||||||
|
nova.conf.devices.register_dynamic_opts(CONF)
|
||||||
|
|
||||||
|
# Add a specific GPU for a third type
|
||||||
|
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-13')
|
||||||
|
# As both nvidia-11 and nvidia-12 aren't set with device_addresses,
|
||||||
|
# only one of them should be the default.
|
||||||
|
|
||||||
|
# nvidia-12 won't be supported since none of the GPUs will use it.
|
||||||
|
self.assertEqual(['nvidia-11', 'nvidia-13'],
|
||||||
|
drvr._get_supported_vgpu_types())
|
||||||
|
self.assertEqual({'0000:84:00.0': 'nvidia-13'}, drvr.pgpu_type_mapping)
|
||||||
|
# There can be only one :-)
|
||||||
|
self.assertEqual('nvidia-11', drvr.pgpu_type_default)
|
||||||
|
msg = ("Mdev type default already set to "
|
||||||
|
" %(default_type)s so %(this_type)s will not "
|
||||||
|
"be used." % {
|
||||||
|
'default_type': 'nvidia-11',
|
||||||
|
'this_type': 'nvidia-12'})
|
||||||
|
mock_warning.assert_called_once_with(msg)
|
||||||
|
|
||||||
def test_get_vgpu_type_per_pgpu(self):
|
def test_get_vgpu_type_per_pgpu(self):
|
||||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
device = 'pci_0000_84_00_0'
|
device = 'pci_0000_84_00_0'
|
||||||
@ -27046,6 +27075,20 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
# 0000:86:00.0 wasn't configured
|
# 0000:86:00.0 wasn't configured
|
||||||
self.assertIsNone(drvr._get_vgpu_type_per_pgpu('pci_0000_86_00_0'))
|
self.assertIsNone(drvr._get_vgpu_type_per_pgpu('pci_0000_86_00_0'))
|
||||||
|
|
||||||
|
def test_get_vgpu_type_per_pgpu_with_default_type(self):
|
||||||
|
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
||||||
|
group='devices')
|
||||||
|
# we need to call the below again to ensure the updated
|
||||||
|
# 'device_addresses' value is read and the new groups created
|
||||||
|
nova.conf.devices.register_dynamic_opts(CONF)
|
||||||
|
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
self.assertEqual('nvidia-11',
|
||||||
|
drvr._get_vgpu_type_per_pgpu('pci_0000_84_00_0'))
|
||||||
|
# Any GPU but 0000:84:00.0 defaults now to nvidia-12
|
||||||
|
self.assertEqual('nvidia-12',
|
||||||
|
drvr._get_vgpu_type_per_pgpu('pci_0000_85_00_0'))
|
||||||
|
|
||||||
def test_get_resource_class_for_device(self):
|
def test_get_resource_class_for_device(self):
|
||||||
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
|
||||||
group='devices')
|
group='devices')
|
||||||
|
@ -540,6 +540,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
self.mdev_classes = set([])
|
self.mdev_classes = set([])
|
||||||
# this is for knowing how many mdevs can be created by a type
|
# this is for knowing how many mdevs can be created by a type
|
||||||
self.mdev_type_max_mapping = collections.defaultdict(str)
|
self.mdev_type_max_mapping = collections.defaultdict(str)
|
||||||
|
# if we have a wildcard, we default to use this mdev type
|
||||||
|
self.pgpu_type_default = None
|
||||||
self.supported_vgpu_types = self._get_supported_vgpu_types()
|
self.supported_vgpu_types = self._get_supported_vgpu_types()
|
||||||
|
|
||||||
# This dict is for knowing which mdevs are already claimed by some
|
# This dict is for knowing which mdevs are already claimed by some
|
||||||
@ -8199,38 +8201,36 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
# be calling this method before init_host()
|
# be calling this method before init_host()
|
||||||
nova.conf.devices.register_dynamic_opts(CONF)
|
nova.conf.devices.register_dynamic_opts(CONF)
|
||||||
|
|
||||||
|
enabled_mdev_types = []
|
||||||
for vgpu_type in CONF.devices.enabled_mdev_types:
|
for vgpu_type in CONF.devices.enabled_mdev_types:
|
||||||
|
enabled_mdev_types.append(vgpu_type)
|
||||||
|
# NOTE(sbauza) group is now always set because we register the
|
||||||
|
# dynamic options above
|
||||||
group = getattr(CONF, 'mdev_%s' % vgpu_type, None)
|
group = getattr(CONF, 'mdev_%s' % vgpu_type, None)
|
||||||
if group is None or not group.device_addresses:
|
if group is None:
|
||||||
first_type = CONF.devices.enabled_mdev_types[0]
|
# Should never happen but if so, just fails early.
|
||||||
if len(CONF.devices.enabled_mdev_types) > 1:
|
raise exception.InvalidLibvirtMdevConfig(
|
||||||
# Only provide the warning if the operator provided more
|
reason="can't find '[devices]/mdev_%s group' "
|
||||||
# than one type as it's not needed to provide groups
|
"in the configuration" % group
|
||||||
# if you only use one vGPU type.
|
)
|
||||||
msg = ("The mdev type '%(type)s' was listed in '[devices] "
|
|
||||||
"enabled_mdev_types' but no corresponding "
|
|
||||||
"'[mdev_%(type)s]' group or "
|
|
||||||
"'[mdev_%(type)s] device_addresses' "
|
|
||||||
"option was defined. Only the first type "
|
|
||||||
"'%(ftype)s' will be used." % {'type': vgpu_type,
|
|
||||||
'ftype': first_type})
|
|
||||||
LOG.warning(msg)
|
|
||||||
# We need to reset the mapping tables that we started to
|
|
||||||
# provide keys and values from previously processed vGPUs but
|
|
||||||
# since there is a problem for this vGPU type, we only want to
|
|
||||||
# support only the first type.
|
|
||||||
self.pgpu_type_mapping.clear()
|
|
||||||
self.mdev_class_mapping.clear()
|
|
||||||
first_group = getattr(CONF, 'mdev_%s' % first_type, None)
|
|
||||||
if first_group is None:
|
|
||||||
self.mdev_classes = {orc.VGPU}
|
|
||||||
else:
|
|
||||||
self.mdev_classes = {first_group.mdev_class}
|
|
||||||
return [first_type]
|
|
||||||
mdev_class = group.mdev_class
|
mdev_class = group.mdev_class
|
||||||
# By default, max_instances is None
|
# By default, max_instances is None
|
||||||
if group.max_instances:
|
if group.max_instances:
|
||||||
self.mdev_type_max_mapping[vgpu_type] = group.max_instances
|
self.mdev_type_max_mapping[vgpu_type] = group.max_instances
|
||||||
|
if not group.device_addresses:
|
||||||
|
if not self.pgpu_type_default:
|
||||||
|
self.pgpu_type_default = vgpu_type
|
||||||
|
self.mdev_classes.add(mdev_class)
|
||||||
|
else:
|
||||||
|
msg = ("Mdev type default already set to "
|
||||||
|
" %(default_type)s so %(this_type)s will not "
|
||||||
|
"be used." % {
|
||||||
|
'default_type': self.pgpu_type_default,
|
||||||
|
'this_type': vgpu_type})
|
||||||
|
LOG.warning(msg)
|
||||||
|
# we remove the type from the supported list.
|
||||||
|
enabled_mdev_types.remove(vgpu_type)
|
||||||
|
continue
|
||||||
for device_address in group.device_addresses:
|
for device_address in group.device_addresses:
|
||||||
if device_address in self.pgpu_type_mapping:
|
if device_address in self.pgpu_type_mapping:
|
||||||
raise exception.InvalidLibvirtMdevConfig(
|
raise exception.InvalidLibvirtMdevConfig(
|
||||||
@ -8247,7 +8247,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
self.pgpu_type_mapping[device_address] = vgpu_type
|
self.pgpu_type_mapping[device_address] = vgpu_type
|
||||||
self.mdev_class_mapping[device_address] = mdev_class
|
self.mdev_class_mapping[device_address] = mdev_class
|
||||||
self.mdev_classes.add(mdev_class)
|
self.mdev_classes.add(mdev_class)
|
||||||
return CONF.devices.enabled_mdev_types
|
return enabled_mdev_types
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_pci_id_from_libvirt_name(
|
def _get_pci_id_from_libvirt_name(
|
||||||
@ -8281,16 +8281,14 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
if not self.supported_vgpu_types:
|
if not self.supported_vgpu_types:
|
||||||
return
|
return
|
||||||
|
|
||||||
if len(self.supported_vgpu_types) == 1:
|
|
||||||
first_type = self.supported_vgpu_types[0]
|
|
||||||
group = getattr(CONF, 'mdev_%s' % first_type, None)
|
|
||||||
if group is None or not group.device_addresses:
|
|
||||||
return first_type
|
|
||||||
|
|
||||||
device_address = self._get_pci_id_from_libvirt_name(device_address)
|
device_address = self._get_pci_id_from_libvirt_name(device_address)
|
||||||
if not device_address:
|
if not device_address:
|
||||||
return
|
return
|
||||||
return self.pgpu_type_mapping.get(device_address)
|
mdev_type = self.pgpu_type_mapping.get(device_address)
|
||||||
|
# if we can't find the mdev type by the config, do we have a default
|
||||||
|
# type because of a config group not using device_addresses ?
|
||||||
|
# NOTE(sbauza): By default pgpu_type_default is None if unset
|
||||||
|
return mdev_type or self.pgpu_type_default
|
||||||
|
|
||||||
def _get_resource_class_for_device(self, device_address):
|
def _get_resource_class_for_device(self, device_address):
|
||||||
"""Returns the resource class for the inventory of this device.
|
"""Returns the resource class for the inventory of this device.
|
||||||
|
Loading…
Reference in New Issue
Block a user