sysinv: Intel ACC100 (Mt Bryce) enablement

This commit adds SR-IOV device plugin support for forward error
correction (FEC) devices that are enabled on an Intel ACC100 (Mt.
Bryce). The Intel ACC100 is mounted on Lisbon ACC100 Card.

The FEC device is intended for use by a DPDK application. It is
presented to the system under resource name: "intel_acc100_fec."

An example usage to modify the device:

system host-device-modify <host> <device_name> \
  -e true \
  --driver igb_uio \
  --vf-driver <driver> \
  -N <num_vfs>

And example assignment to a pod:

 resources:
      requests:
        memory: 4Gi
        intel.com/intel_acc100_fec: '16'
        cpu: 6
      limits:
        hugepages-1Gi: 2Gi
        memory: 4Gi
        intel.com/intel_acc100_fec: '16'
        cpu: 6

Story: 2008440
Task: 41403

Depends-On: https://review.opendev.org/c/starlingx/stx-puppet/+/775253
Depends-On: https://review.opendev.org/c/starlingx/integ/+/775252

Signed-off-by: Babak Sarashki <Babak.SarAshki@windriver.com>
Change-Id: I831fd16a0410ee988365c067789f760139274ec8
This commit is contained in:
Babak Sarashki 2021-01-05 16:08:43 -05:00 committed by Babak Sarashki
parent 500d4e250c
commit 6ab90d747a
8 changed files with 192 additions and 96 deletions

View File

@ -8,8 +8,25 @@ from cgtsclient.common import utils
from cgtsclient import exc
from cgtsclient.v1 import ihost as ihost_utils
# PCI Device Class ID in hexadecimal string
PCI_DEVICE_CLASS_FPGA = '120000'
# Account for those accelerator cards with a progIF set.
# PCI Device Class ID in hexadecimal string.
class pci_device_class_acclr:
def __init__(self):
self.pci_class_ids = ['120000', '120001']
def __eq__(self, other):
return (other in self.pci_class_ids)
def __ne__(self, other):
return (other not in self.pci_class_ids)
def __str__(self):
return ' '.join(self.pci_class_ids)
PCI_DEVICE_CLASS_FPGA = pci_device_class_acclr()
def _print_device_show(device):

View File

@ -3355,12 +3355,12 @@ class HostController(rest.RestController):
self._check_sriovdp_interface_datanets(interface)
def _semantic_check_fpga_fec_device(self, host, dev, force_unlock=False):
def _semantic_check_acclr_fec_device(self, host, dev, force_unlock=False):
"""
Perform semantic checks on an FPGA FEC device.
Perform semantic checks on an FEC device.
"""
if (force_unlock or
dev.pdevice_id != device.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF):
dev.pdevice_id not in device.SRIOV_ENABLED_FEC_DEVICE_IDS):
return
sriov_numvfs = dev.sriov_numvfs
@ -3371,7 +3371,7 @@ class HostController(rest.RestController):
LOG.info("check sriov_numvfs=%s sriov_vfs_pci_address=%s" %
(sriov_numvfs, dev.sriov_vfs_pci_address))
else:
msg = (_("Expecting number of FPGA device sriov_numvfs=%s. "
msg = (_("Expecting number of FEC device sriov_numvfs=%s. "
"Please wait a few minutes for inventory update and "
"retry host-unlock." %
sriov_numvfs))
@ -3381,15 +3381,15 @@ class HostController(rest.RestController):
host['uuid'])
raise wsme.exc.ClientSideError(msg)
def _semantic_check_fpga_device(self, host, dev, force_unlock=False):
def _semantic_check_acclr_device(self, host, dev, force_unlock=False):
"""
Perform semantic checks on an FPGA device.
Perform semantic checks on an FEC device.
"""
if dev.pclass_id != device.PCI_DEVICE_CLASS_FPGA:
return
if dev.pdevice_id == device.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF:
self._semantic_check_fpga_fec_device(host, dev, force_unlock)
if dev.pdevice_id in device.SRIOV_ENABLED_FEC_DEVICE_IDS:
self._semantic_check_acclr_fec_device(host, dev, force_unlock)
def _semantic_check_devices(self, host, force_unlock=False):
"""
@ -3399,7 +3399,7 @@ class HostController(rest.RestController):
pecan.request.dbapi.pci_device_get_by_host(host['uuid']))
for dev in devices:
if dev.pclass_id == device.PCI_DEVICE_CLASS_FPGA:
self._semantic_check_fpga_device(host, dev, force_unlock)
self._semantic_check_acclr_device(host, dev, force_unlock)
def _semantic_check_unlock_kube_upgrade(self, ihost, force_unlock=False):
"""

View File

@ -354,23 +354,23 @@ def _check_field(field):
def _check_device_sriov(device, host):
sriov_update = False
if (device['pdevice_id'] == dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF and
if (device['pdevice_id'] in dconstants.SRIOV_ENABLED_FEC_DEVICE_IDS and
host.invprovision != constants.PROVISIONED):
raise wsme.exc.ClientSideError(_("Cannot configure device %s "
"until host %s is unlocked for the first time." %
(device['uuid'], host.hostname)))
if (device['pdevice_id'] not in dconstants.SRIOV_ENABLED_DEVICE_IDS and
if (device['pdevice_id'] not in dconstants.SRIOV_ENABLED_FEC_DEVICE_IDS and
'sriov_numvfs' in device.keys() and device['sriov_numvfs']):
raise wsme.exc.ClientSideError(_("The number of SR-IOV VFs is specified "
"but the device is not supported for SR-IOV"))
if (device['pdevice_id'] not in dconstants.SRIOV_ENABLED_DEVICE_IDS and
if (device['pdevice_id'] not in dconstants.SRIOV_ENABLED_FEC_DEVICE_IDS and
'sriov_vf_driver' in device.keys() and device['sriov_vf_driver']):
raise wsme.exc.ClientSideError(_("The SR-IOV VF driver is specified "
"but the device is not supported for SR-IOV"))
if device['pdevice_id'] not in dconstants.SRIOV_ENABLED_DEVICE_IDS:
if device['pdevice_id'] not in dconstants.SRIOV_ENABLED_FEC_DEVICE_IDS:
return sriov_update
if 'sriov_numvfs' not in device.keys():
@ -400,15 +400,18 @@ def _check_device_sriov(device, host):
raise wsme.exc.ClientSideError(_("The SR-IOV VF driver must be specified"))
else:
if (device['sriov_vf_driver'] is not None and
device['pdevice_id'] == dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF and
device['sriov_vf_driver'] not in dconstants.FPGA_INTEL_5GNR_FEC_VF_VALID_DRIVERS):
device['pdevice_id'] in
dconstants.SRIOV_ENABLED_FEC_DEVICE_IDS and
device['sriov_vf_driver'] not in
dconstants.FPGA_INTEL_5GNR_FEC_VF_VALID_DRIVERS):
msg = (_("Value for SR-IOV VF driver must be one of "
"{}").format(', '.join(dconstants.FPGA_INTEL_5GNR_FEC_VF_VALID_DRIVERS)))
raise wsme.exc.ClientSideError(msg)
if ('driver' in device.keys() and device['driver'] and
device['pdevice_id'] == dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF and
device['driver'] not in dconstants.FPGA_INTEL_5GNR_FEC_PF_VALID_DRIVERS):
device['pdevice_id'] in dconstants.SRIOV_ENABLED_FEC_DEVICE_IDS and
device['driver'] not in
dconstants.FPGA_INTEL_5GNR_FEC_PF_VALID_DRIVERS):
msg = (_("Value for SR-IOV PF driver must be one of "
"{}").format(', '.join(dconstants.FPGA_INTEL_5GNR_FEC_PF_VALID_DRIVERS)))
raise wsme.exc.ClientSideError(msg)

View File

@ -6,8 +6,25 @@
from sysinv.common import constants
# PCI Device Class ID in hexidecimal string
PCI_DEVICE_CLASS_FPGA = '120000'
# Account for those accelerators cards with a progIF set.
# PCI Device Class ID in hexadecimal string.
class pci_device_class_acclr(object):
def __init__(self):
self.pci_class_ids = ['120000', '120001']
def __eq__(self, other):
return (other in self.pci_class_ids)
def __ne__(self, other):
return (other not in self.pci_class_ids)
def __str__(self):
return ' '.join(self.pci_class_ids)
PCI_DEVICE_CLASS_FPGA = pci_device_class_acclr()
# Device Vendors
PCI_DEVICE_VENDOR_INTEL = "8086"
@ -16,8 +33,12 @@ PCI_DEVICE_VENDOR_INTEL = "8086"
PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF = "0d8f"
PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_VF = "0d90"
# SR-IOV enabled devices
SRIOV_ENABLED_DEVICE_IDS = [PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF]
PCI_DEVICE_ID_ACC100_INTEL_5GNR_FEC_PF = "0d5c"
PCI_DEVICE_ID_ACC100_INTEL_5GNR_FEC_VF = "0d5d"
# SR-IOV enabled FEC devices
SRIOV_ENABLED_FEC_DEVICE_IDS = [PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF,
PCI_DEVICE_ID_ACC100_INTEL_5GNR_FEC_PF]
FPGA_INTEL_5GNR_FEC_DRIVER_IGB_UIO = "igb_uio"
FPGA_INTEL_5GNR_FEC_DRIVER_NONE = "none"
@ -28,6 +49,27 @@ FPGA_INTEL_5GNR_FEC_VF_VALID_DRIVERS = [FPGA_INTEL_5GNR_FEC_DRIVER_IGB_UIO,
FPGA_INTEL_5GNR_FEC_PF_VALID_DRIVERS = [FPGA_INTEL_5GNR_FEC_DRIVER_IGB_UIO,
FPGA_INTEL_5GNR_FEC_DRIVER_NONE]
# This dictionary is used when generating resourceName and device_config.
# Where:
# The key for both devices in the resultant device_config (puppet/device.py)
# is 'platform::devices::fpga::fec::params::device_config'.
# fec_name(Required): used in puppet/kubernetes.py to generate reourceName.
# dvconf(Optional): used in puppet/devices.py to generate device_config and
# represents optional puppet resources the device may require for
# configuration. The current implementation supports the invocation of only
# one ACCLR FEC class.
ACCLR_FEC_RESOURCES = {
PCI_DEVICE_ID_ACC100_INTEL_5GNR_FEC_PF:
{
'fec_name': "intel_acc100_fec",
'dvconf': {'platform::devices::acc100::fec::enabled': True}
},
PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF:
{
'fec_name': "intel_fpga_fec",
}
}
# Device Image
DEVICE_IMAGE_TMP_PATH = '/tmp/device_images'
DEVICE_IMAGE_PATH = '/opt/platform/device_images'

View File

@ -2672,8 +2672,9 @@ class ConductorManager(service.PeriodicService):
pci_dev.get('sriov_vf_pdevice_id', None),
'driver': pci_dev['driver']}
LOG.info("attr: %s" % attr)
if (host['administrative'] == constants.ADMIN_LOCKED and
pci_dev['pdevice_id'] == dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF):
if (host['administrative'] == constants.ADMIN_LOCKED
and pci_dev['pdevice_id'] in
dconstants.SRIOV_ENABLED_FEC_DEVICE_IDS):
# For the FPGA FEC device, the actual VF driver
# is only updated on an unlocked host. The set
# of VF PCI addresses may not be known when the

View File

@ -58,67 +58,83 @@ class DevicePuppet(base.BasePuppet):
'platform::devices::qat::service_enabled': True,
}
def _get_host_fpga_fec_device_config(self, fpga_fec_devices):
def _get_host_acclr_fec_device_config(self, pci_device_list):
"""
Builds a config dictionary for FPGA FEC devices to be used by the
Builds a config dictionary for FEC devices to be used by the
platform devices (worker) puppet resource.
"""
device_config = {}
vf_config = {}
for device in fpga_fec_devices:
if not device.get('driver', None) or not device.get('sriov_numvfs', None):
continue
device_config = {}
acclr_config = {}
puppet_dflt = 'platform::devices::fpga::fec::params::device_config'
name = 'pci-%s' % device.pciaddr
for dv in dconstants.ACCLR_FEC_RESOURCES:
for device in pci_device_list[dv]:
if (not device.get('driver', None) or
not device.get('sriov_numvfs', None)):
continue
# Format the vf addresses as quoted strings in order to prevent
# puppet from treating the address as a time/date value
vf_addrs = device.get('sriov_vfs_pci_address', [])
if vf_addrs:
vf_addrs = [quoted_str(addr.strip())
for addr in vf_addrs.split(",") if addr]
if len(vf_addrs) == device.get('sriov_numvfs', 0):
vf_driver = device.get('sriov_vf_driver', None)
if vf_driver:
if constants.SRIOV_DRIVER_TYPE_VFIO in vf_driver:
vf_driver = constants.SRIOV_DRIVER_VFIO_PCI
for addr in vf_addrs:
vf_config.update({
addr: {
'addr': addr,
'driver': vf_driver
}
})
# Pass extra parameters to puppet
if 'dvconf' in dconstants.ACCLR_FEC_RESOURCES[dv]:
acclr_config.update(
dconstants.ACCLR_FEC_RESOURCES[dv]['dvconf'])
pf_config = {
device.pciaddr: {
'num_vfs': device['sriov_numvfs'],
'addr': quoted_str(device['pciaddr'].strip()),
'driver': device['driver'],
'device_id': device['pdevice_id']
name = 'pci-%s' % device.pciaddr
# Format the vf addresses as quoted strings in order to prevent
# puppet from treating the address as a time/date value
vf_addrs = device.get('sriov_vfs_pci_address', [])
if vf_addrs:
vf_addrs = [quoted_str(addr.strip())
for addr in vf_addrs.split(",") if addr]
if len(vf_addrs) == device.get('sriov_numvfs', 0):
vf_driver = device.get('sriov_vf_driver', None)
if vf_driver:
if constants.SRIOV_DRIVER_TYPE_VFIO in vf_driver:
vf_driver = constants.SRIOV_DRIVER_VFIO_PCI
for addr in vf_addrs:
vf_config.update({
addr: {
'addr': addr,
'driver': vf_driver
}
})
pf_config = {
device.pciaddr: {
'num_vfs': device['sriov_numvfs'],
'addr': quoted_str(device['pciaddr'].strip()),
'driver': device['driver'],
'device_id': device['pdevice_id']
}
}
}
device_config = {
name: {
'pf_config': pf_config,
'vf_config': vf_config
device_config = {
name: {
'pf_config': pf_config,
'vf_config': vf_config
}
}
}
return {
'platform::devices::fpga::fec::params::device_config': device_config
}
def _get_host_fpga_device_config(self, pci_device_list):
acclr_config.update({puppet_dflt: device_config})
return acclr_config
def _get_host_acclr_device_config(self, pci_device_list):
"""
Builds a config dictionary for FPGA devices to be used by the platform
Builds a config dictionary for FEC devices to be used by the platform
devices (worker) puppet resource.
"""
fpga_config = {}
fpga_fec_devices = pci_device_list[dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF]
if fpga_fec_devices:
fec_config = self._get_host_fpga_fec_device_config(fpga_fec_devices)
fpga_config.update(fec_config)
return fpga_config
acclr_config = {}
for acclr_devid in dconstants.SRIOV_ENABLED_FEC_DEVICE_IDS:
if acclr_devid not in pci_device_list:
continue
acclr_device = pci_device_list[acclr_devid]
if acclr_device:
acclr_config.update(self._get_host_acclr_fec_device_config(
acclr_device))
return acclr_config
def get_host_config(self, host):
if constants.WORKER not in host.subfunctions:
@ -136,8 +152,8 @@ class DevicePuppet(base.BasePuppet):
if qat_devices:
device_config.update(qat_devices)
fpga_devices = self._get_host_fpga_device_config(devices)
if fpga_devices:
device_config.update(fpga_devices)
acclr_devices = self._get_host_acclr_fec_device_config(devices)
if acclr_devices:
device_config.update(acclr_devices)
return device_config

View File

@ -447,7 +447,7 @@ class KubernetesPuppet(base.BasePuppet):
driver = port['driver']
return driver
def _get_pcidp_fpga_driver(self, device):
def _get_pcidp_fec_driver(self, device):
sriov_vf_driver = device.get('sriov_vf_driver', None)
if (sriov_vf_driver and
constants.SRIOV_DRIVER_TYPE_VFIO in sriov_vf_driver):
@ -547,13 +547,17 @@ class KubernetesPuppet(base.BasePuppet):
return list(resources.values())
def _get_pcidp_fpga_resources(self, host):
def _get_pcidp_fec_resources(self, host):
resources = {}
fec_name = "intel_fpga_fec"
for d in self.dbapi.pci_device_get_by_host(host.id):
if (d['pclass_id'] == dconstants.PCI_DEVICE_CLASS_FPGA
and d['pdevice_id'] == dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF):
for ddevid in dconstants.ACCLR_FEC_RESOURCES:
fec_name = dconstants.ACCLR_FEC_RESOURCES[ddevid]['fec_name']
for d in self.dbapi.pci_device_get_by_host(host.id):
if d['pdevice_id'] != ddevid:
continue
resource = resources.get(fec_name, None)
if not resource:
resource = {
@ -568,17 +572,20 @@ class KubernetesPuppet(base.BasePuppet):
vendor = d.get('pvendor_id', None)
if not vendor:
LOG.error("Failed to get vendor id for pci device %s", d['pciaddr'])
LOG.error("Failed to get vendor id for pci device %s",
d['pciaddr'])
continue
device = d.get('sriov_vf_pdevice_id', None)
if not device:
LOG.error("Failed to get device id for pci device %s", d['pciaddr'])
LOG.error("Failed to get device id for pci device %s",
d['pciaddr'])
continue
driver = self._get_pcidp_fpga_driver(d)
driver = self._get_pcidp_fec_driver(d)
if not driver:
LOG.error("Failed to get driver for pci device %s", d['pciaddr'])
LOG.error("Failed to get driver for pci device %s",
d['pciaddr'])
continue
vendor_list = resource['selectors']['vendors']
@ -604,5 +611,5 @@ class KubernetesPuppet(base.BasePuppet):
constants.INTERFACE_CLASS_PCI_SRIOV)
pcipt_resources = self._get_pcidp_network_resources_by_ifclass(
constants.INTERFACE_CLASS_PCI_PASSTHROUGH)
fpga_resources = self._get_pcidp_fpga_resources(host)
return json.dumps({'resourceList': sriov_resources + pcipt_resources + fpga_resources})
fec_resources = self._get_pcidp_fec_resources(host)
return json.dumps({'resourceList': sriov_resources + pcipt_resources + fec_resources})

View File

@ -160,16 +160,26 @@ class TestPostDevice(TestDevice, dbbase.ControllerHostTestCase):
class TestPatchDevice(TestDevice):
def setUp(self):
def setUp(self,
pclass_id=dconstants.PCI_DEVICE_CLASS_FPGA.__str__().split(' '),
pdev_id='0d8f'
):
super(TestPatchDevice, self).setUp()
# PCI_DEVICE_CLASS_FPGA is now a class that overloads euqality conditional.
# This was needed to account for PCI devices with ProgIF other than 0x0.
# First element in pclass_id is the default 0x120000 Classid + ProgIF.
self.pclass_id = pclass_id[0]
self.pdevice = 'Device [' + pdev_id + ']'
# Create a pci_device
self.pci_device = dbutils.create_test_pci_device(
host_id=self.worker.id,
pciaddr='0000:b7:00.0',
name='pci_0000_b7_00_0',
pclass='Processing accelerators',
pclass_id=dconstants.PCI_DEVICE_CLASS_FPGA,
pclass_id=self.pclass_id,
pvendor='Intel Corporation',
pvendor_id='8086',
pdevice='Device [0d8f]',
@ -184,10 +194,10 @@ class TestPatchDevice(TestDevice):
self.assertEqual('0000:b7:00.0', response['pciaddr'])
self.assertEqual('pci_0000_b7_00_0', response['name'])
self.assertEqual('Processing accelerators', response['pclass'])
self.assertEqual(dconstants.PCI_DEVICE_CLASS_FPGA, response['pclass_id'])
self.assertEqual(self.pclass_id, response['pclass_id'])
self.assertEqual('Intel Corporation', response['pvendor'])
self.assertEqual('8086', response['pvendor_id'])
self.assertEqual('Device [0d8f]', response['pdevice'])
self.assertEqual(self.pdevice, response['pdevice'])
self.assertEqual(dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF, response['pdevice_id'])
self.assertEqual(None, response['driver'])
self.assertEqual(False, response['enabled'])
@ -306,7 +316,7 @@ class TestPatchDevice(TestDevice):
def test_device_modify_sriov_numvfs_unsupported_hw_device(self):
self.pci_device = dbutils.create_test_pci_device(
host_id=self.worker.id,
pclass_id=dconstants.PCI_DEVICE_CLASS_FPGA,
pclass_id=self.pclass_id,
pdevice_id=dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF,
sriov_totalvfs=None)
response = self.patch_dict_json(
@ -322,7 +332,7 @@ class TestPatchDevice(TestDevice):
def test_device_modify_sriov_vf_driver_igb_uio(self):
self.pci_device = dbutils.create_test_pci_device(
host_id=self.worker.id,
pclass_id=dconstants.PCI_DEVICE_CLASS_FPGA,
pclass_id=self.pclass_id,
pdevice_id=dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF,
sriov_totalvfs=8,
sriov_numvfs=2)
@ -337,7 +347,7 @@ class TestPatchDevice(TestDevice):
def test_device_modify_sriov_vf_driver_vfio(self):
self.pci_device = dbutils.create_test_pci_device(
host_id=self.worker.id,
pclass_id=dconstants.PCI_DEVICE_CLASS_FPGA,
pclass_id=self.pclass_id,
pdevice_id=dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF,
sriov_totalvfs=8,
sriov_numvfs=2)
@ -402,7 +412,7 @@ class TestPatchDevice(TestDevice):
invprovision="provisioning")
self.pci_device = dbutils.create_test_pci_device(
host_id=host.id,
pclass_id=dconstants.PCI_DEVICE_CLASS_FPGA,
pclass_id=self.pclass_id,
pdevice_id=dconstants.PCI_DEVICE_ID_FPGA_INTEL_5GNR_FEC_PF,
sriov_totalvfs=8,
sriov_numvfs=2)