add support for intel gpu device plugin
pods of intel gpu device plugin will only be created on nodes with label “intelgpu: enabled” which support intel gpus with driver i915 in this commit, sysinv agent will check host GPU device driver. once detected supported device, sysinv agent would send request to sysinv conductor, and conductor would set kubernetes label “intelgpu: enabled” for specific node if file “/etc/platform/enabled_kube_plugins” exists and "intelgpu: enabled" is in the file. Story: 2005937 Task: 35813 Change-Id: I10fb599c693d2d6e01fc14d42924dcd8cf1240a9 Signed-off-by: SidneyAn <ran1.an@intel.com>
This commit is contained in:
parent
ce5e0fce4f
commit
023be74256
|
@ -139,6 +139,7 @@ class AgentManager(service.PeriodicService):
|
|||
PV = 'pv'
|
||||
LVG = 'lvg'
|
||||
HOST_FILESYSTEMS = 'host_filesystems'
|
||||
K8S_DEVICE_PLUGIN = 'k8s_device_plugin'
|
||||
|
||||
# Note that this set must be extended when there are
|
||||
# additional inventory required for the initial
|
||||
|
@ -152,7 +153,8 @@ class AgentManager(service.PeriodicService):
|
|||
DISK,
|
||||
PV,
|
||||
LVG,
|
||||
HOST_FILESYSTEMS}
|
||||
HOST_FILESYSTEMS,
|
||||
K8S_DEVICE_PLUGIN}
|
||||
|
||||
def __init__(self, host, topic):
|
||||
serializer = objects_base.SysinvObjectSerializer()
|
||||
|
@ -838,6 +840,8 @@ class AgentManager(service.PeriodicService):
|
|||
self._report_port_inventory(icontext, rpcapi,
|
||||
port_list, pci_device_list)
|
||||
|
||||
self._report_supported_device_plugin(icontext, rpcapi, pci_device_list)
|
||||
|
||||
# Find list of numa_nodes and cpus for this ihost
|
||||
inumas, icpus = self._inode_operator.inodes_get_inumas_icpus()
|
||||
|
||||
|
@ -956,6 +960,39 @@ class AgentManager(service.PeriodicService):
|
|||
self._report_to_conductor_iplatform_avail()
|
||||
self._iconfig_read_config_reported = config_uuid
|
||||
|
||||
@retrying.retry(wait_fixed=15 * 1000, stop_max_delay=300 * 1000,
|
||||
retry_on_exception=_retry_on_missing_host_uuid)
|
||||
def _report_supported_device_plugin(self, context, rpcapi=None, pci_device_list=None):
|
||||
|
||||
if not self._ihost_uuid:
|
||||
raise exception.LocalHostUUIDNotFound()
|
||||
|
||||
if rpcapi is None:
|
||||
rpcapi = conductor_rpcapi.ConductorAPI(
|
||||
topic=conductor_rpcapi.MANAGER_TOPIC)
|
||||
|
||||
if pci_device_list is None:
|
||||
port_list, pci_device_list, host_macs = self._get_ports_inventory()
|
||||
|
||||
device_plugin_labels = self._ipci_operator.get_support_dp_labels(pci_device_list)
|
||||
|
||||
if not device_plugin_labels:
|
||||
LOG.info("device_plugin_labels is empty.")
|
||||
self._inventory_reported.add(self.K8S_DEVICE_PLUGIN)
|
||||
return
|
||||
|
||||
try:
|
||||
rpcapi.device_plugin_labels_update_by_ihost(context,
|
||||
self._ihost_uuid,
|
||||
device_plugin_labels)
|
||||
self._inventory_reported.add(self.K8S_DEVICE_PLUGIN)
|
||||
except RemoteError as e:
|
||||
LOG.error("device_plugin_labels_update_by_ihost RemoteError exc_type=%s" %
|
||||
e.exc_type)
|
||||
except exception.SysinvException:
|
||||
LOG.exception("Sysinv Agent uncaught exception updating device plugin labels.")
|
||||
pass
|
||||
|
||||
def subfunctions_get(self):
|
||||
""" returns subfunctions on this host.
|
||||
"""
|
||||
|
|
|
@ -168,6 +168,33 @@ class PCIDevice(object):
|
|||
return "<PCIDevice '%s'>" % str(self)
|
||||
|
||||
|
||||
class DevicePlugin(object):
|
||||
'''Class to record specific information of each k8s device plugins'''
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
def get_label(pci_device_list):
|
||||
return None
|
||||
|
||||
|
||||
class IntelGPUdp(DevicePlugin):
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
def get_label(self, pci_device_list):
|
||||
|
||||
for device in pci_device_list:
|
||||
if "VGA" in device['pclass'] and device['driver'] is not None:
|
||||
if "i915" in device['driver']:
|
||||
label = {}
|
||||
label.update({'label_key': 'intelgpu'})
|
||||
label.update({'label_value': 'enabled'})
|
||||
return label
|
||||
|
||||
|
||||
DEVICE_PLUGIN_LIST = [IntelGPUdp()]
|
||||
|
||||
|
||||
class PCIOperator(object):
|
||||
'''Class to encapsulate PCI operations for System Inventory'''
|
||||
|
||||
|
@ -667,3 +694,13 @@ class PCIOperator(object):
|
|||
pci_attrs_array.append(attrs)
|
||||
|
||||
return pci_attrs_array
|
||||
|
||||
def get_support_dp_labels(self, pci_device_list):
|
||||
|
||||
labels = []
|
||||
for device in DEVICE_PLUGIN_LIST:
|
||||
label = device.get_label(pci_device_list)
|
||||
if label is not None:
|
||||
labels.append(label)
|
||||
|
||||
return labels
|
||||
|
|
|
@ -34,6 +34,7 @@ import filecmp
|
|||
import fnmatch
|
||||
import glob
|
||||
import hashlib
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
|
@ -4199,6 +4200,56 @@ class ConductorManager(service.PeriodicService):
|
|||
tsc.install_uuid)
|
||||
greenthread.sleep(constants.FIX_INSTALL_UUID_INTERVAL_SECS)
|
||||
|
||||
def _get_kube_plugin_labels(self):
|
||||
|
||||
# this file will be generated after initial config process if the
|
||||
# kubernetes device plugin list is not empty.
|
||||
if not os.path.isfile('/etc/platform/enabled_kube_plugins'):
|
||||
return None
|
||||
|
||||
try:
|
||||
file_object = open('/etc/platform/enabled_kube_plugins')
|
||||
plugins = json.loads(file_object.read())
|
||||
labels = list(plugins.values())
|
||||
return labels
|
||||
except Exception as e:
|
||||
LOG.error("failed to get kube_plugin list from file. \
|
||||
exception: %s" % str(e))
|
||||
return None
|
||||
|
||||
def device_plugin_labels_update_by_ihost(self, context,
|
||||
host_uuid, device_plugin_labels):
|
||||
|
||||
"""Assign device plugin labels to an ihost with the supplied data.
|
||||
|
||||
:param context: an admin context
|
||||
:param host_uuid: host uuid unique id
|
||||
:param device_plugin_labels: kubernetes labels request to assign
|
||||
"""
|
||||
enabled_kube_labels = self._get_kube_plugin_labels()
|
||||
if enabled_kube_labels is None:
|
||||
LOG.info("Vendor k8s device plugin list is empty. \
|
||||
Set parameters in ansible override file if required.")
|
||||
return
|
||||
|
||||
host_uuid.strip()
|
||||
try:
|
||||
ihost = self.dbapi.ihost_get(host_uuid)
|
||||
except exception.ServerNotFound:
|
||||
LOG.exception("Invalid host_uuid %s" % host_uuid)
|
||||
return
|
||||
|
||||
for label in device_plugin_labels:
|
||||
kube_label = label['label_key'] + "=" + label['label_value']
|
||||
if kube_label not in enabled_kube_labels:
|
||||
continue
|
||||
|
||||
label.update({'host_id': ihost.id})
|
||||
try:
|
||||
self.dbapi.label_create(host_uuid, label)
|
||||
except exception.HostLabelAlreadyExists:
|
||||
pass
|
||||
|
||||
@periodic_task.periodic_task(spacing=CONF.conductor.audit_interval)
|
||||
def _agent_update_request(self, context):
|
||||
"""
|
||||
|
|
|
@ -426,6 +426,22 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
|
|||
return self.cast(context, self.make_msg('update_partition_config',
|
||||
partition=partition))
|
||||
|
||||
def device_plugin_labels_update_by_ihost(self, context,
|
||||
host_uuid, device_plugin_labels):
|
||||
|
||||
"""Assign device plugin labels to an ihost with the supplied data.
|
||||
|
||||
:param context: an admin context
|
||||
:param host_uuid: host uuid unique id
|
||||
:param device_plugin_labels: kubernetes labels request to assign
|
||||
:returns: pass or fail
|
||||
"""
|
||||
|
||||
return self.call(context,
|
||||
self.make_msg('device_plugin_labels_update_by_ihost',
|
||||
host_uuid=host_uuid,
|
||||
device_plugin_labels=device_plugin_labels))
|
||||
|
||||
def iplatform_update_by_ihost(self, context,
|
||||
ihost_uuid, imsg_dict):
|
||||
"""Create or update memory for an ihost with the supplied data.
|
||||
|
|
Loading…
Reference in New Issue