Merge "Provide a proper way to choose VF in CNI"

This commit is contained in:
Zuul
2019-08-28 11:21:09 +00:00
committed by Gerrit Code Review
9 changed files with 240 additions and 59 deletions

View File

@@ -131,6 +131,33 @@ We have to add to the sriov section following mapping:
device_plugin_resource_prefix = samsung.com device_plugin_resource_prefix = samsung.com
physnet_resource_mappings = physnet1:numa0 physnet_resource_mappings = physnet1:numa0
5. Enable Kubelet Pod Resources feature
To use SR-IOV functionality properly it is necessary to enable Kubelet Pod
Resources feature. Pod Resources is a service provided by Kubelet via gRPC
server that allows to request list of resources allocated for each pod and
container on the node. These resources are devices allocated by k8s device
plugins. Service was implemented mainly for monitoring purposes, but it also
suitable for SR-IOV binding driver allowing it to know which VF was allocated
for particular container.
To enable Pod Resources service it is needed to add
``--feature-gates KubeletPodResources=true`` into ``/etc/sysconfig/kubelet``.
This file could look like::
KUBELET_EXTRA_ARGS="--feature-gates KubeletPodResources=true"
Note that it is important to set right value for parameter ``kubelet_root_dir``
in ``kuryr.conf``. By default it is ``/var/lib/kubelet``.
In case of using containerized CNI it is necessary to mount
``'kubelet_root_dir'/pod-resources`` directory into CNI container.
To use this feature add ``enable_pod_resource_service`` into kuryr.conf.
.. code-block:: ini
[sriov]
enable_pod_resource_service = True
6. Use privileged user 6. Use privileged user

View File

@@ -18,6 +18,7 @@ CLI interface for kuryr status commands.
from __future__ import print_function from __future__ import print_function
import copy
import sys import sys
import textwrap import textwrap
import traceback import traceback
@@ -108,6 +109,8 @@ class UpgradeCommands(object):
if obj.obj_name() != objects.vif.PodState.obj_name(): if obj.obj_name() != objects.vif.PodState.obj_name():
old_count += 1 old_count += 1
elif not self._has_valid_sriov_annot(obj):
old_count += 1
if malformed_count == 0 and old_count == 0: if malformed_count == 0 and old_count == 0:
return UpgradeCheckResult(0, 'All annotations are updated.') return UpgradeCheckResult(0, 'All annotations are updated.')
@@ -193,16 +196,43 @@ class UpgradeCommands(object):
t.add_row(cell) t.add_row(cell)
print(t) print(t)
def _has_valid_sriov_annot(self, state):
for obj in state.vifs.values():
if obj.obj_name() != objects.vif.VIFSriov.obj_name():
continue
if hasattr(obj, 'pod_name') and hasattr(obj, 'pod_link'):
continue
return False
return True
def _convert_sriov(self, state):
new_state = copy.deepcopy(state)
for iface, obj in new_state.additional_vifs.items():
if obj.obj_name() != objects.vif.VIFSriov.obj_name():
continue
if hasattr(obj, 'pod_name') and hasattr(obj, 'pod_link'):
continue
new_obj = objects.vif.VIFSriov()
new_obj.__dict__ = obj.__dict__.copy()
new_state.additional_vifs[iface] = new_obj
return new_state
def update_annotations(self): def update_annotations(self):
def test_fn(obj): def test_fn(obj):
return obj.obj_name() != objects.vif.PodState.obj_name() return (obj.obj_name() != objects.vif.PodState.obj_name() or
not self._has_valid_sriov_annot(obj))
def update_fn(obj): def update_fn(obj):
return vif.PodState(default_vif=obj) if obj.obj_name() != objects.vif.PodState.obj_name():
return vif.PodState(default_vif=obj)
return self._convert_sriov(obj)
self._convert_annotations(test_fn, update_fn) self._convert_annotations(test_fn, update_fn)
def downgrade_annotations(self): def downgrade_annotations(self):
# NOTE(danil): There is no need to downgrade sriov vifs
# when annotations has old format. After downgrade annotations
# will have only one default vif and it could not be sriov vif
def test_fn(obj): def test_fn(obj):
return obj.obj_name() == objects.vif.PodState.obj_name() return obj.obj_name() == objects.vif.PodState.obj_name()

View File

@@ -20,6 +20,7 @@ from oslo_concurrency import lockutils
from oslo_concurrency import processutils from oslo_concurrency import processutils
from oslo_config import cfg from oslo_config import cfg
from oslo_log import log as logging from oslo_log import log as logging
from oslo_serialization import jsonutils
from kuryr_kubernetes import clients from kuryr_kubernetes import clients
from kuryr_kubernetes.cni.binding import base as b_base from kuryr_kubernetes.cni.binding import base as b_base
@@ -48,13 +49,8 @@ class VIFSriovDriver(object):
@release_lock_object @release_lock_object
def connect(self, vif, ifname, netns, container_id): def connect(self, vif, ifname, netns, container_id):
physnet = vif.physnet pci = self._choose_pci(vif, ifname, netns)
pf_names = self._get_host_pf_names(physnet) vf_name, vf_index, pf, pci_info = self._get_vf_info(pci)
vf_name, vf_index, pf, pci_info = self._get_available_vf_info(pf_names)
if not vf_name:
raise exceptions.CNIError(
"No free interfaces for physnet {} available".format(physnet))
LOG.debug("Connect {} as {} (port_id={}) in container_id={}".format( LOG.debug("Connect {} as {} (port_id={}) in container_id={}".format(
vf_name, ifname, vif.id, container_id)) vf_name, ifname, vif.id, container_id))
@@ -74,6 +70,9 @@ class VIFSriovDriver(object):
iface.mtu = vif.network.mtu iface.mtu = vif.network.mtu
iface.up() iface.up()
pod_link = vif.pod_link
self._annotate_device(pod_link, pci)
self._save_pci_info(vif.id, pci_info) self._save_pci_info(vif.id, pci_info)
def disconnect(self, vif, ifname, netns, container_id): def disconnect(self, vif, ifname, netns, container_id):
@@ -82,48 +81,101 @@ class VIFSriovDriver(object):
# it to all-zero state # it to all-zero state
self._remove_pci_info(vif.id) self._remove_pci_info(vif.id)
def _get_host_pf_names(self, physnet): def _choose_pci(self, vif, ifname, netns):
"""Return a list of PFs, that belong to a physnet""" pr_client = clients.get_pod_resources_client()
pod_resources_list = pr_client.list()
resources = pod_resources_list.pod_resources
pod_name = vif.pod_name
pod_link = vif.pod_link
physnet = vif.physnet
resource_name = self._get_resource_by_physnet(physnet)
resource = self._make_resource(resource_name)
LOG.debug("Vif %s will correspond to pci device belonging to "
"resource %s", vif, resource)
pod_devices = self._get_pod_devices(pod_link)
pod_resource = None
container_devices = None
for res in resources:
if res.name == pod_name:
pod_resource = res
break
if not pod_resource:
raise exceptions.CNIError(
"No resources are discovered for pod {}".format(pod_name))
LOG.debug("Looking for PCI device used by kubelet service and not "
"used by pod %s yet ...", pod_name)
for container in pod_resource.containers:
try:
container_devices = container.devices
except Exception:
LOG.warning("No devices in container %s",
container.name)
continue
if physnet not in self._device_pf_mapping: for dev in container_devices:
raise cfg.Error( if dev.resource_name != resource:
"No mapping for physnet {} in {}".format(
physnet, self._device_pf_mapping))
return self._device_pf_mapping[physnet]
def _get_available_vf_info(self, pf_names):
"""Scan /sys for unacquired VF among PFs in pf_names"""
for pf in pf_names:
pf_sys_path = '/sys/class/net/{}/device'.format(pf)
nvfs = self._get_total_vfs(pf)
for vf_index in range(nvfs):
vf_sys_path = os.path.join(pf_sys_path,
'virtfn{}'.format(vf_index),
'net')
# TODO(kzaitsev): use /var/run/kuryr/smth
lock_path = os.path.join("/tmp",
"{}.{}".format(pf, vf_index))
self._acquire(lock_path)
LOG.debug("Aquired %s lock", lock_path)
try:
vf_names = os.listdir(vf_sys_path)
except OSError:
LOG.debug("Could not open %s. "
"Skipping vf %s for pf %s", vf_sys_path,
vf_index, pf)
self._release()
continue continue
if not vf_names:
LOG.debug("No interfaces in %s. " for pci in dev.device_ids:
"Skipping vf %s for pf %s", vf_sys_path, if pci in pod_devices:
vf_index, pf) continue
self._release() LOG.debug("Appropriate PCI device %s is found", pci)
continue return pci
vf_name = vf_names[0]
pci_info = self._get_pci_info(pf, vf_index) def _get_resource_by_physnet(self, physnet):
LOG.debug("Aquiring vf %s of pf %s", vf_index, pf) mapping = config.CONF.sriov.physnet_resource_mappings
return vf_name, vf_index, pf, pci_info try:
resource_name = mapping[physnet]
except KeyError:
LOG.exception("No resource name for physnet %s", physnet)
raise
return resource_name
def _make_resource(self, res_name):
res_prefix = config.CONF.sriov.device_plugin_resource_prefix
return res_prefix + '/' + res_name
def _get_pod_devices(self, pod_link):
k8s = clients.get_kubernetes_client()
pod = k8s.get(pod_link)
annotations = pod['metadata']['annotations']
try:
json_devices = annotations[constants.K8S_ANNOTATION_PCI_DEVICES]
devices = jsonutils.loads(json_devices)
except KeyError:
devices = []
except Exception as ex:
LOG.exception("Exception while getting annotations: %s", ex)
return devices
def _annotate_device(self, pod_link, pci):
k8s = clients.get_kubernetes_client()
pod_devices = self._get_pod_devices(pod_link)
pod_devices.append(pci)
pod_devices = jsonutils.dumps(pod_devices)
LOG.debug("Trying to annotate pod %s with pci %s", pod_link, pci)
k8s.annotate(pod_link,
{constants.K8S_ANNOTATION_PCI_DEVICES: pod_devices})
def _get_vf_info(self, pci):
vf_sys_path = '/sys/bus/pci/devices/{}/net/'.format(pci)
vf_names = os.listdir(vf_sys_path)
vf_name = vf_names[0]
pfysfn_path = '/sys/bus/pci/devices/{}/physfn/net/'.format(pci)
pf_names = os.listdir(pfysfn_path)
pf_name = pf_names[0]
nvfs = self._get_total_vfs(pf_name)
pf_sys_path = '/sys/class/net/{}/device'.format(pf_name)
for vf_index in range(nvfs):
virtfn_path = os.path.join(pf_sys_path,
'virtfn{}'.format(vf_index))
vf_pci = os.path.basename(os.readlink(virtfn_path))
if vf_pci == pci:
pci_info = self._get_pci_info(pf_name, vf_index)
return vf_name, vf_index, pf_name, pci_info
return None, None, None, None return None, None, None, None
def _get_pci_info(self, pf, vf_index): def _get_pci_info(self, pf, vf_index):

View File

@@ -280,6 +280,8 @@ class CNIDaemonServiceManager(cotyledon.ServiceManager):
os_vif.initialize() os_vif.initialize()
clients.setup_kubernetes_client() clients.setup_kubernetes_client()
if CONF.sriov.enable_pod_resource_service:
clients.setup_pod_resources_client()
self.manager = multiprocessing.Manager() self.manager = multiprocessing.Manager()
registry = self.manager.dict() # For Watcher->Server communication. registry = self.manager.dict() # For Watcher->Server communication.

View File

@@ -258,6 +258,9 @@ sriov_opts = [
cfg.StrOpt('kubelet_root_dir', cfg.StrOpt('kubelet_root_dir',
help=_("The root directory of the Kubelet daemon"), help=_("The root directory of the Kubelet daemon"),
default='/var/lib/kubelet'), default='/var/lib/kubelet'),
cfg.BoolOpt('enable_pod_resource_service',
help=_("Enable PodResources service"),
default=False),
cfg.DictOpt('default_physnet_subnets', cfg.DictOpt('default_physnet_subnets',
help=_("A mapping of default subnets for certain physnets " help=_("A mapping of default subnets for certain physnets "
"in a form of physnet-name:<SUBNET-ID>"), "in a form of physnet-name:<SUBNET-ID>"),

View File

@@ -52,6 +52,7 @@ K8S_ANNOTATION_NPWG_CRD_SUBNET_ID = 'subnetId'
K8S_ANNOTATION_NPWG_CRD_DRIVER_TYPE = 'driverType' K8S_ANNOTATION_NPWG_CRD_DRIVER_TYPE = 'driverType'
K8S_ANNOTATION_NODE_PCI_DEVICE_INFO = 'openstack.org/kuryr-pci-info' K8S_ANNOTATION_NODE_PCI_DEVICE_INFO = 'openstack.org/kuryr-pci-info'
K8S_ANNOTATION_PCI_DEVICES = K8S_ANNOTATION_PREFIX + '-pci-devices'
K8S_OS_VIF_NOOP_PLUGIN = "noop" K8S_OS_VIF_NOOP_PLUGIN = "noop"

View File

@@ -58,6 +58,8 @@ class SriovVIFDriver(neutron_vif.NeutronPodVIFDriver):
c_utils.tag_neutron_resources('ports', [port['id']]) c_utils.tag_neutron_resources('ports', [port['id']])
vif = ovu.neutron_to_osvif_vif(vif_plugin, port, subnets) vif = ovu.neutron_to_osvif_vif(vif_plugin, port, subnets)
vif.physnet = physnet vif.physnet = physnet
vif.pod_name = pod_name
vif.pod_link = pod['metadata']['selfLink']
LOG.debug("{} vifs are available for the pod {}".format( LOG.debug("{} vifs are available for the pod {}".format(
amount, pod_name)) amount, pod_name))

View File

@@ -73,10 +73,13 @@ class VIFMacvlanNested(obj_osvif.VIFBase):
@obj_base.VersionedObjectRegistry.register @obj_base.VersionedObjectRegistry.register
class VIFSriov(obj_osvif.VIFDirect): class VIFSriov(obj_osvif.VIFDirect):
# This is OVO based SRIOV vif. # This is OVO based SRIOV vif.
# Version 1.0: Initial version
VERSION = '1.0' # Version 1.1: Added pod_name field and pod_link field.
VERSION = '1.1'
fields = { fields = {
# physnet of the VIF # physnet of the VIF
'physnet': obj_fields.StringField(), 'physnet': obj_fields.StringField(),
'pod_name': obj_fields.StringField(),
'pod_link': obj_fields.StringField(),
} }

View File

@@ -19,6 +19,7 @@ from os_vif import objects as osv_objects
from oslo_config import cfg from oslo_config import cfg
from kuryr_kubernetes.cni.binding import base from kuryr_kubernetes.cni.binding import base
from kuryr_kubernetes.cni.binding import sriov
from kuryr_kubernetes import objects from kuryr_kubernetes import objects
from kuryr_kubernetes.tests import base as test_base from kuryr_kubernetes.tests import base as test_base
from kuryr_kubernetes.tests import fake from kuryr_kubernetes.tests import fake
@@ -213,22 +214,50 @@ class TestSriovDriver(TestDriverMixin, test_base.TestCase):
def setUp(self): def setUp(self):
super(TestSriovDriver, self).setUp() super(TestSriovDriver, self).setUp()
self.vif = fake._fake_vif(objects.vif.VIFSriov) self.vif = fake._fake_vif(objects.vif.VIFSriov)
self.vif.physnet = 'test_physnet' self.vif.physnet = 'physnet2'
self.pci_info = mock.Mock() self.pci_info = mock.Mock()
self.vif.pod_link = 'pod_link'
self.vif.pod_name = 'pod_1'
self.pci = mock.Mock()
self.device_ids = ['pci_dev_1']
self.device = mock.Mock()
self.device.device_ids = self.device_ids
self.device.resource_name = 'intel.com/sriov'
self.cont_devs = [self.device]
self.container = mock.Mock()
self.container.devices = self.cont_devs
self.pod_containers = [self.container]
self.pod_resource = mock.Mock()
self.pod_resource.containers = self.pod_containers
self.pod_resource.name = 'pod_1'
self.resources = [self.pod_resource]
CONF.set_override('physnet_resource_mappings', 'physnet2:sriov',
group='sriov')
self.addCleanup(CONF.clear_override, 'physnet_resource_mappings',
group='sriov')
CONF.set_override('device_plugin_resource_prefix', 'intel.com',
group='sriov')
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.' @mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
'_get_host_pf_names') '_annotate_device')
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.' @mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
'_get_available_vf_info') '_choose_pci')
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
'_get_vf_info')
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.' @mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
'_set_vf_mac') '_set_vf_mac')
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.' @mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
'_save_pci_info') '_save_pci_info')
def test_connect(self, m_save_pci_info, m_set_vf_mac, m_avail_vf_info, def test_connect(self, m_save_pci_info, m_set_vf_mac, m_vf_info,
m_host_pf_names): m_choose_pci, m_annot_dev):
m_avail_vf_info.return_value = [self.ifname, 1, m_vf_info.return_value = [self.ifname, 1, 'h_interface',
'h_interface', self.pci_info] self.pci_info]
m_host_pf_names.return_value = 'h_interface' m_choose_pci.return_value = self.pci
self._test_connect() self._test_connect()
self.assertEqual(self.ifname, self.m_c_iface.ifname) self.assertEqual(self.ifname, self.m_c_iface.ifname)
@@ -237,9 +266,41 @@ class TestSriovDriver(TestDriverMixin, test_base.TestCase):
m_set_vf_mac.assert_called_once_with('h_interface', 1, m_set_vf_mac.assert_called_once_with('h_interface', 1,
str(self.vif.address)) str(self.vif.address))
m_save_pci_info.assert_called_once_with(self.vif.id, self.pci_info) m_save_pci_info.assert_called_once_with(self.vif.id, self.pci_info)
m_annot_dev.assert_called_once_with(self.vif.pod_link, self.pci)
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.' @mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
'_remove_pci_info') '_remove_pci_info')
def test_disconnect(self, m_remove_pci): def test_disconnect(self, m_remove_pci):
m_remove_pci.return_value = None m_remove_pci.return_value = None
self._test_disconnect() self._test_disconnect()
@mock.patch('kuryr_kubernetes.clients.get_pod_resources_client')
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
'_get_resource_by_physnet')
def test_choose_pci(self, m_get_res_ph, m_get_prc):
cls = sriov.VIFSriovDriver
m_driver = mock.Mock(spec=cls)
m_driver._make_resource.return_value = 'intel.com/sriov'
m_driver._get_pod_devices.return_value = ['pci_dev_2']
pod_resources_list = mock.Mock()
pod_resources_list.pod_resources = self.resources
pod_resources_client = mock.Mock()
pod_resources_client.list.return_value = pod_resources_list
m_get_prc.return_value = pod_resources_client
self.assertEqual('pci_dev_1', cls._choose_pci(m_driver, self.vif,
self.ifname, self.netns))
def test_get_resource_by_physnet(self):
cls = sriov.VIFSriovDriver
m_driver = mock.Mock(spec=cls)
self.assertEqual(
'sriov', cls._get_resource_by_physnet(m_driver, self.vif.physnet))
def test_make_resource(self):
cls = sriov.VIFSriovDriver
m_driver = mock.Mock(spec=cls)
self.assertEqual('intel.com/sriov', cls._make_resource(m_driver,
'sriov'))