Merge "Provide a proper way to choose VF in CNI"
This commit is contained in:
@@ -131,6 +131,33 @@ We have to add to the sriov section following mapping:
|
||||
device_plugin_resource_prefix = samsung.com
|
||||
physnet_resource_mappings = physnet1:numa0
|
||||
|
||||
5. Enable Kubelet Pod Resources feature
|
||||
|
||||
To use SR-IOV functionality properly it is necessary to enable Kubelet Pod
|
||||
Resources feature. Pod Resources is a service provided by Kubelet via gRPC
|
||||
server that allows to request list of resources allocated for each pod and
|
||||
container on the node. These resources are devices allocated by k8s device
|
||||
plugins. Service was implemented mainly for monitoring purposes, but it also
|
||||
suitable for SR-IOV binding driver allowing it to know which VF was allocated
|
||||
for particular container.
|
||||
|
||||
To enable Pod Resources service it is needed to add
|
||||
``--feature-gates KubeletPodResources=true`` into ``/etc/sysconfig/kubelet``.
|
||||
This file could look like::
|
||||
|
||||
KUBELET_EXTRA_ARGS="--feature-gates KubeletPodResources=true"
|
||||
|
||||
Note that it is important to set right value for parameter ``kubelet_root_dir``
|
||||
in ``kuryr.conf``. By default it is ``/var/lib/kubelet``.
|
||||
In case of using containerized CNI it is necessary to mount
|
||||
``'kubelet_root_dir'/pod-resources`` directory into CNI container.
|
||||
|
||||
To use this feature add ``enable_pod_resource_service`` into kuryr.conf.
|
||||
|
||||
.. code-block:: ini
|
||||
|
||||
[sriov]
|
||||
enable_pod_resource_service = True
|
||||
|
||||
6. Use privileged user
|
||||
|
||||
|
@@ -18,6 +18,7 @@ CLI interface for kuryr status commands.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import copy
|
||||
import sys
|
||||
import textwrap
|
||||
import traceback
|
||||
@@ -108,6 +109,8 @@ class UpgradeCommands(object):
|
||||
|
||||
if obj.obj_name() != objects.vif.PodState.obj_name():
|
||||
old_count += 1
|
||||
elif not self._has_valid_sriov_annot(obj):
|
||||
old_count += 1
|
||||
|
||||
if malformed_count == 0 and old_count == 0:
|
||||
return UpgradeCheckResult(0, 'All annotations are updated.')
|
||||
@@ -193,16 +196,43 @@ class UpgradeCommands(object):
|
||||
t.add_row(cell)
|
||||
print(t)
|
||||
|
||||
def _has_valid_sriov_annot(self, state):
|
||||
for obj in state.vifs.values():
|
||||
if obj.obj_name() != objects.vif.VIFSriov.obj_name():
|
||||
continue
|
||||
if hasattr(obj, 'pod_name') and hasattr(obj, 'pod_link'):
|
||||
continue
|
||||
return False
|
||||
return True
|
||||
|
||||
def _convert_sriov(self, state):
|
||||
new_state = copy.deepcopy(state)
|
||||
for iface, obj in new_state.additional_vifs.items():
|
||||
if obj.obj_name() != objects.vif.VIFSriov.obj_name():
|
||||
continue
|
||||
if hasattr(obj, 'pod_name') and hasattr(obj, 'pod_link'):
|
||||
continue
|
||||
new_obj = objects.vif.VIFSriov()
|
||||
new_obj.__dict__ = obj.__dict__.copy()
|
||||
new_state.additional_vifs[iface] = new_obj
|
||||
return new_state
|
||||
|
||||
def update_annotations(self):
|
||||
def test_fn(obj):
|
||||
return obj.obj_name() != objects.vif.PodState.obj_name()
|
||||
return (obj.obj_name() != objects.vif.PodState.obj_name() or
|
||||
not self._has_valid_sriov_annot(obj))
|
||||
|
||||
def update_fn(obj):
|
||||
if obj.obj_name() != objects.vif.PodState.obj_name():
|
||||
return vif.PodState(default_vif=obj)
|
||||
return self._convert_sriov(obj)
|
||||
|
||||
self._convert_annotations(test_fn, update_fn)
|
||||
|
||||
def downgrade_annotations(self):
|
||||
# NOTE(danil): There is no need to downgrade sriov vifs
|
||||
# when annotations has old format. After downgrade annotations
|
||||
# will have only one default vif and it could not be sriov vif
|
||||
def test_fn(obj):
|
||||
return obj.obj_name() == objects.vif.PodState.obj_name()
|
||||
|
||||
|
@@ -20,6 +20,7 @@ from oslo_concurrency import lockutils
|
||||
from oslo_concurrency import processutils
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils
|
||||
|
||||
from kuryr_kubernetes import clients
|
||||
from kuryr_kubernetes.cni.binding import base as b_base
|
||||
@@ -48,13 +49,8 @@ class VIFSriovDriver(object):
|
||||
|
||||
@release_lock_object
|
||||
def connect(self, vif, ifname, netns, container_id):
|
||||
physnet = vif.physnet
|
||||
pf_names = self._get_host_pf_names(physnet)
|
||||
vf_name, vf_index, pf, pci_info = self._get_available_vf_info(pf_names)
|
||||
|
||||
if not vf_name:
|
||||
raise exceptions.CNIError(
|
||||
"No free interfaces for physnet {} available".format(physnet))
|
||||
pci = self._choose_pci(vif, ifname, netns)
|
||||
vf_name, vf_index, pf, pci_info = self._get_vf_info(pci)
|
||||
|
||||
LOG.debug("Connect {} as {} (port_id={}) in container_id={}".format(
|
||||
vf_name, ifname, vif.id, container_id))
|
||||
@@ -74,6 +70,9 @@ class VIFSriovDriver(object):
|
||||
iface.mtu = vif.network.mtu
|
||||
iface.up()
|
||||
|
||||
pod_link = vif.pod_link
|
||||
self._annotate_device(pod_link, pci)
|
||||
|
||||
self._save_pci_info(vif.id, pci_info)
|
||||
|
||||
def disconnect(self, vif, ifname, netns, container_id):
|
||||
@@ -82,48 +81,101 @@ class VIFSriovDriver(object):
|
||||
# it to all-zero state
|
||||
self._remove_pci_info(vif.id)
|
||||
|
||||
def _get_host_pf_names(self, physnet):
|
||||
"""Return a list of PFs, that belong to a physnet"""
|
||||
|
||||
if physnet not in self._device_pf_mapping:
|
||||
raise cfg.Error(
|
||||
"No mapping for physnet {} in {}".format(
|
||||
physnet, self._device_pf_mapping))
|
||||
return self._device_pf_mapping[physnet]
|
||||
|
||||
def _get_available_vf_info(self, pf_names):
|
||||
"""Scan /sys for unacquired VF among PFs in pf_names"""
|
||||
|
||||
for pf in pf_names:
|
||||
pf_sys_path = '/sys/class/net/{}/device'.format(pf)
|
||||
nvfs = self._get_total_vfs(pf)
|
||||
for vf_index in range(nvfs):
|
||||
vf_sys_path = os.path.join(pf_sys_path,
|
||||
'virtfn{}'.format(vf_index),
|
||||
'net')
|
||||
# TODO(kzaitsev): use /var/run/kuryr/smth
|
||||
lock_path = os.path.join("/tmp",
|
||||
"{}.{}".format(pf, vf_index))
|
||||
self._acquire(lock_path)
|
||||
LOG.debug("Aquired %s lock", lock_path)
|
||||
def _choose_pci(self, vif, ifname, netns):
|
||||
pr_client = clients.get_pod_resources_client()
|
||||
pod_resources_list = pr_client.list()
|
||||
resources = pod_resources_list.pod_resources
|
||||
pod_name = vif.pod_name
|
||||
pod_link = vif.pod_link
|
||||
physnet = vif.physnet
|
||||
resource_name = self._get_resource_by_physnet(physnet)
|
||||
resource = self._make_resource(resource_name)
|
||||
LOG.debug("Vif %s will correspond to pci device belonging to "
|
||||
"resource %s", vif, resource)
|
||||
pod_devices = self._get_pod_devices(pod_link)
|
||||
pod_resource = None
|
||||
container_devices = None
|
||||
for res in resources:
|
||||
if res.name == pod_name:
|
||||
pod_resource = res
|
||||
break
|
||||
if not pod_resource:
|
||||
raise exceptions.CNIError(
|
||||
"No resources are discovered for pod {}".format(pod_name))
|
||||
LOG.debug("Looking for PCI device used by kubelet service and not "
|
||||
"used by pod %s yet ...", pod_name)
|
||||
for container in pod_resource.containers:
|
||||
try:
|
||||
container_devices = container.devices
|
||||
except Exception:
|
||||
LOG.warning("No devices in container %s",
|
||||
container.name)
|
||||
continue
|
||||
|
||||
for dev in container_devices:
|
||||
if dev.resource_name != resource:
|
||||
continue
|
||||
|
||||
for pci in dev.device_ids:
|
||||
if pci in pod_devices:
|
||||
continue
|
||||
LOG.debug("Appropriate PCI device %s is found", pci)
|
||||
return pci
|
||||
|
||||
def _get_resource_by_physnet(self, physnet):
|
||||
mapping = config.CONF.sriov.physnet_resource_mappings
|
||||
try:
|
||||
resource_name = mapping[physnet]
|
||||
except KeyError:
|
||||
LOG.exception("No resource name for physnet %s", physnet)
|
||||
raise
|
||||
return resource_name
|
||||
|
||||
def _make_resource(self, res_name):
|
||||
res_prefix = config.CONF.sriov.device_plugin_resource_prefix
|
||||
return res_prefix + '/' + res_name
|
||||
|
||||
def _get_pod_devices(self, pod_link):
|
||||
k8s = clients.get_kubernetes_client()
|
||||
pod = k8s.get(pod_link)
|
||||
annotations = pod['metadata']['annotations']
|
||||
try:
|
||||
json_devices = annotations[constants.K8S_ANNOTATION_PCI_DEVICES]
|
||||
devices = jsonutils.loads(json_devices)
|
||||
except KeyError:
|
||||
devices = []
|
||||
except Exception as ex:
|
||||
LOG.exception("Exception while getting annotations: %s", ex)
|
||||
return devices
|
||||
|
||||
def _annotate_device(self, pod_link, pci):
|
||||
k8s = clients.get_kubernetes_client()
|
||||
pod_devices = self._get_pod_devices(pod_link)
|
||||
pod_devices.append(pci)
|
||||
pod_devices = jsonutils.dumps(pod_devices)
|
||||
|
||||
LOG.debug("Trying to annotate pod %s with pci %s", pod_link, pci)
|
||||
k8s.annotate(pod_link,
|
||||
{constants.K8S_ANNOTATION_PCI_DEVICES: pod_devices})
|
||||
|
||||
def _get_vf_info(self, pci):
|
||||
vf_sys_path = '/sys/bus/pci/devices/{}/net/'.format(pci)
|
||||
vf_names = os.listdir(vf_sys_path)
|
||||
except OSError:
|
||||
LOG.debug("Could not open %s. "
|
||||
"Skipping vf %s for pf %s", vf_sys_path,
|
||||
vf_index, pf)
|
||||
self._release()
|
||||
continue
|
||||
if not vf_names:
|
||||
LOG.debug("No interfaces in %s. "
|
||||
"Skipping vf %s for pf %s", vf_sys_path,
|
||||
vf_index, pf)
|
||||
self._release()
|
||||
continue
|
||||
vf_name = vf_names[0]
|
||||
pci_info = self._get_pci_info(pf, vf_index)
|
||||
LOG.debug("Aquiring vf %s of pf %s", vf_index, pf)
|
||||
return vf_name, vf_index, pf, pci_info
|
||||
|
||||
pfysfn_path = '/sys/bus/pci/devices/{}/physfn/net/'.format(pci)
|
||||
pf_names = os.listdir(pfysfn_path)
|
||||
pf_name = pf_names[0]
|
||||
|
||||
nvfs = self._get_total_vfs(pf_name)
|
||||
pf_sys_path = '/sys/class/net/{}/device'.format(pf_name)
|
||||
for vf_index in range(nvfs):
|
||||
virtfn_path = os.path.join(pf_sys_path,
|
||||
'virtfn{}'.format(vf_index))
|
||||
vf_pci = os.path.basename(os.readlink(virtfn_path))
|
||||
if vf_pci == pci:
|
||||
pci_info = self._get_pci_info(pf_name, vf_index)
|
||||
return vf_name, vf_index, pf_name, pci_info
|
||||
return None, None, None, None
|
||||
|
||||
def _get_pci_info(self, pf, vf_index):
|
||||
|
@@ -280,6 +280,8 @@ class CNIDaemonServiceManager(cotyledon.ServiceManager):
|
||||
|
||||
os_vif.initialize()
|
||||
clients.setup_kubernetes_client()
|
||||
if CONF.sriov.enable_pod_resource_service:
|
||||
clients.setup_pod_resources_client()
|
||||
|
||||
self.manager = multiprocessing.Manager()
|
||||
registry = self.manager.dict() # For Watcher->Server communication.
|
||||
|
@@ -258,6 +258,9 @@ sriov_opts = [
|
||||
cfg.StrOpt('kubelet_root_dir',
|
||||
help=_("The root directory of the Kubelet daemon"),
|
||||
default='/var/lib/kubelet'),
|
||||
cfg.BoolOpt('enable_pod_resource_service',
|
||||
help=_("Enable PodResources service"),
|
||||
default=False),
|
||||
cfg.DictOpt('default_physnet_subnets',
|
||||
help=_("A mapping of default subnets for certain physnets "
|
||||
"in a form of physnet-name:<SUBNET-ID>"),
|
||||
|
@@ -52,6 +52,7 @@ K8S_ANNOTATION_NPWG_CRD_SUBNET_ID = 'subnetId'
|
||||
K8S_ANNOTATION_NPWG_CRD_DRIVER_TYPE = 'driverType'
|
||||
|
||||
K8S_ANNOTATION_NODE_PCI_DEVICE_INFO = 'openstack.org/kuryr-pci-info'
|
||||
K8S_ANNOTATION_PCI_DEVICES = K8S_ANNOTATION_PREFIX + '-pci-devices'
|
||||
|
||||
K8S_OS_VIF_NOOP_PLUGIN = "noop"
|
||||
|
||||
|
@@ -58,6 +58,8 @@ class SriovVIFDriver(neutron_vif.NeutronPodVIFDriver):
|
||||
c_utils.tag_neutron_resources('ports', [port['id']])
|
||||
vif = ovu.neutron_to_osvif_vif(vif_plugin, port, subnets)
|
||||
vif.physnet = physnet
|
||||
vif.pod_name = pod_name
|
||||
vif.pod_link = pod['metadata']['selfLink']
|
||||
|
||||
LOG.debug("{} vifs are available for the pod {}".format(
|
||||
amount, pod_name))
|
||||
|
@@ -73,10 +73,13 @@ class VIFMacvlanNested(obj_osvif.VIFBase):
|
||||
@obj_base.VersionedObjectRegistry.register
|
||||
class VIFSriov(obj_osvif.VIFDirect):
|
||||
# This is OVO based SRIOV vif.
|
||||
|
||||
VERSION = '1.0'
|
||||
# Version 1.0: Initial version
|
||||
# Version 1.1: Added pod_name field and pod_link field.
|
||||
VERSION = '1.1'
|
||||
|
||||
fields = {
|
||||
# physnet of the VIF
|
||||
'physnet': obj_fields.StringField(),
|
||||
'pod_name': obj_fields.StringField(),
|
||||
'pod_link': obj_fields.StringField(),
|
||||
}
|
||||
|
@@ -19,6 +19,7 @@ from os_vif import objects as osv_objects
|
||||
from oslo_config import cfg
|
||||
|
||||
from kuryr_kubernetes.cni.binding import base
|
||||
from kuryr_kubernetes.cni.binding import sriov
|
||||
from kuryr_kubernetes import objects
|
||||
from kuryr_kubernetes.tests import base as test_base
|
||||
from kuryr_kubernetes.tests import fake
|
||||
@@ -213,22 +214,50 @@ class TestSriovDriver(TestDriverMixin, test_base.TestCase):
|
||||
def setUp(self):
|
||||
super(TestSriovDriver, self).setUp()
|
||||
self.vif = fake._fake_vif(objects.vif.VIFSriov)
|
||||
self.vif.physnet = 'test_physnet'
|
||||
self.vif.physnet = 'physnet2'
|
||||
self.pci_info = mock.Mock()
|
||||
self.vif.pod_link = 'pod_link'
|
||||
self.vif.pod_name = 'pod_1'
|
||||
self.pci = mock.Mock()
|
||||
|
||||
self.device_ids = ['pci_dev_1']
|
||||
self.device = mock.Mock()
|
||||
self.device.device_ids = self.device_ids
|
||||
self.device.resource_name = 'intel.com/sriov'
|
||||
|
||||
self.cont_devs = [self.device]
|
||||
self.container = mock.Mock()
|
||||
self.container.devices = self.cont_devs
|
||||
|
||||
self.pod_containers = [self.container]
|
||||
self.pod_resource = mock.Mock()
|
||||
self.pod_resource.containers = self.pod_containers
|
||||
self.pod_resource.name = 'pod_1'
|
||||
|
||||
self.resources = [self.pod_resource]
|
||||
|
||||
CONF.set_override('physnet_resource_mappings', 'physnet2:sriov',
|
||||
group='sriov')
|
||||
self.addCleanup(CONF.clear_override, 'physnet_resource_mappings',
|
||||
group='sriov')
|
||||
CONF.set_override('device_plugin_resource_prefix', 'intel.com',
|
||||
group='sriov')
|
||||
|
||||
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
|
||||
'_get_host_pf_names')
|
||||
'_annotate_device')
|
||||
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
|
||||
'_get_available_vf_info')
|
||||
'_choose_pci')
|
||||
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
|
||||
'_get_vf_info')
|
||||
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
|
||||
'_set_vf_mac')
|
||||
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
|
||||
'_save_pci_info')
|
||||
def test_connect(self, m_save_pci_info, m_set_vf_mac, m_avail_vf_info,
|
||||
m_host_pf_names):
|
||||
m_avail_vf_info.return_value = [self.ifname, 1,
|
||||
'h_interface', self.pci_info]
|
||||
m_host_pf_names.return_value = 'h_interface'
|
||||
def test_connect(self, m_save_pci_info, m_set_vf_mac, m_vf_info,
|
||||
m_choose_pci, m_annot_dev):
|
||||
m_vf_info.return_value = [self.ifname, 1, 'h_interface',
|
||||
self.pci_info]
|
||||
m_choose_pci.return_value = self.pci
|
||||
self._test_connect()
|
||||
|
||||
self.assertEqual(self.ifname, self.m_c_iface.ifname)
|
||||
@@ -237,9 +266,41 @@ class TestSriovDriver(TestDriverMixin, test_base.TestCase):
|
||||
m_set_vf_mac.assert_called_once_with('h_interface', 1,
|
||||
str(self.vif.address))
|
||||
m_save_pci_info.assert_called_once_with(self.vif.id, self.pci_info)
|
||||
m_annot_dev.assert_called_once_with(self.vif.pod_link, self.pci)
|
||||
|
||||
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
|
||||
'_remove_pci_info')
|
||||
def test_disconnect(self, m_remove_pci):
|
||||
m_remove_pci.return_value = None
|
||||
self._test_disconnect()
|
||||
|
||||
@mock.patch('kuryr_kubernetes.clients.get_pod_resources_client')
|
||||
@mock.patch('kuryr_kubernetes.cni.binding.sriov.VIFSriovDriver.'
|
||||
'_get_resource_by_physnet')
|
||||
def test_choose_pci(self, m_get_res_ph, m_get_prc):
|
||||
cls = sriov.VIFSriovDriver
|
||||
m_driver = mock.Mock(spec=cls)
|
||||
|
||||
m_driver._make_resource.return_value = 'intel.com/sriov'
|
||||
m_driver._get_pod_devices.return_value = ['pci_dev_2']
|
||||
|
||||
pod_resources_list = mock.Mock()
|
||||
pod_resources_list.pod_resources = self.resources
|
||||
pod_resources_client = mock.Mock()
|
||||
pod_resources_client.list.return_value = pod_resources_list
|
||||
m_get_prc.return_value = pod_resources_client
|
||||
|
||||
self.assertEqual('pci_dev_1', cls._choose_pci(m_driver, self.vif,
|
||||
self.ifname, self.netns))
|
||||
|
||||
def test_get_resource_by_physnet(self):
|
||||
cls = sriov.VIFSriovDriver
|
||||
m_driver = mock.Mock(spec=cls)
|
||||
self.assertEqual(
|
||||
'sriov', cls._get_resource_by_physnet(m_driver, self.vif.physnet))
|
||||
|
||||
def test_make_resource(self):
|
||||
cls = sriov.VIFSriovDriver
|
||||
m_driver = mock.Mock(spec=cls)
|
||||
self.assertEqual('intel.com/sriov', cls._make_resource(m_driver,
|
||||
'sriov'))
|
||||
|
Reference in New Issue
Block a user