# All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import os from kuryr.lib._i18n import _ from oslo_concurrency import lockutils from oslo_config import cfg from oslo_log import log as logging from oslo_serialization import jsonutils import pyroute2 from kuryr_kubernetes import clients from kuryr_kubernetes.cni.binding import base as b_base from kuryr_kubernetes import config from kuryr_kubernetes import constants from kuryr_kubernetes import exceptions from kuryr_kubernetes.handlers import health from kuryr_kubernetes import utils LOG = logging.getLogger(__name__) CONF = cfg.CONF class VIFSriovDriver(health.HealthHandler, b_base.BaseBindingDriver): def __init__(self): super().__init__() self._lock = None def release_lock_object(func): def wrapped(self, *args, **kwargs): try: return func(self, *args, **kwargs) finally: if self._lock and self._lock.acquired: self._lock.release() return wrapped @release_lock_object def connect(self, vif, ifname, netns, container_id): pci_info = self._process_vif(vif, ifname, netns) if config.CONF.sriov.enable_node_annotations: self._save_pci_info(vif.id, pci_info) def disconnect(self, vif, ifname, netns, container_id): # NOTE(k.zaitsev): when netns is deleted the interface is # returned automatically to host netns. We may reset # it to all-zero state self._return_device_driver(vif) if config.CONF.sriov.enable_node_annotations: self._remove_pci_info(vif.id) def _process_vif(self, vif, ifname, netns): pr_client = clients.get_pod_resources_client() pod_resources_list = pr_client.list() resources = pod_resources_list.pod_resources resource_name = self._get_resource_by_physnet(vif.physnet) driver = self._get_driver_by_res(resource_name) resource = self._make_resource(resource_name) LOG.debug("Vif %s will correspond to pci device belonging to " "resource %s", vif, resource) pod_devices = self._get_pod_devices(vif.pod_link) pod_resource = None container_devices = None for res in resources: if res.name == vif.pod_name: pod_resource = res break if not pod_resource: raise exceptions.CNIError( "No resources are discovered for pod {}".format(vif.pod_name)) LOG.debug("Looking for PCI device used by kubelet service and not " "used by pod %s yet ...", vif.pod_name) for container in pod_resource.containers: try: container_devices = container.devices except Exception: LOG.warning("No devices in container %s", container.name) continue for dev in container_devices: if dev.resource_name != resource: continue for pci in dev.device_ids: if pci in pod_devices: continue LOG.debug("Appropriate PCI device %s is found", pci) pci_info = self._compute_pci(pci, driver, vif.pod_link, vif, ifname, netns) return pci_info def _get_resource_by_physnet(self, physnet): mapping = config.CONF.sriov.physnet_resource_mappings try: resource_name = mapping[physnet] except KeyError: LOG.exception("No resource name for physnet %s", physnet) raise return resource_name def _make_resource(self, res_name): res_prefix = config.CONF.sriov.device_plugin_resource_prefix return res_prefix + '/' + res_name def _get_driver_by_res(self, resource_name): mapping = config.CONF.sriov.resource_driver_mappings try: driver = mapping[resource_name] except KeyError: LOG.exception("No driver for resource_name %s", resource_name) raise return driver def _compute_pci(self, pci, driver, pod_link, vif, ifname, netns): vf_name, vf_index, pf, pci_info = self._get_vf_info(pci, driver) pci_info['physical_network'] = vif.physnet if driver in constants.USERSPACE_DRIVERS: LOG.info("PCI device %s will be rebinded to userspace network " "driver %s", pci, driver) if vf_index and pf: self._set_vf_mac(pf, vf_index, vif.address) if vif.network.should_provide_vlan: vlan_id = vif.network.vlan self._set_vf_vlan(pf, vf_index, vlan_id) old_driver = self._bind_device(pci, driver) else: LOG.info("PCI device %s will be moved to container's net ns %s", pci, netns) self._move_to_netns(ifname, netns, vif, vf_name, vf_index, pf) old_driver = driver self._annotate_device(pod_link, pci, old_driver, driver, vif.id) return pci_info def _move_to_netns(self, ifname, netns, vif, vf_name, vf_index, pf): if vf_index and pf: if vif.network.should_provide_vlan: vlan_id = vif.network.vlan self._set_vf_vlan(pf, vf_index, vlan_id) self._set_vf_mac(pf, vf_index, vif.address) with b_base.get_ipdb() as h_ipdb, b_base.get_ipdb(netns) as c_ipdb: with h_ipdb.interfaces[vf_name] as host_iface: host_iface.net_ns_fd = utils.convert_netns(netns) with c_ipdb.interfaces[vf_name] as iface: iface.ifname = ifname iface.mtu = vif.network.mtu iface.up() def _get_vf_info(self, pci, driver): vf_sys_path = '/sys/bus/pci/devices/{}/net/'.format(pci) if not os.path.exists(vf_sys_path): if driver not in constants.USERSPACE_DRIVERS: raise OSError(_("No vf name for device {}").format(pci)) vf_name = None else: vf_names = os.listdir(vf_sys_path) vf_name = vf_names[0] pfysfn_path = '/sys/bus/pci/devices/{}/physfn/net/'.format(pci) # If physical function is not specified in VF's directory then # this VF belongs to current VM node if not os.path.exists(pfysfn_path): LOG.info("Current device %s is a virtual function which is " "passed into VM. Getting it's pci info", vf_name) pci_info = self._get_vf_pci_info(pci) return vf_name, None, None, pci_info pf_names = os.listdir(pfysfn_path) pf_name = pf_names[0] nvfs = self._get_total_vfs(pf_name) pf_sys_path = '/sys/class/net/{}/device'.format(pf_name) for vf_index in range(nvfs): virtfn_path = os.path.join(pf_sys_path, 'virtfn{}'.format(vf_index)) vf_pci = os.path.basename(os.readlink(virtfn_path)) if vf_pci == pci: pci_info = self._get_pci_info(pf_name, vf_index) return vf_name, vf_index, pf_name, pci_info return None, None, None, None def _get_vf_pci_info(self, pci): vendor_path = '/sys/bus/pci/devices/{}/vendor'.format(pci) with open(vendor_path) as vendor_file: # vendor_full contains a hex value (e.g. 0x8086) vendor_full = vendor_file.read() vendor = vendor_full.split('x')[1].strip() device_path = '/sys/bus/pci/devices/{}/device'.format(pci) LOG.info("Full path to device which is being processed", device_path) with open(device_path) as device_file: # device_full contains a hex value (e.g. 0x1520) device_full = device_file.read() device = device_full.split('x')[1].strip() pci_vendor_info = '{}:{}'.format(vendor, device) return {'pci_slot': pci, 'pci_vendor_info': pci_vendor_info} def _bind_device(self, pci, driver, old_driver=None): if not old_driver: old_driver_path = '/sys/bus/pci/devices/{}/driver'.format(pci) old_driver_link = os.readlink(old_driver_path) old_driver = os.path.basename(old_driver_link) if old_driver not in constants.MELLANOX_DRIVERS: unbind_path = '/sys/bus/pci/drivers/{}/unbind'.format(old_driver) bind_path = '/sys/bus/pci/drivers/{}/bind'.format(driver) override = "/sys/bus/pci/devices/{}/driver_override".format(pci) with open(unbind_path, 'w') as unbind_fd: unbind_fd.write(pci) with open(override, 'w') as override_fd: override_fd.write("\00") with open(override, 'w') as override_fd: override_fd.write(driver) with open(bind_path, 'w') as bind_fd: bind_fd.write(pci) LOG.info("Device %s was binded on driver %s. Old driver is %s", pci, driver, old_driver) return old_driver def _annotate_device(self, pod_link, pci, old_driver, new_driver, port_id): k8s = clients.get_kubernetes_client() pod_devices = self._get_pod_devices(pod_link) pod_devices[pci] = { constants.K8S_ANNOTATION_OLD_DRIVER: old_driver, constants.K8S_ANNOTATION_CURRENT_DRIVER: new_driver, constants.K8S_ANNOTATION_NEUTRON_PORT: port_id } pod_devices = jsonutils.dumps(pod_devices) LOG.debug("Trying to annotate pod %s with pci %s, old driver %s " "and new driver %s", pod_link, pci, old_driver, new_driver) k8s.annotate(pod_link, {constants.K8S_ANNOTATION_PCI_DEVICES: pod_devices}) def _get_pod_devices(self, pod_link): k8s = clients.get_kubernetes_client() pod = k8s.get(pod_link) annotations = pod['metadata']['annotations'] try: json_devices = annotations[constants.K8S_ANNOTATION_PCI_DEVICES] devices = jsonutils.loads(json_devices) except KeyError: devices = {} except Exception as ex: LOG.exception("Exception while getting annotations: %s", ex) LOG.debug("Pod %s has devices %s", pod_link, devices) return devices def _return_device_driver(self, vif): if not hasattr(vif, 'pod_link'): return pod_devices = self._get_pod_devices(vif.pod_link) for pci, info in pod_devices.items(): if info[constants.K8S_ANNOTATION_NEUTRON_PORT] == vif.id: if (info[constants.K8S_ANNOTATION_OLD_DRIVER] != info[constants.K8S_ANNOTATION_CURRENT_DRIVER]): LOG.debug("Driver of device %s should be changed back", pci) self._bind_device( pci, info[constants.K8S_ANNOTATION_OLD_DRIVER], info[constants.K8S_ANNOTATION_CURRENT_DRIVER] ) def _get_pci_info(self, pf, vf_index): vendor_path = '/sys/class/net/{}/device/virtfn{}/vendor'.format( pf, vf_index) with open(vendor_path) as vendor_file: vendor_full = vendor_file.read() vendor = vendor_full.split('x')[1].strip() device_path = '/sys/class/net/{}/device/virtfn{}/device'.format( pf, vf_index) with open(device_path) as device_file: device_full = device_file.read() device = device_full.split('x')[1].strip() pci_vendor_info = '{}:{}'.format(vendor, device) vf_path = '/sys/class/net/{}/device/virtfn{}'.format( pf, vf_index) pci_slot_path = os.readlink(vf_path) pci_slot = pci_slot_path.split('/')[1] return {'pci_slot': pci_slot, 'pci_vendor_info': pci_vendor_info} def _save_pci_info(self, neutron_port, port_pci_info): k8s = clients.get_kubernetes_client() annot_name = self._make_annotation_name(neutron_port) nodename = utils.get_node_name() LOG.info("Trying to annotate node %s with pci info %s", nodename, port_pci_info) k8s.patch_node_annotations(nodename, annot_name, port_pci_info) def _remove_pci_info(self, neutron_port): k8s = clients.get_kubernetes_client() annot_name = self._make_annotation_name(neutron_port) nodename = utils.get_node_name() LOG.info("Trying to delete pci info for port %s on node %s", neutron_port, nodename) k8s.remove_node_annotations(nodename, annot_name) def _make_annotation_name(self, neutron_port): annot_name = constants.K8S_ANNOTATION_NODE_PCI_DEVICE_INFO annot_name = annot_name.replace('/', '~1') annot_name = annot_name + '-' + neutron_port return annot_name def _acquire(self, path): if self._lock and self._lock.acquired: raise RuntimeError(_("Attempting to lock {} when {} " "is already locked.").format(path, self._lock)) self._lock = lockutils.InterProcessLock(path=path) return self._lock.acquire() def _release(self): if not self._lock: raise RuntimeError(_("Attempting release an empty lock")) return self._lock.release() def _get_total_vfs(self, pf): """Read /sys information for configured number of VFs of a PF""" pf_sys_path = '/sys/class/net/{}/device'.format(pf) total_fname = os.path.join(pf_sys_path, 'sriov_numvfs') try: with open(total_fname) as total_f: data = total_f.read() except IOError: LOG.warning("Could not open %s. No VFs for %s", total_fname, pf) return 0 nvfs = 0 try: nvfs = int(data.strip()) except ValueError: LOG.warning("Could not parse %s from %s. No VFs for %s", data, total_fname, pf) return 0 LOG.debug("PF %s has %s VFs", pf, nvfs) return nvfs def _set_vf_mac(self, pf, vf_index, mac): LOG.debug("Setting VF MAC: pf = %s, vf_index = %s, mac = %s", pf, vf_index, mac) ip = pyroute2.IPRoute() pf_index = ip.link_lookup(ifname=pf)[0] try: ip.link("set", index=pf_index, vf={"vf": vf_index, "mac": mac}) except pyroute2.NetlinkError: LOG.exception("Unable to set mac for VF %s on pf %s", vf_index, pf) raise def _set_vf_vlan(self, pf, vf_index, vlan_id): LOG.debug("Setting VF VLAN: pf = %s, vf_index = %s, vlan_id = %s", pf, vf_index, vlan_id) ip = pyroute2.IPRoute() pf_index = ip.link_lookup(ifname=pf)[0] try: ip.link("set", index=pf_index, vf={"vf": vf_index, "vlan": vlan_id}) except pyroute2.NetlinkError: LOG.exception("Unable to set vlan for VF %s on pf %s", vf_index, pf) raise