Merge "Adjust agent to handle Broadcom NIC corner cases"

This commit is contained in:
Zuul 2022-04-25 21:33:19 +00:00 committed by Gerrit Code Review
commit a97a8b6421
4 changed files with 115 additions and 17 deletions

View File

@ -15,6 +15,7 @@ from eventlet.green import subprocess
import glob
import os
import shlex
import time
from oslo_log import log as logging
from sysinv._i18n import _
@ -63,7 +64,7 @@ prevision = 4
psvendor = 5
psdevice = 6
VALID_PORT_SPEED = ['10', '100', '1000', '10000', '40000', '100000']
VALID_PORT_SPEED = ['10', '100', '1000', '10000', '25000', '40000', '100000']
# Network device flags (from include/uapi/linux/if.h)
IFF_UP = 1 << 0
@ -511,6 +512,24 @@ class PCIOperator(object):
flags = None
return flags
def _get_netdev_operstate(self, dirpcinet, pci):
foperstate = dirpcinet + pci + '/operstate'
try:
with open(foperstate, 'r') as f:
operstate = f.readline().rstrip()
except Exception:
operstate = None
return operstate
def _get_netdev_speed(self, dirpcinet, pci):
fspeed = dirpcinet + pci + '/speed'
try:
with open(fspeed, 'r') as f:
speed = f.readline().rstrip()
except Exception:
speed = None
return speed
def _get_netdev_flags(self, dirpcinet, pci):
fflags = dirpcinet + pci + '/flags'
return self._read_flags(fflags)
@ -527,6 +546,38 @@ class PCIOperator(object):
names.append(name)
return names
def _pci_wait_for_operational_state(self, iface, pci_addr, driver):
"""
Waits for an interface to become operational, up to 1.5 seconds,
but only for drivers that require this.
Some network adapters may take up to 1 second to become operational,
even though "flags" indicate that the interface is up.
This function waits for the interface to become operational by polling
the driver 16 times with 0.1 seconds in between each attempt.
"""
if driver not in constants.DRIVERS_NOT_IMMEDIATELY_OPERATIONAL:
return
num_tries = 16
sleep_dur = 0.1
dirpcinet = self.get_pci_net_directory(pci_addr)
for attempt in range(num_tries):
operstate = self._get_netdev_operstate(dirpcinet, iface)
if operstate == "up":
return
# Do not sleep at the end of the last iteration.
if attempt < (num_tries - 1):
time.sleep(sleep_dur)
LOG.warning("%s did not become operational after %d attempts" %
(iface, num_tries))
def pci_get_net_attrs(self, pciaddr):
''' For this pciaddr, build a list of network attributes per port '''
pci_attrs_array = []
@ -653,26 +704,30 @@ class PCIOperator(object):
if not(flags & IFF_UP):
LOG.warning("Enabling device %s to query link speed" % n)
cmd = 'ip link set dev %s up' % n
subprocess.Popen(cmd, stdout=subprocess.PIPE,
shell=True)
_p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
shell=True)
_p.wait()
self._pci_wait_for_operational_state(n, a, driver)
# Read the speed
fspeed = dirpcinet + n + '/' + "speed"
try:
with open(fspeed, 'r') as f:
speed = f.readline().rstrip()
if speed not in VALID_PORT_SPEED:
LOG.error("Invalid port speed = %s for %s " %
(speed, n))
speed = None
except Exception:
speed = self._get_netdev_speed(dirpcinet, n)
if speed is None:
LOG.warning("ATTR speed unknown for: %s (flags: %s)" % (n, hex(flags)))
elif speed == '-1':
LOG.warning("Port speed detected as -1 for: %s (link operstate: %s)" %
(n, self._get_netdev_operstate(dirpcinet, n)))
speed = None
elif speed not in VALID_PORT_SPEED:
LOG.error("Invalid port speed = %s for %s " % (speed, n))
speed = None
# If the administrative state was down, take it back down
if not(flags & IFF_UP):
LOG.warning("Disabling device %s after querying link speed" % n)
cmd = 'ip link set dev %s down' % n
subprocess.Popen(cmd, stdout=subprocess.PIPE,
shell=True)
_p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
shell=True)
_p.wait()
flink_mode = dirpcinet + n + '/' + "link_mode"
try:

View File

@ -1921,11 +1921,22 @@ CEPH_MON_1 = 'ceph-mon-1-ip'
CEPH_MON_2 = 'ceph-mon-2-ip'
CEPH_FLOATING_MON = 'ceph-floating-mon-ip'
# Broadcom interface definitions
DRIVER_BNXT_EN = 'bnxt_en'
# Mellanox interface definitions
DRIVER_MLX_CX4 = 'mlx5_core'
MELLANOX_DRIVERS = [DRIVER_MLX_CX4]
# Drivers that require devices to be up before setting
# up SR-IOV.
DRIVERS_UP_BEFORE_SRIOV = [DRIVER_BNXT_EN]
# Drivers that require additional time before they
# become operational
DRIVERS_NOT_IMMEDIATELY_OPERATIONAL = [DRIVER_BNXT_EN]
# Traffic control
TRAFFIC_CONTROL_SCRIPT = '/usr/local/bin/tc_setup.sh'

View File

@ -1036,13 +1036,15 @@ def get_ethernet_network_config(context, iface, config):
get_interface_port_name(context, iface))
command = "echo 0 > %s; echo %s > %s" % (sriovfs_path, iface['sriov_numvfs'],
sriovfs_path)
fill_interface_config_option_operation(options, IFACE_PRE_UP_OP, command)
iface_op = get_device_sriov_setup_op(context, iface)
fill_interface_config_option_operation(options, iface_op, command)
elif interface_class == constants.INTERFACE_CLASS_PCI_PASSTHROUGH:
sriovfs_path = ("/sys/class/net/%s/device/sriov_numvfs" %
get_interface_port_name(context, iface))
command = "if [ -f %s ]; then echo 0 > %s; fi" % (
sriovfs_path, sriovfs_path)
fill_interface_config_option_operation(options, IFACE_PRE_UP_OP, command)
iface_op = get_device_sriov_setup_op(context, iface)
fill_interface_config_option_operation(options, iface_op, command)
config['options'].update(options)
return config
@ -1071,6 +1073,33 @@ def get_route_config(route, ifname):
return config
def get_device_sriov_setup_op(context, iface):
"""
Determines if the interface has a driver that requires it to be up before
SR-IOV/virtual function interfaces can be set up. Returns the corresponding
interface pre/post-up operation code.
"""
port = get_interface_port(context, iface)
if port['driver'] in constants.DRIVERS_UP_BEFORE_SRIOV:
return IFACE_POST_UP_OP
else:
return IFACE_PRE_UP_OP
def get_sriov_interface_up_requirement(context, iface):
"""
Determines if an interface has a driver that requires it to be
administratively up before VFs can be set up.
"""
port = get_interface_port(context, iface)
if port['driver'] in constants.DRIVERS_UP_BEFORE_SRIOV:
return True
else:
return False
def get_sriov_interface_port(context, iface):
"""
Determine the underlying port of the SR-IOV interface.
@ -1177,6 +1206,7 @@ def get_sriov_config(context, iface):
'num_vfs': num_vfs,
'device_id': interface.get_sriov_interface_device_id(context, iface),
'port_name': port['name'],
'up_requirement': get_sriov_interface_up_requirement(context, iface),
'vf_config': vf_config
}
return config

View File

@ -1130,7 +1130,8 @@ class InterfaceTestCase(InterfaceTestCaseMixin, dbbase.BaseHostTestCase):
def _get_sriov_config(self, ifname='default',
vf_driver=constants.SRIOV_DRIVER_TYPE_VFIO,
num_vfs=2, pf_addr=None, device_id='1572',
port_name="eth0", vf_config=None):
port_name="eth0", up_requirement=False,
vf_config=None):
if vf_config is None:
vf_config = {}
config = {'ifname': ifname,
@ -1138,6 +1139,7 @@ class InterfaceTestCase(InterfaceTestCaseMixin, dbbase.BaseHostTestCase):
'device_id': device_id,
'num_vfs': num_vfs,
'port_name': port_name,
'up_requirement': up_requirement,
'vf_config': vf_config}
return config