Merge "Add nvidia firstboot yaml to tripleo-heat-template" into stable/victoria
This commit is contained in:
commit
866bb9ea55
743
firstboot/nvidia_firstboot.yaml
Normal file
743
firstboot/nvidia_firstboot.yaml
Normal file
@ -0,0 +1,743 @@
|
|||||||
|
heat_template_version: rocky
|
||||||
|
|
||||||
|
description: >
|
||||||
|
This's a temporary workaround for upgrading Nvidia mellanox NICs
|
||||||
|
FW and configuring them using mstflint tool
|
||||||
|
|
||||||
|
parameters:
|
||||||
|
BIN_DIR_URL:
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
description: 'URL of a directory containing Mellanox NIC Firmware'
|
||||||
|
FORCE_UPDATE:
|
||||||
|
type: boolean
|
||||||
|
default: False
|
||||||
|
description: "Force update the fw even if it's an older version"
|
||||||
|
DEV_WHITE_LIST:
|
||||||
|
type: comma_delimited_list
|
||||||
|
default: []
|
||||||
|
description: list of MLNX devices PCIs to be processed.
|
||||||
|
If the value is empty, all MLNX devices will be processed.
|
||||||
|
Example, ['0000:04:00.0', '0000:81:00.0']
|
||||||
|
Make sure to choose only the PCI ends with 0
|
||||||
|
NUM_OF_VFS:
|
||||||
|
type: number
|
||||||
|
default: 32
|
||||||
|
description: 'Max number of vfs'
|
||||||
|
SRIOV_EN:
|
||||||
|
type: boolean
|
||||||
|
default: True
|
||||||
|
description: 'Enable/Disable Sriov'
|
||||||
|
LINK_TYPE:
|
||||||
|
type: string
|
||||||
|
default: 'eth'
|
||||||
|
description: 'Link type ["eth", "ib"]'
|
||||||
|
ESWITCH_IPV4_TTL_MODIFY_ENABLE:
|
||||||
|
type: boolean
|
||||||
|
default: False
|
||||||
|
description: 'Enable TTL modification by E-Switch'
|
||||||
|
PRIO_TAG_REQUIRED_EN:
|
||||||
|
type: boolean
|
||||||
|
default: False
|
||||||
|
description: 'Priority tag required'
|
||||||
|
ESWITCH_HAIRPIN_TOT_BUFFER_SIZE:
|
||||||
|
type: json
|
||||||
|
default: {"*": "17"}
|
||||||
|
description: >
|
||||||
|
If a single key of "*" is provided, then its value will set to all indexes.
|
||||||
|
If you need to set configuration for a set of specific indexes, you can pass the
|
||||||
|
value as below for index 2 to be 17 and index 3 to be 16
|
||||||
|
Example, {"2": "17", "3": "16"}
|
||||||
|
Make sure to choose only the PCI ends with 0
|
||||||
|
ESWITCH_HAIRPIN_DESCRIPTORS:
|
||||||
|
type: json
|
||||||
|
default: {"*": "11"}
|
||||||
|
description: >
|
||||||
|
If a single key of "*" is provided, then its value will set to all indexes.
|
||||||
|
If you need to set configuration for a set of specific indexes, you can pass the
|
||||||
|
value as below for index 2 to be 17 and index 3 to be 16
|
||||||
|
Example, {"2": "17", "3": "16"}
|
||||||
|
Make sure to choose only the PCI ends with 0
|
||||||
|
BF_NIC_MODE:
|
||||||
|
type: boolean
|
||||||
|
default: False
|
||||||
|
description: >
|
||||||
|
A special parameter for BlueField SmartNICs, if the value is False, that means the
|
||||||
|
NIC in smart nic mode and it's going to set the following config:
|
||||||
|
"INTERNAL_CPU_PAGE_SUPPLIER": "ECPF",
|
||||||
|
"INTERNAL_CPU_ESWITCH_MANAGER": "ECPF",
|
||||||
|
"INTERNAL_CPU_IB_VPORT0": "ECPF",
|
||||||
|
"INTERNAL_CPU_OFFLOAD_ENGINE": "ENABLED",
|
||||||
|
If the value is True, that means the NIC in nic mode and it's going
|
||||||
|
to set the following config:
|
||||||
|
"INTERNAL_CPU_PAGE_SUPPLIER", "EXT_HOST_PF"
|
||||||
|
"INTERNAL_CPU_ESWITCH_MANAGER", "EXT_HOST_PF"
|
||||||
|
"INTERNAL_CPU_IB_VPORT0", "EXT_HOST_PF"
|
||||||
|
"INTERNAL_CPU_OFFLOAD_ENGINE", "DISABLED"
|
||||||
|
RESET_SYNC:
|
||||||
|
type: number
|
||||||
|
default: 0
|
||||||
|
description: >
|
||||||
|
Run mstfwreset with the specified reset-sync [0,1],
|
||||||
|
The default and Current supported option now is 0
|
||||||
|
|
||||||
|
|
||||||
|
resources:
|
||||||
|
userdata:
|
||||||
|
type: OS::Heat::MultipartMime
|
||||||
|
properties:
|
||||||
|
parts:
|
||||||
|
- config: {get_resource: nvidia_nic_fw_update}
|
||||||
|
|
||||||
|
nvidia_nic_fw_update:
|
||||||
|
type: OS::Heat::SoftwareConfig
|
||||||
|
properties:
|
||||||
|
config:
|
||||||
|
str_replace:
|
||||||
|
template: |
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
|
||||||
|
import six
|
||||||
|
from six.moves import html_parser
|
||||||
|
from six.moves.urllib import error as urlError
|
||||||
|
from six.moves.urllib import request as urlRequest
|
||||||
|
|
||||||
|
from oslo_concurrency import processutils
|
||||||
|
|
||||||
|
FW_VERSION_REGEX = r'FW Version:\s*\t*(?P<fw_ver>\d+\.\d+\.\d+)'
|
||||||
|
RUNNING_FW_VERSION_REGEX = r'FW Version\(Running\):\s*\t*(?P<fw_ver>\d+\.\d+\.\d+)'
|
||||||
|
PSID_REGEX = r'PSID:\s*\t*(?P<psid>\w+)'
|
||||||
|
ARRAY_VALUE_REGEX = r'Array\[(?P<first_index>\d+)\.\.(?P<last_index>\d+)\]'
|
||||||
|
ARRAY_PARAM_REGEX = r'(?P<param_name>\w+)\[\d+\]'
|
||||||
|
|
||||||
|
_DEV_WHITE_LIST = $DEV_WHITE_LIST
|
||||||
|
_FORCE_UPDATE = $FORCE_UPDATE
|
||||||
|
_BIN_DIR_URL = "$BIN_DIR_URL"
|
||||||
|
_BF_NIC_MODE = "$BF_NIC_MODE"
|
||||||
|
# TODO(adrianc): add configurable parameter for logging
|
||||||
|
logging.basicConfig(
|
||||||
|
filename='/var/log/nvidia_nic_fw_update.log',
|
||||||
|
filemode='w',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
LOG = logging.getLogger("nvidia_nic_fw_update")
|
||||||
|
|
||||||
|
_MLX_CONFIG = {
|
||||||
|
"SRIOV_EN": "$SRIOV_EN",
|
||||||
|
"NUM_OF_VFS": "$NUM_OF_VFS",
|
||||||
|
"LINK_TYPE_P1": "$LINK_TYPE",
|
||||||
|
"LINK_TYPE_P2": "$LINK_TYPE",
|
||||||
|
"ESWITCH_IPV4_TTL_MODIFY_ENABLE": "$ESWITCH_IPV4_TTL_MODIFY_ENABLE",
|
||||||
|
"PRIO_TAG_REQUIRED_EN": "$PRIO_TAG_REQUIRED_EN",
|
||||||
|
"ESWITCH_HAIRPIN_TOT_BUFFER_SIZE": $ESWITCH_HAIRPIN_TOT_BUFFER_SIZE,
|
||||||
|
"ESWITCH_HAIRPIN_DESCRIPTORS": $ESWITCH_HAIRPIN_DESCRIPTORS
|
||||||
|
}
|
||||||
|
if _BF_NIC_MODE.lower() == "false":
|
||||||
|
# It means we are in smart nic mode for BlueField device
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_PAGE_SUPPLIER"] = "ECPF"
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_ESWITCH_MANAGER"] = "ECPF"
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_IB_VPORT0"] = "ECPF"
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_OFFLOAD_ENGINE"] = "ENABLED"
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_RSHIM"] = "True"
|
||||||
|
else:
|
||||||
|
# It measn we are in nic mode for BlueField device
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_PAGE_SUPPLIER"] = "EXT_HOST_PF"
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_ESWITCH_MANAGER"] = "EXT_HOST_PF"
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_IB_VPORT0"] = "EXT_HOST_PF"
|
||||||
|
_MLX_CONFIG["INTERNAL_CPU_OFFLOAD_ENGINE"] = "DISABLED"
|
||||||
|
|
||||||
|
_RESET_SYNC = $RESET_SYNC
|
||||||
|
|
||||||
|
def run_command(*cmd, **kwargs):
|
||||||
|
try:
|
||||||
|
out, err = processutils.execute(*cmd, **kwargs)
|
||||||
|
except processutils.ProcessExecutionError as e:
|
||||||
|
LOG.error("Failed to execute %s, %s", ' '.join(cmd), str(e))
|
||||||
|
raise e
|
||||||
|
if err:
|
||||||
|
LOG.warning("Got stderr output: %s" % err)
|
||||||
|
LOG.debug(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def parse_mstflint_query_output(out):
|
||||||
|
""" Parse Mstflint query output
|
||||||
|
|
||||||
|
For now just extract 'FW Version' and 'PSID'
|
||||||
|
|
||||||
|
:param out: mstflint query output
|
||||||
|
:return: dictionary of query attributes
|
||||||
|
"""
|
||||||
|
query_info = {}
|
||||||
|
for line in out.split('\n'):
|
||||||
|
fw_ver = re.match(FW_VERSION_REGEX, line)
|
||||||
|
psid = re.match(PSID_REGEX, line)
|
||||||
|
running_fw_ver = re.match(RUNNING_FW_VERSION_REGEX, line)
|
||||||
|
if fw_ver:
|
||||||
|
query_info["fw_ver"] = fw_ver.group('fw_ver')
|
||||||
|
if running_fw_ver:
|
||||||
|
query_info["running_fw_ver"] = running_fw_ver.group('fw_ver')
|
||||||
|
if psid:
|
||||||
|
query_info["psid"] = psid.group('psid')
|
||||||
|
return query_info
|
||||||
|
|
||||||
|
|
||||||
|
class MlnxDevices(object):
|
||||||
|
""" Discover and retrieve Mellanox PCI devices.
|
||||||
|
|
||||||
|
Can be used as an iterator once discover has been called.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, dev_white_list):
|
||||||
|
self._devs = []
|
||||||
|
self._dev_white_list = dev_white_list
|
||||||
|
|
||||||
|
def discover(self):
|
||||||
|
""" Discover Mellanox devices in the system. (first PF of every device)
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
if self._devs:
|
||||||
|
return self._devs
|
||||||
|
|
||||||
|
devs = []
|
||||||
|
cmd = ['lspci', '-D', '-d', '15b3:']
|
||||||
|
out = run_command(*cmd)
|
||||||
|
for line in out.split('\n'):
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
dev = line.split()[0]
|
||||||
|
if dev.endswith('.0') and (not self._dev_white_list or
|
||||||
|
dev in self._dev_white_list):
|
||||||
|
devs.append(dev)
|
||||||
|
self._devs = devs
|
||||||
|
LOG.info("Found Mellanox devices: %s", devs)
|
||||||
|
other_devs = set(self._dev_white_list) - set(devs)
|
||||||
|
if other_devs:
|
||||||
|
LOG.warning("Not all devices in PCI white list where discovered,"
|
||||||
|
" %s these may not be nvidia devices or have their "
|
||||||
|
"PCI function set to non zero." % other_devs)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._devs)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self._devs.__iter__()
|
||||||
|
|
||||||
|
|
||||||
|
class MlnxConfigParamMetaData(object):
|
||||||
|
""" Metadata about a single mlxconfig parameter"""
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
class MlnxConfigArrayParamMetaData(MlnxConfigParamMetaData):
|
||||||
|
""" Metadata about a single mlxconfig array parameter"""
|
||||||
|
|
||||||
|
def __init__(self, name, first_idx, last_idx):
|
||||||
|
super(MlnxConfigArrayParamMetaData, self).__init__(name)
|
||||||
|
self.first_index = int(first_idx)
|
||||||
|
self.last_index = int(last_idx)
|
||||||
|
|
||||||
|
|
||||||
|
class MlnxDeviceConfig(object):
|
||||||
|
""" Get/Set Mellanox Device configurations
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, pci_dev):
|
||||||
|
self.pci_dev = pci_dev
|
||||||
|
self._tool_confs = None
|
||||||
|
# NOTE(adrianc) ATM contains only array type parameter metadata
|
||||||
|
self.mlnx_config_array_param_metadata = {}
|
||||||
|
|
||||||
|
def _mstconfig_parse_data(self, data):
|
||||||
|
# Parsing the mstconfig out to json
|
||||||
|
data = list(filter(None, data.split('\n')))
|
||||||
|
r = {}
|
||||||
|
c = 0
|
||||||
|
for line in data:
|
||||||
|
c += 1
|
||||||
|
if 'Configurations:' in line:
|
||||||
|
break
|
||||||
|
for i in range(c, len(data)):
|
||||||
|
d = list(filter(None, data[i].strip().split()))
|
||||||
|
r[d[0]] = d[1]
|
||||||
|
return r
|
||||||
|
|
||||||
|
def get_device_conf_dict(self, param_name=None):
|
||||||
|
""" Get device Configurations
|
||||||
|
|
||||||
|
:param param_name: if provided retireve only given configuration
|
||||||
|
:return: dict {"PARAM_NAME": "Param value", ....}
|
||||||
|
"""
|
||||||
|
LOG.info("Getting configurations for device: %s" % self.pci_dev)
|
||||||
|
cmd = ["mstconfig", "-d", self.pci_dev, "q"]
|
||||||
|
if param_name:
|
||||||
|
cmd.append(param_name)
|
||||||
|
out = run_command(*cmd)
|
||||||
|
return self._mstconfig_parse_data(out)
|
||||||
|
|
||||||
|
def param_supp_by_config_tool(self, param_name):
|
||||||
|
""" Check if configuration tool supports the provided configuration
|
||||||
|
parameter.
|
||||||
|
|
||||||
|
:param param_name: configuration name
|
||||||
|
:return: bool
|
||||||
|
"""
|
||||||
|
if self._tool_confs is None:
|
||||||
|
self._tool_confs = run_command(
|
||||||
|
"mstconfig", "-d", self.pci_dev, "i")
|
||||||
|
# trim any array index if present
|
||||||
|
indexed_param = re.match(ARRAY_PARAM_REGEX, param_name)
|
||||||
|
if indexed_param:
|
||||||
|
param_name = indexed_param.group('param_name')
|
||||||
|
return param_name in self._tool_confs
|
||||||
|
|
||||||
|
def _build_config_param_metadata_map(self, conf_dict):
|
||||||
|
self.mlnx_config_array_param_metadata = {}
|
||||||
|
for param_name, val in six.iteritems(conf_dict):
|
||||||
|
array_val = re.match(ARRAY_VALUE_REGEX, val)
|
||||||
|
if array_val:
|
||||||
|
# Array parameter, extract first/last index
|
||||||
|
first_index = array_val.group('first_index')
|
||||||
|
last_index = array_val.group('last_index')
|
||||||
|
self.mlnx_config_array_param_metadata[param_name] = \
|
||||||
|
MlnxConfigArrayParamMetaData(
|
||||||
|
param_name, first_index, last_index)
|
||||||
|
|
||||||
|
def _inflate_array_param_vals_from_query(self, conf_dict):
|
||||||
|
""" Inflate provided conf dict with all values of array parameter"""
|
||||||
|
inflated_conf = {}
|
||||||
|
for param_name, val in six.iteritems(conf_dict):
|
||||||
|
if param_name in self.mlnx_config_array_param_metadata:
|
||||||
|
param_meta = self.mlnx_config_array_param_metadata[param_name]
|
||||||
|
first = param_meta.first_index
|
||||||
|
last = param_meta.last_index
|
||||||
|
arr_param_val = self.get_device_conf_dict(
|
||||||
|
param_name="%s[%s..%s]" % (param_name, first, last))
|
||||||
|
# Add new keys to dict
|
||||||
|
for k, v in six.iteritems(arr_param_val):
|
||||||
|
inflated_conf[k] = v
|
||||||
|
else:
|
||||||
|
inflated_conf[param_name] = val
|
||||||
|
return inflated_conf
|
||||||
|
|
||||||
|
def _inflate_single_array_input_val(self, param_name, val):
|
||||||
|
conf_dict = {}
|
||||||
|
param_meta = self.mlnx_config_array_param_metadata[param_name]
|
||||||
|
first = param_meta.first_index
|
||||||
|
last = param_meta.last_index
|
||||||
|
|
||||||
|
if '*' in val:
|
||||||
|
if len(val) != 1:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Invalid input for provided array type parameter. %s:%s"
|
||||||
|
% (param_name, val))
|
||||||
|
|
||||||
|
for idx in range(first, last + 1):
|
||||||
|
conf_dict["%s[%s]" % (param_name, idx)] = val['*']
|
||||||
|
else:
|
||||||
|
for idx, idx_val in six.iteritems(val):
|
||||||
|
if int(idx) not in range(first, last + 1):
|
||||||
|
LOG.warning(
|
||||||
|
"Provided array param index(%s) is out of range "
|
||||||
|
"[%s..%s] skipping...", idx, first, last)
|
||||||
|
continue
|
||||||
|
conf_dict["%s[%s]" % (param_name, idx)] = str(idx_val)
|
||||||
|
return conf_dict
|
||||||
|
|
||||||
|
def _inflate_array_param_vals_from_input(self, conf_dict):
|
||||||
|
""" Inflate provided conf dict with all values of array parameter"""
|
||||||
|
inflated_conf = {}
|
||||||
|
for param_name, val in six.iteritems(conf_dict):
|
||||||
|
if param_name in self.mlnx_config_array_param_metadata:
|
||||||
|
exp_inp = self._inflate_single_array_input_val(param_name, val)
|
||||||
|
# Add to conf_dict
|
||||||
|
for k, v in six.iteritems(exp_inp):
|
||||||
|
inflated_conf[k] = v
|
||||||
|
else:
|
||||||
|
inflated_conf[param_name] = val
|
||||||
|
|
||||||
|
return inflated_conf
|
||||||
|
|
||||||
|
def set_config(self, conf_dict):
|
||||||
|
""" Set device configurations
|
||||||
|
|
||||||
|
:param conf_dict: a dictionary of:
|
||||||
|
{"PARAM_NAME": "Param value to set", ...}
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
current_mlx_config = self.get_device_conf_dict()
|
||||||
|
self._build_config_param_metadata_map(current_mlx_config)
|
||||||
|
current_mlx_config = self._inflate_array_param_vals_from_query(
|
||||||
|
current_mlx_config)
|
||||||
|
# inflate user input for array parameters
|
||||||
|
conf_dict = self._inflate_array_param_vals_from_input(conf_dict)
|
||||||
|
|
||||||
|
params_to_set = []
|
||||||
|
for key, value in conf_dict.items():
|
||||||
|
if not self.param_supp_by_config_tool(key):
|
||||||
|
LOG.error(
|
||||||
|
"Configuraiton: %s is not supported by mstconfig,"
|
||||||
|
" please update to the latest mstflint package." % key)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if current_mlx_config.get(key) and value.lower(
|
||||||
|
) not in current_mlx_config.get(key).lower():
|
||||||
|
# Aggregate all configurations required to be modified
|
||||||
|
params_to_set.append("%s=%s" % (key, value))
|
||||||
|
|
||||||
|
if params_to_set:
|
||||||
|
LOG.info("Setting configurations for device: %s" % self.pci_dev)
|
||||||
|
run_command("mstconfig", "-d", self.pci_dev, "-y",
|
||||||
|
"set", *params_to_set)
|
||||||
|
LOG.info("Set device configurations: Setting %s done successfully",
|
||||||
|
" ".join(params_to_set))
|
||||||
|
else:
|
||||||
|
LOG.info("Set device configurations: No operation required")
|
||||||
|
|
||||||
|
|
||||||
|
class MlnxFirmwareBinary(object):
|
||||||
|
|
||||||
|
def __init__(self, local_bin_path):
|
||||||
|
self.bin_path = local_bin_path
|
||||||
|
self.image_info = {}
|
||||||
|
|
||||||
|
def get_info(self):
|
||||||
|
""" Get firmware information from binary
|
||||||
|
|
||||||
|
Caller should wrap this call under try catch to skip non compliant
|
||||||
|
firmware binaries.
|
||||||
|
|
||||||
|
:return: dict of firmware image attributes
|
||||||
|
"""
|
||||||
|
if self.image_info.get('file_path', '') == self.bin_path:
|
||||||
|
return self.image_info
|
||||||
|
self.image_info = {'file_path': self.bin_path}
|
||||||
|
cmd = ['mstflint', '-i', self.bin_path, 'query']
|
||||||
|
out = run_command(*cmd)
|
||||||
|
self.image_info.update(parse_mstflint_query_output(out))
|
||||||
|
# Note(adrianc): deep copy ?
|
||||||
|
return self.image_info
|
||||||
|
|
||||||
|
|
||||||
|
class MlnxFirmwareBinariesFetcher(object):
|
||||||
|
""" A class for fetching firmware binaries form a directory
|
||||||
|
provided by a URL link
|
||||||
|
|
||||||
|
Note: URL MUST point to a directory and end with '/'
|
||||||
|
e.g http://www.mysite.com/mlnx_bins/
|
||||||
|
"""
|
||||||
|
dest_dir = tempfile.mkdtemp(suffix="tripleo_mlnx_firmware")
|
||||||
|
|
||||||
|
class FileHTMLParser(html_parser.HTMLParser):
|
||||||
|
""" A crude HTML Parser to extract files from an HTTP response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, suffix):
|
||||||
|
# HTMLParser is Old style class dont use super() method
|
||||||
|
html_parser.HTMLParser.__init__(self)
|
||||||
|
self.matches = []
|
||||||
|
self.suffix = suffix
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
for name, val in attrs:
|
||||||
|
if name == 'href' and val.endswith(self.suffix):
|
||||||
|
self.matches.append(val)
|
||||||
|
|
||||||
|
def __init__(self, url):
|
||||||
|
self.url = url
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self._cleanup_dest_dir()
|
||||||
|
|
||||||
|
def _cleanup_dest_dir(self):
|
||||||
|
if os.path.exists(MlnxFirmwareBinariesFetcher.dest_dir):
|
||||||
|
shutil.rmtree(MlnxFirmwareBinariesFetcher.dest_dir)
|
||||||
|
|
||||||
|
def _get_file_from_url(self, file_name):
|
||||||
|
try:
|
||||||
|
full_path = self.url + "/" + file_name
|
||||||
|
LOG.info("Downloading file: %s to %s", full_path,
|
||||||
|
MlnxFirmwareBinariesFetcher.dest_dir)
|
||||||
|
url_data = urlRequest.urlopen(full_path)
|
||||||
|
except urlError.HTTPError as e:
|
||||||
|
LOG.error("Failed to download data: %s", str(e))
|
||||||
|
raise e
|
||||||
|
dest_file_path = os.path.join(MlnxFirmwareBinariesFetcher.dest_dir,
|
||||||
|
file_name)
|
||||||
|
with open(dest_file_path, 'wb') as f:
|
||||||
|
f.write(url_data.read())
|
||||||
|
return dest_file_path
|
||||||
|
|
||||||
|
def _get_file_create_bin_obj(self, file_name, fw_bins):
|
||||||
|
""" This wrapper method will download a firmware binary,
|
||||||
|
create MlnxFirmwareBinary object and append to the provided
|
||||||
|
fw_bins list.
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
dest_file_path = self._get_file_from_url(file_name)
|
||||||
|
fw_bin = MlnxFirmwareBinary(dest_file_path)
|
||||||
|
# Note(adrianc): Pre query image, to skip incompatible files
|
||||||
|
# in case of Error
|
||||||
|
fw_bin.get_info()
|
||||||
|
fw_bins.append(fw_bin)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warning("Failed to download and query %s, skipping file. "
|
||||||
|
"%s", file_name, str(e))
|
||||||
|
|
||||||
|
def get_firmware_binaries(self):
|
||||||
|
""" Get Firmware binaries
|
||||||
|
|
||||||
|
:return: list containing the files downloaded
|
||||||
|
"""
|
||||||
|
# get list of files
|
||||||
|
# download into dest_dir
|
||||||
|
# for each file, create MlnxFirmwareBinary
|
||||||
|
# return list of the MlnxFirmwareBinary
|
||||||
|
if not self.url.endswith('/'):
|
||||||
|
LOG.error("Bad URL provided (%s), expected URL to be a directory",
|
||||||
|
self.url)
|
||||||
|
raise RuntimeError('Failed to get firmware binaries, '
|
||||||
|
'expected directory URL path '
|
||||||
|
'(e.g "http://<your_ip>/mlnx_bins/"). '
|
||||||
|
'Given URL path: %s', self.url)
|
||||||
|
try:
|
||||||
|
index_data = str(urlRequest.urlopen(_BIN_DIR_URL).read())
|
||||||
|
except urlError.HTTPError as err:
|
||||||
|
LOG.error(err)
|
||||||
|
raise err
|
||||||
|
parser = MlnxFirmwareBinariesFetcher.FileHTMLParser(suffix=".bin")
|
||||||
|
parser.feed(index_data)
|
||||||
|
parser.close()
|
||||||
|
if not parser.matches:
|
||||||
|
LOG.warning("No bin Files found in the provided URL: %s", self.url)
|
||||||
|
|
||||||
|
fw_bins = []
|
||||||
|
threads = []
|
||||||
|
for file_name in parser.matches:
|
||||||
|
# TODO(adrianc) fetch files async with co-routines,
|
||||||
|
# may need to limit thread count
|
||||||
|
t = threading.Thread(target=self._get_file_create_bin_obj,
|
||||||
|
args=(file_name, fw_bins))
|
||||||
|
t.start()
|
||||||
|
threads.append(t)
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
return fw_bins
|
||||||
|
|
||||||
|
|
||||||
|
class MlnxDevFirmwareOps(object):
|
||||||
|
""" Perform various Firmware related operations on device
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, dev):
|
||||||
|
self.dev = dev
|
||||||
|
self.dev_info = {}
|
||||||
|
|
||||||
|
def query_device(self, force=False):
|
||||||
|
""" Get firmware information from device
|
||||||
|
|
||||||
|
:param force: force device query, even if query was executed in
|
||||||
|
previous calls.
|
||||||
|
:return: dict of firmware image attributes
|
||||||
|
"""
|
||||||
|
if not force and self.dev_info.get('device', '') == self.dev:
|
||||||
|
return self.dev_info
|
||||||
|
|
||||||
|
self.dev_info = {'device': self.dev}
|
||||||
|
cmd = ['mstflint', '-d', self.dev, '-qq', 'query']
|
||||||
|
out = run_command(*cmd)
|
||||||
|
self.dev_info = parse_mstflint_query_output(out)
|
||||||
|
# Note(adrianc): deep copy ?
|
||||||
|
return self.dev_info
|
||||||
|
|
||||||
|
def need_update(self, image_info):
|
||||||
|
""" Check if device requires firmware update
|
||||||
|
|
||||||
|
:param image_info: image_info dict as returned from
|
||||||
|
MlnxFirmwareBinary.get_info()
|
||||||
|
:return: bool, True if update is needed
|
||||||
|
"""
|
||||||
|
if not self.dev_info:
|
||||||
|
self.query_device()
|
||||||
|
LOG.info("Device firmware version: %s, Image firmware version: %s" %
|
||||||
|
(self.dev_info['fw_ver'], image_info['fw_ver']))
|
||||||
|
return self.dev_info['fw_ver'] < image_info['fw_ver']
|
||||||
|
|
||||||
|
def need_reset_before_config(self):
|
||||||
|
""" Check if device requires firmware reset before applying any
|
||||||
|
configurations on the device.
|
||||||
|
|
||||||
|
:return: (bool, bool) True if reset is needed,
|
||||||
|
True if skip_fms_sync is needed
|
||||||
|
"""
|
||||||
|
self.query_device(force=True)
|
||||||
|
next_boot_image_newer = 'running_fw_ver' in self.dev_info and \
|
||||||
|
self.dev_info['running_fw_ver'] < self.dev_info['fw_ver']
|
||||||
|
if next_boot_image_newer:
|
||||||
|
mandatory_params = ["ESWITCH_IPV4_TTL_MODIFY_ENABLE",
|
||||||
|
"PRIO_TAG_REQUIRED_EN",
|
||||||
|
"INTERNAL_CPU_PAGE_SUPPLIE",
|
||||||
|
"INTERNAL_CPU_ESWITCH_MANAGE",
|
||||||
|
"INTERNAL_CPU_IB_VPORT0",
|
||||||
|
"INTERNAL_CPU_OFFLOAD_ENGINE"]
|
||||||
|
device_config = MlnxDeviceConfig(self.dev)
|
||||||
|
conf_dict = device_config.get_device_conf_dict()
|
||||||
|
for param in mandatory_params:
|
||||||
|
if param not in conf_dict and \
|
||||||
|
device_config.param_supp_by_config_tool(param):
|
||||||
|
if "INTERNAL_CPU_MODEL" in conf_dict:
|
||||||
|
if self.dev_info['running_fw_ver'] < "24.32.0000":
|
||||||
|
# In case the device is BlueField and the FW is less than Nov release
|
||||||
|
# return True, True to do reset with skip_fms_sync
|
||||||
|
return True, True
|
||||||
|
elif (self.dev_info['running_fw_ver'] >= "24.32.0000" and
|
||||||
|
self.dev_info['running_fw_ver'] < "24.33.0000"):
|
||||||
|
# In case the device is BlueField and the FW is from Nov release
|
||||||
|
# reset will fail, so don't do it
|
||||||
|
return False, False
|
||||||
|
return True, False
|
||||||
|
return False, False
|
||||||
|
|
||||||
|
def burn_firmware(self, image_path):
|
||||||
|
""" Burn firmware on device
|
||||||
|
|
||||||
|
:param image_path: firmware binary file path
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
LOG.info("Updating firmware image (%s) for device: %s",
|
||||||
|
image_path, self.dev)
|
||||||
|
cmd = ["mstflint", "-d", self.dev, "-i", image_path,
|
||||||
|
"-y", "burn"]
|
||||||
|
run_command(*cmd)
|
||||||
|
LOG.info("Device %s: Successfully updated.", self.dev)
|
||||||
|
|
||||||
|
def reset_device(self, skip_fms_sync=False):
|
||||||
|
""" Reset firmware
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
LOG.info("Device %s: Performing firmware reset.", self.dev)
|
||||||
|
if skip_fms_sync:
|
||||||
|
cmd = ["mstfwreset", "-d", self.dev, "--skip_fsm_sync", "-y",
|
||||||
|
"--sync", _RESET_SYNC, "reset"]
|
||||||
|
else:
|
||||||
|
cmd = ["mstfwreset", "-d", self.dev, "-y", "reset"]
|
||||||
|
run_command(*cmd)
|
||||||
|
LOG.info("Device %s: Firmware successfully reset.", self.dev)
|
||||||
|
|
||||||
|
|
||||||
|
def check_prereq():
|
||||||
|
""" Check that all needed tools are available in the system.
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# check for mstflint
|
||||||
|
run_command('mstflint', '-v')
|
||||||
|
# check for mstconfig
|
||||||
|
run_command('mstconfig', '-v')
|
||||||
|
# check for mstfwreset
|
||||||
|
run_command('mstfwreset', '-v')
|
||||||
|
# check for lspci
|
||||||
|
run_command('lspci', '--version')
|
||||||
|
except Exception as e:
|
||||||
|
LOG.error("Failed Prerequisite check. %s", str(e))
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def process_device(pci_dev, psid_map):
|
||||||
|
""" Process a single Mellanox device.
|
||||||
|
|
||||||
|
Processing pipeline:
|
||||||
|
- Perform firmware update if required
|
||||||
|
- Reset device to load firmware if required
|
||||||
|
- Perform device configurations if required
|
||||||
|
|
||||||
|
:param pci_dev: nvidia nic PCI device address (String)
|
||||||
|
:param psid_map: dict mapping between PSID and an image_info dict
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
LOG.info("Processing Device: %s", pci_dev)
|
||||||
|
dev_ops = MlnxDevFirmwareOps(pci_dev)
|
||||||
|
device_config = MlnxDeviceConfig(pci_dev)
|
||||||
|
dev_query = dev_ops.query_device()
|
||||||
|
# see if there is a matching bin
|
||||||
|
dev_psid = dev_query['psid']
|
||||||
|
if dev_psid in psid_map:
|
||||||
|
if _FORCE_UPDATE or dev_ops.need_update(psid_map[dev_psid]):
|
||||||
|
dev_ops.burn_firmware(psid_map[dev_psid]['file_path'])
|
||||||
|
else:
|
||||||
|
LOG.info("Firmware update is not required for Device.")
|
||||||
|
else:
|
||||||
|
LOG.info("No firmware binary found for device %s with "
|
||||||
|
"PSID: %s, skipping...", pci_dev, dev_psid)
|
||||||
|
# check if reset is required.
|
||||||
|
# Note: device Reset is required if a newer firmware version was burnt
|
||||||
|
# and current firmware does not support some mandatory configurations.
|
||||||
|
# Note: skip_fms_sync is required if device is BlueField SmartNIC and
|
||||||
|
# the current FW is less than Nov release
|
||||||
|
need_rest, need_skip_fms_sync = dev_ops.need_reset_before_config()
|
||||||
|
if need_rest:
|
||||||
|
dev_ops.reset_device(skip_fms_sync=need_skip_fms_sync)
|
||||||
|
# set device configurations
|
||||||
|
device_config.set_config(_MLX_CONFIG)
|
||||||
|
LOG.info("Device %s processed successfully.", pci_dev)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.error("Failed to process device %s. %s", pci_dev, str(e))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
check_prereq()
|
||||||
|
# discover devices
|
||||||
|
mlnx_devices = MlnxDevices(_DEV_WHITE_LIST)
|
||||||
|
mlnx_devices.discover()
|
||||||
|
# get binaries and prep psid map
|
||||||
|
psid_map = {}
|
||||||
|
if _BIN_DIR_URL:
|
||||||
|
binary_getter = MlnxFirmwareBinariesFetcher(_BIN_DIR_URL)
|
||||||
|
fw_binaries = binary_getter.get_firmware_binaries()
|
||||||
|
|
||||||
|
for fw_bin in fw_binaries:
|
||||||
|
image_info = fw_bin.get_info()
|
||||||
|
psid_map[image_info['psid']] = image_info
|
||||||
|
# process devices
|
||||||
|
for pci_dev in mlnx_devices:
|
||||||
|
process_device(pci_dev, psid_map)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
|
params:
|
||||||
|
$BIN_DIR_URL: {get_param: BIN_DIR_URL}
|
||||||
|
$FORCE_UPDATE: {get_param: FORCE_UPDATE}
|
||||||
|
$DEV_WHITE_LIST: {get_param: DEV_WHITE_LIST}
|
||||||
|
$NUM_OF_VFS: {get_param: NUM_OF_VFS}
|
||||||
|
$SRIOV_EN: {get_param: SRIOV_EN}
|
||||||
|
$LINK_TYPE: {get_param: LINK_TYPE}
|
||||||
|
$ESWITCH_IPV4_TTL_MODIFY_ENABLE: {get_param: ESWITCH_IPV4_TTL_MODIFY_ENABLE}
|
||||||
|
$PRIO_TAG_REQUIRED_EN: {get_param: PRIO_TAG_REQUIRED_EN}
|
||||||
|
$ESWITCH_HAIRPIN_TOT_BUFFER_SIZE: {get_param: ESWITCH_HAIRPIN_TOT_BUFFER_SIZE}
|
||||||
|
$ESWITCH_HAIRPIN_DESCRIPTORS: {get_param: ESWITCH_HAIRPIN_DESCRIPTORS}
|
||||||
|
$BF_NIC_MODE: {get_param: BF_NIC_MODE}
|
||||||
|
$RESET_SYNC: {get_param: RESET_SYNC}
|
||||||
|
|
||||||
|
|
||||||
|
outputs:
|
||||||
|
# This means get_resource from the parent template will get the userdata, see:
|
||||||
|
# http://docs.openstack.org/developer/heat/template_guide/composition.html#making-your-template-resource-more-transparent
|
||||||
|
# Note this is new-for-kilo, an alternative is returning a value then using
|
||||||
|
# get_attr in the parent template instead.
|
||||||
|
OS::stack_id:
|
||||||
|
value: {get_resource: userdata}
|
Loading…
Reference in New Issue
Block a user