Browse Source
This's a temporary workaround for upgrading Nvidia mellanox NICs FW and configuring them using mstflint tool Change-Id: I40166051c10876ee9b865135f219cf0dff6d4c18changes/85/833785/3
1 changed files with 743 additions and 0 deletions
@ -0,0 +1,743 @@
|
||||
heat_template_version: wallaby |
||||
|
||||
description: > |
||||
This's a temporary workaround for upgrading Nvidia mellanox NICs |
||||
FW and configuring them using mstflint tool |
||||
|
||||
parameters: |
||||
BIN_DIR_URL: |
||||
type: string |
||||
default: '' |
||||
description: 'URL of a directory containing Mellanox NIC Firmware' |
||||
FORCE_UPDATE: |
||||
type: boolean |
||||
default: False |
||||
description: "Force update the fw even if it's an older version" |
||||
DEV_WHITE_LIST: |
||||
type: comma_delimited_list |
||||
default: [] |
||||
description: list of MLNX devices PCIs to be processed. |
||||
If the value is empty, all MLNX devices will be processed. |
||||
Example, ['0000:04:00.0', '0000:81:00.0'] |
||||
Make sure to choose only the PCI ends with 0 |
||||
NUM_OF_VFS: |
||||
type: number |
||||
default: 32 |
||||
description: 'Max number of vfs' |
||||
SRIOV_EN: |
||||
type: boolean |
||||
default: True |
||||
description: 'Enable/Disable Sriov' |
||||
LINK_TYPE: |
||||
type: string |
||||
default: 'eth' |
||||
description: 'Link type ["eth", "ib"]' |
||||
ESWITCH_IPV4_TTL_MODIFY_ENABLE: |
||||
type: boolean |
||||
default: False |
||||
description: 'Enable TTL modification by E-Switch' |
||||
PRIO_TAG_REQUIRED_EN: |
||||
type: boolean |
||||
default: False |
||||
description: 'Priority tag required' |
||||
ESWITCH_HAIRPIN_TOT_BUFFER_SIZE: |
||||
type: json |
||||
default: {"*": "17"} |
||||
description: > |
||||
If a single key of "*" is provided, then its value will set to all indexes. |
||||
If you need to set configuration for a set of specific indexes, you can pass the |
||||
value as below for index 2 to be 17 and index 3 to be 16 |
||||
Example, {"2": "17", "3": "16"} |
||||
Make sure to choose only the PCI ends with 0 |
||||
ESWITCH_HAIRPIN_DESCRIPTORS: |
||||
type: json |
||||
default: {"*": "11"} |
||||
description: > |
||||
If a single key of "*" is provided, then its value will set to all indexes. |
||||
If you need to set configuration for a set of specific indexes, you can pass the |
||||
value as below for index 2 to be 17 and index 3 to be 16 |
||||
Example, {"2": "17", "3": "16"} |
||||
Make sure to choose only the PCI ends with 0 |
||||
BF_NIC_MODE: |
||||
type: boolean |
||||
default: False |
||||
description: > |
||||
A special parameter for BlueField SmartNICs, if the value is False, that means the |
||||
NIC in smart nic mode and it's going to set the following config: |
||||
"INTERNAL_CPU_PAGE_SUPPLIER": "ECPF", |
||||
"INTERNAL_CPU_ESWITCH_MANAGER": "ECPF", |
||||
"INTERNAL_CPU_IB_VPORT0": "ECPF", |
||||
"INTERNAL_CPU_OFFLOAD_ENGINE": "ENABLED", |
||||
If the value is True, that means the NIC in nic mode and it's going |
||||
to set the following config: |
||||
"INTERNAL_CPU_PAGE_SUPPLIER", "EXT_HOST_PF" |
||||
"INTERNAL_CPU_ESWITCH_MANAGER", "EXT_HOST_PF" |
||||
"INTERNAL_CPU_IB_VPORT0", "EXT_HOST_PF" |
||||
"INTERNAL_CPU_OFFLOAD_ENGINE", "DISABLED" |
||||
RESET_SYNC: |
||||
type: number |
||||
default: 0 |
||||
description: > |
||||
Run mstfwreset with the specified reset-sync [0,1], |
||||
The default and Current supported option now is 0 |
||||
|
||||
|
||||
resources: |
||||
userdata: |
||||
type: OS::Heat::MultipartMime |
||||
properties: |
||||
parts: |
||||
- config: {get_resource: nvidia_nic_fw_update} |
||||
|
||||
nvidia_nic_fw_update: |
||||
type: OS::Heat::SoftwareConfig |
||||
properties: |
||||
config: |
||||
str_replace: |
||||
template: | |
||||
#!/usr/bin/env python |
||||
|
||||
import logging |
||||
import os |
||||
import re |
||||
import shutil |
||||
import tempfile |
||||
import threading |
||||
|
||||
import six |
||||
from six.moves import html_parser |
||||
from six.moves.urllib import error as urlError |
||||
from six.moves.urllib import request as urlRequest |
||||
|
||||
from oslo_concurrency import processutils |
||||
|
||||
FW_VERSION_REGEX = r'FW Version:\s*\t*(?P<fw_ver>\d+\.\d+\.\d+)' |
||||
RUNNING_FW_VERSION_REGEX = r'FW Version\(Running\):\s*\t*(?P<fw_ver>\d+\.\d+\.\d+)' |
||||
PSID_REGEX = r'PSID:\s*\t*(?P<psid>\w+)' |
||||
ARRAY_VALUE_REGEX = r'Array\[(?P<first_index>\d+)\.\.(?P<last_index>\d+)\]' |
||||
ARRAY_PARAM_REGEX = r'(?P<param_name>\w+)\[\d+\]' |
||||
|
||||
_DEV_WHITE_LIST = $DEV_WHITE_LIST |
||||
_FORCE_UPDATE = $FORCE_UPDATE |
||||
_BIN_DIR_URL = "$BIN_DIR_URL" |
||||
_BF_NIC_MODE = "$BF_NIC_MODE" |
||||
# TODO(adrianc): add configurable parameter for logging |
||||
logging.basicConfig( |
||||
filename='/var/log/nvidia_nic_fw_update.log', |
||||
filemode='w', |
||||
level=logging.DEBUG) |
||||
LOG = logging.getLogger("nvidia_nic_fw_update") |
||||
|
||||
_MLX_CONFIG = { |
||||
"SRIOV_EN": "$SRIOV_EN", |
||||
"NUM_OF_VFS": "$NUM_OF_VFS", |
||||
"LINK_TYPE_P1": "$LINK_TYPE", |
||||
"LINK_TYPE_P2": "$LINK_TYPE", |
||||
"ESWITCH_IPV4_TTL_MODIFY_ENABLE": "$ESWITCH_IPV4_TTL_MODIFY_ENABLE", |
||||
"PRIO_TAG_REQUIRED_EN": "$PRIO_TAG_REQUIRED_EN", |
||||
"ESWITCH_HAIRPIN_TOT_BUFFER_SIZE": $ESWITCH_HAIRPIN_TOT_BUFFER_SIZE, |
||||
"ESWITCH_HAIRPIN_DESCRIPTORS": $ESWITCH_HAIRPIN_DESCRIPTORS |
||||
} |
||||
if _BF_NIC_MODE.lower() == "false": |
||||
# It means we are in smart nic mode for BlueField device |
||||
_MLX_CONFIG["INTERNAL_CPU_PAGE_SUPPLIER"] = "ECPF" |
||||
_MLX_CONFIG["INTERNAL_CPU_ESWITCH_MANAGER"] = "ECPF" |
||||
_MLX_CONFIG["INTERNAL_CPU_IB_VPORT0"] = "ECPF" |
||||
_MLX_CONFIG["INTERNAL_CPU_OFFLOAD_ENGINE"] = "ENABLED" |
||||
_MLX_CONFIG["INTERNAL_CPU_RSHIM"] = "True" |
||||
else: |
||||
# It measn we are in nic mode for BlueField device |
||||
_MLX_CONFIG["INTERNAL_CPU_PAGE_SUPPLIER"] = "EXT_HOST_PF" |
||||
_MLX_CONFIG["INTERNAL_CPU_ESWITCH_MANAGER"] = "EXT_HOST_PF" |
||||
_MLX_CONFIG["INTERNAL_CPU_IB_VPORT0"] = "EXT_HOST_PF" |
||||
_MLX_CONFIG["INTERNAL_CPU_OFFLOAD_ENGINE"] = "DISABLED" |
||||
|
||||
_RESET_SYNC = $RESET_SYNC |
||||
|
||||
def run_command(*cmd, **kwargs): |
||||
try: |
||||
out, err = processutils.execute(*cmd, **kwargs) |
||||
except processutils.ProcessExecutionError as e: |
||||
LOG.error("Failed to execute %s, %s", ' '.join(cmd), str(e)) |
||||
raise e |
||||
if err: |
||||
LOG.warning("Got stderr output: %s" % err) |
||||
LOG.debug(out) |
||||
return out |
||||
|
||||
|
||||
def parse_mstflint_query_output(out): |
||||
""" Parse Mstflint query output |
||||
|
||||
For now just extract 'FW Version' and 'PSID' |
||||
|
||||
:param out: mstflint query output |
||||
:return: dictionary of query attributes |
||||
""" |
||||
query_info = {} |
||||
for line in out.split('\n'): |
||||
fw_ver = re.match(FW_VERSION_REGEX, line) |
||||
psid = re.match(PSID_REGEX, line) |
||||
running_fw_ver = re.match(RUNNING_FW_VERSION_REGEX, line) |
||||
if fw_ver: |
||||
query_info["fw_ver"] = fw_ver.group('fw_ver') |
||||
if running_fw_ver: |
||||
query_info["running_fw_ver"] = running_fw_ver.group('fw_ver') |
||||
if psid: |
||||
query_info["psid"] = psid.group('psid') |
||||
return query_info |
||||
|
||||
|
||||
class MlnxDevices(object): |
||||
""" Discover and retrieve Mellanox PCI devices. |
||||
|
||||
Can be used as an iterator once discover has been called. |
||||
""" |
||||
|
||||
def __init__(self, dev_white_list): |
||||
self._devs = [] |
||||
self._dev_white_list = dev_white_list |
||||
|
||||
def discover(self): |
||||
""" Discover Mellanox devices in the system. (first PF of every device) |
||||
|
||||
:return: None |
||||
""" |
||||
if self._devs: |
||||
return self._devs |
||||
|
||||
devs = [] |
||||
cmd = ['lspci', '-D', '-d', '15b3:'] |
||||
out = run_command(*cmd) |
||||
for line in out.split('\n'): |
||||
if not line: |
||||
continue |
||||
dev = line.split()[0] |
||||
if dev.endswith('.0') and (not self._dev_white_list or |
||||
dev in self._dev_white_list): |
||||
devs.append(dev) |
||||
self._devs = devs |
||||
LOG.info("Found Mellanox devices: %s", devs) |
||||
other_devs = set(self._dev_white_list) - set(devs) |
||||
if other_devs: |
||||
LOG.warning("Not all devices in PCI white list where discovered," |
||||
" %s these may not be nvidia devices or have their " |
||||
"PCI function set to non zero." % other_devs) |
||||
|
||||
def __len__(self): |
||||
return len(self._devs) |
||||
|
||||
def __iter__(self): |
||||
return self._devs.__iter__() |
||||
|
||||
|
||||
class MlnxConfigParamMetaData(object): |
||||
""" Metadata about a single mlxconfig parameter""" |
||||
|
||||
def __init__(self, name): |
||||
self.name = name |
||||
|
||||
class MlnxConfigArrayParamMetaData(MlnxConfigParamMetaData): |
||||
""" Metadata about a single mlxconfig array parameter""" |
||||
|
||||
def __init__(self, name, first_idx, last_idx): |
||||
super(MlnxConfigArrayParamMetaData, self).__init__(name) |
||||
self.first_index = int(first_idx) |
||||
self.last_index = int(last_idx) |
||||
|
||||
|
||||
class MlnxDeviceConfig(object): |
||||
""" Get/Set Mellanox Device configurations |
||||
""" |
||||
|
||||
def __init__(self, pci_dev): |
||||
self.pci_dev = pci_dev |
||||
self._tool_confs = None |
||||
# NOTE(adrianc) ATM contains only array type parameter metadata |
||||
self.mlnx_config_array_param_metadata = {} |
||||
|
||||
def _mstconfig_parse_data(self, data): |
||||
# Parsing the mstconfig out to json |
||||
data = list(filter(None, data.split('\n'))) |
||||
r = {} |
||||
c = 0 |
||||
for line in data: |
||||
c += 1 |
||||
if 'Configurations:' in line: |
||||
break |
||||
for i in range(c, len(data)): |
||||
d = list(filter(None, data[i].strip().split())) |
||||
r[d[0]] = d[1] |
||||
return r |
||||
|
||||
def get_device_conf_dict(self, param_name=None): |
||||
""" Get device Configurations |
||||
|
||||
:param param_name: if provided retireve only given configuration |
||||
:return: dict {"PARAM_NAME": "Param value", ....} |
||||
""" |
||||
LOG.info("Getting configurations for device: %s" % self.pci_dev) |
||||
cmd = ["mstconfig", "-d", self.pci_dev, "q"] |
||||
if param_name: |
||||
cmd.append(param_name) |
||||
out = run_command(*cmd) |
||||
return self._mstconfig_parse_data(out) |
||||
|
||||
def param_supp_by_config_tool(self, param_name): |
||||
""" Check if configuration tool supports the provided configuration |
||||
parameter. |
||||
|
||||
:param param_name: configuration name |
||||
:return: bool |
||||
""" |
||||
if self._tool_confs is None: |
||||
self._tool_confs = run_command( |
||||
"mstconfig", "-d", self.pci_dev, "i") |
||||
# trim any array index if present |
||||
indexed_param = re.match(ARRAY_PARAM_REGEX, param_name) |
||||
if indexed_param: |
||||
param_name = indexed_param.group('param_name') |
||||
return param_name in self._tool_confs |
||||
|
||||
def _build_config_param_metadata_map(self, conf_dict): |
||||
self.mlnx_config_array_param_metadata = {} |
||||
for param_name, val in six.iteritems(conf_dict): |
||||
array_val = re.match(ARRAY_VALUE_REGEX, val) |
||||
if array_val: |
||||
# Array parameter, extract first/last index |
||||
first_index = array_val.group('first_index') |
||||
last_index = array_val.group('last_index') |
||||
self.mlnx_config_array_param_metadata[param_name] = \ |
||||
MlnxConfigArrayParamMetaData( |
||||
param_name, first_index, last_index) |
||||
|
||||
def _inflate_array_param_vals_from_query(self, conf_dict): |
||||
""" Inflate provided conf dict with all values of array parameter""" |
||||
inflated_conf = {} |
||||
for param_name, val in six.iteritems(conf_dict): |
||||
if param_name in self.mlnx_config_array_param_metadata: |
||||
param_meta = self.mlnx_config_array_param_metadata[param_name] |
||||
first = param_meta.first_index |
||||
last = param_meta.last_index |
||||
arr_param_val = self.get_device_conf_dict( |
||||
param_name="%s[%s..%s]" % (param_name, first, last)) |
||||
# Add new keys to dict |
||||
for k, v in six.iteritems(arr_param_val): |
||||
inflated_conf[k] = v |
||||
else: |
||||
inflated_conf[param_name] = val |
||||
return inflated_conf |
||||
|
||||
def _inflate_single_array_input_val(self, param_name, val): |
||||
conf_dict = {} |
||||
param_meta = self.mlnx_config_array_param_metadata[param_name] |
||||
first = param_meta.first_index |
||||
last = param_meta.last_index |
||||
|
||||
if '*' in val: |
||||
if len(val) != 1: |
||||
raise RuntimeError( |
||||
"Invalid input for provided array type parameter. %s:%s" |
||||
% (param_name, val)) |
||||
|
||||
for idx in range(first, last + 1): |
||||
conf_dict["%s[%s]" % (param_name, idx)] = val['*'] |
||||
else: |
||||
for idx, idx_val in six.iteritems(val): |
||||
if int(idx) not in range(first, last + 1): |
||||
LOG.warning( |
||||
"Provided array param index(%s) is out of range " |
||||
"[%s..%s] skipping...", idx, first, last) |
||||
continue |
||||
conf_dict["%s[%s]" % (param_name, idx)] = str(idx_val) |
||||
return conf_dict |
||||
|
||||
def _inflate_array_param_vals_from_input(self, conf_dict): |
||||
""" Inflate provided conf dict with all values of array parameter""" |
||||
inflated_conf = {} |
||||
for param_name, val in six.iteritems(conf_dict): |
||||
if param_name in self.mlnx_config_array_param_metadata: |
||||
exp_inp = self._inflate_single_array_input_val(param_name, val) |
||||
# Add to conf_dict |
||||
for k, v in six.iteritems(exp_inp): |
||||
inflated_conf[k] = v |
||||
else: |
||||
inflated_conf[param_name] = val |
||||
|
||||
return inflated_conf |
||||
|
||||
def set_config(self, conf_dict): |
||||
""" Set device configurations |
||||
|
||||
:param conf_dict: a dictionary of: |
||||
{"PARAM_NAME": "Param value to set", ...} |
||||
:return: None |
||||
""" |
||||
current_mlx_config = self.get_device_conf_dict() |
||||
self._build_config_param_metadata_map(current_mlx_config) |
||||
current_mlx_config = self._inflate_array_param_vals_from_query( |
||||
current_mlx_config) |
||||
# inflate user input for array parameters |
||||
conf_dict = self._inflate_array_param_vals_from_input(conf_dict) |
||||
|
||||
params_to_set = [] |
||||
for key, value in conf_dict.items(): |
||||
if not self.param_supp_by_config_tool(key): |
||||
LOG.error( |
||||
"Configuraiton: %s is not supported by mstconfig," |
||||
" please update to the latest mstflint package." % key) |
||||
continue |
||||
|
||||
if current_mlx_config.get(key) and value.lower( |
||||
) not in current_mlx_config.get(key).lower(): |
||||
# Aggregate all configurations required to be modified |
||||
params_to_set.append("%s=%s" % (key, value)) |
||||
|
||||
if params_to_set: |
||||
LOG.info("Setting configurations for device: %s" % self.pci_dev) |
||||
run_command("mstconfig", "-d", self.pci_dev, "-y", |
||||
"set", *params_to_set) |
||||
LOG.info("Set device configurations: Setting %s done successfully", |
||||
" ".join(params_to_set)) |
||||
else: |
||||
LOG.info("Set device configurations: No operation required") |
||||
|
||||
|
||||
class MlnxFirmwareBinary(object): |
||||
|
||||
def __init__(self, local_bin_path): |
||||
self.bin_path = local_bin_path |
||||
self.image_info = {} |
||||
|
||||
def get_info(self): |
||||
""" Get firmware information from binary |
||||
|
||||
Caller should wrap this call under try catch to skip non compliant |
||||
firmware binaries. |
||||
|
||||
:return: dict of firmware image attributes |
||||
""" |
||||
if self.image_info.get('file_path', '') == self.bin_path: |
||||
return self.image_info |
||||
self.image_info = {'file_path': self.bin_path} |
||||
cmd = ['mstflint', '-i', self.bin_path, 'query'] |
||||
out = run_command(*cmd) |
||||
self.image_info.update(parse_mstflint_query_output(out)) |
||||
# Note(adrianc): deep copy ? |
||||
return self.image_info |
||||
|
||||
|
||||
class MlnxFirmwareBinariesFetcher(object): |
||||
""" A class for fetching firmware binaries form a directory |
||||
provided by a URL link |
||||
|
||||
Note: URL MUST point to a directory and end with '/' |
||||
e.g http://www.mysite.com/mlnx_bins/ |
||||
""" |
||||
dest_dir = tempfile.mkdtemp(suffix="tripleo_mlnx_firmware") |
||||
|
||||
class FileHTMLParser(html_parser.HTMLParser): |
||||
""" A crude HTML Parser to extract files from an HTTP response. |
||||
""" |
||||
|
||||
def __init__(self, suffix): |
||||
# HTMLParser is Old style class dont use super() method |
||||
html_parser.HTMLParser.__init__(self) |
||||
self.matches = [] |
||||
self.suffix = suffix |
||||
|
||||
def handle_starttag(self, tag, attrs): |
||||
for name, val in attrs: |
||||
if name == 'href' and val.endswith(self.suffix): |
||||
self.matches.append(val) |
||||
|
||||
def __init__(self, url): |
||||
self.url = url |
||||
|
||||
def __del__(self): |
||||
self._cleanup_dest_dir() |
||||
|
||||
def _cleanup_dest_dir(self): |
||||
if os.path.exists(MlnxFirmwareBinariesFetcher.dest_dir): |
||||
shutil.rmtree(MlnxFirmwareBinariesFetcher.dest_dir) |
||||
|
||||
def _get_file_from_url(self, file_name): |
||||
try: |
||||
full_path = self.url + "/" + file_name |
||||
LOG.info("Downloading file: %s to %s", full_path, |
||||
MlnxFirmwareBinariesFetcher.dest_dir) |
||||
url_data = urlRequest.urlopen(full_path) |
||||
except urlError.HTTPError as e: |
||||
LOG.error("Failed to download data: %s", str(e)) |
||||
raise e |
||||
dest_file_path = os.path.join(MlnxFirmwareBinariesFetcher.dest_dir, |
||||
file_name) |
||||
with open(dest_file_path, 'wb') as f: |
||||
f.write(url_data.read()) |
||||
return dest_file_path |
||||
|
||||
def _get_file_create_bin_obj(self, file_name, fw_bins): |
||||
""" This wrapper method will download a firmware binary, |
||||
create MlnxFirmwareBinary object and append to the provided |
||||
fw_bins list. |
||||
|
||||
:return: None |
||||
""" |
||||
try: |
||||
dest_file_path = self._get_file_from_url(file_name) |
||||
fw_bin = MlnxFirmwareBinary(dest_file_path) |
||||
# Note(adrianc): Pre query image, to skip incompatible files |
||||
# in case of Error |
||||
fw_bin.get_info() |
||||
fw_bins.append(fw_bin) |
||||
except Exception as e: |
||||
LOG.warning("Failed to download and query %s, skipping file. " |
||||
"%s", file_name, str(e)) |
||||
|
||||
def get_firmware_binaries(self): |
||||
""" Get Firmware binaries |
||||
|
||||
:return: list containing the files downloaded |
||||
""" |
||||
# get list of files |
||||
# download into dest_dir |
||||
# for each file, create MlnxFirmwareBinary |
||||
# return list of the MlnxFirmwareBinary |
||||
if not self.url.endswith('/'): |
||||
LOG.error("Bad URL provided (%s), expected URL to be a directory", |
||||
self.url) |
||||
raise RuntimeError('Failed to get firmware binaries, ' |
||||
'expected directory URL path ' |
||||
'(e.g "http://<your_ip>/mlnx_bins/"). ' |
||||
'Given URL path: %s', self.url) |
||||
try: |
||||
index_data = str(urlRequest.urlopen(_BIN_DIR_URL).read()) |
||||
except urlError.HTTPError as err: |
||||
LOG.error(err) |
||||
raise err |
||||
parser = MlnxFirmwareBinariesFetcher.FileHTMLParser(suffix=".bin") |
||||
parser.feed(index_data) |
||||
parser.close() |
||||
if not parser.matches: |
||||
LOG.warning("No bin Files found in the provided URL: %s", self.url) |
||||
|
||||
fw_bins = [] |
||||
threads = [] |
||||
for file_name in parser.matches: |
||||
# TODO(adrianc) fetch files async with co-routines, |
||||
# may need to limit thread count |
||||
t = threading.Thread(target=self._get_file_create_bin_obj, |
||||
args=(file_name, fw_bins)) |
||||
t.start() |
||||
threads.append(t) |
||||
for t in threads: |
||||
t.join() |
||||
return fw_bins |
||||
|
||||
|
||||
class MlnxDevFirmwareOps(object): |
||||
""" Perform various Firmware related operations on device |
||||
""" |
||||
|
||||
def __init__(self, dev): |
||||
self.dev = dev |
||||
self.dev_info = {} |
||||
|
||||
def query_device(self, force=False): |
||||
""" Get firmware information from device |
||||
|
||||
:param force: force device query, even if query was executed in |
||||
previous calls. |
||||
:return: dict of firmware image attributes |
||||
""" |
||||
if not force and self.dev_info.get('device', '') == self.dev: |
||||
return self.dev_info |
||||
|
||||
self.dev_info = {'device': self.dev} |
||||
cmd = ['mstflint', '-d', self.dev, '-qq', 'query'] |
||||
out = run_command(*cmd) |
||||
self.dev_info = parse_mstflint_query_output(out) |
||||
# Note(adrianc): deep copy ? |
||||
return self.dev_info |
||||
|
||||
def need_update(self, image_info): |
||||
""" Check if device requires firmware update |
||||
|
||||
:param image_info: image_info dict as returned from |
||||
MlnxFirmwareBinary.get_info() |
||||
:return: bool, True if update is needed |
||||
""" |
||||
if not self.dev_info: |
||||
self.query_device() |
||||
LOG.info("Device firmware version: %s, Image firmware version: %s" % |
||||
(self.dev_info['fw_ver'], image_info['fw_ver'])) |
||||
return self.dev_info['fw_ver'] < image_info['fw_ver'] |
||||
|
||||
def need_reset_before_config(self): |
||||
""" Check if device requires firmware reset before applying any |
||||
configurations on the device. |
||||
|
||||
:return: (bool, bool) True if reset is needed, |
||||
True if skip_fms_sync is needed |
||||
""" |
||||
self.query_device(force=True) |
||||
next_boot_image_newer = 'running_fw_ver' in self.dev_info and \ |
||||
self.dev_info['running_fw_ver'] < self.dev_info['fw_ver'] |
||||
if next_boot_image_newer: |
||||
mandatory_params = ["ESWITCH_IPV4_TTL_MODIFY_ENABLE", |
||||
"PRIO_TAG_REQUIRED_EN", |
||||
"INTERNAL_CPU_PAGE_SUPPLIE", |
||||
"INTERNAL_CPU_ESWITCH_MANAGE", |
||||
"INTERNAL_CPU_IB_VPORT0", |
||||
"INTERNAL_CPU_OFFLOAD_ENGINE"] |
||||
device_config = MlnxDeviceConfig(self.dev) |
||||
conf_dict = device_config.get_device_conf_dict() |
||||
for param in mandatory_params: |
||||
if param not in conf_dict and \ |
||||
device_config.param_supp_by_config_tool(param): |
||||
if "INTERNAL_CPU_MODEL" in conf_dict: |
||||
if self.dev_info['running_fw_ver'] < "24.32.0000": |
||||
# In case the device is BlueField and the FW is less than Nov release |
||||
# return True, True to do reset with skip_fms_sync |
||||
return True, True |
||||
elif (self.dev_info['running_fw_ver'] >= "24.32.0000" and |
||||
self.dev_info['running_fw_ver'] < "24.33.0000"): |
||||
# In case the device is BlueField and the FW is from Nov release |
||||
# reset will fail, so don't do it |
||||
return False, False |
||||
return True, False |
||||
return False, False |
||||
|
||||
def burn_firmware(self, image_path): |
||||
""" Burn firmware on device |
||||
|
||||
:param image_path: firmware binary file path |
||||
:return: None |
||||
""" |
||||
LOG.info("Updating firmware image (%s) for device: %s", |
||||
image_path, self.dev) |
||||
cmd = ["mstflint", "-d", self.dev, "-i", image_path, |
||||
"-y", "burn"] |
||||
run_command(*cmd) |
||||
LOG.info("Device %s: Successfully updated.", self.dev) |
||||
|
||||
def reset_device(self, skip_fms_sync=False): |
||||
""" Reset firmware |
||||
|
||||
:return: None |
||||
""" |
||||
LOG.info("Device %s: Performing firmware reset.", self.dev) |
||||
if skip_fms_sync: |
||||
cmd = ["mstfwreset", "-d", self.dev, "--skip_fsm_sync", "-y", |
||||
"--sync", _RESET_SYNC, "reset"] |
||||
else: |
||||
cmd = ["mstfwreset", "-d", self.dev, "-y", "reset"] |
||||
run_command(*cmd) |
||||
LOG.info("Device %s: Firmware successfully reset.", self.dev) |
||||
|
||||
|
||||
def check_prereq(): |
||||
""" Check that all needed tools are available in the system. |
||||
|
||||
:return: None |
||||
""" |
||||
try: |
||||
# check for mstflint |
||||
run_command('mstflint', '-v') |
||||
# check for mstconfig |
||||
run_command('mstconfig', '-v') |
||||
# check for mstfwreset |
||||
run_command('mstfwreset', '-v') |
||||
# check for lspci |
||||
run_command('lspci', '--version') |
||||
except Exception as e: |
||||
LOG.error("Failed Prerequisite check. %s", str(e)) |
||||
raise e |
||||
|
||||
|
||||
def process_device(pci_dev, psid_map): |
||||
""" Process a single Mellanox device. |
||||
|
||||
Processing pipeline: |
||||
- Perform firmware update if required |
||||
- Reset device to load firmware if required |
||||
- Perform device configurations if required |
||||
|
||||
:param pci_dev: nvidia nic PCI device address (String) |
||||
:param psid_map: dict mapping between PSID and an image_info dict |
||||
:return: None |
||||
""" |
||||
try: |
||||
LOG.info("Processing Device: %s", pci_dev) |
||||
dev_ops = MlnxDevFirmwareOps(pci_dev) |
||||
device_config = MlnxDeviceConfig(pci_dev) |
||||
dev_query = dev_ops.query_device() |
||||
# see if there is a matching bin |
||||
dev_psid = dev_query['psid'] |
||||
if dev_psid in psid_map: |
||||
if _FORCE_UPDATE or dev_ops.need_update(psid_map[dev_psid]): |
||||
dev_ops.burn_firmware(psid_map[dev_psid]['file_path']) |
||||
else: |
||||
LOG.info("Firmware update is not required for Device.") |
||||
else: |
||||
LOG.info("No firmware binary found for device %s with " |
||||
"PSID: %s, skipping...", pci_dev, dev_psid) |
||||
# check if reset is required. |
||||
# Note: device Reset is required if a newer firmware version was burnt |
||||
# and current firmware does not support some mandatory configurations. |
||||
# Note: skip_fms_sync is required if device is BlueField SmartNIC and |
||||
# the current FW is less than Nov release |
||||
need_rest, need_skip_fms_sync = dev_ops.need_reset_before_config() |
||||
if need_rest: |
||||
dev_ops.reset_device(skip_fms_sync=need_skip_fms_sync) |
||||
# set device configurations |
||||
device_config.set_config(_MLX_CONFIG) |
||||
LOG.info("Device %s processed successfully.", pci_dev) |
||||
except Exception as e: |
||||
LOG.error("Failed to process device %s. %s", pci_dev, str(e)) |
||||
|
||||
|
||||
def main(): |
||||
check_prereq() |
||||
# discover devices |
||||
mlnx_devices = MlnxDevices(_DEV_WHITE_LIST) |
||||
mlnx_devices.discover() |
||||
# get binaries and prep psid map |
||||
psid_map = {} |
||||
if _BIN_DIR_URL: |
||||
binary_getter = MlnxFirmwareBinariesFetcher(_BIN_DIR_URL) |
||||
fw_binaries = binary_getter.get_firmware_binaries() |
||||
|
||||
for fw_bin in fw_binaries: |
||||
image_info = fw_bin.get_info() |
||||
psid_map[image_info['psid']] = image_info |
||||
# process devices |
||||
for pci_dev in mlnx_devices: |
||||
process_device(pci_dev, psid_map) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main() |
||||
|
||||
params: |
||||
$BIN_DIR_URL: {get_param: BIN_DIR_URL} |
||||
$FORCE_UPDATE: {get_param: FORCE_UPDATE} |
||||
$DEV_WHITE_LIST: {get_param: DEV_WHITE_LIST} |
||||
$NUM_OF_VFS: {get_param: NUM_OF_VFS} |
||||
$SRIOV_EN: {get_param: SRIOV_EN} |
||||
$LINK_TYPE: {get_param: LINK_TYPE} |
||||
$ESWITCH_IPV4_TTL_MODIFY_ENABLE: {get_param: ESWITCH_IPV4_TTL_MODIFY_ENABLE} |
||||
$PRIO_TAG_REQUIRED_EN: {get_param: PRIO_TAG_REQUIRED_EN} |
||||
$ESWITCH_HAIRPIN_TOT_BUFFER_SIZE: {get_param: ESWITCH_HAIRPIN_TOT_BUFFER_SIZE} |
||||
$ESWITCH_HAIRPIN_DESCRIPTORS: {get_param: ESWITCH_HAIRPIN_DESCRIPTORS} |
||||
$BF_NIC_MODE: {get_param: BF_NIC_MODE} |
||||
$RESET_SYNC: {get_param: RESET_SYNC} |
||||
|
||||
|
||||
outputs: |
||||
# This means get_resource from the parent template will get the userdata, see: |
||||
# http://docs.openstack.org/developer/heat/template_guide/composition.html#making-your-template-resource-more-transparent |
||||
# Note this is new-for-kilo, an alternative is returning a value then using |
||||
# get_attr in the parent template instead. |
||||
OS::stack_id: |
||||
value: {get_resource: userdata} |
Loading…
Reference in new issue