Add sysinv-reset-n3000-fpgas cmd
When AIO runs single manifest, reset N3000 FPGA needs to complete without docker local registry and other SM managed services. This adds sysinv-reset-n3000-fpgas cmd for puppet to reset N3000 FPGAS at host start-up. The sysinv-reset-n3000-fpgas cmd separates the function of reseting n3000 fpgas from sysinv-fpgas-agent as sysinv-fpgas-agent has dependency to rabbit, which is not available until manifest completes. Change-Id: Ic3c4b2a00515d194793257729362f71e2951286c Partial-Bug: 1918139 Signed-off-by: Bin Qian <bin.qian@windriver.com>
This commit is contained in:
parent
6acd2e3564
commit
7ce3d16eea
@ -39,6 +39,7 @@ console_scripts =
|
||||
sysinv-helm = sysinv.cmd.helm:main
|
||||
sysinv-utils = sysinv.cmd.utils:main
|
||||
cert-mon = sysinv.cmd.cert_mon:main
|
||||
sysinv-reset-n3000-fpgas = sysinv.cmd.reset_n3000_fpgas:main
|
||||
|
||||
systemconfig.puppet_plugins =
|
||||
001_platform = sysinv.puppet.platform:PlatformPuppet
|
||||
|
41
sysinv/sysinv/sysinv/sysinv/cmd/reset_n3000_fpgas.py
Normal file
41
sysinv/sysinv/sysinv/sysinv/cmd/reset_n3000_fpgas.py
Normal file
@ -0,0 +1,41 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# Copyright (c) 2021 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
||||
# of an applicable Wind River license agreement.
|
||||
#
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from sysinv.fpga_agent.reset_n3000_fpgas import reset_n3000_fpgas
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
||||
def main():
|
||||
logging.register_options(CONF)
|
||||
CONF(project='sysinv', prog='reset-n3000-fpgas')
|
||||
|
||||
logging.set_defaults()
|
||||
logging.setup(cfg.CONF, 'reset-n3000-fpgas')
|
||||
|
||||
if reset_n3000_fpgas():
|
||||
exit(0)
|
||||
else:
|
||||
exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -22,3 +22,11 @@ N3000_DEVICES = [
|
||||
N3000_FEC_PF_DEVICE,
|
||||
N3000_DEFAULT_DEVICE,
|
||||
]
|
||||
|
||||
# TODO: Make this specified in the config file.
|
||||
# This is the docker image containing the OPAE tools to access the FPGA device.
|
||||
OPAE_IMG = "registry.local:9001/docker.io/starlingx/n3000-opae:stx.4.0-v1.0.0"
|
||||
|
||||
# This is a flag file created by puppet after doing a "docker login".
|
||||
# We need to wait for it to exist before trying to run docker images.
|
||||
DOCKER_LOGIN_FLAG = "/var/run/docker_login_done"
|
||||
|
@ -81,17 +81,10 @@ CONF.register_opts(agent_opts, 'fpga_agent')
|
||||
# This is the docker image containing the OPAE tools to access the FPGA device.
|
||||
OPAE_IMG = "registry.local:9001/docker.io/starlingx/n3000-opae:stx.4.0-v1.0.0"
|
||||
|
||||
# This is a flag file created by puppet after doing a "docker login".
|
||||
# We need to wait for it to exist before trying to run docker images.
|
||||
DOCKER_LOGIN_FLAG = "/var/run/docker_login_done"
|
||||
|
||||
# This is the location where we cache the device image file while
|
||||
# writing it to the hardware.
|
||||
DEVICE_IMAGE_CACHE_DIR = "/usr/local/share/applications/sysinv"
|
||||
|
||||
# Volatile flag file so we only reset the N3000s once after bootup.
|
||||
N3000_RESET_FLAG = os.path.join(tsc.VOLATILE_PATH, ".sysinv_n3000_reset")
|
||||
|
||||
SYSFS_DEVICE_PATH = "/sys/bus/pci/devices/"
|
||||
FME_PATH = "/fpga/intel-fpga-dev.*/intel-fpga-fme.*/"
|
||||
SPI_PATH = "spi-altera.*.auto/spi_master/spi*/spi*.*/"
|
||||
@ -110,7 +103,7 @@ BMC_BUILD_VER_PATH = "max10_version"
|
||||
def wait_for_docker_login():
|
||||
# TODO: add a timeout
|
||||
LOG.info("Waiting for docker login flag.")
|
||||
while not os.path.exists(DOCKER_LOGIN_FLAG):
|
||||
while not os.path.exists(constants.DOCKER_LOGIN_FLAG):
|
||||
time.sleep(1)
|
||||
LOG.info("Found docker login flag, continuing.")
|
||||
|
||||
@ -195,33 +188,6 @@ def write_device_image_n3000(filename, pci_addr):
|
||||
raise exception.SysinvException(msg)
|
||||
|
||||
|
||||
def reset_device_n3000(pci_addr):
|
||||
# Reset the N3000 FPGA at the specified PCI address.
|
||||
try:
|
||||
# Build up the command to perform the reset.
|
||||
# Note the hack to work around OPAE tool locale issues
|
||||
cmd = ("docker run -t --privileged -e LC_ALL=en_US.UTF-8 "
|
||||
"-e LANG=en_US.UTF-8 " + OPAE_IMG +
|
||||
" rsu bmcimg " + pci_addr)
|
||||
|
||||
# Issue the command to perform the firmware update.
|
||||
subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable
|
||||
stderr=subprocess.STDOUT)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
# "docker run" return code will be:
|
||||
# 125 if the error is with Docker daemon itself
|
||||
# 126 if the contained command cannot be invoked
|
||||
# 127 if the contained command cannot be found
|
||||
# Exit code of contained command otherwise
|
||||
msg = ("Failed to reset device %s, "
|
||||
"return code is %d, command output: %s." %
|
||||
(pci_addr, exc.returncode,
|
||||
exc.output.decode('utf-8')))
|
||||
LOG.error(msg)
|
||||
LOG.error("Check for intel-max10 kernel logs.")
|
||||
raise exception.SysinvException(msg)
|
||||
|
||||
|
||||
def read_n3000_sysfs_file(pattern):
|
||||
# Read a sysfs file related to the N3000.
|
||||
# The result should be an empty string if the file doesn't exist,
|
||||
@ -347,24 +313,6 @@ def get_n3000_devices():
|
||||
return fpga_addrs
|
||||
|
||||
|
||||
def reset_n3000_fpgas():
|
||||
# We only want to do this once after host startup.
|
||||
if not os.path.exists(N3000_RESET_FLAG):
|
||||
# Reset all N3000 FPGAs on the system.
|
||||
# TODO: make this run in parallel if there are multiple devices.
|
||||
LOG.info("Resetting N3000 FPGAs.")
|
||||
got_exception = False
|
||||
fpga_addrs = get_n3000_devices()
|
||||
for fpga_addr in fpga_addrs:
|
||||
try:
|
||||
reset_device_n3000(fpga_addr)
|
||||
except Exception:
|
||||
got_exception = True
|
||||
LOG.info("Done resetting N3000 FPGAs.")
|
||||
if not got_exception:
|
||||
utils.touch(N3000_RESET_FLAG)
|
||||
|
||||
|
||||
def get_n3000_pci_info():
|
||||
""" Query PCI information about N3000 PCI devices.
|
||||
|
||||
@ -465,11 +413,6 @@ class FpgaAgentManager(service.PeriodicService):
|
||||
|
||||
# Wait for puppet to log in to the local docker registry
|
||||
wait_for_docker_login()
|
||||
|
||||
# Trigger reset of N3000 FPGAs. This is needed because the PCI address
|
||||
# changes on the first reset after boot.
|
||||
reset_n3000_fpgas()
|
||||
|
||||
# Wait around until someone else updates the platform.conf file
|
||||
# with our host UUID.
|
||||
self.wait_for_host_uuid()
|
||||
|
102
sysinv/sysinv/sysinv/sysinv/fpga_agent/reset_n3000_fpgas.py
Normal file
102
sysinv/sysinv/sysinv/sysinv/fpga_agent/reset_n3000_fpgas.py
Normal file
@ -0,0 +1,102 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# Copyright (c) 2021 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
||||
# of an applicable Wind River license agreement.
|
||||
#
|
||||
|
||||
import os
|
||||
import shlex
|
||||
from eventlet.green import subprocess
|
||||
from oslo_log import log
|
||||
|
||||
from sysinv.common import utils
|
||||
from sysinv.common import exception
|
||||
from sysinv.fpga_agent.manager import get_n3000_devices
|
||||
from sysinv.fpga_agent import constants
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
# Volatile flag file so we only reset the N3000s once after bootup.
|
||||
N3000_RESET_FLAG = os.path.join(tsc.VOLATILE_PATH, ".sysinv_n3000_reset")
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
def n3000_img_accessible():
|
||||
cmd = 'docker image list "%s" --format "{{.Repository}}:{{.Tag}}"' % \
|
||||
constants.OPAE_IMG
|
||||
items = subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable
|
||||
stderr=subprocess.STDOUT)
|
||||
for line in items.splitlines():
|
||||
if line == constants.OPAE_IMG:
|
||||
LOG.info('%s image found' % constants.OPAE_IMG)
|
||||
return True
|
||||
|
||||
LOG.info("%s image not found." % constants.OPAE_IMG)
|
||||
return False
|
||||
|
||||
|
||||
def reset_device_n3000(pci_addr):
|
||||
# Reset the N3000 FPGA at the specified PCI address.
|
||||
try:
|
||||
# Build up the command to perform the reset.
|
||||
# Note the hack to work around OPAE tool locale issues
|
||||
cmd = ("docker run -t --privileged -e LC_ALL=en_US.UTF-8 "
|
||||
"-e LANG=en_US.UTF-8 " + constants.OPAE_IMG +
|
||||
" rsu bmcimg " + pci_addr)
|
||||
|
||||
# Issue the command to perform the firmware update.
|
||||
subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable
|
||||
stderr=subprocess.STDOUT)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
# "docker run" return code will be:
|
||||
# 125 if the error is with Docker daemon itself
|
||||
# 126 if the contained command cannot be invoked
|
||||
# 127 if the contained command cannot be found
|
||||
# Exit code of contained command otherwise
|
||||
msg = ("Failed to reset device %s, "
|
||||
"return code is %d, command output: %s." %
|
||||
(pci_addr, exc.returncode,
|
||||
exc.output.decode('utf-8')))
|
||||
LOG.error(msg)
|
||||
LOG.error("Check for intel-max10 kernel logs.")
|
||||
raise exception.SysinvException(msg)
|
||||
|
||||
|
||||
def reset_n3000_fpgas():
|
||||
if not os.path.exists(N3000_RESET_FLAG):
|
||||
# Reset all N3000 FPGAs on the system.
|
||||
# TODO: make this run in parallel if there are multiple devices.
|
||||
LOG.info("Resetting N3000 FPGAs.")
|
||||
got_exception = False
|
||||
fpga_addrs = get_n3000_devices()
|
||||
if not n3000_img_accessible() and \
|
||||
not os.path.exists(constants.DOCKER_LOGIN_FLAG):
|
||||
LOG.info("Either docker image or docker login is ready, exit...")
|
||||
return False
|
||||
|
||||
for fpga_addr in fpga_addrs:
|
||||
try:
|
||||
reset_device_n3000(fpga_addr)
|
||||
except Exception:
|
||||
got_exception = True
|
||||
|
||||
LOG.info("Done resetting N3000 FPGAs.")
|
||||
if not got_exception:
|
||||
utils.touch(N3000_RESET_FLAG)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return True
|
Loading…
Reference in New Issue
Block a user