Add sysinv-reset-n3000-fpgas cmd
When AIO runs single manifest, reset N3000 FPGA needs to complete without docker local registry and other SM managed services. This adds sysinv-reset-n3000-fpgas cmd for puppet to reset N3000 FPGAS at host start-up. The sysinv-reset-n3000-fpgas cmd separates the function of reseting n3000 fpgas from sysinv-fpgas-agent as sysinv-fpgas-agent has dependency to rabbit, which is not available until manifest completes. Change-Id: Ic3c4b2a00515d194793257729362f71e2951286c Partial-Bug: 1918139 Signed-off-by: Bin Qian <bin.qian@windriver.com>
This commit is contained in:
parent
6acd2e3564
commit
7ce3d16eea
@ -39,6 +39,7 @@ console_scripts =
|
|||||||
sysinv-helm = sysinv.cmd.helm:main
|
sysinv-helm = sysinv.cmd.helm:main
|
||||||
sysinv-utils = sysinv.cmd.utils:main
|
sysinv-utils = sysinv.cmd.utils:main
|
||||||
cert-mon = sysinv.cmd.cert_mon:main
|
cert-mon = sysinv.cmd.cert_mon:main
|
||||||
|
sysinv-reset-n3000-fpgas = sysinv.cmd.reset_n3000_fpgas:main
|
||||||
|
|
||||||
systemconfig.puppet_plugins =
|
systemconfig.puppet_plugins =
|
||||||
001_platform = sysinv.puppet.platform:PlatformPuppet
|
001_platform = sysinv.puppet.platform:PlatformPuppet
|
||||||
|
41
sysinv/sysinv/sysinv/sysinv/cmd/reset_n3000_fpgas.py
Normal file
41
sysinv/sysinv/sysinv/sysinv/cmd/reset_n3000_fpgas.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
# Copyright (c) 2021 Wind River Systems, Inc.
|
||||||
|
#
|
||||||
|
# The right to copy, distribute, modify, or otherwise make use
|
||||||
|
# of this software may be licensed only pursuant to the terms
|
||||||
|
# of an applicable Wind River license agreement.
|
||||||
|
#
|
||||||
|
from oslo_config import cfg
|
||||||
|
from oslo_log import log as logging
|
||||||
|
from sysinv.fpga_agent.reset_n3000_fpgas import reset_n3000_fpgas
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
CONF = cfg.CONF
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.register_options(CONF)
|
||||||
|
CONF(project='sysinv', prog='reset-n3000-fpgas')
|
||||||
|
|
||||||
|
logging.set_defaults()
|
||||||
|
logging.setup(cfg.CONF, 'reset-n3000-fpgas')
|
||||||
|
|
||||||
|
if reset_n3000_fpgas():
|
||||||
|
exit(0)
|
||||||
|
else:
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -22,3 +22,11 @@ N3000_DEVICES = [
|
|||||||
N3000_FEC_PF_DEVICE,
|
N3000_FEC_PF_DEVICE,
|
||||||
N3000_DEFAULT_DEVICE,
|
N3000_DEFAULT_DEVICE,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# TODO: Make this specified in the config file.
|
||||||
|
# This is the docker image containing the OPAE tools to access the FPGA device.
|
||||||
|
OPAE_IMG = "registry.local:9001/docker.io/starlingx/n3000-opae:stx.4.0-v1.0.0"
|
||||||
|
|
||||||
|
# This is a flag file created by puppet after doing a "docker login".
|
||||||
|
# We need to wait for it to exist before trying to run docker images.
|
||||||
|
DOCKER_LOGIN_FLAG = "/var/run/docker_login_done"
|
||||||
|
@ -81,17 +81,10 @@ CONF.register_opts(agent_opts, 'fpga_agent')
|
|||||||
# This is the docker image containing the OPAE tools to access the FPGA device.
|
# This is the docker image containing the OPAE tools to access the FPGA device.
|
||||||
OPAE_IMG = "registry.local:9001/docker.io/starlingx/n3000-opae:stx.4.0-v1.0.0"
|
OPAE_IMG = "registry.local:9001/docker.io/starlingx/n3000-opae:stx.4.0-v1.0.0"
|
||||||
|
|
||||||
# This is a flag file created by puppet after doing a "docker login".
|
|
||||||
# We need to wait for it to exist before trying to run docker images.
|
|
||||||
DOCKER_LOGIN_FLAG = "/var/run/docker_login_done"
|
|
||||||
|
|
||||||
# This is the location where we cache the device image file while
|
# This is the location where we cache the device image file while
|
||||||
# writing it to the hardware.
|
# writing it to the hardware.
|
||||||
DEVICE_IMAGE_CACHE_DIR = "/usr/local/share/applications/sysinv"
|
DEVICE_IMAGE_CACHE_DIR = "/usr/local/share/applications/sysinv"
|
||||||
|
|
||||||
# Volatile flag file so we only reset the N3000s once after bootup.
|
|
||||||
N3000_RESET_FLAG = os.path.join(tsc.VOLATILE_PATH, ".sysinv_n3000_reset")
|
|
||||||
|
|
||||||
SYSFS_DEVICE_PATH = "/sys/bus/pci/devices/"
|
SYSFS_DEVICE_PATH = "/sys/bus/pci/devices/"
|
||||||
FME_PATH = "/fpga/intel-fpga-dev.*/intel-fpga-fme.*/"
|
FME_PATH = "/fpga/intel-fpga-dev.*/intel-fpga-fme.*/"
|
||||||
SPI_PATH = "spi-altera.*.auto/spi_master/spi*/spi*.*/"
|
SPI_PATH = "spi-altera.*.auto/spi_master/spi*/spi*.*/"
|
||||||
@ -110,7 +103,7 @@ BMC_BUILD_VER_PATH = "max10_version"
|
|||||||
def wait_for_docker_login():
|
def wait_for_docker_login():
|
||||||
# TODO: add a timeout
|
# TODO: add a timeout
|
||||||
LOG.info("Waiting for docker login flag.")
|
LOG.info("Waiting for docker login flag.")
|
||||||
while not os.path.exists(DOCKER_LOGIN_FLAG):
|
while not os.path.exists(constants.DOCKER_LOGIN_FLAG):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
LOG.info("Found docker login flag, continuing.")
|
LOG.info("Found docker login flag, continuing.")
|
||||||
|
|
||||||
@ -195,33 +188,6 @@ def write_device_image_n3000(filename, pci_addr):
|
|||||||
raise exception.SysinvException(msg)
|
raise exception.SysinvException(msg)
|
||||||
|
|
||||||
|
|
||||||
def reset_device_n3000(pci_addr):
|
|
||||||
# Reset the N3000 FPGA at the specified PCI address.
|
|
||||||
try:
|
|
||||||
# Build up the command to perform the reset.
|
|
||||||
# Note the hack to work around OPAE tool locale issues
|
|
||||||
cmd = ("docker run -t --privileged -e LC_ALL=en_US.UTF-8 "
|
|
||||||
"-e LANG=en_US.UTF-8 " + OPAE_IMG +
|
|
||||||
" rsu bmcimg " + pci_addr)
|
|
||||||
|
|
||||||
# Issue the command to perform the firmware update.
|
|
||||||
subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable
|
|
||||||
stderr=subprocess.STDOUT)
|
|
||||||
except subprocess.CalledProcessError as exc:
|
|
||||||
# "docker run" return code will be:
|
|
||||||
# 125 if the error is with Docker daemon itself
|
|
||||||
# 126 if the contained command cannot be invoked
|
|
||||||
# 127 if the contained command cannot be found
|
|
||||||
# Exit code of contained command otherwise
|
|
||||||
msg = ("Failed to reset device %s, "
|
|
||||||
"return code is %d, command output: %s." %
|
|
||||||
(pci_addr, exc.returncode,
|
|
||||||
exc.output.decode('utf-8')))
|
|
||||||
LOG.error(msg)
|
|
||||||
LOG.error("Check for intel-max10 kernel logs.")
|
|
||||||
raise exception.SysinvException(msg)
|
|
||||||
|
|
||||||
|
|
||||||
def read_n3000_sysfs_file(pattern):
|
def read_n3000_sysfs_file(pattern):
|
||||||
# Read a sysfs file related to the N3000.
|
# Read a sysfs file related to the N3000.
|
||||||
# The result should be an empty string if the file doesn't exist,
|
# The result should be an empty string if the file doesn't exist,
|
||||||
@ -347,24 +313,6 @@ def get_n3000_devices():
|
|||||||
return fpga_addrs
|
return fpga_addrs
|
||||||
|
|
||||||
|
|
||||||
def reset_n3000_fpgas():
|
|
||||||
# We only want to do this once after host startup.
|
|
||||||
if not os.path.exists(N3000_RESET_FLAG):
|
|
||||||
# Reset all N3000 FPGAs on the system.
|
|
||||||
# TODO: make this run in parallel if there are multiple devices.
|
|
||||||
LOG.info("Resetting N3000 FPGAs.")
|
|
||||||
got_exception = False
|
|
||||||
fpga_addrs = get_n3000_devices()
|
|
||||||
for fpga_addr in fpga_addrs:
|
|
||||||
try:
|
|
||||||
reset_device_n3000(fpga_addr)
|
|
||||||
except Exception:
|
|
||||||
got_exception = True
|
|
||||||
LOG.info("Done resetting N3000 FPGAs.")
|
|
||||||
if not got_exception:
|
|
||||||
utils.touch(N3000_RESET_FLAG)
|
|
||||||
|
|
||||||
|
|
||||||
def get_n3000_pci_info():
|
def get_n3000_pci_info():
|
||||||
""" Query PCI information about N3000 PCI devices.
|
""" Query PCI information about N3000 PCI devices.
|
||||||
|
|
||||||
@ -465,11 +413,6 @@ class FpgaAgentManager(service.PeriodicService):
|
|||||||
|
|
||||||
# Wait for puppet to log in to the local docker registry
|
# Wait for puppet to log in to the local docker registry
|
||||||
wait_for_docker_login()
|
wait_for_docker_login()
|
||||||
|
|
||||||
# Trigger reset of N3000 FPGAs. This is needed because the PCI address
|
|
||||||
# changes on the first reset after boot.
|
|
||||||
reset_n3000_fpgas()
|
|
||||||
|
|
||||||
# Wait around until someone else updates the platform.conf file
|
# Wait around until someone else updates the platform.conf file
|
||||||
# with our host UUID.
|
# with our host UUID.
|
||||||
self.wait_for_host_uuid()
|
self.wait_for_host_uuid()
|
||||||
|
102
sysinv/sysinv/sysinv/sysinv/fpga_agent/reset_n3000_fpgas.py
Normal file
102
sysinv/sysinv/sysinv/sysinv/fpga_agent/reset_n3000_fpgas.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
# Copyright (c) 2021 Wind River Systems, Inc.
|
||||||
|
#
|
||||||
|
# The right to copy, distribute, modify, or otherwise make use
|
||||||
|
# of this software may be licensed only pursuant to the terms
|
||||||
|
# of an applicable Wind River license agreement.
|
||||||
|
#
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shlex
|
||||||
|
from eventlet.green import subprocess
|
||||||
|
from oslo_log import log
|
||||||
|
|
||||||
|
from sysinv.common import utils
|
||||||
|
from sysinv.common import exception
|
||||||
|
from sysinv.fpga_agent.manager import get_n3000_devices
|
||||||
|
from sysinv.fpga_agent import constants
|
||||||
|
import tsconfig.tsconfig as tsc
|
||||||
|
|
||||||
|
# Volatile flag file so we only reset the N3000s once after bootup.
|
||||||
|
N3000_RESET_FLAG = os.path.join(tsc.VOLATILE_PATH, ".sysinv_n3000_reset")
|
||||||
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def n3000_img_accessible():
|
||||||
|
cmd = 'docker image list "%s" --format "{{.Repository}}:{{.Tag}}"' % \
|
||||||
|
constants.OPAE_IMG
|
||||||
|
items = subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable
|
||||||
|
stderr=subprocess.STDOUT)
|
||||||
|
for line in items.splitlines():
|
||||||
|
if line == constants.OPAE_IMG:
|
||||||
|
LOG.info('%s image found' % constants.OPAE_IMG)
|
||||||
|
return True
|
||||||
|
|
||||||
|
LOG.info("%s image not found." % constants.OPAE_IMG)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def reset_device_n3000(pci_addr):
|
||||||
|
# Reset the N3000 FPGA at the specified PCI address.
|
||||||
|
try:
|
||||||
|
# Build up the command to perform the reset.
|
||||||
|
# Note the hack to work around OPAE tool locale issues
|
||||||
|
cmd = ("docker run -t --privileged -e LC_ALL=en_US.UTF-8 "
|
||||||
|
"-e LANG=en_US.UTF-8 " + constants.OPAE_IMG +
|
||||||
|
" rsu bmcimg " + pci_addr)
|
||||||
|
|
||||||
|
# Issue the command to perform the firmware update.
|
||||||
|
subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable
|
||||||
|
stderr=subprocess.STDOUT)
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
# "docker run" return code will be:
|
||||||
|
# 125 if the error is with Docker daemon itself
|
||||||
|
# 126 if the contained command cannot be invoked
|
||||||
|
# 127 if the contained command cannot be found
|
||||||
|
# Exit code of contained command otherwise
|
||||||
|
msg = ("Failed to reset device %s, "
|
||||||
|
"return code is %d, command output: %s." %
|
||||||
|
(pci_addr, exc.returncode,
|
||||||
|
exc.output.decode('utf-8')))
|
||||||
|
LOG.error(msg)
|
||||||
|
LOG.error("Check for intel-max10 kernel logs.")
|
||||||
|
raise exception.SysinvException(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def reset_n3000_fpgas():
|
||||||
|
if not os.path.exists(N3000_RESET_FLAG):
|
||||||
|
# Reset all N3000 FPGAs on the system.
|
||||||
|
# TODO: make this run in parallel if there are multiple devices.
|
||||||
|
LOG.info("Resetting N3000 FPGAs.")
|
||||||
|
got_exception = False
|
||||||
|
fpga_addrs = get_n3000_devices()
|
||||||
|
if not n3000_img_accessible() and \
|
||||||
|
not os.path.exists(constants.DOCKER_LOGIN_FLAG):
|
||||||
|
LOG.info("Either docker image or docker login is ready, exit...")
|
||||||
|
return False
|
||||||
|
|
||||||
|
for fpga_addr in fpga_addrs:
|
||||||
|
try:
|
||||||
|
reset_device_n3000(fpga_addr)
|
||||||
|
except Exception:
|
||||||
|
got_exception = True
|
||||||
|
|
||||||
|
LOG.info("Done resetting N3000 FPGAs.")
|
||||||
|
if not got_exception:
|
||||||
|
utils.touch(N3000_RESET_FLAG)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
Loading…
Reference in New Issue
Block a user