diff --git a/sysinv/sysinv/sysinv/setup.cfg b/sysinv/sysinv/sysinv/setup.cfg index 35b201dfd2..fad80d55f1 100644 --- a/sysinv/sysinv/sysinv/setup.cfg +++ b/sysinv/sysinv/sysinv/setup.cfg @@ -39,6 +39,7 @@ console_scripts = sysinv-helm = sysinv.cmd.helm:main sysinv-utils = sysinv.cmd.utils:main cert-mon = sysinv.cmd.cert_mon:main + sysinv-reset-n3000-fpgas = sysinv.cmd.reset_n3000_fpgas:main systemconfig.puppet_plugins = 001_platform = sysinv.puppet.platform:PlatformPuppet diff --git a/sysinv/sysinv/sysinv/sysinv/cmd/reset_n3000_fpgas.py b/sysinv/sysinv/sysinv/sysinv/cmd/reset_n3000_fpgas.py new file mode 100644 index 0000000000..b915c85c71 --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/cmd/reset_n3000_fpgas.py @@ -0,0 +1,41 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# The right to copy, distribute, modify, or otherwise make use +# of this software may be licensed only pursuant to the terms +# of an applicable Wind River license agreement. +# +from oslo_config import cfg +from oslo_log import log as logging +from sysinv.fpga_agent.reset_n3000_fpgas import reset_n3000_fpgas + +LOG = logging.getLogger(__name__) +CONF = cfg.CONF + + +def main(): + logging.register_options(CONF) + CONF(project='sysinv', prog='reset-n3000-fpgas') + + logging.set_defaults() + logging.setup(cfg.CONF, 'reset-n3000-fpgas') + + if reset_n3000_fpgas(): + exit(0) + else: + exit(1) + + +if __name__ == '__main__': + main() diff --git a/sysinv/sysinv/sysinv/sysinv/fpga_agent/constants.py b/sysinv/sysinv/sysinv/sysinv/fpga_agent/constants.py index 077a705a4f..a5d1193474 100644 --- a/sysinv/sysinv/sysinv/sysinv/fpga_agent/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/fpga_agent/constants.py @@ -22,3 +22,11 @@ N3000_DEVICES = [ N3000_FEC_PF_DEVICE, N3000_DEFAULT_DEVICE, ] + +# TODO: Make this specified in the config file. +# This is the docker image containing the OPAE tools to access the FPGA device. +OPAE_IMG = "registry.local:9001/docker.io/starlingx/n3000-opae:stx.4.0-v1.0.0" + +# This is a flag file created by puppet after doing a "docker login". +# We need to wait for it to exist before trying to run docker images. +DOCKER_LOGIN_FLAG = "/var/run/docker_login_done" diff --git a/sysinv/sysinv/sysinv/sysinv/fpga_agent/manager.py b/sysinv/sysinv/sysinv/sysinv/fpga_agent/manager.py index 5b61d7a18c..e8c1966b6a 100644 --- a/sysinv/sysinv/sysinv/sysinv/fpga_agent/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/fpga_agent/manager.py @@ -81,17 +81,10 @@ CONF.register_opts(agent_opts, 'fpga_agent') # This is the docker image containing the OPAE tools to access the FPGA device. OPAE_IMG = "registry.local:9001/docker.io/starlingx/n3000-opae:stx.4.0-v1.0.0" -# This is a flag file created by puppet after doing a "docker login". -# We need to wait for it to exist before trying to run docker images. -DOCKER_LOGIN_FLAG = "/var/run/docker_login_done" - # This is the location where we cache the device image file while # writing it to the hardware. DEVICE_IMAGE_CACHE_DIR = "/usr/local/share/applications/sysinv" -# Volatile flag file so we only reset the N3000s once after bootup. -N3000_RESET_FLAG = os.path.join(tsc.VOLATILE_PATH, ".sysinv_n3000_reset") - SYSFS_DEVICE_PATH = "/sys/bus/pci/devices/" FME_PATH = "/fpga/intel-fpga-dev.*/intel-fpga-fme.*/" SPI_PATH = "spi-altera.*.auto/spi_master/spi*/spi*.*/" @@ -110,7 +103,7 @@ BMC_BUILD_VER_PATH = "max10_version" def wait_for_docker_login(): # TODO: add a timeout LOG.info("Waiting for docker login flag.") - while not os.path.exists(DOCKER_LOGIN_FLAG): + while not os.path.exists(constants.DOCKER_LOGIN_FLAG): time.sleep(1) LOG.info("Found docker login flag, continuing.") @@ -195,33 +188,6 @@ def write_device_image_n3000(filename, pci_addr): raise exception.SysinvException(msg) -def reset_device_n3000(pci_addr): - # Reset the N3000 FPGA at the specified PCI address. - try: - # Build up the command to perform the reset. - # Note the hack to work around OPAE tool locale issues - cmd = ("docker run -t --privileged -e LC_ALL=en_US.UTF-8 " - "-e LANG=en_US.UTF-8 " + OPAE_IMG + - " rsu bmcimg " + pci_addr) - - # Issue the command to perform the firmware update. - subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable - stderr=subprocess.STDOUT) - except subprocess.CalledProcessError as exc: - # "docker run" return code will be: - # 125 if the error is with Docker daemon itself - # 126 if the contained command cannot be invoked - # 127 if the contained command cannot be found - # Exit code of contained command otherwise - msg = ("Failed to reset device %s, " - "return code is %d, command output: %s." % - (pci_addr, exc.returncode, - exc.output.decode('utf-8'))) - LOG.error(msg) - LOG.error("Check for intel-max10 kernel logs.") - raise exception.SysinvException(msg) - - def read_n3000_sysfs_file(pattern): # Read a sysfs file related to the N3000. # The result should be an empty string if the file doesn't exist, @@ -347,24 +313,6 @@ def get_n3000_devices(): return fpga_addrs -def reset_n3000_fpgas(): - # We only want to do this once after host startup. - if not os.path.exists(N3000_RESET_FLAG): - # Reset all N3000 FPGAs on the system. - # TODO: make this run in parallel if there are multiple devices. - LOG.info("Resetting N3000 FPGAs.") - got_exception = False - fpga_addrs = get_n3000_devices() - for fpga_addr in fpga_addrs: - try: - reset_device_n3000(fpga_addr) - except Exception: - got_exception = True - LOG.info("Done resetting N3000 FPGAs.") - if not got_exception: - utils.touch(N3000_RESET_FLAG) - - def get_n3000_pci_info(): """ Query PCI information about N3000 PCI devices. @@ -465,11 +413,6 @@ class FpgaAgentManager(service.PeriodicService): # Wait for puppet to log in to the local docker registry wait_for_docker_login() - - # Trigger reset of N3000 FPGAs. This is needed because the PCI address - # changes on the first reset after boot. - reset_n3000_fpgas() - # Wait around until someone else updates the platform.conf file # with our host UUID. self.wait_for_host_uuid() diff --git a/sysinv/sysinv/sysinv/sysinv/fpga_agent/reset_n3000_fpgas.py b/sysinv/sysinv/sysinv/sysinv/fpga_agent/reset_n3000_fpgas.py new file mode 100644 index 0000000000..e24716683d --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/fpga_agent/reset_n3000_fpgas.py @@ -0,0 +1,102 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# The right to copy, distribute, modify, or otherwise make use +# of this software may be licensed only pursuant to the terms +# of an applicable Wind River license agreement. +# + +import os +import shlex +from eventlet.green import subprocess +from oslo_log import log + +from sysinv.common import utils +from sysinv.common import exception +from sysinv.fpga_agent.manager import get_n3000_devices +from sysinv.fpga_agent import constants +import tsconfig.tsconfig as tsc + +# Volatile flag file so we only reset the N3000s once after bootup. +N3000_RESET_FLAG = os.path.join(tsc.VOLATILE_PATH, ".sysinv_n3000_reset") +LOG = log.getLogger(__name__) + + +def n3000_img_accessible(): + cmd = 'docker image list "%s" --format "{{.Repository}}:{{.Tag}}"' % \ + constants.OPAE_IMG + items = subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable + stderr=subprocess.STDOUT) + for line in items.splitlines(): + if line == constants.OPAE_IMG: + LOG.info('%s image found' % constants.OPAE_IMG) + return True + + LOG.info("%s image not found." % constants.OPAE_IMG) + return False + + +def reset_device_n3000(pci_addr): + # Reset the N3000 FPGA at the specified PCI address. + try: + # Build up the command to perform the reset. + # Note the hack to work around OPAE tool locale issues + cmd = ("docker run -t --privileged -e LC_ALL=en_US.UTF-8 " + "-e LANG=en_US.UTF-8 " + constants.OPAE_IMG + + " rsu bmcimg " + pci_addr) + + # Issue the command to perform the firmware update. + subprocess.check_output(shlex.split(cmd), # pylint: disable=not-callable + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as exc: + # "docker run" return code will be: + # 125 if the error is with Docker daemon itself + # 126 if the contained command cannot be invoked + # 127 if the contained command cannot be found + # Exit code of contained command otherwise + msg = ("Failed to reset device %s, " + "return code is %d, command output: %s." % + (pci_addr, exc.returncode, + exc.output.decode('utf-8'))) + LOG.error(msg) + LOG.error("Check for intel-max10 kernel logs.") + raise exception.SysinvException(msg) + + +def reset_n3000_fpgas(): + if not os.path.exists(N3000_RESET_FLAG): + # Reset all N3000 FPGAs on the system. + # TODO: make this run in parallel if there are multiple devices. + LOG.info("Resetting N3000 FPGAs.") + got_exception = False + fpga_addrs = get_n3000_devices() + if not n3000_img_accessible() and \ + not os.path.exists(constants.DOCKER_LOGIN_FLAG): + LOG.info("Either docker image or docker login is ready, exit...") + return False + + for fpga_addr in fpga_addrs: + try: + reset_device_n3000(fpga_addr) + except Exception: + got_exception = True + + LOG.info("Done resetting N3000 FPGAs.") + if not got_exception: + utils.touch(N3000_RESET_FLAG) + return True + else: + return False + else: + return True