Files
distcloud/distributedcloud/dccommon/subcloud_install.py
Cristian Mondo 3615dd79dc Improve error handling for Ansible playbook timeout
Currently, when a timeout occurs during the execution of
the Ansible playbook, no specific errors are logged in
the Ansible log because the calling process (dcmanager)
stops execution after exceeding the configured timeout.
dcmanager uses a mechanism to search for error messages
from Ansible log files, but it doesn't map the error
found to the event that executed/failed. Instead, it
looks for the first occurrence of an error that may not
be related to the event. This results in incorrect error
information being displayed to the user.

This commit improves the timeout handling by providing
more context in the error messages for those scenarios
where timeout is configured, making it easier to
diagnose issues when they arise. The error message can
now be displayed to the user through 'dcmanager subcloud
errors' command.

The change involves updating several scenearios where
Ansible playbooks are executed, including:
- Subcloud installation
- Subcloud bootstrap
- Subcloud config
- Subcloud network reconfiguration
- Subcloud rehoming
- Subcloud backup
- Subcloud restore
- Subcloud backup deletion
- Subcloud enrollment
- Subcloud prestaging

Test Plan:
PASS - Install a subcloud, generate Ansible playbook error,
       and verify that the error message is correctly
       logged and displayed.

PASS - Repeat the above tests for the following scenarios:
       Subcloud bootstrap
       Subcloud config
       Subcloud network reconfiguration
       Subcloud backup
       Subcloud restore
       Subcloud backup deletion
       Subcloud prestaging
       Subcloud rehoming
       Subcloud enrollment

PASS - Prestage a subcloud, generate Ansible timeout error
       (configuring playbook_timeout parameter)
       and verify that the error message is correctly
       logged and displayed.

PASS - Restore a subcloud backup, generate Ansible timeout
       error (configuring playbook_timeout parameter)
       and verify that the error message is correctly
       logged and displayed.

PASS - Run the above scenarios without errors to ensure
       that normal operations are unaffected.

Closes-bug: 2121478

Change-Id: I12dccb5e4fec0b5cbd642549af26897ac3552b7f
Signed-off-by: Cristian Mondo <cristian.mondo@windriver.com>
2025-09-16 09:39:26 -03:00

760 lines
29 KiB
Python

# Copyright (c) 2021-2025 Wind River Systems, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
import os
import pty
import shutil
import socket
import tempfile
import threading
import urllib.error as urllib_error
from urllib import parse
from urllib import request
from eventlet.green import subprocess
import netaddr
from oslo_config import cfg
from oslo_log import log as logging
from dccommon import consts
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon import endpoint_cache
from dccommon import exceptions
from dccommon import ostree_mount
from dccommon import utils as cutils
from dcmanager.common import consts as dcmanager_consts
from dcmanager.common import utils
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
BOOT_MENU_TIMEOUT = "5"
SUBCLOUD_ISO_DOWNLOAD_PATH = "/var/www/pages/iso"
PACKAGE_LIST_PATH = "/usr/local/share/pkg-list"
GEN_ISO_COMMAND = "/usr/local/bin/gen-bootloader-iso.sh"
NETWORK_SCRIPTS = "/etc/sysconfig/network-scripts"
NETWORK_INTERFACE_PREFIX = "ifcfg"
NETWORK_ROUTE_PREFIX = "route"
LOCAL_REGISTRY_PREFIX = "registry.local:9001/"
SERIAL_CONSOLE_INSTALL_TYPES = (0, 2, 4)
RVMC_DEBUG_LEVEL_IPMI_CAPTURE = 1
class SubcloudInstall(object):
"""Class to encapsulate the subcloud install operations"""
def __init__(self, subcloud_name):
# Initialize sysinv_client closer to
# its actual usage to avoid token expiration.
self.sysinv_client = None
self.name = subcloud_name
self.input_iso = None
self.www_iso_root = None
self.https_enabled = None
self.ipmi_logger = None
@staticmethod
def config_device(ks_cfg, interface, vlan=False):
device_cfg = "%s/%s-%s" % (NETWORK_SCRIPTS, NETWORK_INTERFACE_PREFIX, interface)
ks_cfg.write("\tcat << EOF > " + device_cfg + "\n")
ks_cfg.write("DEVICE=" + interface + "\n")
ks_cfg.write("BOOTPROTO=none\n")
ks_cfg.write("ONBOOT=yes\n")
if vlan:
ks_cfg.write("VLAN=yes\n")
@staticmethod
def config_ip_address(ks_cfg, values):
ks_cfg.write("IPADDR=" + values["bootstrap_address"] + "\n")
ks_cfg.write("PREFIX=" + str(values["bootstrap_address_prefix"]) + "\n")
@staticmethod
def config_default_route(ks_cfg, values, ip_version):
if ip_version == 4:
ks_cfg.write("DEFROUTE=yes\n")
ks_cfg.write("GATEWAY=" + values["nexthop_gateway"] + "\n")
else:
ks_cfg.write("IPV6INIT=yes\n")
ks_cfg.write("IPV6_DEFROUTE=yes\n")
ks_cfg.write("IPV6_DEFAULTGW=" + values["nexthop_gateway"] + "\n")
@staticmethod
def config_static_route(ks_cfg, interface, values, ip_version):
if ip_version == 4:
route_cfg = "%s/%s-%s" % (NETWORK_SCRIPTS, NETWORK_ROUTE_PREFIX, interface)
ks_cfg.write("\tcat << EOF > " + route_cfg + "\n")
ks_cfg.write("ADDRESS0=" + values["network_address"] + "\n")
ks_cfg.write("NETMASK0=" + str(values["network_mask"]) + "\n")
ks_cfg.write("GATEWAY0=" + values["nexthop_gateway"] + "\n")
else:
route_cfg = "%s/%s6-%s" % (NETWORK_SCRIPTS, NETWORK_ROUTE_PREFIX, interface)
ks_cfg.write("\tcat << EOF > " + route_cfg + "\n")
route_args = "%s/%s via %s dev %s\n" % (
values["network_address"],
values["network_mask"],
values["nexthop_gateway"],
interface,
)
ks_cfg.write(route_args)
ks_cfg.write("EOF\n\n")
@staticmethod
def get_sysinv_client():
admin_session = endpoint_cache.EndpointCache.get_admin_session()
region_name = cutils.get_region_one_name()
return SysinvClient(region_name, admin_session)
@staticmethod
def format_address(ip_address):
try:
address = netaddr.IPAddress(ip_address)
if address.version == 6:
return "[%s]" % address
else:
return str(address)
except netaddr.AddrFormatError as e:
LOG.error("Failed to format the address: %s", ip_address)
raise e
def get_https_enabled(self):
if self.https_enabled is None:
system = self.sysinv_client.get_system()
self.https_enabled = system.capabilities.get("https_enabled", False)
return self.https_enabled
@staticmethod
def get_image_base_url(
https_enabled, sysinv_client, subcloud_primary_oam_ip_family
):
# get the protocol and the configured http or https port
protocol, value = (
("https", "https_port") if https_enabled else ("http", "http_port")
)
http_parameters = sysinv_client.get_service_parameters("name", value)
port = getattr(http_parameters[0], "value")
# system controller OAM pools can be either single-stack or dual-stack,
# subcloud need to choose right IP family based upon subcloud primary
# OAM IP family, as OAM communication between subcloud and
# system controller is single stack only.
oam_pools = sysinv_client.get_oam_address_pools()
try:
oam_pool = utils.get_pool_by_ip_family(
oam_pools, subcloud_primary_oam_ip_family
)
except Exception as e:
error_msg = (
f"subcloud primary OAM IP family does not exist on system"
f"controller OAM: {e}"
)
LOG.exception(error_msg)
raise Exception(error_msg)
oam_floating_ip = SubcloudInstall.format_address(oam_pool.floating_address)
return f"{protocol}://{oam_floating_ip}:{port}"
@staticmethod
def create_rvmc_config_file(override_path, payload):
LOG.debug(
"create rvmc config file, path: %s, payload: %s", override_path, payload
)
rvmc_config_file = os.path.join(override_path, consts.RVMC_CONFIG_FILE_NAME)
with open(rvmc_config_file, "w") as f_out_rvmc_config_file:
for k, v in payload.items():
if (
k in consts.BMC_INSTALL_VALUES
or k in consts.OPTIONAL_BMC_INSTALL_VALUES
or k == "image"
):
f_out_rvmc_config_file.write(f"{k}: {json.dumps(v)}\n")
def create_install_override_file(self, override_path, payload):
LOG.debug("create install override file")
install_override_file = os.path.join(override_path, "install_values.yml")
host_name = socket.gethostname()
with open(install_override_file, "w") as f_out_override_file:
f_out_override_file.write(
"---"
"\npassword_change: true"
"\nhost_name: "
+ host_name
+ "\nrvmc_config_dir: "
+ override_path
+ "\n"
)
for k, v in payload.items():
f_out_override_file.write("%s: %s\n" % (k, json.dumps(v)))
def update_iso(
self,
override_path,
values,
subcloud_primary_oam_ip_family,
include_paths=None,
kickstart_uri=None,
):
if not os.path.isdir(self.www_iso_root):
os.mkdir(self.www_iso_root, 0o755)
LOG.debug(
"update_iso: www_iso_root: %s, values: %s, override_path: %s",
self.www_iso_root,
str(values),
override_path,
)
path = None
software_version = str(values["software_version"])
try:
if parse.urlparse(values["image"]).scheme:
url = values["image"]
else:
path = os.path.abspath(values["image"])
url = parse.urljoin("file:", request.pathname2url(path))
filename = os.path.join(override_path, "bootimage.iso")
if path and path.startswith(
consts.SOFTWARE_VAULT_DIR + "/" + software_version
):
if os.path.exists(path):
# Reference known load in vault
LOG.info("Setting input_iso to load vault path %s" % path)
self.input_iso = path
else:
raise exceptions.LoadNotInVault(path=path)
else:
LOG.info("Downloading %s to %s", url, override_path)
self.input_iso, _ = request.urlretrieve(url, filename)
LOG.info("Downloaded %s to %s", url, self.input_iso)
except urllib_error.ContentTooShortError as e:
msg = "Error: Downloading file %s may be interrupted: %s" % (
values["image"],
e,
)
LOG.error(msg)
raise exceptions.DCCommonException(resource=self.name, msg=msg)
except Exception as e:
msg = "Error: Could not download file %s: %s" % (values["image"], e)
LOG.error(msg)
raise exceptions.DCCommonException(resource=self.name, msg=msg)
LOG.info(f"Updating ISO file with {GEN_ISO_COMMAND}")
update_iso_cmd = [
GEN_ISO_COMMAND,
"--input",
self.input_iso,
"--www-root",
self.www_iso_root,
"--id",
self.name,
"--boot-hostname",
self.name,
"--timeout",
BOOT_MENU_TIMEOUT,
"--release",
software_version,
]
if include_paths:
for path in include_paths:
update_iso_cmd += ["--include-path", path]
if kickstart_uri:
update_iso_cmd += ["--kickstart-uri", kickstart_uri]
for key, _ in consts.GEN_ISO_OPTIONS.items():
if key in values:
LOG.debug(
"Setting option from key=%s, option=%s, value=%s",
key,
consts.GEN_ISO_OPTIONS[key],
values[key],
)
if key in ("bootstrap_address", "nexthop_gateway"):
update_iso_cmd += [
consts.GEN_ISO_OPTIONS[key],
self.format_address(values[key]),
]
elif key == "no_check_certificate":
if str(values[key]) == "True" and self.get_https_enabled():
update_iso_cmd += [
consts.GEN_ISO_OPTIONS[key],
"inst.noverifyssl=True",
]
elif key in ("rootfs_device", "boot_device", "rd.net.timeout.ipv6dad"):
update_iso_cmd += [
consts.GEN_ISO_OPTIONS[key],
(key + "=" + str(values[key])),
]
elif key == "bootstrap_vlan":
vlan_inteface = "%s.%s:%s" % (
values["bootstrap_interface"],
values["bootstrap_vlan"],
values["bootstrap_interface"],
)
update_iso_cmd += [
consts.GEN_ISO_OPTIONS[key],
("vlan=" + vlan_inteface),
]
elif key == "bootstrap_interface" and "bootstrap_vlan" in values:
boot_interface = "%s.%s" % (
values["bootstrap_interface"],
values["bootstrap_vlan"],
)
update_iso_cmd += [consts.GEN_ISO_OPTIONS[key], boot_interface]
elif key == "persistent_size":
update_iso_cmd += [
consts.GEN_ISO_OPTIONS[key],
("persistent_size=%s" % str(values[key])),
]
elif key == "hw_settle":
# translate to 'insthwsettle' boot parameter
update_iso_cmd += [
consts.GEN_ISO_OPTIONS[key],
("insthwsettle=%s" % str(values[key])),
]
elif key == "extra_boot_params":
update_iso_cmd += [
consts.GEN_ISO_OPTIONS[key],
("extra_boot_params=%s" % str(values[key])),
]
elif key == "wipe_osds":
update_iso_cmd += [
consts.GEN_ISO_OPTIONS[key],
"wipe_osds=%s" % (1 if values[key] else 0),
]
else:
update_iso_cmd += [consts.GEN_ISO_OPTIONS[key], str(values[key])]
image_base_url = self.get_image_base_url(
self.get_https_enabled(), self.sysinv_client, subcloud_primary_oam_ip_family
)
base_url = os.path.join(image_base_url, "iso", software_version)
update_iso_cmd += ["--base-url", base_url]
str_cmd = " ".join(x for x in update_iso_cmd)
LOG.info("Running update_iso_cmd: %s", str_cmd)
result = subprocess.run(
update_iso_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
if result.returncode != 0:
msg = f"Failed to update iso: {str_cmd}"
LOG.error(
"%s returncode: %s, output: %s",
msg,
result.returncode,
result.stdout.decode("utf-8").replace("\n", ", "),
)
raise Exception(msg)
def cleanup(self, software_version=None):
# Do not remove the input_iso if it is in the Load Vault
if (
self.input_iso is not None
and not self.input_iso.startswith(consts.SOFTWARE_VAULT_DIR)
and os.path.exists(self.input_iso)
):
os.remove(self.input_iso)
if self.www_iso_root is not None and os.path.isdir(self.www_iso_root):
cleanup_cmd = [
GEN_ISO_COMMAND,
"--id",
self.name,
"--www-root",
self.www_iso_root,
"--delete",
]
LOG.info("Running install cleanup: %s", self.name)
result = subprocess.run(
cleanup_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
if result.returncode == 0:
# Note: watch for non-exit 0 errors in this output as well
LOG.info(
"Finished install cleanup: %s returncode: %s, output: %s",
" ".join(cleanup_cmd),
result.returncode,
result.stdout.decode("utf-8").replace("\n", ", "),
)
else:
LOG.error(
"Failed install cleanup: %s returncode: %s, output: %s",
" ".join(cleanup_cmd),
result.returncode,
result.stdout.decode("utf-8").replace("\n", ", "),
)
# TODO(kmacleod): utils.synchronized should be moved into dccommon
@utils.synchronized("packages-list-from-bootimage", external=True)
def _copy_packages_list_from_bootimage(self, software_version, pkg_file_src):
# The source file (pkg_file_src) is not available.
# So create a temporary directory in /mnt, mount the bootimage.iso
# from /opt/dc-vault/rel-<version>/. Then copy the file from there to
# the pkg_file_src location.
if os.path.exists(pkg_file_src):
LOG.info("Found existing package_checksums file at %s", pkg_file_src)
return
temp_bootimage_mnt_dir = tempfile.mkdtemp()
bootimage_path = os.path.join(
consts.SOFTWARE_VAULT_DIR, software_version, "bootimage.iso"
)
with open(os.devnull, "w") as fnull:
try:
# pylint: disable-next=not-callable
subprocess.check_call(
[
"mount",
"-r",
"-o",
"loop",
bootimage_path,
temp_bootimage_mnt_dir,
],
stdout=fnull,
stderr=fnull,
)
except Exception:
os.rmdir(temp_bootimage_mnt_dir)
raise Exception("Unable to mount bootimage.iso")
# Now that the bootimage.iso has been mounted, copy package_checksums to
# pkg_file_src.
try:
pkg_file = os.path.join(temp_bootimage_mnt_dir, "package_checksums")
LOG.info("Copying %s to %s", pkg_file, pkg_file_src)
shutil.copy(pkg_file, pkg_file_src)
# now copy package_checksums to
# /usr/local/share/pkg-list/<software_version>_packages_list.txt
# This will only be done once by the first thread to access this code.
# The directory PACKAGE_LIST_PATH may exist from a previous invocation
# of this function (artifacts due to a previous failure).
# Create the directory if it does not exist.
if not os.path.exists(PACKAGE_LIST_PATH):
os.mkdir(PACKAGE_LIST_PATH, 0o755)
package_list_file = os.path.join(
PACKAGE_LIST_PATH, software_version + "_packages_list.txt"
)
shutil.copy(pkg_file_src, package_list_file)
except IOError:
# bootimage.iso in /opt/dc-vault/<release-id>/ does not have the file.
# this is an issue in bootimage.iso.
msg = "Package_checksums not found in bootimage.iso"
LOG.error(msg)
raise Exception(msg)
finally:
# pylint: disable-next=not-callable
subprocess.check_call(["umount", "-l", temp_bootimage_mnt_dir])
os.rmdir(temp_bootimage_mnt_dir)
@staticmethod
def is_serial_console(install_type):
return install_type is not None and install_type in SERIAL_CONSOLE_INSTALL_TYPES
def prep(
self,
override_path,
payload,
subcloud_primary_oam_ip_family,
include_paths=None,
kickstart_uri=None,
):
"""Update the iso image and create the config files for the subcloud"""
LOG.info("Prepare for %s remote install" % (self.name))
if SubcloudInstall.is_serial_console(
payload.get("install_type")
) and IpmiLogger.is_enabled(payload.get("rvmc_debug_level", 0)):
self.ipmi_logger = IpmiLogger(self.name, override_path)
iso_values = {}
for k in consts.MANDATORY_INSTALL_VALUES:
if k in list(consts.GEN_ISO_OPTIONS.keys()):
iso_values[k] = payload.get(k)
if k not in consts.BMC_INSTALL_VALUES:
iso_values[k] = payload.get(k)
for k in consts.OPTIONAL_INSTALL_VALUES:
if k in payload:
iso_values[k] = payload.get(k)
software_version = str(payload["software_version"])
iso_values["software_version"] = payload["software_version"]
iso_values["image"] = payload["image"]
override_path = os.path.join(override_path, self.name)
if not os.path.isdir(override_path):
os.mkdir(override_path, 0o755)
self.www_iso_root = os.path.join(consts.SUBCLOUD_ISO_PATH, software_version)
ostree_mount.validate_ostree_iso_mount(software_version)
# Clean up iso directory if it already exists
# This may happen if a previous installation attempt was abruptly
# terminated
iso_dir_path = os.path.join(self.www_iso_root, "nodes", self.name)
if os.path.isdir(iso_dir_path):
LOG.info(
"Found preexisting iso dir for subcloud %s, cleaning up", self.name
)
self.cleanup(software_version)
self.sysinv_client = self.get_sysinv_client()
# Update the default iso image based on the install values
# Runs gen-bootloader-iso.sh
self.update_iso(
override_path,
iso_values,
subcloud_primary_oam_ip_family,
include_paths,
kickstart_uri,
)
# remove the iso values from the payload
for k in iso_values:
if k in payload:
del payload[k]
# get the boot image url for bmc
image_base_url = self.get_image_base_url(
self.get_https_enabled(), self.sysinv_client, subcloud_primary_oam_ip_family
)
payload["image"] = os.path.join(
image_base_url, "iso", software_version, "nodes", self.name, "bootimage.iso"
)
# create the rvmc config file
self.create_rvmc_config_file(override_path, payload)
# remove the bmc values from the payload
for k in consts.BMC_INSTALL_VALUES + consts.OPTIONAL_BMC_INSTALL_VALUES:
if k in payload:
del payload[k]
# remove the boot image url from the payload
if "image" in payload:
del payload["image"]
# create the install override file
self.create_install_override_file(override_path, payload)
def install(self, log_file_dir, install_command):
LOG.info("Start remote install %s", self.name)
subcloud_log_base_path = os.path.join(log_file_dir, self.name)
playbook_log_file = f"{subcloud_log_base_path}_playbook_output.log"
console_log_file = f"{subcloud_log_base_path}_serial_console.log"
if self.ipmi_logger:
self.ipmi_logger.start_logging(console_log_file)
try:
# Since this is a long-running task we want to register
# for cleanup on process restart/SWACT.
ansible = cutils.AnsiblePlaybook(self.name)
aborted = ansible.run_playbook(playbook_log_file, install_command)
# Returns True if the playbook was aborted and False otherwise
return aborted
except exceptions.PlaybookExecutionFailed:
msg = (
f"Failed to install {self.name}, check individual "
f"logs at {playbook_log_file}. "
)
if self.ipmi_logger:
msg += f"Console log files are available at {console_log_file}. "
msg += f"Run {dcmanager_consts.ERROR_DESC_CMD} for details"
LOG.error(msg)
raise
finally:
if self.ipmi_logger:
self.ipmi_logger.stop_logging()
class IpmiLogger(object):
"""Captures serial console log via external ipmitool script."""
def __init__(self, subcloud_name, override_path):
self.name = subcloud_name
self.override_path = os.path.join(override_path, subcloud_name)
# Note: will not exist yet, but is created before ipmicap_start:
self.rvmc_config_file = os.path.join(
self.override_path, consts.RVMC_CONFIG_FILE_NAME
)
@staticmethod
def is_enabled(rvmc_debug_level):
"""Determine if IPMI capture is enabled.
Decision is based on the global CONF.ipmi_capture value and the given
rvmc_debug_level. The global CONF.ipmi_capture value defaults to 1,
which defers the configuration to the per-subcloud rvmc_debug_level
install value. The CONF.ipmi_capture can be set in
/etc/dcmanager/dcmanager.conf to override this setting for all
subclouds.
CONF.ipmi_capture options:
0: globally disabled
1: enabled based on rvmc_debug_level
2: globally enabled
"""
if CONF.ipmi_capture == 0:
LOG.debug("IPMI capture is globally disabled")
return False
if CONF.ipmi_capture == 2:
LOG.debug("IPMI capture is globally enabled")
return True
try:
return int(rvmc_debug_level) >= RVMC_DEBUG_LEVEL_IPMI_CAPTURE
except ValueError:
LOG.exception(f"Invalid rvmc_debug_level in payload: '{rvmc_debug_level}'")
return False
def start_logging(self, log_file):
"""Run the IPMI capture script to capture the serial console logs.
We must allocate a pty for the shell process for ipmitool to
properly connect.
This is required for proper process cleanup on termination:
Run this script in a separate thread so that we can wait for the
process to end while not blocking the caller.
"""
def ipmicap_start(log_file):
"""Thread function: Invoke the IPMI capture script.
Wait for it to finish.
"""
try:
ipmi_cmd = [
"/usr/local/bin/ipmicap.sh",
"--force-deactivate",
"--redirect",
"--rvmc-config",
self.rvmc_config_file,
"--log",
log_file,
]
msg = "IPMI capture"
# Unless ipmitool has a console for stdin it fails with error:
# tcgetattr: Inappropriate ioctl for device
# Open a pty and use it for our process:
master_fd, slave_fd = pty.openpty()
LOG.info(
"%s start %s: %s, pty:%s",
msg,
self.name,
" ".join(ipmi_cmd),
os.ttyname(slave_fd),
)
try:
result = subprocess.run(
ipmi_cmd,
stdin=slave_fd,
# capture both streams in stdout:
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
output = result.stdout.decode("utf-8").replace("\n", ", ")
if result.returncode == 0:
if output:
LOG.info(
"%s finished %s, output: %s",
msg,
self.name,
output,
)
else:
LOG.info("%s finished %s", msg, self.name)
else:
LOG.warn(
"%s failed %s, returncode: %s, output: %s",
msg,
self.name,
result.returncode,
output,
)
finally:
try:
os.close(slave_fd)
except Exception:
LOG.exception(f"Close slave_fd failed: {slave_fd}")
try:
os.close(master_fd)
except Exception:
LOG.exception(f"Close master_fd failed {master_fd}")
except Exception:
LOG.exception(f"IPMI capture start failed: {self.name}")
try:
capture_thread = threading.Thread(target=ipmicap_start, args=(log_file,))
capture_thread.start()
except Exception:
LOG.exception(f"IPMI capture start threading failed: {self.name}")
def stop_logging(self):
"""Kill the IPMI capture script"""
msg = "IPMI capture stop"
try:
ipmi_cmd = [
"/usr/local/bin/ipmicap.sh",
"--kill",
"--rvmc-config",
self.rvmc_config_file,
]
LOG.info("%s invoking %s", msg, " ".join(ipmi_cmd))
result = subprocess.run(
ipmi_cmd,
# capture both streams in stdout:
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
if result.returncode == 0:
LOG.info(
"%s %s, output: %s",
msg,
self.name,
result.stdout.decode("utf-8").replace("\n", ", "),
)
else:
LOG.warn(
"%s %s failed, returncode: %s, output: %s",
msg,
self.name,
result.returncode,
result.stdout.decode("utf-8").replace("\n", ", "),
)
except Exception:
LOG.exception("%s %s failed with exception", msg, self.name)