This commit optimizes the SEL event monitoring script used during
subcloud enrollment. The script now runs a single ipmitool sel list -v
command instead of invoking ipmitool sel list followed by multiple
ipmitool sel get calls.
A new mode has been added to the monitoring script that monitors for
all success events, returning the last one, allowing the caller to
detect if multiple events were sent and skip directly to the latest
completed phase. If a failure event is detected, the script exits
immediately.
The IPMI codes used by auto-restore and enrollment have been updated
from OEM-reserved codes to the “System Event” sensor type, which the
BMC never interprets as an error.
Additionally, the script now stops sending the expected_region_name
restore override for factory restores, since the factory-installed
subcloud region name is not expected to match the system controller's
region name.
Test Plan:
01. PASS – Run subcloud enrollment and verify the following:
- Events sent sequentially with delays are returned individually.
- Events sent quickly without delays return only the latest event
captured in the polling interval.
- A failure event is returned immediately when detected.
- Success events followed by a failure event return the failure
event as soon as it appears.
- If no events are detected, the script exits with a timeout error.
02. PASS – Run subcloud auto-restore and factory restore to verify the
new codes are used and that both operations complete successfully.
03. PASS – Time the full enroll-init operation on a slow BMC and verify
it completes faster than the previous implementation.
Story: 2011455
Task: 52858
Change-Id: Ib379e4e4c8308361357914f21fd8d73ab5bafbb4
Signed-off-by: Gustavo Herzmann <gustavo.herzmann@windriver.com>
720 lines
23 KiB
Python
720 lines
23 KiB
Python
#!/usr/bin/env python3
|
|
#
|
|
# Copyright (c) 2025 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
"""
|
|
Monitors the IPMI System Event Log (SEL) for a specific target event and
|
|
associated event data, or retrieves the last event ID.
|
|
|
|
The script operates in three modes:
|
|
- In monitoring mode, it periodically checks the SEL for the desired events.
|
|
If any of the events are found, the script returns a return code of 0 and the
|
|
matched event in json format. If an event is not found after the maximum
|
|
number of checks, a non-zero return code is returned.
|
|
- In success/failure monitoring mode, it'll return immediately on any success
|
|
event (returning the latest/most recent one found) or failure event. This
|
|
allows the playbook to call the script multiple times and get feedback about
|
|
which stage completed.
|
|
- Alternatively, using --get-last-event makes the script simply return the ID of
|
|
the most recent event in the SEL and exit. If the SEL is empty, it returns -1.
|
|
|
|
For the monitoring modes, it only monitors new events, existing events are
|
|
ignored unless --initial-event-id is specified to start monitoring from a
|
|
specific point.
|
|
"""
|
|
|
|
import argparse
|
|
import base64
|
|
import json
|
|
import logging
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from typing import Dict
|
|
from typing import List
|
|
from typing import Optional
|
|
from typing import Tuple
|
|
from typing import Union
|
|
|
|
import netaddr
|
|
import yaml
|
|
|
|
# Configure logging to stderr so stdout stays clean for Ansible JSON output
|
|
logging.basicConfig(
|
|
stream=sys.stderr,
|
|
level=logging.INFO,
|
|
format="%(asctime)s %(levelname)s: %(message)s",
|
|
)
|
|
|
|
|
|
def hex_to_int(hex_str: str) -> int:
|
|
return int(hex_str, 16)
|
|
|
|
|
|
def run_command(cmd: list[str]) -> Optional[str]:
|
|
"""Run a shell command and return its output"""
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
return result.stdout
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
logging.error(f"Command failed: {' '.join(cmd)}")
|
|
logging.error(f"stderr: {e.stderr.strip()}")
|
|
return None
|
|
|
|
|
|
def exit_script(
|
|
success: bool,
|
|
message: str,
|
|
matched_data: Optional[str] = None,
|
|
result_data: Optional[Dict] = None,
|
|
) -> None:
|
|
if not success:
|
|
logging.error(message)
|
|
|
|
# Output as JSON to stdout for Ansible to parse
|
|
result = {"success": success, "message": message}
|
|
|
|
# Add additional info if an event was matched
|
|
if matched_data:
|
|
result["matched_data"] = matched_data
|
|
|
|
# Add extra result data for the success/failure mode
|
|
if result_data:
|
|
result.update(result_data)
|
|
|
|
print(json.dumps(result))
|
|
sys.exit(0 if success else 1)
|
|
|
|
|
|
class IpmiTool:
|
|
def __init__(self, host, user, password):
|
|
self.base_cmd = [
|
|
"ipmitool",
|
|
"-I",
|
|
"lanplus",
|
|
"-H",
|
|
host,
|
|
"-U",
|
|
user,
|
|
"-P",
|
|
password,
|
|
]
|
|
|
|
@classmethod
|
|
def from_config(cls, path: str) -> "IpmiTool":
|
|
"""Instantiates IpmiTool using a BMC config file"""
|
|
|
|
logging.info(f"Parsing BMC config from {path}")
|
|
|
|
if not os.path.isfile(path):
|
|
exit_script(False, f"BMC config file not found: {path}")
|
|
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as yaml_config:
|
|
cfg = yaml.safe_load(yaml_config)
|
|
|
|
except Exception as ex:
|
|
exit_script(
|
|
False,
|
|
f"Unable to open or parse the BMC configuration file: {path} ({ex})",
|
|
)
|
|
|
|
for key in ("bmc_password", "bmc_address", "bmc_username"):
|
|
if key not in cfg:
|
|
exit_script(False, f"{key} is missing from the BMC configuration file")
|
|
try:
|
|
cfg["bmc_password"] = base64.b64decode(cfg["bmc_password"]).decode("utf-8")
|
|
except Exception as ex:
|
|
exit_script(
|
|
False, f"Failed to decode BMC password found in config file ({ex})"
|
|
)
|
|
|
|
# Validate IP address
|
|
try:
|
|
netaddr.IPAddress(cfg["bmc_address"])
|
|
except Exception as ex:
|
|
exit_script(
|
|
False,
|
|
"Invalid bmc_address found in the BMC configuration "
|
|
f"file: {cfg['bmc_address']} ({ex})",
|
|
)
|
|
|
|
return cls(cfg["bmc_address"], cfg["bmc_username"], cfg["bmc_password"])
|
|
|
|
@staticmethod
|
|
def get_id_from_line(event_line: str) -> int:
|
|
return hex_to_int(event_line.split("|")[0].strip())
|
|
|
|
@staticmethod
|
|
def parse_verbose_sel_output(output: str) -> Dict[int, Dict[str, str]]:
|
|
"""Parse the verbose SEL list output and return a dictionary of events
|
|
|
|
ipmitools sel list -v sample output:
|
|
|
|
SEL Record ID : 01ae
|
|
Record Type : 02
|
|
Timestamp : 09/26/2025 21:10:19
|
|
Generator ID : 0041
|
|
EvM Revision : 04
|
|
Sensor Type : System Event
|
|
Sensor Number : cc
|
|
Event Type : Reserved
|
|
Event Direction : Assertion Event
|
|
Event Data : cc10ef
|
|
Description :
|
|
|
|
SEL Record ID : 01af
|
|
Record Type : 02
|
|
Timestamp : 09/26/2025 21:20:51
|
|
Generator ID : 0020
|
|
EvM Revision : 04
|
|
Sensor Type : System ACPI Power State
|
|
Sensor Number : aa
|
|
Event Type : Sensor-specific Discrete
|
|
Event Direction : Assertion Event
|
|
Event Data : 00ffff
|
|
Description : S0/G0: working
|
|
"""
|
|
events = {}
|
|
current_event = {}
|
|
current_event_id = None
|
|
|
|
for line in output.splitlines():
|
|
line = line.strip()
|
|
|
|
# Skip empty lines and known error messages
|
|
if (
|
|
not line
|
|
or line.startswith("Running")
|
|
or line.startswith("Error")
|
|
or line.startswith("Invalid")
|
|
or line.startswith("Discovered")
|
|
):
|
|
continue
|
|
|
|
# If it's a new event line, store the old one and create a new one
|
|
if line.startswith("SEL Record ID"):
|
|
if current_event_id is not None and current_event:
|
|
events[current_event_id] = current_event
|
|
|
|
current_event_id = hex_to_int(line.split(":")[1].strip())
|
|
current_event = {}
|
|
continue
|
|
|
|
if ":" in line:
|
|
key, value = line.split(":", 1)
|
|
current_event[key.strip()] = value.strip()
|
|
|
|
# Handle the last event
|
|
if current_event_id is not None and current_event:
|
|
events[current_event_id] = current_event
|
|
|
|
return events
|
|
|
|
def get_all_events_detailed(self) -> Dict[int, Dict[str, str]]:
|
|
"""Get all events with detailed information using sel list -v"""
|
|
cmd = self.base_cmd + ["sel", "list", "-v"]
|
|
|
|
output = run_command(cmd)
|
|
if not output:
|
|
return {}
|
|
|
|
return self.parse_verbose_sel_output(output)
|
|
|
|
def get_all_events(self) -> list[str]:
|
|
"""Get all events in the SEL"""
|
|
cmd = self.base_cmd + ["sel", "list"]
|
|
|
|
output = run_command(cmd)
|
|
if not output:
|
|
return []
|
|
return output.strip().splitlines()
|
|
|
|
def get_last_event_id(self) -> Optional[int]:
|
|
"""Get the ID of the most recent event in the SEL"""
|
|
lines = self.get_all_events()
|
|
|
|
if not lines:
|
|
return None
|
|
|
|
return self.get_id_from_line(lines[-1])
|
|
|
|
|
|
def get_last_event_only(ipmi_tool: IpmiTool) -> None:
|
|
"""Get the last event ID and exit"""
|
|
last_event_id = ipmi_tool.get_last_event_id()
|
|
if last_event_id is not None:
|
|
message = f"Last event ID: {last_event_id}"
|
|
result = {"success": True, "message": message, "last_event_id": last_event_id}
|
|
else:
|
|
# SEL is empty, return -1 as the last event ID
|
|
message = "SEL is empty, returning -1 as last event ID"
|
|
result = {"success": True, "message": message, "last_event_id": -1}
|
|
|
|
print(json.dumps(result))
|
|
sys.exit(0)
|
|
|
|
|
|
def get_success_failure_events(
|
|
matching_events: Dict[int, Dict[str, str]],
|
|
failure_codes_lower: List[str],
|
|
success_codes_lower: List[str],
|
|
elapsed_time: float,
|
|
) -> Optional[Tuple[bool, str, Dict]]:
|
|
"""Process events in success/failure monitoring mode.
|
|
|
|
Returns immediately on first failure event, or returns the latest success event
|
|
after processing all events. Returns None if no success or failure events found.
|
|
"""
|
|
success_events = []
|
|
|
|
# Process events in ascending order by event ID
|
|
for event_id in sorted(matching_events.keys()):
|
|
event_details = matching_events[event_id]
|
|
event_data = event_details.get("Event Data", "")
|
|
if not event_data:
|
|
logging.warning(f"Event {event_id} has no Event Data field")
|
|
continue
|
|
|
|
event_data_lower = event_data.lower()
|
|
logging.info(f"Processing event ID {event_id}, data: {event_data}")
|
|
|
|
if event_data_lower in failure_codes_lower:
|
|
failure_event = {
|
|
"event_id": event_id,
|
|
"data": event_data,
|
|
"timestamp": event_details.get("Timestamp", ""),
|
|
"sensor_type": event_details.get("Sensor Type", ""),
|
|
"sensor_number": event_details.get("Sensor Number", ""),
|
|
}
|
|
|
|
message = (
|
|
f"FAILURE event detected! Event ID: {event_id}, "
|
|
f"Event Data: {event_data}"
|
|
)
|
|
|
|
failure_result = {
|
|
"failure_detected": True,
|
|
"failure_event": failure_event,
|
|
"elapsed_time": elapsed_time,
|
|
}
|
|
|
|
logging.error(message)
|
|
|
|
# Return immediately if we found any failure event
|
|
return False, message, failure_result
|
|
|
|
elif event_data_lower in success_codes_lower:
|
|
success_events.append(
|
|
{
|
|
"event_id": event_id,
|
|
"data": event_data,
|
|
"timestamp": event_details.get("Timestamp", ""),
|
|
"sensor_type": event_details.get("Sensor Type", ""),
|
|
"sensor_number": event_details.get("Sensor Number", ""),
|
|
}
|
|
)
|
|
logging.info(
|
|
f"SUCCESS event found! Event ID: {event_id}, "
|
|
f"Event Data: {event_data}"
|
|
)
|
|
|
|
# Return the latest detected success event
|
|
if success_events:
|
|
latest_success_event = success_events[-1]
|
|
message = (
|
|
"SUCCESS event detected! "
|
|
f"Event ID: {latest_success_event['event_id']}, "
|
|
f"Event Data: {latest_success_event['data']}"
|
|
)
|
|
|
|
success_result = {
|
|
"success_event_detected": True,
|
|
"success_event": latest_success_event,
|
|
"elapsed_time": elapsed_time,
|
|
}
|
|
|
|
return True, message, success_result
|
|
|
|
return None
|
|
|
|
|
|
def get_first_matching_event(
|
|
matching_events: Dict[int, Dict[str, str]],
|
|
target_codes_lower: List[str],
|
|
) -> Optional[Tuple[bool, str, str]]:
|
|
"""Process events in standard monitoring mode.
|
|
|
|
Returns the first matching event if found, returns None otherwise.
|
|
"""
|
|
for event_id, event_details in matching_events.items():
|
|
event_data = event_details.get("Event Data", "")
|
|
if not event_data:
|
|
logging.warning(f"Event {event_id} has no Event Data field")
|
|
continue
|
|
|
|
event_data_lower = event_data.lower()
|
|
logging.info(f"Processing event ID {event_id}, data: {event_data}")
|
|
|
|
if event_data_lower in target_codes_lower:
|
|
message = (
|
|
f"Target event found! Event ID: {event_id}, "
|
|
f"Event Data: {event_data}"
|
|
)
|
|
logging.info(message)
|
|
return True, message, event_data
|
|
|
|
logging.debug(
|
|
f"Event {event_id} data '{event_data}' doesn't match target codes"
|
|
)
|
|
|
|
return None
|
|
|
|
|
|
def monitor_events(
|
|
ipmi_tool: IpmiTool,
|
|
sensor_type_filter: Optional[str],
|
|
sensor_number_filter: Optional[str],
|
|
target_codes: List[str],
|
|
interval: float,
|
|
timeout: int,
|
|
initial_event_id: Optional[int] = None,
|
|
success_codes: Optional[List[str]] = None,
|
|
failure_codes: Optional[List[str]] = None,
|
|
) -> Union[Tuple[bool, str, Optional[str]], Tuple[bool, str, Dict]]:
|
|
"""Monitor IPMI SEL for target events
|
|
|
|
Monitoring mode (success_codes=None, failure_codes=None):
|
|
- Returns first matching event from target_codes
|
|
- Returns: (success, message, matched_data)
|
|
|
|
Success/failure monitoring mode (success_codes and failure_codes provided):
|
|
- Exits immediately on any failure_codes
|
|
- Exits immediately on any success_codes (returns latest one found)
|
|
- Returns: (success, message, result_dict)
|
|
"""
|
|
|
|
success_failure_mode = success_codes is not None and failure_codes is not None
|
|
|
|
if initial_event_id is not None:
|
|
last_event_id = initial_event_id
|
|
logging.info(f"Using provided initial event ID: {last_event_id}")
|
|
else:
|
|
last_event_id = ipmi_tool.get_last_event_id()
|
|
if last_event_id is None:
|
|
message = "Failed to get initial event ID, SEL might be empty"
|
|
logging.warning(message)
|
|
last_event_id = -1
|
|
|
|
logging.info(f"Starting monitoring from event ID: {last_event_id}")
|
|
|
|
target_codes_lower = [code.lower() for code in target_codes]
|
|
|
|
if success_failure_mode:
|
|
success_codes_lower = [code.lower() for code in success_codes]
|
|
failure_codes_lower = [code.lower() for code in failure_codes]
|
|
|
|
logging.info(
|
|
f"Success/failure mode: Success codes (return on any): {success_codes}"
|
|
)
|
|
logging.info(
|
|
f"Success/failure mode: Failure codes (exit on any): {failure_codes}"
|
|
)
|
|
else:
|
|
logging.info(f"Monitoring mode: Target codes: {target_codes}")
|
|
|
|
filter_info = []
|
|
if sensor_type_filter:
|
|
filter_info.append(f"sensor type: '{sensor_type_filter}'")
|
|
if sensor_number_filter:
|
|
filter_info.append(f"sensor number: '{sensor_number_filter}'")
|
|
if filter_info:
|
|
logging.info(f"Applying sensor filters: {', '.join(filter_info)}")
|
|
|
|
last_event_id_int = last_event_id
|
|
|
|
start_time = time.monotonic()
|
|
next_check_time = start_time
|
|
check_num = 0
|
|
|
|
while time.monotonic() - start_time < timeout:
|
|
check_num += 1
|
|
current_time = time.monotonic()
|
|
|
|
if current_time < next_check_time:
|
|
sleep_time = next_check_time - current_time
|
|
logging.debug(f"Waiting {sleep_time:.2f}s until next check")
|
|
time.sleep(sleep_time)
|
|
|
|
next_check_time += interval
|
|
|
|
elapsed_time = time.monotonic() - start_time
|
|
remaining_time = timeout - elapsed_time
|
|
|
|
logging.info(
|
|
f"Check {check_num}: Checking for new events... "
|
|
f"(elapsed: {elapsed_time:.1f}s, remaining: {remaining_time:.1f}s)"
|
|
)
|
|
|
|
check_start_time = time.monotonic()
|
|
|
|
all_events_detailed = ipmi_tool.get_all_events_detailed()
|
|
|
|
if not all_events_detailed:
|
|
logging.warning("Failed to get events or SEL is empty")
|
|
continue
|
|
|
|
current_event_id_int = max(all_events_detailed.keys())
|
|
logging.debug(
|
|
f"Current last event ID: {current_event_id_int}, "
|
|
f"previous: {last_event_id_int}"
|
|
)
|
|
|
|
if current_event_id_int == last_event_id_int:
|
|
check_duration = time.monotonic() - check_start_time
|
|
logging.debug(f"No new events found (check took {check_duration:.2f}s)")
|
|
continue
|
|
|
|
if current_event_id_int < last_event_id_int:
|
|
logging.info(
|
|
"Current last event ID is smaller than the previous ID, "
|
|
"assuming SEL was cleared..."
|
|
)
|
|
last_event_id_int = 0
|
|
|
|
new_events = {
|
|
event_id: details
|
|
for event_id, details in all_events_detailed.items()
|
|
if event_id > last_event_id_int
|
|
}
|
|
|
|
logging.info(f"Found {len(new_events)} new events")
|
|
|
|
# Filter events based on sensor type and number
|
|
matching_events = {}
|
|
for event_id, event_details in new_events.items():
|
|
sensor_type = event_details.get("Sensor Type", "")
|
|
sensor_number = event_details.get("Sensor Number", "")
|
|
|
|
if sensor_type_filter and sensor_type_filter.lower() != sensor_type.lower():
|
|
continue
|
|
|
|
if (
|
|
sensor_number_filter
|
|
and sensor_number_filter.lower() != sensor_number.lower()
|
|
):
|
|
continue
|
|
|
|
matching_events[event_id] = event_details
|
|
|
|
logging.info(f"Found {len(matching_events)} events matching sensor filters")
|
|
|
|
result = None
|
|
if success_failure_mode:
|
|
result = get_success_failure_events(
|
|
matching_events, failure_codes_lower, success_codes_lower, elapsed_time
|
|
)
|
|
else:
|
|
result = get_first_matching_event(matching_events, target_codes_lower)
|
|
|
|
# If we found a matching event during this poll interval, return immediately
|
|
if result is not None:
|
|
return result
|
|
|
|
last_event_id_int = current_event_id_int
|
|
|
|
check_duration = time.monotonic() - check_start_time
|
|
logging.debug(f"Check completed in {check_duration:.2f}s")
|
|
|
|
# Timeout reached
|
|
elapsed_time = time.monotonic() - start_time
|
|
|
|
if success_failure_mode:
|
|
message = (
|
|
f"Monitoring timed out after {elapsed_time:.1f}s without "
|
|
"detecting any success or failure events"
|
|
)
|
|
logging.warning(message)
|
|
|
|
timeout_result = {
|
|
"timeout_reached": True,
|
|
"elapsed_time": elapsed_time,
|
|
}
|
|
|
|
return False, message, timeout_result
|
|
|
|
message = f"Monitoring timed out after {elapsed_time:.1f} seconds"
|
|
logging.warning(message)
|
|
return False, message, None
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description=(
|
|
"Monitor new IPMI SEL entries for success/failure events or get "
|
|
"last event ID."
|
|
),
|
|
epilog="""\
|
|
Examples:
|
|
|
|
1) Query mode: get the last SEL event id (or -1 if empty) and exit.
|
|
ipmi_sel_event_monitor.py \\
|
|
--config-file rvmc-config.yaml \\
|
|
--get-last-event
|
|
|
|
2) Monitoring mode: wait for first matching event and return immediately
|
|
ipmi_sel_event_monitor.py \\
|
|
--config-file rvmc-config.yaml \\
|
|
--sensor-type "System Event" \\
|
|
--sensor-number "cc" \\
|
|
--data-values "ffffe6,ffffe7" \\
|
|
--interval 15 \\
|
|
--timeout 300
|
|
|
|
3) Success/failure monitoring mode: return immediately on ANY success event
|
|
(latest one) or failure event.
|
|
ipmi_sel_event_monitor.py \\
|
|
--config-file rvmc-config.yaml \\
|
|
--sensor-type "System Event" \\
|
|
--sensor-number "cc" \\
|
|
--success-codes "ffffe0,ffffe9,ffffe2,ffffea,ffffe4,ffffe6" \\
|
|
--failure-codes "ffffe1,ffffec,ffffed,ffffee,ffffe3,fffff0" \\
|
|
--interval 15 \\
|
|
--timeout 300
|
|
""",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
|
|
# Common args
|
|
parser.add_argument(
|
|
"--config-file",
|
|
required=True,
|
|
help="Path to BMC configuration file containing host, username, and password",
|
|
)
|
|
parser.add_argument(
|
|
"--sensor-type",
|
|
help="Filter by sensor type (e.g., 'Unknown', 'System Event')",
|
|
)
|
|
parser.add_argument(
|
|
"--sensor-number",
|
|
help="Filter by sensor number (e.g., '01', 'ff')",
|
|
)
|
|
parser.add_argument(
|
|
"--initial-event-id",
|
|
type=int,
|
|
help=(
|
|
"Initial event ID to start monitoring from "
|
|
"(only monitor events after this ID)"
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--interval", type=int, default=30, help="Check interval in seconds"
|
|
)
|
|
parser.add_argument("--timeout", type=int, default=3600, help="Timeout in seconds")
|
|
|
|
# Monitoring mode
|
|
parser.add_argument(
|
|
"--data-values",
|
|
help="Comma-separated list of event data values to match",
|
|
)
|
|
|
|
# Failure/success mode
|
|
parser.add_argument(
|
|
"--success-codes",
|
|
help=(
|
|
"Comma-separated list of success event codes "
|
|
"(return immediately on ANY of these)"
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--failure-codes",
|
|
help=(
|
|
"Comma-separated list of failure event codes "
|
|
"(exit immediately on ANY of these)"
|
|
),
|
|
)
|
|
|
|
# Query mode
|
|
parser.add_argument(
|
|
"--get-last-event",
|
|
action="store_true",
|
|
help="Get the last event ID and exit (ignores other monitoring parameters)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.get_last_event or args.data_values:
|
|
# For query or monitoring mode, no extra validation is necessary
|
|
pass
|
|
elif args.success_codes or args.failure_codes:
|
|
if not args.success_codes:
|
|
parser.error("--success-codes is required when using success/failure mode")
|
|
if not args.failure_codes:
|
|
parser.error("--failure-codes is required when using success/failure mode")
|
|
else:
|
|
parser.error(
|
|
"Must specify either --get-last-event, --data-values, or both "
|
|
"--success-codes and --failure-codes"
|
|
)
|
|
|
|
try:
|
|
ipmi_tool = IpmiTool.from_config(args.config_file)
|
|
|
|
if args.get_last_event:
|
|
get_last_event_only(ipmi_tool)
|
|
|
|
elif args.success_codes and args.failure_codes:
|
|
success_codes = [code.strip() for code in args.success_codes.split(",")]
|
|
failure_codes = [code.strip() for code in args.failure_codes.split(",")]
|
|
all_target_codes = success_codes + failure_codes
|
|
|
|
success, message, result_data = monitor_events(
|
|
ipmi_tool,
|
|
args.sensor_type,
|
|
args.sensor_number,
|
|
all_target_codes,
|
|
args.interval,
|
|
args.timeout,
|
|
args.initial_event_id,
|
|
success_codes,
|
|
failure_codes,
|
|
)
|
|
|
|
exit_script(success, message, None, result_data)
|
|
|
|
else:
|
|
event_data_values = [d.strip() for d in args.data_values.split(",")]
|
|
|
|
success, message, matched_data = monitor_events(
|
|
ipmi_tool,
|
|
args.sensor_type,
|
|
args.sensor_number,
|
|
event_data_values,
|
|
args.interval,
|
|
args.timeout,
|
|
args.initial_event_id,
|
|
)
|
|
|
|
exit_script(success, message, matched_data)
|
|
|
|
except Exception as e:
|
|
exit_script(
|
|
False,
|
|
f"Unexpected exception while monitoring IPMI SEL for target events: {e}",
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|