Report Tool: Improve plugin handling

A recent update introduced an empty file (__init__.py) in the plugins
folder which was causing report traceback failures for off system runs.

Also the current handling of the --plugin option is broken.
The fix to that issue lead to a few additional more general plugin
handling improvements.

Test Plan:

PASS: Verify ignore handling of empty plugin files.
PASS: Verify all python file permissions set to executable
      on fresh pull in git and after on-system package install.
PASS: Verify all plugin file permissions are not executable
      on fresh pull in git and after on-system package install.
PASS: Verify general handling of the --plugin option with space
      delimited plugins that follow.
PASS: Verify correlator is not run if there is no plugin data
      to correlate.
PASS: Verify missing plugin output log files do not lead to a
      file not found error on the console.
PASS: Verify refactored plugin search handling success and
      error paths.
PASS: Verify refactored plugin search handling finds and adds
      built-in and localhost plugins with and without the --plugin
      option specified.
PASS: Verify that previous plugin data is removed prior to a rerun
      of the tool. This is helpful for localhost plugin development.
PASS: Verify handling of adding multiple plugins that span both
      built-in and localhost locations.
PASS: Verify handling of missing plugin(s) when specified with
      the --plugin option.

Regression:

PASS: Verify collector package build and passes tox.
PASS: Verify both on-system and off-system Report handling.
PASS: Verify collect all using --report option
PASS: Verify logging with and without --debug option.
PASS: Verify no pep8 errors or warnings.

Story: 2010533
Task: 48433
Task: 48432
Task: 48443

Change-Id: I42616daad2de6b0785f11736ef20b11e19f19869
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2023-07-21 16:08:40 +00:00
parent 3b338a6ce3
commit 712187a496
32 changed files with 199 additions and 118 deletions

0
tools/collector/debian-scripts/report/__init__.py Normal file → Executable file

0
tools/collector/debian-scripts/report/algorithms.py Normal file → Executable file

@ -25,6 +25,9 @@ import logging
import os
import re
# internal imports
import algorithms
logger = logging.getLogger(__name__)
@ -47,15 +50,19 @@ class Correlator:
FileNotFoundError
"""
failures = []
try:
failures += self.uncontrolled_swact()
except FileNotFoundError as e:
logger.error(e)
if os.path.exists(os.path.join(
self.plugin_output_dir, algorithms.SWACT_ACTIVITY)):
try:
failures += self.uncontrolled_swact()
except FileNotFoundError as e:
logger.error(e)
try:
failures += self.mtc_errors()
except FileNotFoundError as e:
logger.error(e)
if os.path.exists(os.path.join(
self.plugin_output_dir, algorithms.MAINTENANCE_ERR)):
try:
failures += self.mtc_errors()
except FileNotFoundError as e:
logger.error(e)
events = []
try:
@ -70,10 +77,12 @@ class Correlator:
logger.error(e)
state_changes = []
try:
state_changes += self.get_state_changes(hostname)
except FileNotFoundError as e:
logger.error(e)
if os.path.exists(os.path.join(
self.plugin_output_dir, algorithms.STATE_CHANGES)):
try:
state_changes += self.get_state_changes(hostname)
except FileNotFoundError as e:
logger.error(e)
return (sorted(failures), sorted(events), sorted(alarms),
sorted(state_changes))
@ -96,8 +105,8 @@ class Correlator:
hb_loss = active_failed = go_active_failed = link_down = False
# Open output file from swact activity plugin and read it
file_path = os.path.join(self.plugin_output_dir, "swact_activity")
file_path = os.path.join(self.plugin_output_dir,
algorithms.SWACT_ACTIVITY)
with open(file_path, "r") as swact_activity:
for line in swact_activity:
if "Uncontrolled swact" in line and not start_time:
@ -201,8 +210,8 @@ class Correlator:
daemon_fail = comm_loss = auto_recov_dis = False
# Open output file from maintenance errors plugin and read it
file_path = os.path.join(self.plugin_output_dir, "maintenance_errors")
file_path = os.path.join(self.plugin_output_dir,
algorithms.MAINTENANCE_ERR)
with open(file_path, "r") as mtc:
for line in mtc:
if "auto recovery disabled" in line and not auto_recov_dis:
@ -376,8 +385,8 @@ class Correlator:
hb_loss = False
# Open output file from heartbeat loss plugin and read it
file_path = os.path.join(self.plugin_output_dir, "heartbeat_loss")
file_path = os.path.join(self.plugin_output_dir,
algorithms.HEARTBEAT_LOSS)
with open(file_path, "r") as heartbeat_loss:
for line in heartbeat_loss:
if (re.search("Error : " + host + " (.+) Heartbeat Loss ",
@ -401,8 +410,8 @@ class Correlator:
daemon_fail = False
# Open output file from daemon failures plugin and read it
file_path = os.path.join(self.plugin_output_dir, "daemon_failures")
file_path = os.path.join(self.plugin_output_dir,
algorithms.DAEMON_FAILURES)
with open(file_path, "r") as daemon_failures:
for line in daemon_failures:
if (re.search("\\d " + host +
@ -427,8 +436,8 @@ class Correlator:
puppet_log = None
# Open output file from puppet errors plugin and read it
file_path = os.path.join(self.plugin_output_dir, "puppet_errors")
file_path = os.path.join(self.plugin_output_dir,
algorithms.PUPPET_ERRORS)
with open(file_path, "r") as puppet_errors:
for line in puppet_errors:
if "Error: " in line:
@ -453,49 +462,52 @@ class Correlator:
mnfa_start, mnfa_hist = None, ""
# Open output file from maintenance errors plugin and read it
file_path = os.path.join(self.plugin_output_dir, "maintenance_errors")
with open(file_path, "r") as mtc:
for line in mtc:
if "force failed by SM" in line:
host = re.findall("Error : (.+) is being", line)[0]
if hostname == "all" or host == hostname:
data.append(line[0:19] + " " + host +
" force failed by SM\n")
elif "Graceful Recovery Failed" in line:
host = re.findall("Info : (.+) Task:", line)[0]
if hostname == "all" or host == hostname:
data.append(line[0:19] + " " + host +
" graceful recovery failed\n")
elif "MNFA ENTER" in line:
mnfa_start = datetime.strptime(line[0:19],
"%Y-%m-%dT%H:%M:%S")
elif "MNFA POOL" in line:
pool_hosts = len(line.split("MNFA POOL: ")[1].split())
if mnfa_start:
mnfa_hist += (" " + str(pool_hosts))
else:
data_len = len(data)
for n in range(0, data_len):
event = data[data_len - 1 - n]
if "Multi-node failure" in event:
temp = " " + str(pool_hosts) + ")\n"
data[data_len - 1 - n] = event[:-2] + temp
break
elif "MNFA EXIT" in line:
mnfa_duration = datetime.strptime(line[0:19],
"%Y-%m-%dT%H:%M:%S")
mnfa_duration -= mnfa_start
mnfa_start = mnfa_start.strftime("%Y-%m-%dT%H:%M:%S")
data.append(mnfa_start + " Multi-node failure avoidance " +
"(duration: " + str(mnfa_duration) +
"; history:" + mnfa_hist + ")\n")
mnfa_start, mnfa_hist = None, ""
file_path = os.path.join(self.plugin_output_dir,
algorithms.MAINTENANCE_ERR)
if os.path.exists(file_path):
with open(file_path, "r") as mtc:
for line in mtc:
if "force failed by SM" in line:
host = re.findall("Error : (.+) is being", line)[0]
if hostname == "all" or host == hostname:
data.append(line[0:19] + " " + host +
" force failed by SM\n")
elif "Graceful Recovery Failed" in line:
host = re.findall("Info : (.+) Task:", line)[0]
if hostname == "all" or host == hostname:
data.append(line[0:19] + " " + host +
" graceful recovery failed\n")
elif "MNFA ENTER" in line:
mnfa_start = datetime.strptime(line[0:19],
"%Y-%m-%dT%H:%M:%S")
elif "MNFA POOL" in line:
pool_hosts = len(line.split("MNFA POOL: ")[1].split())
if mnfa_start:
mnfa_hist += (" " + str(pool_hosts))
else:
data_len = len(data)
for n in range(0, data_len):
event = data[data_len - 1 - n]
if "Multi-node failure" in event:
temp = " " + str(pool_hosts) + ")\n"
data[data_len - 1 - n] = event[:-2] + temp
break
elif "MNFA EXIT" in line:
mnfa_duration = datetime.strptime(line[0:19],
"%Y-%m-%dT%H:%M:%S")
mnfa_duration -= mnfa_start
mnfa_start = mnfa_start.strftime("%Y-%m-%dT%H:%M:%S")
data.append(mnfa_start +
" Multi-node failure avoidance " +
"(duration: " + str(mnfa_duration) +
"; history:" + mnfa_hist + ")\n")
mnfa_start, mnfa_hist = None, ""
# Open output file from swact activity plugin and read it
file_path = os.path.join(self.plugin_output_dir, "swact_activity")
file_path = os.path.join(self.plugin_output_dir,
algorithms.SWACT_ACTIVITY)
if not os.path.exists(file_path):
return data
with open(file_path, "r") as swact_activity:
for line in swact_activity:
if (re.search("Service (.+) is failed and has reached max "
@ -523,7 +535,7 @@ class Correlator:
data = []
# Open 'alarm' output file from alarm plugin and read it
file_path = os.path.join(self.plugin_output_dir, "alarm")
file_path = os.path.join(self.plugin_output_dir, algorithms.ALARM)
if not os.path.exists(file_path):
logger.debug("No alarms found")
return data
@ -566,8 +578,8 @@ class Correlator:
data = []
# Open output file from state changes plugin and read it
file_path = os.path.join(self.plugin_output_dir, "state_changes")
file_path = os.path.join(self.plugin_output_dir,
algorithms.STATE_CHANGES)
with open(file_path, "r") as state_changes:
for line in state_changes:
if "is ENABLED" in line:

@ -416,10 +416,12 @@ class ExecutionEngine:
# Sort the lines based on the numeric value
sorted_lines = sorted(lines, key=lambda x: int(x.split()[0]),
reverse=True)
for line in sorted_lines:
logger.info(line)
if sorted_lines:
for line in sorted_lines:
logger.info(line)
else:
sys.exit("no plugin data found ; "
"nothing to correlate ... exiting")
if empty_files:
logger.info("")
logger.info("... nothing found by plugins: %s" % empty_files)
@ -443,10 +445,6 @@ class ExecutionEngine:
plugin_output_dir (string) : directory with output files from
plugins
"""
# logger.info("Correlator Output Dir: %s", output_dir)
# logger.info("Correlator Plugin Dir: %s", plugin_output_dir)
correlator = Correlator(plugin_output_dir)
failures, events, alarms, state_changes = correlator.run(
self.opts.hostname)

@ -45,13 +45,18 @@ class Plugin:
"start": None,
"end": None,
}
if file:
logger.debug("plugin init: %s", file)
if file and os.path.isfile(file):
try:
logger.debug("calling _file_set_attributes file: %s", file)
self._file_set_attributes()
except KeyError as e:
raise e
elif opts:
logger.debug("calling _opts_set_attributes opts: %s", self.opts)
self._opts_set_attributes()
else:
logger.debug("no plugin opts specified")
try:
self.verify()
@ -128,7 +133,7 @@ class Plugin:
"""
plugin_name = os.path.basename(self.file)
HOSTS_ERR = f"plugin: {plugin_name} should not have hosts specified"
HOSTS_ERR = f"plugin: '{plugin_name}' shouldn't have 'hosts' label"
if self.state["algorithm"] == algorithms.SUBSTRING:
self.validate_state(plugin_name, "files")
@ -169,33 +174,34 @@ class Plugin:
datetime.strptime(self.state["start"], "%Y-%m-%d %H:%M:%S")
except ValueError as e:
logger.error(
"plugin : %s needs a valid start time in YYYY-MM-DD \
"plugin '%s' needs a valid start time in YYYY-MM-DD \
HH:MM:SS format", plugin_name)
try:
datetime.strptime(self.state["end"], "%Y-%m-%d %H:%M:%S")
except ValueError as e:
logger.error(
"plugin : %s needs a valid end time in YYYY-MM-DD \
"plugin '%s' needs a valid end time in YYYY-MM-DD \
HH:MM:SS format", plugin_name)
else:
raise ValueError(
f"plugin: {plugin_name} unknown algorithm "
f"plugin: '{plugin_name}' algorithm label unsupported value: "
f"{self.state['algorithm']}"
)
for host in self.state["hosts"]:
if host not in ["controllers", "workers", "storages", "all"]:
raise ValueError(
f"host not recognized: '{host}', accepted hosts are "
f"hosts label has unsupported values: '{host}', "
f"accepted hosts are "
f"'controllers', 'workers', 'storages', 'all'"
)
def validate_state(self, plugin_name, key):
if len(self.state[key]) == 0:
raise ValueError(
f"plugin: {plugin_name} needs {key} specified for "
f"plugin: {plugin_name} needs '{key}' label specified for "
f"substring algorithm"
)

@ -29,6 +29,8 @@ def swact_activity(hosts, start, end):
hosts (dictionary): Paths to folders for each host
start (string): Start time for analysis
end (string): End time for analysis
Returns:
data (list): a list of logs that represent evidence of swact activity
"""
data = []
sm_files = []

0
tools/collector/debian-scripts/report/plugins/alarm Executable file → Normal file

@ -117,6 +117,8 @@ import tarfile
import tempfile
import time
# internal imports
import algorithms
from execution_engine import ExecutionEngine
from plugin import Plugin
@ -177,16 +179,16 @@ parser.add_argument(
parser.add_argument(
"--hostname",
default="all",
help="Specify host for correlator to find significant events and "
"state changes for (default: all hosts)",
help="Specify hostname to produce correlated results for "
"(default: all hosts)",
)
parser.add_argument(
"--plugin", "-p",
default=None,
nargs="*",
help="Specify comma separated list of plugins to run "
"(default: runs all found plugins)",
help="Specify a space delimited list of plugins to run "
"(default: all plugins)",
)
parser.add_argument(
@ -483,6 +485,7 @@ class BundleObject:
self.bundles = [] # list of bundles
self.tars = 0 # number of tar files found
self.tgzs = 0 # number of host tgz files found
self.plugins = []
def debug_state(self, func):
if args.state:
@ -790,6 +793,48 @@ class BundleObject:
self.debug_state("get_bundle_type")
def load_plugin(self, path_plugin=None):
"""Load a single plugin from the specified path location
Parameters:
path_plugin: string
The full path and file name of the plugin to load
"""
if path_plugin is not None:
# redundant check but more robust
if os.path.exists(path_plugin):
logger.debug("adding plugin: %s", path_plugin)
self.plugins.append(Plugin(path_plugin))
else:
logger.warning("Warning: plugin '%s' not found", path_plugin)
else:
logger.warning("Warning: load_plugin failed ; no plugin specified")
def load_plugins(self, path=None):
"""Load plugins from the specified path location
Parameters:
path: string
The path to the directory of where to load plugins
"""
if path is not None and os.path.exists(path):
for plugin in os.listdir(path):
path_plugin = os.path.join(path, plugin)
# skip over empty files like __init__.py
if os.path.getsize(path_plugin) == 0:
logger.debug("skipping empty plugin '%s'", plugin)
continue
logger.debug("adding plugin: %s/%s", path, plugin)
self.plugins.append(Plugin(path_plugin))
else:
logger.warning("unable to load plugins from %s ; "
"path does not exist", path)
# Initialize the Bundle Object. Logging starts in /tmp
obj = BundleObject(input_dir)
@ -853,6 +898,12 @@ if not os.path.exists(output_dir):
logger.error(e)
sys.exit("Permission Error: Unable to create report")
# remove the pluin data if it already exists
plugin_data_output_dir = os.path.join(output_dir, "plugins")
if os.path.exists(plugin_data_output_dir):
logger.debug("cleaning up old plugin data: %s", plugin_data_output_dir)
shutil.rmtree(plugin_data_output_dir)
# relocate logging to the selected bundle directory
remove_logging()
new_log_file = output_dir + "/report.log"
@ -870,35 +921,46 @@ except ValueError as e:
logger.error(str(e))
logger.error("Confirm you are running the report tool on a collect bundle")
# Get the full path to the possible plugin dirs
builtin_plugins_path = os.path.join(report_dir, "plugins")
localhost_plugins_path = os.path.join("/etc/collect", "plugins")
logger.debug("vars(args) : %s", vars(args))
logger.debug("args.algorithm: %s", args.algorithm)
logger.debug("args.plugin : %s", args.plugin)
logger.debug("obj.plugins : %s", obj.plugins)
if args.algorithm:
plugins.append(Plugin(opts=vars(args)))
elif args.plugin:
logger.debug("plugin option specified")
system_info_plugin_added = False
for p in args.plugin:
path = os.path.join(report_dir, "plugins", p)
if os.path.exists(path):
try:
plugins.append(Plugin(path))
except Exception as e:
logger.error(str(e))
logger.debug("searching for plugin '%s'", p)
# look for the plugin
if os.path.exists(os.path.join(builtin_plugins_path, p)):
obj.load_plugin(os.path.join(builtin_plugins_path, p))
elif os.path.exists(os.path.join(localhost_plugins_path, p)):
obj.load_plugin(os.path.join(localhost_plugins_path, p))
else:
logger.warning("%s plugin does not exist", p)
logger.warning("Warning: specified plugin '%s' not found", p)
if p == algorithms.SYSTEM_INFO:
system_info_plugin_added = True
if not system_info_plugin_added:
obj.load_plugin(os.path.join(
builtin_plugins_path, algorithms.SYSTEM_INFO))
else:
# load builtin plugins
builtin_plugins = os.path.join(report_dir, "plugins")
if os.path.exists(builtin_plugins):
for file in os.listdir(builtin_plugins):
plugins.append(Plugin(os.path.join(builtin_plugins, file)))
logger.debug("loading built-in plugin: %s", file)
obj.load_plugins(builtin_plugins_path)
# add localhost plugins
localhost_plugins = os.path.join("/etc/collect", "plugins")
if os.path.exists(localhost_plugins):
for file in os.listdir(localhost_plugins):
plugins.append(Plugin(os.path.join(localhost_plugins, file)))
logger.debug("loading localhost plugin: %s", file)
obj.load_plugins(localhost_plugins_path)
# analyze the collect bundle
engine.execute(plugins, output_dir)
# analyze the collect bundle
engine.execute(obj.plugins, output_dir)
sys.exit()

@ -3,4 +3,5 @@ etc/collect.d/* /etc/collect.d
usr/local/sbin/* /usr/local/sbin
usr/local/bin/collect /usr/local/bin
usr/sbin/collect /usr/sbin
/usr/local/bin/report/* /usr/local/bin/report
usr/local/bin/report/* /usr/local/bin/report
etc/collect/plugins/* /etc/collect/plugins

@ -37,7 +37,7 @@ override_dh_auto_install:
install -m 755 -p report/algorithms.py $(ROOT)/usr/local/bin/report/algorithms.py
install -m 755 -p report/plugin.py $(ROOT)/usr/local/bin/report/plugin.py
install -m 755 -p report/correlator.py $(ROOT)/usr/local/bin/report/correlator.py
install -m 755 -p report/README $(ROOT)/usr/local/bin/report/README
install -m 644 -p report/README $(ROOT)/usr/local/bin/report/README
# Report Tool Plugin Algorithms
install -m 755 -p report/plugin_algs/alarm.py $(ROOT)/usr/local/bin/report/plugin_algs/alarm.py
@ -53,18 +53,18 @@ override_dh_auto_install:
install -m 755 -p report/plugin_algs/system_info.py $(ROOT)/usr/local/bin/report/plugin_algs/system_info.py
# Report Tool Plugins
install -m 755 -p report/plugins/alarm $(ROOT)/usr/local/bin/report/plugins/alarm
install -m 755 -p report/plugins/daemon_failures $(ROOT)/usr/local/bin/report/plugins/daemon_failures
install -m 755 -p report/plugins/heartbeat_loss $(ROOT)/usr/local/bin/report/plugins/heartbeat_loss
install -m 755 -p report/plugins/maintenance_errors $(ROOT)/usr/local/bin/report/plugins/maintenance_errors
install -m 755 -p report/plugins/process_failures $(ROOT)/usr/local/bin/report/plugins/process_failures
install -m 755 -p report/plugins/puppet_errors $(ROOT)/usr/local/bin/report/plugins/puppet_errors
install -m 755 -p report/plugins/sm_errors $(ROOT)/usr/local/bin/report/plugins/sm_errors
install -m 755 -p report/plugins/state_changes $(ROOT)/usr/local/bin/report/plugins/state_changes
install -m 755 -p report/plugins/substring $(ROOT)/usr/local/bin/report/plugins/substring
install -m 755 -p report/plugins/swact_activity $(ROOT)/usr/local/bin/report/plugins/swact_activity
install -m 755 -p report/plugins/system_info $(ROOT)/usr/local/bin/report/plugins/system_info
install -m 755 -p report/plugins/substring_hosts $(SYSCONFDIR)/collect/plugins/substring_hosts
install -m 644 -p report/plugins/alarm $(ROOT)/usr/local/bin/report/plugins/alarm
install -m 644 -p report/plugins/daemon_failures $(ROOT)/usr/local/bin/report/plugins/daemon_failures
install -m 644 -p report/plugins/heartbeat_loss $(ROOT)/usr/local/bin/report/plugins/heartbeat_loss
install -m 644 -p report/plugins/maintenance_errors $(ROOT)/usr/local/bin/report/plugins/maintenance_errors
install -m 644 -p report/plugins/process_failures $(ROOT)/usr/local/bin/report/plugins/process_failures
install -m 644 -p report/plugins/puppet_errors $(ROOT)/usr/local/bin/report/plugins/puppet_errors
install -m 644 -p report/plugins/sm_errors $(ROOT)/usr/local/bin/report/plugins/sm_errors
install -m 644 -p report/plugins/state_changes $(ROOT)/usr/local/bin/report/plugins/state_changes
install -m 644 -p report/plugins/substring $(ROOT)/usr/local/bin/report/plugins/substring
install -m 644 -p report/plugins/swact_activity $(ROOT)/usr/local/bin/report/plugins/swact_activity
install -m 644 -p report/plugins/system_info $(ROOT)/usr/local/bin/report/plugins/system_info
install -m 644 -p report/plugins/substring_hosts $(SYSCONFDIR)/collect/plugins/substring_hosts
# Collect Plugins
install -m 755 -p collect_sysinv.sh $(SYSCONFDIR)/collect.d/collect_sysinv