Add consistent init and config complete checks to collectd plugins

Some of the collectd plugins are not waiting for configuration
complete before starting to monitor or communicate with external
services such as fm. This leads to the collectd networking plugin
being triggered to run before or while the host is being configured
which has been seen to lead to collectd segfaults/coredumps within
the collectd's internal networking plugin.

To solve this issue, reduce startup thrash and a slew of plugin
startup error logs, this update adds consistent initialization
and configuration complete checks to all of the starlingX
plugins so monitoring and external service access is not
performed until the host configuration is complete.

Test Plan:

PASS: Verify no plugin sampling till after config is complete
PASS: Verify alarm assert and clear cycle for all plugins
PASS: Install AIO SX system install
PASS: Install AIO DX system install
PEND: Verify Standard system install
PASS: Verify logging

Change-Id: I90a5d1c8c3be77269a571738c9499b2e908e1fc5
Closes-Bug: 1872979
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2020-06-18 15:44:32 -04:00
parent 1a5e6c4c3d
commit 63c8d1e55a
10 changed files with 184 additions and 139 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2018-2019 Wind River Systems, Inc.
# Copyright (c) 2018-2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -48,15 +48,15 @@ re_keyquoteval = re.compile(r'^\s*(\S+)\s*[=:]\s*\"(\S+)\"\s*')
# Plugin specific control class and object.
class CPU_object:
class CPU_object(pc.PluginObject):
def __init__(self):
super(CPU_object, self).__init__(PLUGIN, '')
self.debug = True
self.verbose = True
self._cache = {}
self._k8s_client = pc.K8sClient()
self.k8s_pods = set()
self.hostname = ''
self.schedstat_version = 0
self.schedstat_supported = True
@ -72,6 +72,7 @@ class CPU_object:
self._data[PLATFORM_CPU_PERCENT] = 0.0
self.elapsed_ms = 0.0
# Instantiate the class
obj = CPU_object()
@ -410,8 +411,11 @@ def config_func(config):
def init_func():
"""Init the plugin."""
# do nothing till config is complete.
if obj.config_complete() is False:
return False
obj.hostname = socket.gethostname()
collectd.info('%s init function for %s' % (PLUGIN, obj.hostname))
# Determine the full list of logical cpus for this host
obj.logical_cpus = get_logical_cpus()
@ -459,14 +463,17 @@ def init_func():
# Gather initial cputime state information.
update_cpu_data(init=True)
collectd.info('%s initialization complete' % PLUGIN)
obj.init_completed()
return pc.PLUGIN_PASS
# Calculate the CPU usage sample
def read_func():
if obj.init_complete is False:
init_func()
return 0
# epoch time in floating seconds
now0 = time.time()

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2018 Wind River Systems, Inc.
# Copyright (c) 2018-2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -7,19 +7,17 @@
import os
import random
import collectd
import plugin_common as pc
PLUGIN = 'random number plugin'
# static variables
# define a class here that will persist over read calls
class ExampleObject:
hostname = ""
class ExampleObject(pc.PluginObject):
plugin_data = ['1', '100']
obj = ExampleObject()
obj = ExampleObject(PLUGIN, '')
# The config function - called once on collectd process startup
@ -45,14 +43,24 @@ def config_func(config):
# The init function - called once on collectd process startup
def init_func():
# do nothing till config is complete.
if obj.config_complete() is False:
return False
# get current hostname
obj.hostname = os.uname()[1]
obj.hostname = obj.gethostname()
obj.init_completed()
return 0
# The sample read function - called on every audit interval
def read_func():
if obj.init_complete is False:
init_func()
return 0
# do the work to create the sample
low = int(obj.plugin_data[0])
high = int(obj.plugin_data[1])

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2018-2019 Wind River Systems, Inc.
# Copyright (c) 2018-2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -237,6 +237,9 @@ ALARM_ID__TO__PLUGIN_DICT = {ALARM_ID__CPU: PLUGIN__CPU,
ALARM_ID__VSWITCH_PORT: PLUGIN__VSWITCH_PORT,
ALARM_ID__VSWITCH_IFACE: PLUGIN__VSWITCH_IFACE}
# Common plugin object
pluginObject = pc.PluginObject(PLUGIN, '')
#########################################
# The collectd Maintenance Degrade Object
@ -547,8 +550,8 @@ class DegradeObject:
mtcDegradeObj = DegradeObject(MTCE_CMD_RX_PORT)
# PluginObject Class
class PluginObject:
# fmAlarmObject Class
class fmAlarmObject:
dbObj = None # shared database connection obj
host = None # saved hostname
@ -561,7 +564,7 @@ class PluginObject:
fm_connectivity = False
def __init__(self, id, plugin):
"""PluginObject Class constructor"""
"""fmAlarmObject Class constructor"""
# plugin specific static class members.
self.id = id # alarm id ; 100.1??
@ -1121,7 +1124,7 @@ class PluginObject:
try:
collectd.debug("%s %s Get Lock ..." % (PLUGIN, self.plugin))
with PluginObject.lock:
with fmAlarmObject.lock:
obj = self.instance_objects[eid]
return obj
except:
@ -1147,7 +1150,7 @@ class PluginObject:
"""
try:
collectd.debug("%s %s Add Lock ..." % (PLUGIN, self.plugin))
with PluginObject.lock:
with fmAlarmObject.lock:
self.instance_objects[eid] = obj
except:
collectd.error("%s failed to add instance to %s object list" %
@ -1188,7 +1191,7 @@ class PluginObject:
try:
# create a new plugin object
inst_obj = PluginObject(self.id, self.plugin)
inst_obj = fmAlarmObject(self.id, self.plugin)
self._copy_instance_object(inst_obj)
# initialize the object with instance specific data
@ -1259,7 +1262,7 @@ class PluginObject:
# loop over the mount points
for mp in mountpoints:
# create a new plugin object
inst_obj = PluginObject(ALARM_ID__DF, PLUGIN__DF)
inst_obj = fmAlarmObject(ALARM_ID__DF, PLUGIN__DF)
# initialize the object with instance specific data
inst_obj.resource_name = self.resource_name
@ -1282,24 +1285,21 @@ class PluginObject:
(PLUGIN, inst_obj.instance))
PluginObject.host = os.uname()[1]
# ADD_NEW_PLUGIN: add plugin to this table
# This instantiates the plugin objects
PLUGINS = {
PLUGIN__CPU: PluginObject(ALARM_ID__CPU, PLUGIN__CPU),
PLUGIN__MEM: PluginObject(ALARM_ID__MEM, PLUGIN__MEM),
PLUGIN__DF: PluginObject(ALARM_ID__DF, PLUGIN__DF),
PLUGIN__VSWITCH_CPU: PluginObject(ALARM_ID__VSWITCH_CPU,
PLUGIN__VSWITCH_CPU),
PLUGIN__VSWITCH_MEM: PluginObject(ALARM_ID__VSWITCH_MEM,
PLUGIN__VSWITCH_MEM),
PLUGIN__VSWITCH_PORT: PluginObject(ALARM_ID__VSWITCH_PORT,
PLUGIN__VSWITCH_PORT),
PLUGIN__VSWITCH_IFACE: PluginObject(ALARM_ID__VSWITCH_IFACE,
PLUGIN__VSWITCH_IFACE),
PLUGIN__EXAMPLE: PluginObject(ALARM_ID__EXAMPLE, PLUGIN__EXAMPLE)}
PLUGIN__CPU: fmAlarmObject(ALARM_ID__CPU, PLUGIN__CPU),
PLUGIN__MEM: fmAlarmObject(ALARM_ID__MEM, PLUGIN__MEM),
PLUGIN__DF: fmAlarmObject(ALARM_ID__DF, PLUGIN__DF),
PLUGIN__VSWITCH_CPU: fmAlarmObject(ALARM_ID__VSWITCH_CPU,
PLUGIN__VSWITCH_CPU),
PLUGIN__VSWITCH_MEM: fmAlarmObject(ALARM_ID__VSWITCH_MEM,
PLUGIN__VSWITCH_MEM),
PLUGIN__VSWITCH_PORT: fmAlarmObject(ALARM_ID__VSWITCH_PORT,
PLUGIN__VSWITCH_PORT),
PLUGIN__VSWITCH_IFACE: fmAlarmObject(ALARM_ID__VSWITCH_IFACE,
PLUGIN__VSWITCH_IFACE),
PLUGIN__EXAMPLE: fmAlarmObject(ALARM_ID__EXAMPLE, PLUGIN__EXAMPLE)}
#####################################################################
@ -1359,7 +1359,7 @@ def _build_entity_id(plugin, plugin_instance):
inst_error = False
entity_id = 'host='
entity_id += PluginObject.host
entity_id += fmAlarmObject.host
if plugin == PLUGIN__MEM:
if plugin_instance != 'platform':
@ -1498,7 +1498,7 @@ def _print_state(obj=None):
objs.append(obj)
collectd.debug("%s _print_state Lock ..." % PLUGIN)
with PluginObject.lock:
with fmAlarmObject.lock:
for o in objs:
_print_obj(o)
if len(o.instance_objects):
@ -1520,10 +1520,10 @@ def _database_setup(database):
# http://influxdb-python.readthedocs.io/en/latest/examples.html
# http://influxdb-python.readthedocs.io/en/latest/api-documentation.html
PluginObject.dbObj = InfluxDBClient('127.0.0.1', '8086', database)
if PluginObject.dbObj:
fmAlarmObject.dbObj = InfluxDBClient('127.0.0.1', '8086', database)
if fmAlarmObject.dbObj:
try:
PluginObject.dbObj.create_database('collectd')
fmAlarmObject.dbObj.create_database('collectd')
############################################################
#
@ -1544,14 +1544,14 @@ def _database_setup(database):
#
############################################################
PluginObject.dbObj.create_retention_policy(
fmAlarmObject.dbObj.create_retention_policy(
DATABASE_NAME, '1w', 1, database, True)
except Exception as ex:
if str(ex) == 'database already exists':
try:
collectd.info("%s influxdb:collectd %s" %
(PLUGIN, str(ex)))
PluginObject.dbObj.create_retention_policy(
fmAlarmObject.dbObj.create_retention_policy(
DATABASE_NAME, '1w', 1, database, True)
except Exception as ex:
if str(ex) == 'retention policy already exists':
@ -1568,7 +1568,7 @@ def _database_setup(database):
if not error_str:
found = False
retention = \
PluginObject.dbObj.get_list_retention_policies(database)
fmAlarmObject.dbObj.get_list_retention_policies(database)
for r in range(len(retention)):
if retention[r]["name"] == DATABASE_NAME:
collectd.info("%s influxdb:%s samples retention "
@ -1577,7 +1577,7 @@ def _database_setup(database):
found = True
if found is True:
collectd.info("%s influxdb:%s is setup" % (PLUGIN, database))
PluginObject.database_setup = True
fmAlarmObject.database_setup = True
else:
collectd.error("%s influxdb:%s retention policy NOT setup" %
(PLUGIN, database))
@ -1625,14 +1625,14 @@ def init_func():
"""Collectd FM Notifier Initialization Function"""
mtcDegradeObj.port = MTCE_CMD_RX_PORT
collectd.error("%s mtce port %d" %
(PLUGIN, mtcDegradeObj.port))
collectd.info("%s mtce port %d" %
(PLUGIN, mtcDegradeObj.port))
PluginObject.lock = Lock()
fmAlarmObject.lock = Lock()
PluginObject.host = os.uname()[1]
fmAlarmObject.host = pluginObject.gethostname()
collectd.info("%s %s:%s init function" %
(PLUGIN, tsc.nodetype, PluginObject.host))
(PLUGIN, tsc.nodetype, fmAlarmObject.host))
# Constant CPU Plugin Object Settings
obj = PLUGINS[PLUGIN__CPU]
@ -1737,20 +1737,28 @@ def init_func():
# ...
if tsc.nodetype == 'controller':
PluginObject.database_setup_in_progress = True
fmAlarmObject.database_setup_in_progress = True
_database_setup('collectd')
PluginObject.database_setup_in_progress = False
fmAlarmObject.database_setup_in_progress = False
pluginObject.init_completed()
return 0
# The notifier function inspects the collectd notification and determines if
# the representative alarm needs to be asserted, severity changed, or cleared.
def notifier_func(nObject):
if PluginObject.fm_connectivity is False:
# do nothing till config is complete.
if pluginObject._config_complete is False:
if pluginObject.config_complete() is False:
return 0
if fmAlarmObject.fm_connectivity is False:
# handle multi threading startup
with PluginObject.lock:
if PluginObject.fm_connectivity is True:
with fmAlarmObject.lock:
if fmAlarmObject.fm_connectivity is True:
return 0
##################################################################
@ -1774,7 +1782,7 @@ def notifier_func(nObject):
want_alarm_clear = False
eid = alarm.entity_instance_id
# ignore alarms not for this host
if PluginObject.host not in eid:
if fmAlarmObject.host not in eid:
continue
base_obj = get_base_object(alarm_id)
@ -1834,8 +1842,8 @@ def notifier_func(nObject):
"startup alarm %s" %
(PLUGIN_DEGRADE, ap, alarm_id))
PluginObject.fm_connectivity = True
collectd.info("%s initialization complete" % PLUGIN)
fmAlarmObject.fm_connectivity = True
collectd.info("%s connectivity with fm complete" % PLUGIN)
collectd.debug('%s notification: %s %s:%s - %s %s %s [%s]' % (
PLUGIN,
@ -1867,11 +1875,11 @@ def notifier_func(nObject):
return 0
if tsc.nodetype == 'controller':
if PluginObject.database_setup is False:
if PluginObject.database_setup_in_progress is False:
PluginObject.database_setup_in_progress = True
if fmAlarmObject.database_setup is False:
if fmAlarmObject.database_setup_in_progress is False:
fmAlarmObject.database_setup_in_progress = True
_database_setup('collectd')
PluginObject.database_setup_in_progress = False
fmAlarmObject.database_setup_in_progress = False
# get plugin object
if nObject.plugin in PLUGINS:
@ -1900,7 +1908,7 @@ def notifier_func(nObject):
eid = _build_entity_id(nObject.plugin, nObject.plugin_instance)
try:
# Need lock when reading/writing any obj.instance_objects list
with PluginObject.lock:
with fmAlarmObject.lock:
# we will take an exception if this object is not
# in the list. The exception handling code below will

View File

@ -811,19 +811,13 @@ def config_func(config):
def init_func():
"""Init the plugin"""
if obj.config_done is False:
collectd.info("%s configuration failed" % PLUGIN)
time.sleep(300)
return False
if obj.init_done is False:
if obj.init_ready() is False:
return 0
# do nothing till config is complete.
if obj.config_complete() is False:
return 0
obj.hostname = obj.gethostname()
obj.init_done = True
collectd.info("%s initialization complete" % PLUGIN)
obj.init_completed()
return 0
@ -831,7 +825,7 @@ def init_func():
def read_func():
"""collectd interface monitor plugin read function"""
if obj.init_done is False:
if obj.init_complete is False:
init_func()
return 0

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2018-2019 Wind River Systems, Inc.
# Copyright (c) 2018-2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -53,15 +53,15 @@ re_base_mem = re.compile('\"node\d+:(\d+)MB:\d+\"')
# Plugin specific control class and object.
class MEM_object:
class MEM_object(pc.PluginObject):
def __init__(self):
super(MEM_object, self).__init__(PLUGIN, '')
self.debug = False
self.verbose = False
self._cache = {}
self._k8s_client = pc.K8sClient()
self.k8s_pods = set()
self.hostname = ''
self.reserved_MiB = 0.0
self.reserve_all = False
self.strict_memory_accounting = False
@ -369,6 +369,10 @@ def config_func(config):
def init_func():
"""Init the plugin."""
# do nothing till config is complete.
if obj.config_complete() is False:
return 0
obj.hostname = socket.gethostname()
collectd.info('%s: init function for %s' % (PLUGIN, obj.hostname))
@ -382,8 +386,7 @@ def init_func():
collectd.info('%s: reserve_all: %s, reserved_MiB: %d'
% (PLUGIN, obj.reserve_all, obj.reserved_MiB))
collectd.info('%s: initialization complete' % PLUGIN)
obj.init_completed()
return pc.PLUGIN_PASS
@ -391,6 +394,10 @@ def init_func():
def read_func():
"""collectd memory monitor plugin read function"""
if obj.init_complete is False:
init_func()
return 0
# Get epoch time in floating seconds
now0 = time.time()

View File

@ -66,6 +66,9 @@ import collectd
from fm_api import constants as fm_constants
from fm_api import fm_api
import tsconfig.tsconfig as tsc
import plugin_common as pc
import socket
api = fm_api.FaultAPIsV2()
@ -79,12 +82,8 @@ PLUGIN_ALARMID = "100.114"
# define a class here that will persist over read calls
class NtpqObject:
class NtpqObject(pc.PluginObject):
# static variables set in init
hostname = '' # the name of this host
base_eid = '' # the eid for the major alarm
init_complete = False # set to true once config is complete
alarm_raised = False # True when the major alarm is asserted
server_list_conf = [] # list of servers in the /etc/ntp.conf file
@ -106,7 +105,7 @@ class NtpqObject:
# This plugin's class object - persists over read calls
obj = NtpqObject()
obj = NtpqObject(PLUGIN, '')
###############################################################################
@ -278,7 +277,7 @@ def _clear_base_alarm():
###############################################################################
def _remove_ip_from_unreachable_list(ip):
"""Remove an IP address from the unreachable list and clear its NTP alarms"""
"""Remove IP address from the unreachable list and clear its NTP alarms"""
# remove from unreachable list if its there
if ip and ip in obj.unreachable_servers:
@ -553,12 +552,11 @@ def init_func():
return 0
# do nothing till config is complete.
# init_func will be called again by read_func once config is complete.
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is False:
if obj.config_complete() is False:
return 0
# get current hostname
obj.hostname = os.uname()[1]
obj.hostname = obj.gethostname()
if not obj.hostname:
collectd.error("%s failed to get hostname" % PLUGIN)
return 1
@ -617,8 +615,7 @@ def init_func():
else:
collectd.info("%s no major startup alarms found" % PLUGIN)
obj.init_complete = True
obj.init_completed()
return 0
@ -650,9 +647,7 @@ def read_func():
return 0
if obj.init_complete is False:
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is True:
collectd.info("%s re-running init" % PLUGIN)
init_func()
init_func()
return 0
# get a list if provisioned ntp servers

View File

@ -855,14 +855,14 @@ def config_func(config):
def init_func():
"""Init the plugin"""
if obj.init_done is False:
if obj.init_ready() is False:
return 0
# Only runs on worker nodes
if 'worker' not in tsc.subfunctions:
return 0
# do nothing till config is complete.
if obj.config_complete() is False:
return 0
# Check whether this host is openstack worker node or not
# OVS and OVSDPDK will only run on openstack worker node
# For non openstack worker node, pid file won't exist
@ -878,10 +878,9 @@ def init_func():
global OVS_VSWITCHD_SOCKET
OVS_VSWITCHD_SOCKET = \
"".join([OVS_VSWITCHD_PATH, ".", pid, ".ctl"])
obj.init_done = True
obj.hostname = obj.gethostname()
collectd.info("%s initialization complete" % PLUGIN)
obj.error_logged = False
obj.init_completed()
elif obj.error_logged is False:
collectd.info("%s failed to retrieve pid for ovs-vswitchd in "
@ -900,7 +899,7 @@ def init_func():
def read_func():
"""collectd ovs interface/port monitor plugin read function"""
if obj.init_done is False:
if obj.init_complete is False:
init_func()
return 0

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
# Copyright (c) 2019-2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -15,6 +15,7 @@ import json
import uuid
import httplib2
import socket
import time
import os
from oslo_concurrency import processutils
from fm_api import constants as fm_constants
@ -56,7 +57,7 @@ GROUP_OVERALL = 'overall'
GROUP_FIRST = 'first'
GROUP_PODS = 'pods'
# Overall cpuacct groupings
# Overall cpuacct groupings
GROUP_TOTAL = 'cgroup-total'
GROUP_PLATFORM = 'platform'
GROUP_BASE = 'base'
@ -99,7 +100,7 @@ PLUGIN_FAIL = 1
class PluginObject(object):
def __init__(self, plugin, url):
def __init__(self, plugin, url=""):
# static variables set in init_func
self.plugin = plugin # the name of this plugin
@ -110,9 +111,9 @@ class PluginObject(object):
# dynamic gate variables
self.virtual = False # set to True if host is virtual
self.config_complete = False # set to True once config is complete
self._config_complete = False # set to True once config is complete
self.config_done = False # set true if config_func completed ok
self.init_done = False # set true if init_func completed ok
self.init_complete = False # set true if init_func completed ok
self.fm_connectivity = False # set true when fm connectivity ok
self.alarm_type = fm_constants.FM_ALARM_TYPE_7 # OPERATIONAL
@ -141,6 +142,7 @@ class PluginObject(object):
self.error_logged = False # used to prevent log flooding
self.log_throttle_count = 0 # used to count throttle logs
self.INIT_LOG_THROTTLE = 10 # the init log throttle threshold
self.CONFIG_LOG_THROTTLE = 50 # the config log throttle threshold
self.http_retry_count = 0 # track http error cases
self.HTTP_RETRY_THROTTLE = 6 # http retry threshold
self.phase = 0 # tracks current phase; init, sampling
@ -150,28 +152,57 @@ class PluginObject(object):
###########################################################################
#
# Name : init_ready
# Name : init_completed
#
# Description: Test for init ready condition
# Description: Declare init completed
#
# Parameters : plugin name
#
# Returns : False if initial config complete is not done
# True if initial config complete is done
###########################################################################
def init_completed(self):
"""Declare plugin init complete"""
collectd.info("%s initialization completed" % self.plugin)
self.init_complete = True
###########################################################################
#
# Name : config_complete
#
# Description: Test for config complete condition
#
# Parameters : plugin name
#
# Returns : False if config is not complete
# True if config is complete
#
###########################################################################
def init_ready(self):
"""Test for system init ready state"""
def config_complete(self):
"""Test for config complete state"""
if os.path.exists(tsc.INITIAL_CONFIG_COMPLETE_FLAG) is False:
self.log_throttle_count += 1
if self.log_throttle_count > self.INIT_LOG_THROTTLE:
collectd.info("%s initialization needs retry" % self.plugin)
if self._config_complete is False:
if tsc.nodetype == 'worker' or 'worker' in tsc.subfunctions:
flag_file = tsc.VOLATILE_WORKER_CONFIG_COMPLETE
elif tsc.nodetype == 'storage':
flag_file = tsc.VOLATILE_STORAGE_CONFIG_COMPLETE
else:
flag_file = tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE
if os.path.exists(flag_file) is False:
self._config_complete = False
self.log_throttle_count += 1
if self.log_throttle_count > self.CONFIG_LOG_THROTTLE:
collectd.info("%s configuration check needs retry" %
self.plugin)
self.log_throttle_count = 0
time.sleep(1)
return False
else:
self._config_complete = True
self.log_throttle_count = 0
return False
else:
self.log_throttle_count = 0
collectd.info("%s configuration completed" % self.plugin)
return True

View File

@ -584,7 +584,8 @@ def read_timestamp_mode():
#####################################################################
def init_func():
if obj.init_ready() is False:
# do nothing till config is complete.
if obj.config_complete() is False:
return False
obj.hostname = obj.gethostname()
@ -630,9 +631,9 @@ def init_func():
obj.controller = True
obj.virtual = obj.is_virtual()
obj.init_done = True
obj.log_throttle_count = 0
collectd.info("%s initialization complete" % PLUGIN)
obj.init_completed()
return 0
#####################################################################
@ -643,7 +644,6 @@ def init_func():
#
# Assumptions: collectd calls init_func one time.
#
#
# retry init if needed
# retry fm connect if needed
# check service enabled state
@ -657,11 +657,7 @@ def read_func():
if obj.virtual is True:
return 0
# check and run init until it reports init_done True
if obj.init_done is False:
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
collectd.info("%s re-running init" % PLUGIN)
obj.log_throttle_count += 1
if obj.init_complete is False:
init_func()
return 0
@ -727,7 +723,6 @@ def read_func():
else:
collectd.info("%s no startup alarms found" % PLUGIN)
obj.config_complete = True
obj.fm_connectivity = True
# assert_all_alarms()
@ -813,7 +808,8 @@ def read_func():
#
# sudo /usr/sbin/pmc -u -b 0 'GET PORT_DATA_SET'
#
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f', PLUGIN_CONF_FILE,
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f',
PLUGIN_CONF_FILE,
'-u', '-b', '0', 'GET PORT_DATA_SET'])
port_locked = False
@ -829,7 +825,8 @@ def read_func():
#
# sudo /usr/sbin/pmc -u -b 0 'GET TIME_STATUS_NP'
#
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f', PLUGIN_CONF_FILE,
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f',
PLUGIN_CONF_FILE,
'-u', '-b', '0', 'GET TIME_STATUS_NP'])
got_master_offset = False

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
# Copyright (c) 2019-2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -146,15 +146,14 @@ def init_func():
if tsc.nodetype != 'controller':
return 0
if obj.init_done is False:
if obj.init_ready() is False:
return False
# do nothing till config is complete.
if obj.config_complete() is False:
return 0
obj.hostname = obj.gethostname()
obj.base_eid = 'host=' + obj.hostname
obj.init_done = True
collectd.info("%s initialization complete" % PLUGIN)
obj.init_completed()
return True
@ -166,7 +165,7 @@ def read_func():
if tsc.nodetype != 'controller':
return 0
if obj.init_done is False:
if obj.init_complete is False:
init_func()
return 0