From 63c8d1e55aecfb8aed4f98e29ccf0dc6ccd18cf3 Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Thu, 18 Jun 2020 15:44:32 -0400 Subject: [PATCH] Add consistent init and config complete checks to collectd plugins Some of the collectd plugins are not waiting for configuration complete before starting to monitor or communicate with external services such as fm. This leads to the collectd networking plugin being triggered to run before or while the host is being configured which has been seen to lead to collectd segfaults/coredumps within the collectd's internal networking plugin. To solve this issue, reduce startup thrash and a slew of plugin startup error logs, this update adds consistent initialization and configuration complete checks to all of the starlingX plugins so monitoring and external service access is not performed until the host configuration is complete. Test Plan: PASS: Verify no plugin sampling till after config is complete PASS: Verify alarm assert and clear cycle for all plugins PASS: Install AIO SX system install PASS: Install AIO DX system install PEND: Verify Standard system install PASS: Verify logging Change-Id: I90a5d1c8c3be77269a571738c9499b2e908e1fc5 Closes-Bug: 1872979 Signed-off-by: Eric MacDonald --- collectd-extensions/src/cpu.py | 19 ++-- collectd-extensions/src/example.py | 22 +++-- collectd-extensions/src/fm_notifier.py | 108 ++++++++++++----------- collectd-extensions/src/interface.py | 16 ++-- collectd-extensions/src/memory.py | 17 ++-- collectd-extensions/src/ntpq.py | 25 +++--- collectd-extensions/src/ovs_interface.py | 13 ++- collectd-extensions/src/plugin_common.py | 67 ++++++++++---- collectd-extensions/src/ptp.py | 23 +++-- collectd-extensions/src/remotels.py | 13 ++- 10 files changed, 184 insertions(+), 139 deletions(-) diff --git a/collectd-extensions/src/cpu.py b/collectd-extensions/src/cpu.py index 5b366f2..b135463 100755 --- a/collectd-extensions/src/cpu.py +++ b/collectd-extensions/src/cpu.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2019 Wind River Systems, Inc. +# Copyright (c) 2018-2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -48,15 +48,15 @@ re_keyquoteval = re.compile(r'^\s*(\S+)\s*[=:]\s*\"(\S+)\"\s*') # Plugin specific control class and object. -class CPU_object: +class CPU_object(pc.PluginObject): def __init__(self): + super(CPU_object, self).__init__(PLUGIN, '') self.debug = True self.verbose = True self._cache = {} self._k8s_client = pc.K8sClient() self.k8s_pods = set() - self.hostname = '' self.schedstat_version = 0 self.schedstat_supported = True @@ -72,6 +72,7 @@ class CPU_object: self._data[PLATFORM_CPU_PERCENT] = 0.0 self.elapsed_ms = 0.0 + # Instantiate the class obj = CPU_object() @@ -410,8 +411,11 @@ def config_func(config): def init_func(): """Init the plugin.""" + # do nothing till config is complete. + if obj.config_complete() is False: + return False + obj.hostname = socket.gethostname() - collectd.info('%s init function for %s' % (PLUGIN, obj.hostname)) # Determine the full list of logical cpus for this host obj.logical_cpus = get_logical_cpus() @@ -459,14 +463,17 @@ def init_func(): # Gather initial cputime state information. update_cpu_data(init=True) - collectd.info('%s initialization complete' % PLUGIN) - + obj.init_completed() return pc.PLUGIN_PASS # Calculate the CPU usage sample def read_func(): + if obj.init_complete is False: + init_func() + return 0 + # epoch time in floating seconds now0 = time.time() diff --git a/collectd-extensions/src/example.py b/collectd-extensions/src/example.py index f865145..50ad8c4 100755 --- a/collectd-extensions/src/example.py +++ b/collectd-extensions/src/example.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018 Wind River Systems, Inc. +# Copyright (c) 2018-2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -7,19 +7,17 @@ import os import random import collectd +import plugin_common as pc PLUGIN = 'random number plugin' -# static variables - # define a class here that will persist over read calls -class ExampleObject: - hostname = "" +class ExampleObject(pc.PluginObject): plugin_data = ['1', '100'] -obj = ExampleObject() +obj = ExampleObject(PLUGIN, '') # The config function - called once on collectd process startup @@ -45,14 +43,24 @@ def config_func(config): # The init function - called once on collectd process startup def init_func(): + # do nothing till config is complete. + if obj.config_complete() is False: + return False + # get current hostname - obj.hostname = os.uname()[1] + obj.hostname = obj.gethostname() + + obj.init_completed() return 0 # The sample read function - called on every audit interval def read_func(): + if obj.init_complete is False: + init_func() + return 0 + # do the work to create the sample low = int(obj.plugin_data[0]) high = int(obj.plugin_data[1]) diff --git a/collectd-extensions/src/fm_notifier.py b/collectd-extensions/src/fm_notifier.py index 0f031d3..c84e136 100755 --- a/collectd-extensions/src/fm_notifier.py +++ b/collectd-extensions/src/fm_notifier.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2019 Wind River Systems, Inc. +# Copyright (c) 2018-2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -237,6 +237,9 @@ ALARM_ID__TO__PLUGIN_DICT = {ALARM_ID__CPU: PLUGIN__CPU, ALARM_ID__VSWITCH_PORT: PLUGIN__VSWITCH_PORT, ALARM_ID__VSWITCH_IFACE: PLUGIN__VSWITCH_IFACE} +# Common plugin object +pluginObject = pc.PluginObject(PLUGIN, '') + ######################################### # The collectd Maintenance Degrade Object @@ -547,8 +550,8 @@ class DegradeObject: mtcDegradeObj = DegradeObject(MTCE_CMD_RX_PORT) -# PluginObject Class -class PluginObject: +# fmAlarmObject Class +class fmAlarmObject: dbObj = None # shared database connection obj host = None # saved hostname @@ -561,7 +564,7 @@ class PluginObject: fm_connectivity = False def __init__(self, id, plugin): - """PluginObject Class constructor""" + """fmAlarmObject Class constructor""" # plugin specific static class members. self.id = id # alarm id ; 100.1?? @@ -1121,7 +1124,7 @@ class PluginObject: try: collectd.debug("%s %s Get Lock ..." % (PLUGIN, self.plugin)) - with PluginObject.lock: + with fmAlarmObject.lock: obj = self.instance_objects[eid] return obj except: @@ -1147,7 +1150,7 @@ class PluginObject: """ try: collectd.debug("%s %s Add Lock ..." % (PLUGIN, self.plugin)) - with PluginObject.lock: + with fmAlarmObject.lock: self.instance_objects[eid] = obj except: collectd.error("%s failed to add instance to %s object list" % @@ -1188,7 +1191,7 @@ class PluginObject: try: # create a new plugin object - inst_obj = PluginObject(self.id, self.plugin) + inst_obj = fmAlarmObject(self.id, self.plugin) self._copy_instance_object(inst_obj) # initialize the object with instance specific data @@ -1259,7 +1262,7 @@ class PluginObject: # loop over the mount points for mp in mountpoints: # create a new plugin object - inst_obj = PluginObject(ALARM_ID__DF, PLUGIN__DF) + inst_obj = fmAlarmObject(ALARM_ID__DF, PLUGIN__DF) # initialize the object with instance specific data inst_obj.resource_name = self.resource_name @@ -1282,24 +1285,21 @@ class PluginObject: (PLUGIN, inst_obj.instance)) -PluginObject.host = os.uname()[1] - - # ADD_NEW_PLUGIN: add plugin to this table # This instantiates the plugin objects PLUGINS = { - PLUGIN__CPU: PluginObject(ALARM_ID__CPU, PLUGIN__CPU), - PLUGIN__MEM: PluginObject(ALARM_ID__MEM, PLUGIN__MEM), - PLUGIN__DF: PluginObject(ALARM_ID__DF, PLUGIN__DF), - PLUGIN__VSWITCH_CPU: PluginObject(ALARM_ID__VSWITCH_CPU, - PLUGIN__VSWITCH_CPU), - PLUGIN__VSWITCH_MEM: PluginObject(ALARM_ID__VSWITCH_MEM, - PLUGIN__VSWITCH_MEM), - PLUGIN__VSWITCH_PORT: PluginObject(ALARM_ID__VSWITCH_PORT, - PLUGIN__VSWITCH_PORT), - PLUGIN__VSWITCH_IFACE: PluginObject(ALARM_ID__VSWITCH_IFACE, - PLUGIN__VSWITCH_IFACE), - PLUGIN__EXAMPLE: PluginObject(ALARM_ID__EXAMPLE, PLUGIN__EXAMPLE)} + PLUGIN__CPU: fmAlarmObject(ALARM_ID__CPU, PLUGIN__CPU), + PLUGIN__MEM: fmAlarmObject(ALARM_ID__MEM, PLUGIN__MEM), + PLUGIN__DF: fmAlarmObject(ALARM_ID__DF, PLUGIN__DF), + PLUGIN__VSWITCH_CPU: fmAlarmObject(ALARM_ID__VSWITCH_CPU, + PLUGIN__VSWITCH_CPU), + PLUGIN__VSWITCH_MEM: fmAlarmObject(ALARM_ID__VSWITCH_MEM, + PLUGIN__VSWITCH_MEM), + PLUGIN__VSWITCH_PORT: fmAlarmObject(ALARM_ID__VSWITCH_PORT, + PLUGIN__VSWITCH_PORT), + PLUGIN__VSWITCH_IFACE: fmAlarmObject(ALARM_ID__VSWITCH_IFACE, + PLUGIN__VSWITCH_IFACE), + PLUGIN__EXAMPLE: fmAlarmObject(ALARM_ID__EXAMPLE, PLUGIN__EXAMPLE)} ##################################################################### @@ -1359,7 +1359,7 @@ def _build_entity_id(plugin, plugin_instance): inst_error = False entity_id = 'host=' - entity_id += PluginObject.host + entity_id += fmAlarmObject.host if plugin == PLUGIN__MEM: if plugin_instance != 'platform': @@ -1498,7 +1498,7 @@ def _print_state(obj=None): objs.append(obj) collectd.debug("%s _print_state Lock ..." % PLUGIN) - with PluginObject.lock: + with fmAlarmObject.lock: for o in objs: _print_obj(o) if len(o.instance_objects): @@ -1520,10 +1520,10 @@ def _database_setup(database): # http://influxdb-python.readthedocs.io/en/latest/examples.html # http://influxdb-python.readthedocs.io/en/latest/api-documentation.html - PluginObject.dbObj = InfluxDBClient('127.0.0.1', '8086', database) - if PluginObject.dbObj: + fmAlarmObject.dbObj = InfluxDBClient('127.0.0.1', '8086', database) + if fmAlarmObject.dbObj: try: - PluginObject.dbObj.create_database('collectd') + fmAlarmObject.dbObj.create_database('collectd') ############################################################ # @@ -1544,14 +1544,14 @@ def _database_setup(database): # ############################################################ - PluginObject.dbObj.create_retention_policy( + fmAlarmObject.dbObj.create_retention_policy( DATABASE_NAME, '1w', 1, database, True) except Exception as ex: if str(ex) == 'database already exists': try: collectd.info("%s influxdb:collectd %s" % (PLUGIN, str(ex))) - PluginObject.dbObj.create_retention_policy( + fmAlarmObject.dbObj.create_retention_policy( DATABASE_NAME, '1w', 1, database, True) except Exception as ex: if str(ex) == 'retention policy already exists': @@ -1568,7 +1568,7 @@ def _database_setup(database): if not error_str: found = False retention = \ - PluginObject.dbObj.get_list_retention_policies(database) + fmAlarmObject.dbObj.get_list_retention_policies(database) for r in range(len(retention)): if retention[r]["name"] == DATABASE_NAME: collectd.info("%s influxdb:%s samples retention " @@ -1577,7 +1577,7 @@ def _database_setup(database): found = True if found is True: collectd.info("%s influxdb:%s is setup" % (PLUGIN, database)) - PluginObject.database_setup = True + fmAlarmObject.database_setup = True else: collectd.error("%s influxdb:%s retention policy NOT setup" % (PLUGIN, database)) @@ -1625,14 +1625,14 @@ def init_func(): """Collectd FM Notifier Initialization Function""" mtcDegradeObj.port = MTCE_CMD_RX_PORT - collectd.error("%s mtce port %d" % - (PLUGIN, mtcDegradeObj.port)) + collectd.info("%s mtce port %d" % + (PLUGIN, mtcDegradeObj.port)) - PluginObject.lock = Lock() + fmAlarmObject.lock = Lock() - PluginObject.host = os.uname()[1] + fmAlarmObject.host = pluginObject.gethostname() collectd.info("%s %s:%s init function" % - (PLUGIN, tsc.nodetype, PluginObject.host)) + (PLUGIN, tsc.nodetype, fmAlarmObject.host)) # Constant CPU Plugin Object Settings obj = PLUGINS[PLUGIN__CPU] @@ -1737,20 +1737,28 @@ def init_func(): # ... if tsc.nodetype == 'controller': - PluginObject.database_setup_in_progress = True + fmAlarmObject.database_setup_in_progress = True _database_setup('collectd') - PluginObject.database_setup_in_progress = False + fmAlarmObject.database_setup_in_progress = False + + pluginObject.init_completed() + return 0 # The notifier function inspects the collectd notification and determines if # the representative alarm needs to be asserted, severity changed, or cleared. def notifier_func(nObject): - if PluginObject.fm_connectivity is False: + # do nothing till config is complete. + if pluginObject._config_complete is False: + if pluginObject.config_complete() is False: + return 0 + + if fmAlarmObject.fm_connectivity is False: # handle multi threading startup - with PluginObject.lock: - if PluginObject.fm_connectivity is True: + with fmAlarmObject.lock: + if fmAlarmObject.fm_connectivity is True: return 0 ################################################################## @@ -1774,7 +1782,7 @@ def notifier_func(nObject): want_alarm_clear = False eid = alarm.entity_instance_id # ignore alarms not for this host - if PluginObject.host not in eid: + if fmAlarmObject.host not in eid: continue base_obj = get_base_object(alarm_id) @@ -1834,8 +1842,8 @@ def notifier_func(nObject): "startup alarm %s" % (PLUGIN_DEGRADE, ap, alarm_id)) - PluginObject.fm_connectivity = True - collectd.info("%s initialization complete" % PLUGIN) + fmAlarmObject.fm_connectivity = True + collectd.info("%s connectivity with fm complete" % PLUGIN) collectd.debug('%s notification: %s %s:%s - %s %s %s [%s]' % ( PLUGIN, @@ -1867,11 +1875,11 @@ def notifier_func(nObject): return 0 if tsc.nodetype == 'controller': - if PluginObject.database_setup is False: - if PluginObject.database_setup_in_progress is False: - PluginObject.database_setup_in_progress = True + if fmAlarmObject.database_setup is False: + if fmAlarmObject.database_setup_in_progress is False: + fmAlarmObject.database_setup_in_progress = True _database_setup('collectd') - PluginObject.database_setup_in_progress = False + fmAlarmObject.database_setup_in_progress = False # get plugin object if nObject.plugin in PLUGINS: @@ -1900,7 +1908,7 @@ def notifier_func(nObject): eid = _build_entity_id(nObject.plugin, nObject.plugin_instance) try: # Need lock when reading/writing any obj.instance_objects list - with PluginObject.lock: + with fmAlarmObject.lock: # we will take an exception if this object is not # in the list. The exception handling code below will diff --git a/collectd-extensions/src/interface.py b/collectd-extensions/src/interface.py index 3f9a472..0a2022c 100755 --- a/collectd-extensions/src/interface.py +++ b/collectd-extensions/src/interface.py @@ -811,19 +811,13 @@ def config_func(config): def init_func(): """Init the plugin""" - if obj.config_done is False: - collectd.info("%s configuration failed" % PLUGIN) - time.sleep(300) - return False - - if obj.init_done is False: - if obj.init_ready() is False: - return 0 + # do nothing till config is complete. + if obj.config_complete() is False: + return 0 obj.hostname = obj.gethostname() - obj.init_done = True - collectd.info("%s initialization complete" % PLUGIN) + obj.init_completed() return 0 @@ -831,7 +825,7 @@ def init_func(): def read_func(): """collectd interface monitor plugin read function""" - if obj.init_done is False: + if obj.init_complete is False: init_func() return 0 diff --git a/collectd-extensions/src/memory.py b/collectd-extensions/src/memory.py index 968abc9..a163a7f 100755 --- a/collectd-extensions/src/memory.py +++ b/collectd-extensions/src/memory.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2019 Wind River Systems, Inc. +# Copyright (c) 2018-2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -53,15 +53,15 @@ re_base_mem = re.compile('\"node\d+:(\d+)MB:\d+\"') # Plugin specific control class and object. -class MEM_object: +class MEM_object(pc.PluginObject): def __init__(self): + super(MEM_object, self).__init__(PLUGIN, '') self.debug = False self.verbose = False self._cache = {} self._k8s_client = pc.K8sClient() self.k8s_pods = set() - self.hostname = '' self.reserved_MiB = 0.0 self.reserve_all = False self.strict_memory_accounting = False @@ -369,6 +369,10 @@ def config_func(config): def init_func(): """Init the plugin.""" + # do nothing till config is complete. + if obj.config_complete() is False: + return 0 + obj.hostname = socket.gethostname() collectd.info('%s: init function for %s' % (PLUGIN, obj.hostname)) @@ -382,8 +386,7 @@ def init_func(): collectd.info('%s: reserve_all: %s, reserved_MiB: %d' % (PLUGIN, obj.reserve_all, obj.reserved_MiB)) - collectd.info('%s: initialization complete' % PLUGIN) - + obj.init_completed() return pc.PLUGIN_PASS @@ -391,6 +394,10 @@ def init_func(): def read_func(): """collectd memory monitor plugin read function""" + if obj.init_complete is False: + init_func() + return 0 + # Get epoch time in floating seconds now0 = time.time() diff --git a/collectd-extensions/src/ntpq.py b/collectd-extensions/src/ntpq.py index e5abb57..166f513 100755 --- a/collectd-extensions/src/ntpq.py +++ b/collectd-extensions/src/ntpq.py @@ -66,6 +66,9 @@ import collectd from fm_api import constants as fm_constants from fm_api import fm_api import tsconfig.tsconfig as tsc + +import plugin_common as pc + import socket api = fm_api.FaultAPIsV2() @@ -79,12 +82,8 @@ PLUGIN_ALARMID = "100.114" # define a class here that will persist over read calls -class NtpqObject: +class NtpqObject(pc.PluginObject): - # static variables set in init - hostname = '' # the name of this host - base_eid = '' # the eid for the major alarm - init_complete = False # set to true once config is complete alarm_raised = False # True when the major alarm is asserted server_list_conf = [] # list of servers in the /etc/ntp.conf file @@ -106,7 +105,7 @@ class NtpqObject: # This plugin's class object - persists over read calls -obj = NtpqObject() +obj = NtpqObject(PLUGIN, '') ############################################################################### @@ -278,7 +277,7 @@ def _clear_base_alarm(): ############################################################################### def _remove_ip_from_unreachable_list(ip): - """Remove an IP address from the unreachable list and clear its NTP alarms""" + """Remove IP address from the unreachable list and clear its NTP alarms""" # remove from unreachable list if its there if ip and ip in obj.unreachable_servers: @@ -553,12 +552,11 @@ def init_func(): return 0 # do nothing till config is complete. - # init_func will be called again by read_func once config is complete. - if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is False: + if obj.config_complete() is False: return 0 # get current hostname - obj.hostname = os.uname()[1] + obj.hostname = obj.gethostname() if not obj.hostname: collectd.error("%s failed to get hostname" % PLUGIN) return 1 @@ -617,8 +615,7 @@ def init_func(): else: collectd.info("%s no major startup alarms found" % PLUGIN) - obj.init_complete = True - + obj.init_completed() return 0 @@ -650,9 +647,7 @@ def read_func(): return 0 if obj.init_complete is False: - if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is True: - collectd.info("%s re-running init" % PLUGIN) - init_func() + init_func() return 0 # get a list if provisioned ntp servers diff --git a/collectd-extensions/src/ovs_interface.py b/collectd-extensions/src/ovs_interface.py index a1c4eda..d2877c6 100755 --- a/collectd-extensions/src/ovs_interface.py +++ b/collectd-extensions/src/ovs_interface.py @@ -855,14 +855,14 @@ def config_func(config): def init_func(): """Init the plugin""" - if obj.init_done is False: - if obj.init_ready() is False: - return 0 - # Only runs on worker nodes if 'worker' not in tsc.subfunctions: return 0 + # do nothing till config is complete. + if obj.config_complete() is False: + return 0 + # Check whether this host is openstack worker node or not # OVS and OVSDPDK will only run on openstack worker node # For non openstack worker node, pid file won't exist @@ -878,10 +878,9 @@ def init_func(): global OVS_VSWITCHD_SOCKET OVS_VSWITCHD_SOCKET = \ "".join([OVS_VSWITCHD_PATH, ".", pid, ".ctl"]) - obj.init_done = True obj.hostname = obj.gethostname() - collectd.info("%s initialization complete" % PLUGIN) obj.error_logged = False + obj.init_completed() elif obj.error_logged is False: collectd.info("%s failed to retrieve pid for ovs-vswitchd in " @@ -900,7 +899,7 @@ def init_func(): def read_func(): """collectd ovs interface/port monitor plugin read function""" - if obj.init_done is False: + if obj.init_complete is False: init_func() return 0 diff --git a/collectd-extensions/src/plugin_common.py b/collectd-extensions/src/plugin_common.py index 9dab4ee..934de28 100644 --- a/collectd-extensions/src/plugin_common.py +++ b/collectd-extensions/src/plugin_common.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -15,6 +15,7 @@ import json import uuid import httplib2 import socket +import time import os from oslo_concurrency import processutils from fm_api import constants as fm_constants @@ -56,7 +57,7 @@ GROUP_OVERALL = 'overall' GROUP_FIRST = 'first' GROUP_PODS = 'pods' -# Overall cpuacct groupings +# Overall cpuacct groupings GROUP_TOTAL = 'cgroup-total' GROUP_PLATFORM = 'platform' GROUP_BASE = 'base' @@ -99,7 +100,7 @@ PLUGIN_FAIL = 1 class PluginObject(object): - def __init__(self, plugin, url): + def __init__(self, plugin, url=""): # static variables set in init_func self.plugin = plugin # the name of this plugin @@ -110,9 +111,9 @@ class PluginObject(object): # dynamic gate variables self.virtual = False # set to True if host is virtual - self.config_complete = False # set to True once config is complete + self._config_complete = False # set to True once config is complete self.config_done = False # set true if config_func completed ok - self.init_done = False # set true if init_func completed ok + self.init_complete = False # set true if init_func completed ok self.fm_connectivity = False # set true when fm connectivity ok self.alarm_type = fm_constants.FM_ALARM_TYPE_7 # OPERATIONAL @@ -141,6 +142,7 @@ class PluginObject(object): self.error_logged = False # used to prevent log flooding self.log_throttle_count = 0 # used to count throttle logs self.INIT_LOG_THROTTLE = 10 # the init log throttle threshold + self.CONFIG_LOG_THROTTLE = 50 # the config log throttle threshold self.http_retry_count = 0 # track http error cases self.HTTP_RETRY_THROTTLE = 6 # http retry threshold self.phase = 0 # tracks current phase; init, sampling @@ -150,28 +152,57 @@ class PluginObject(object): ########################################################################### # - # Name : init_ready + # Name : init_completed # - # Description: Test for init ready condition + # Description: Declare init completed # # Parameters : plugin name # - # Returns : False if initial config complete is not done - # True if initial config complete is done + ########################################################################### + + def init_completed(self): + """Declare plugin init complete""" + + collectd.info("%s initialization completed" % self.plugin) + self.init_complete = True + + ########################################################################### + # + # Name : config_complete + # + # Description: Test for config complete condition + # + # Parameters : plugin name + # + # Returns : False if config is not complete + # True if config is complete # ########################################################################### - def init_ready(self): - """Test for system init ready state""" + def config_complete(self): + """Test for config complete state""" - if os.path.exists(tsc.INITIAL_CONFIG_COMPLETE_FLAG) is False: - self.log_throttle_count += 1 - if self.log_throttle_count > self.INIT_LOG_THROTTLE: - collectd.info("%s initialization needs retry" % self.plugin) + if self._config_complete is False: + if tsc.nodetype == 'worker' or 'worker' in tsc.subfunctions: + flag_file = tsc.VOLATILE_WORKER_CONFIG_COMPLETE + elif tsc.nodetype == 'storage': + flag_file = tsc.VOLATILE_STORAGE_CONFIG_COMPLETE + else: + flag_file = tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE + + if os.path.exists(flag_file) is False: + self._config_complete = False + self.log_throttle_count += 1 + if self.log_throttle_count > self.CONFIG_LOG_THROTTLE: + collectd.info("%s configuration check needs retry" % + self.plugin) + self.log_throttle_count = 0 + time.sleep(1) + return False + else: + self._config_complete = True self.log_throttle_count = 0 - return False - else: - self.log_throttle_count = 0 + collectd.info("%s configuration completed" % self.plugin) return True diff --git a/collectd-extensions/src/ptp.py b/collectd-extensions/src/ptp.py index 6d9187d..5d3ad93 100755 --- a/collectd-extensions/src/ptp.py +++ b/collectd-extensions/src/ptp.py @@ -584,7 +584,8 @@ def read_timestamp_mode(): ##################################################################### def init_func(): - if obj.init_ready() is False: + # do nothing till config is complete. + if obj.config_complete() is False: return False obj.hostname = obj.gethostname() @@ -630,9 +631,9 @@ def init_func(): obj.controller = True obj.virtual = obj.is_virtual() - obj.init_done = True - obj.log_throttle_count = 0 - collectd.info("%s initialization complete" % PLUGIN) + + obj.init_completed() + return 0 ##################################################################### @@ -643,7 +644,6 @@ def init_func(): # # Assumptions: collectd calls init_func one time. # -# # retry init if needed # retry fm connect if needed # check service enabled state @@ -657,11 +657,7 @@ def read_func(): if obj.virtual is True: return 0 - # check and run init until it reports init_done True - if obj.init_done is False: - if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE): - collectd.info("%s re-running init" % PLUGIN) - obj.log_throttle_count += 1 + if obj.init_complete is False: init_func() return 0 @@ -727,7 +723,6 @@ def read_func(): else: collectd.info("%s no startup alarms found" % PLUGIN) - obj.config_complete = True obj.fm_connectivity = True # assert_all_alarms() @@ -813,7 +808,8 @@ def read_func(): # # sudo /usr/sbin/pmc -u -b 0 'GET PORT_DATA_SET' # - data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f', PLUGIN_CONF_FILE, + data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f', + PLUGIN_CONF_FILE, '-u', '-b', '0', 'GET PORT_DATA_SET']) port_locked = False @@ -829,7 +825,8 @@ def read_func(): # # sudo /usr/sbin/pmc -u -b 0 'GET TIME_STATUS_NP' # - data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f', PLUGIN_CONF_FILE, + data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f', + PLUGIN_CONF_FILE, '-u', '-b', '0', 'GET TIME_STATUS_NP']) got_master_offset = False diff --git a/collectd-extensions/src/remotels.py b/collectd-extensions/src/remotels.py index 95c3cda..1330220 100755 --- a/collectd-extensions/src/remotels.py +++ b/collectd-extensions/src/remotels.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -146,15 +146,14 @@ def init_func(): if tsc.nodetype != 'controller': return 0 - if obj.init_done is False: - if obj.init_ready() is False: - return False + # do nothing till config is complete. + if obj.config_complete() is False: + return 0 obj.hostname = obj.gethostname() obj.base_eid = 'host=' + obj.hostname - obj.init_done = True - collectd.info("%s initialization complete" % PLUGIN) + obj.init_completed() return True @@ -166,7 +165,7 @@ def read_func(): if tsc.nodetype != 'controller': return 0 - if obj.init_done is False: + if obj.init_complete is False: init_func() return 0