Add consistent init and config complete checks to collectd plugins
Some of the collectd plugins are not waiting for configuration complete before starting to monitor or communicate with external services such as fm. This leads to the collectd networking plugin being triggered to run before or while the host is being configured which has been seen to lead to collectd segfaults/coredumps within the collectd's internal networking plugin. To solve this issue, reduce startup thrash and a slew of plugin startup error logs, this update adds consistent initialization and configuration complete checks to all of the starlingX plugins so monitoring and external service access is not performed until the host configuration is complete. Test Plan: PASS: Verify no plugin sampling till after config is complete PASS: Verify alarm assert and clear cycle for all plugins PASS: Install AIO SX system install PASS: Install AIO DX system install PEND: Verify Standard system install PASS: Verify logging Change-Id: I90a5d1c8c3be77269a571738c9499b2e908e1fc5 Closes-Bug: 1872979 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
1a5e6c4c3d
commit
63c8d1e55a
|
@ -1,5 +1,5 @@
|
|||
#
|
||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
||||
# Copyright (c) 2018-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
@ -48,15 +48,15 @@ re_keyquoteval = re.compile(r'^\s*(\S+)\s*[=:]\s*\"(\S+)\"\s*')
|
|||
|
||||
|
||||
# Plugin specific control class and object.
|
||||
class CPU_object:
|
||||
class CPU_object(pc.PluginObject):
|
||||
|
||||
def __init__(self):
|
||||
super(CPU_object, self).__init__(PLUGIN, '')
|
||||
self.debug = True
|
||||
self.verbose = True
|
||||
self._cache = {}
|
||||
self._k8s_client = pc.K8sClient()
|
||||
self.k8s_pods = set()
|
||||
self.hostname = ''
|
||||
|
||||
self.schedstat_version = 0
|
||||
self.schedstat_supported = True
|
||||
|
@ -72,6 +72,7 @@ class CPU_object:
|
|||
self._data[PLATFORM_CPU_PERCENT] = 0.0
|
||||
self.elapsed_ms = 0.0
|
||||
|
||||
|
||||
# Instantiate the class
|
||||
obj = CPU_object()
|
||||
|
||||
|
@ -410,8 +411,11 @@ def config_func(config):
|
|||
def init_func():
|
||||
"""Init the plugin."""
|
||||
|
||||
# do nothing till config is complete.
|
||||
if obj.config_complete() is False:
|
||||
return False
|
||||
|
||||
obj.hostname = socket.gethostname()
|
||||
collectd.info('%s init function for %s' % (PLUGIN, obj.hostname))
|
||||
|
||||
# Determine the full list of logical cpus for this host
|
||||
obj.logical_cpus = get_logical_cpus()
|
||||
|
@ -459,14 +463,17 @@ def init_func():
|
|||
# Gather initial cputime state information.
|
||||
update_cpu_data(init=True)
|
||||
|
||||
collectd.info('%s initialization complete' % PLUGIN)
|
||||
|
||||
obj.init_completed()
|
||||
return pc.PLUGIN_PASS
|
||||
|
||||
|
||||
# Calculate the CPU usage sample
|
||||
def read_func():
|
||||
|
||||
if obj.init_complete is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
# epoch time in floating seconds
|
||||
now0 = time.time()
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
# Copyright (c) 2018-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
@ -7,19 +7,17 @@
|
|||
import os
|
||||
import random
|
||||
import collectd
|
||||
import plugin_common as pc
|
||||
|
||||
PLUGIN = 'random number plugin'
|
||||
|
||||
# static variables
|
||||
|
||||
|
||||
# define a class here that will persist over read calls
|
||||
class ExampleObject:
|
||||
hostname = ""
|
||||
class ExampleObject(pc.PluginObject):
|
||||
plugin_data = ['1', '100']
|
||||
|
||||
|
||||
obj = ExampleObject()
|
||||
obj = ExampleObject(PLUGIN, '')
|
||||
|
||||
|
||||
# The config function - called once on collectd process startup
|
||||
|
@ -45,14 +43,24 @@ def config_func(config):
|
|||
# The init function - called once on collectd process startup
|
||||
def init_func():
|
||||
|
||||
# do nothing till config is complete.
|
||||
if obj.config_complete() is False:
|
||||
return False
|
||||
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
obj.hostname = obj.gethostname()
|
||||
|
||||
obj.init_completed()
|
||||
return 0
|
||||
|
||||
|
||||
# The sample read function - called on every audit interval
|
||||
def read_func():
|
||||
|
||||
if obj.init_complete is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
# do the work to create the sample
|
||||
low = int(obj.plugin_data[0])
|
||||
high = int(obj.plugin_data[1])
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#
|
||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
||||
# Copyright (c) 2018-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
@ -237,6 +237,9 @@ ALARM_ID__TO__PLUGIN_DICT = {ALARM_ID__CPU: PLUGIN__CPU,
|
|||
ALARM_ID__VSWITCH_PORT: PLUGIN__VSWITCH_PORT,
|
||||
ALARM_ID__VSWITCH_IFACE: PLUGIN__VSWITCH_IFACE}
|
||||
|
||||
# Common plugin object
|
||||
pluginObject = pc.PluginObject(PLUGIN, '')
|
||||
|
||||
|
||||
#########################################
|
||||
# The collectd Maintenance Degrade Object
|
||||
|
@ -547,8 +550,8 @@ class DegradeObject:
|
|||
mtcDegradeObj = DegradeObject(MTCE_CMD_RX_PORT)
|
||||
|
||||
|
||||
# PluginObject Class
|
||||
class PluginObject:
|
||||
# fmAlarmObject Class
|
||||
class fmAlarmObject:
|
||||
|
||||
dbObj = None # shared database connection obj
|
||||
host = None # saved hostname
|
||||
|
@ -561,7 +564,7 @@ class PluginObject:
|
|||
fm_connectivity = False
|
||||
|
||||
def __init__(self, id, plugin):
|
||||
"""PluginObject Class constructor"""
|
||||
"""fmAlarmObject Class constructor"""
|
||||
|
||||
# plugin specific static class members.
|
||||
self.id = id # alarm id ; 100.1??
|
||||
|
@ -1121,7 +1124,7 @@ class PluginObject:
|
|||
|
||||
try:
|
||||
collectd.debug("%s %s Get Lock ..." % (PLUGIN, self.plugin))
|
||||
with PluginObject.lock:
|
||||
with fmAlarmObject.lock:
|
||||
obj = self.instance_objects[eid]
|
||||
return obj
|
||||
except:
|
||||
|
@ -1147,7 +1150,7 @@ class PluginObject:
|
|||
"""
|
||||
try:
|
||||
collectd.debug("%s %s Add Lock ..." % (PLUGIN, self.plugin))
|
||||
with PluginObject.lock:
|
||||
with fmAlarmObject.lock:
|
||||
self.instance_objects[eid] = obj
|
||||
except:
|
||||
collectd.error("%s failed to add instance to %s object list" %
|
||||
|
@ -1188,7 +1191,7 @@ class PluginObject:
|
|||
|
||||
try:
|
||||
# create a new plugin object
|
||||
inst_obj = PluginObject(self.id, self.plugin)
|
||||
inst_obj = fmAlarmObject(self.id, self.plugin)
|
||||
self._copy_instance_object(inst_obj)
|
||||
|
||||
# initialize the object with instance specific data
|
||||
|
@ -1259,7 +1262,7 @@ class PluginObject:
|
|||
# loop over the mount points
|
||||
for mp in mountpoints:
|
||||
# create a new plugin object
|
||||
inst_obj = PluginObject(ALARM_ID__DF, PLUGIN__DF)
|
||||
inst_obj = fmAlarmObject(ALARM_ID__DF, PLUGIN__DF)
|
||||
|
||||
# initialize the object with instance specific data
|
||||
inst_obj.resource_name = self.resource_name
|
||||
|
@ -1282,24 +1285,21 @@ class PluginObject:
|
|||
(PLUGIN, inst_obj.instance))
|
||||
|
||||
|
||||
PluginObject.host = os.uname()[1]
|
||||
|
||||
|
||||
# ADD_NEW_PLUGIN: add plugin to this table
|
||||
# This instantiates the plugin objects
|
||||
PLUGINS = {
|
||||
PLUGIN__CPU: PluginObject(ALARM_ID__CPU, PLUGIN__CPU),
|
||||
PLUGIN__MEM: PluginObject(ALARM_ID__MEM, PLUGIN__MEM),
|
||||
PLUGIN__DF: PluginObject(ALARM_ID__DF, PLUGIN__DF),
|
||||
PLUGIN__VSWITCH_CPU: PluginObject(ALARM_ID__VSWITCH_CPU,
|
||||
PLUGIN__VSWITCH_CPU),
|
||||
PLUGIN__VSWITCH_MEM: PluginObject(ALARM_ID__VSWITCH_MEM,
|
||||
PLUGIN__VSWITCH_MEM),
|
||||
PLUGIN__VSWITCH_PORT: PluginObject(ALARM_ID__VSWITCH_PORT,
|
||||
PLUGIN__VSWITCH_PORT),
|
||||
PLUGIN__VSWITCH_IFACE: PluginObject(ALARM_ID__VSWITCH_IFACE,
|
||||
PLUGIN__VSWITCH_IFACE),
|
||||
PLUGIN__EXAMPLE: PluginObject(ALARM_ID__EXAMPLE, PLUGIN__EXAMPLE)}
|
||||
PLUGIN__CPU: fmAlarmObject(ALARM_ID__CPU, PLUGIN__CPU),
|
||||
PLUGIN__MEM: fmAlarmObject(ALARM_ID__MEM, PLUGIN__MEM),
|
||||
PLUGIN__DF: fmAlarmObject(ALARM_ID__DF, PLUGIN__DF),
|
||||
PLUGIN__VSWITCH_CPU: fmAlarmObject(ALARM_ID__VSWITCH_CPU,
|
||||
PLUGIN__VSWITCH_CPU),
|
||||
PLUGIN__VSWITCH_MEM: fmAlarmObject(ALARM_ID__VSWITCH_MEM,
|
||||
PLUGIN__VSWITCH_MEM),
|
||||
PLUGIN__VSWITCH_PORT: fmAlarmObject(ALARM_ID__VSWITCH_PORT,
|
||||
PLUGIN__VSWITCH_PORT),
|
||||
PLUGIN__VSWITCH_IFACE: fmAlarmObject(ALARM_ID__VSWITCH_IFACE,
|
||||
PLUGIN__VSWITCH_IFACE),
|
||||
PLUGIN__EXAMPLE: fmAlarmObject(ALARM_ID__EXAMPLE, PLUGIN__EXAMPLE)}
|
||||
|
||||
|
||||
#####################################################################
|
||||
|
@ -1359,7 +1359,7 @@ def _build_entity_id(plugin, plugin_instance):
|
|||
inst_error = False
|
||||
|
||||
entity_id = 'host='
|
||||
entity_id += PluginObject.host
|
||||
entity_id += fmAlarmObject.host
|
||||
|
||||
if plugin == PLUGIN__MEM:
|
||||
if plugin_instance != 'platform':
|
||||
|
@ -1498,7 +1498,7 @@ def _print_state(obj=None):
|
|||
objs.append(obj)
|
||||
|
||||
collectd.debug("%s _print_state Lock ..." % PLUGIN)
|
||||
with PluginObject.lock:
|
||||
with fmAlarmObject.lock:
|
||||
for o in objs:
|
||||
_print_obj(o)
|
||||
if len(o.instance_objects):
|
||||
|
@ -1520,10 +1520,10 @@ def _database_setup(database):
|
|||
|
||||
# http://influxdb-python.readthedocs.io/en/latest/examples.html
|
||||
# http://influxdb-python.readthedocs.io/en/latest/api-documentation.html
|
||||
PluginObject.dbObj = InfluxDBClient('127.0.0.1', '8086', database)
|
||||
if PluginObject.dbObj:
|
||||
fmAlarmObject.dbObj = InfluxDBClient('127.0.0.1', '8086', database)
|
||||
if fmAlarmObject.dbObj:
|
||||
try:
|
||||
PluginObject.dbObj.create_database('collectd')
|
||||
fmAlarmObject.dbObj.create_database('collectd')
|
||||
|
||||
############################################################
|
||||
#
|
||||
|
@ -1544,14 +1544,14 @@ def _database_setup(database):
|
|||
#
|
||||
############################################################
|
||||
|
||||
PluginObject.dbObj.create_retention_policy(
|
||||
fmAlarmObject.dbObj.create_retention_policy(
|
||||
DATABASE_NAME, '1w', 1, database, True)
|
||||
except Exception as ex:
|
||||
if str(ex) == 'database already exists':
|
||||
try:
|
||||
collectd.info("%s influxdb:collectd %s" %
|
||||
(PLUGIN, str(ex)))
|
||||
PluginObject.dbObj.create_retention_policy(
|
||||
fmAlarmObject.dbObj.create_retention_policy(
|
||||
DATABASE_NAME, '1w', 1, database, True)
|
||||
except Exception as ex:
|
||||
if str(ex) == 'retention policy already exists':
|
||||
|
@ -1568,7 +1568,7 @@ def _database_setup(database):
|
|||
if not error_str:
|
||||
found = False
|
||||
retention = \
|
||||
PluginObject.dbObj.get_list_retention_policies(database)
|
||||
fmAlarmObject.dbObj.get_list_retention_policies(database)
|
||||
for r in range(len(retention)):
|
||||
if retention[r]["name"] == DATABASE_NAME:
|
||||
collectd.info("%s influxdb:%s samples retention "
|
||||
|
@ -1577,7 +1577,7 @@ def _database_setup(database):
|
|||
found = True
|
||||
if found is True:
|
||||
collectd.info("%s influxdb:%s is setup" % (PLUGIN, database))
|
||||
PluginObject.database_setup = True
|
||||
fmAlarmObject.database_setup = True
|
||||
else:
|
||||
collectd.error("%s influxdb:%s retention policy NOT setup" %
|
||||
(PLUGIN, database))
|
||||
|
@ -1625,14 +1625,14 @@ def init_func():
|
|||
"""Collectd FM Notifier Initialization Function"""
|
||||
|
||||
mtcDegradeObj.port = MTCE_CMD_RX_PORT
|
||||
collectd.error("%s mtce port %d" %
|
||||
(PLUGIN, mtcDegradeObj.port))
|
||||
collectd.info("%s mtce port %d" %
|
||||
(PLUGIN, mtcDegradeObj.port))
|
||||
|
||||
PluginObject.lock = Lock()
|
||||
fmAlarmObject.lock = Lock()
|
||||
|
||||
PluginObject.host = os.uname()[1]
|
||||
fmAlarmObject.host = pluginObject.gethostname()
|
||||
collectd.info("%s %s:%s init function" %
|
||||
(PLUGIN, tsc.nodetype, PluginObject.host))
|
||||
(PLUGIN, tsc.nodetype, fmAlarmObject.host))
|
||||
|
||||
# Constant CPU Plugin Object Settings
|
||||
obj = PLUGINS[PLUGIN__CPU]
|
||||
|
@ -1737,20 +1737,28 @@ def init_func():
|
|||
# ...
|
||||
|
||||
if tsc.nodetype == 'controller':
|
||||
PluginObject.database_setup_in_progress = True
|
||||
fmAlarmObject.database_setup_in_progress = True
|
||||
_database_setup('collectd')
|
||||
PluginObject.database_setup_in_progress = False
|
||||
fmAlarmObject.database_setup_in_progress = False
|
||||
|
||||
pluginObject.init_completed()
|
||||
return 0
|
||||
|
||||
|
||||
# The notifier function inspects the collectd notification and determines if
|
||||
# the representative alarm needs to be asserted, severity changed, or cleared.
|
||||
def notifier_func(nObject):
|
||||
|
||||
if PluginObject.fm_connectivity is False:
|
||||
# do nothing till config is complete.
|
||||
if pluginObject._config_complete is False:
|
||||
if pluginObject.config_complete() is False:
|
||||
return 0
|
||||
|
||||
if fmAlarmObject.fm_connectivity is False:
|
||||
|
||||
# handle multi threading startup
|
||||
with PluginObject.lock:
|
||||
if PluginObject.fm_connectivity is True:
|
||||
with fmAlarmObject.lock:
|
||||
if fmAlarmObject.fm_connectivity is True:
|
||||
return 0
|
||||
|
||||
##################################################################
|
||||
|
@ -1774,7 +1782,7 @@ def notifier_func(nObject):
|
|||
want_alarm_clear = False
|
||||
eid = alarm.entity_instance_id
|
||||
# ignore alarms not for this host
|
||||
if PluginObject.host not in eid:
|
||||
if fmAlarmObject.host not in eid:
|
||||
continue
|
||||
|
||||
base_obj = get_base_object(alarm_id)
|
||||
|
@ -1834,8 +1842,8 @@ def notifier_func(nObject):
|
|||
"startup alarm %s" %
|
||||
(PLUGIN_DEGRADE, ap, alarm_id))
|
||||
|
||||
PluginObject.fm_connectivity = True
|
||||
collectd.info("%s initialization complete" % PLUGIN)
|
||||
fmAlarmObject.fm_connectivity = True
|
||||
collectd.info("%s connectivity with fm complete" % PLUGIN)
|
||||
|
||||
collectd.debug('%s notification: %s %s:%s - %s %s %s [%s]' % (
|
||||
PLUGIN,
|
||||
|
@ -1867,11 +1875,11 @@ def notifier_func(nObject):
|
|||
return 0
|
||||
|
||||
if tsc.nodetype == 'controller':
|
||||
if PluginObject.database_setup is False:
|
||||
if PluginObject.database_setup_in_progress is False:
|
||||
PluginObject.database_setup_in_progress = True
|
||||
if fmAlarmObject.database_setup is False:
|
||||
if fmAlarmObject.database_setup_in_progress is False:
|
||||
fmAlarmObject.database_setup_in_progress = True
|
||||
_database_setup('collectd')
|
||||
PluginObject.database_setup_in_progress = False
|
||||
fmAlarmObject.database_setup_in_progress = False
|
||||
|
||||
# get plugin object
|
||||
if nObject.plugin in PLUGINS:
|
||||
|
@ -1900,7 +1908,7 @@ def notifier_func(nObject):
|
|||
eid = _build_entity_id(nObject.plugin, nObject.plugin_instance)
|
||||
try:
|
||||
# Need lock when reading/writing any obj.instance_objects list
|
||||
with PluginObject.lock:
|
||||
with fmAlarmObject.lock:
|
||||
|
||||
# we will take an exception if this object is not
|
||||
# in the list. The exception handling code below will
|
||||
|
|
|
@ -811,19 +811,13 @@ def config_func(config):
|
|||
def init_func():
|
||||
"""Init the plugin"""
|
||||
|
||||
if obj.config_done is False:
|
||||
collectd.info("%s configuration failed" % PLUGIN)
|
||||
time.sleep(300)
|
||||
return False
|
||||
|
||||
if obj.init_done is False:
|
||||
if obj.init_ready() is False:
|
||||
return 0
|
||||
# do nothing till config is complete.
|
||||
if obj.config_complete() is False:
|
||||
return 0
|
||||
|
||||
obj.hostname = obj.gethostname()
|
||||
obj.init_done = True
|
||||
collectd.info("%s initialization complete" % PLUGIN)
|
||||
|
||||
obj.init_completed()
|
||||
return 0
|
||||
|
||||
|
||||
|
@ -831,7 +825,7 @@ def init_func():
|
|||
def read_func():
|
||||
"""collectd interface monitor plugin read function"""
|
||||
|
||||
if obj.init_done is False:
|
||||
if obj.init_complete is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#
|
||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
||||
# Copyright (c) 2018-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
@ -53,15 +53,15 @@ re_base_mem = re.compile('\"node\d+:(\d+)MB:\d+\"')
|
|||
|
||||
|
||||
# Plugin specific control class and object.
|
||||
class MEM_object:
|
||||
class MEM_object(pc.PluginObject):
|
||||
|
||||
def __init__(self):
|
||||
super(MEM_object, self).__init__(PLUGIN, '')
|
||||
self.debug = False
|
||||
self.verbose = False
|
||||
self._cache = {}
|
||||
self._k8s_client = pc.K8sClient()
|
||||
self.k8s_pods = set()
|
||||
self.hostname = ''
|
||||
self.reserved_MiB = 0.0
|
||||
self.reserve_all = False
|
||||
self.strict_memory_accounting = False
|
||||
|
@ -369,6 +369,10 @@ def config_func(config):
|
|||
def init_func():
|
||||
"""Init the plugin."""
|
||||
|
||||
# do nothing till config is complete.
|
||||
if obj.config_complete() is False:
|
||||
return 0
|
||||
|
||||
obj.hostname = socket.gethostname()
|
||||
collectd.info('%s: init function for %s' % (PLUGIN, obj.hostname))
|
||||
|
||||
|
@ -382,8 +386,7 @@ def init_func():
|
|||
collectd.info('%s: reserve_all: %s, reserved_MiB: %d'
|
||||
% (PLUGIN, obj.reserve_all, obj.reserved_MiB))
|
||||
|
||||
collectd.info('%s: initialization complete' % PLUGIN)
|
||||
|
||||
obj.init_completed()
|
||||
return pc.PLUGIN_PASS
|
||||
|
||||
|
||||
|
@ -391,6 +394,10 @@ def init_func():
|
|||
def read_func():
|
||||
"""collectd memory monitor plugin read function"""
|
||||
|
||||
if obj.init_complete is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
# Get epoch time in floating seconds
|
||||
now0 = time.time()
|
||||
|
||||
|
|
|
@ -66,6 +66,9 @@ import collectd
|
|||
from fm_api import constants as fm_constants
|
||||
from fm_api import fm_api
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
import plugin_common as pc
|
||||
|
||||
import socket
|
||||
|
||||
api = fm_api.FaultAPIsV2()
|
||||
|
@ -79,12 +82,8 @@ PLUGIN_ALARMID = "100.114"
|
|||
|
||||
|
||||
# define a class here that will persist over read calls
|
||||
class NtpqObject:
|
||||
class NtpqObject(pc.PluginObject):
|
||||
|
||||
# static variables set in init
|
||||
hostname = '' # the name of this host
|
||||
base_eid = '' # the eid for the major alarm
|
||||
init_complete = False # set to true once config is complete
|
||||
alarm_raised = False # True when the major alarm is asserted
|
||||
|
||||
server_list_conf = [] # list of servers in the /etc/ntp.conf file
|
||||
|
@ -106,7 +105,7 @@ class NtpqObject:
|
|||
|
||||
|
||||
# This plugin's class object - persists over read calls
|
||||
obj = NtpqObject()
|
||||
obj = NtpqObject(PLUGIN, '')
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -278,7 +277,7 @@ def _clear_base_alarm():
|
|||
###############################################################################
|
||||
|
||||
def _remove_ip_from_unreachable_list(ip):
|
||||
"""Remove an IP address from the unreachable list and clear its NTP alarms"""
|
||||
"""Remove IP address from the unreachable list and clear its NTP alarms"""
|
||||
|
||||
# remove from unreachable list if its there
|
||||
if ip and ip in obj.unreachable_servers:
|
||||
|
@ -553,12 +552,11 @@ def init_func():
|
|||
return 0
|
||||
|
||||
# do nothing till config is complete.
|
||||
# init_func will be called again by read_func once config is complete.
|
||||
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is False:
|
||||
if obj.config_complete() is False:
|
||||
return 0
|
||||
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
obj.hostname = obj.gethostname()
|
||||
if not obj.hostname:
|
||||
collectd.error("%s failed to get hostname" % PLUGIN)
|
||||
return 1
|
||||
|
@ -617,8 +615,7 @@ def init_func():
|
|||
else:
|
||||
collectd.info("%s no major startup alarms found" % PLUGIN)
|
||||
|
||||
obj.init_complete = True
|
||||
|
||||
obj.init_completed()
|
||||
return 0
|
||||
|
||||
|
||||
|
@ -650,9 +647,7 @@ def read_func():
|
|||
return 0
|
||||
|
||||
if obj.init_complete is False:
|
||||
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is True:
|
||||
collectd.info("%s re-running init" % PLUGIN)
|
||||
init_func()
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
# get a list if provisioned ntp servers
|
||||
|
|
|
@ -855,14 +855,14 @@ def config_func(config):
|
|||
def init_func():
|
||||
"""Init the plugin"""
|
||||
|
||||
if obj.init_done is False:
|
||||
if obj.init_ready() is False:
|
||||
return 0
|
||||
|
||||
# Only runs on worker nodes
|
||||
if 'worker' not in tsc.subfunctions:
|
||||
return 0
|
||||
|
||||
# do nothing till config is complete.
|
||||
if obj.config_complete() is False:
|
||||
return 0
|
||||
|
||||
# Check whether this host is openstack worker node or not
|
||||
# OVS and OVSDPDK will only run on openstack worker node
|
||||
# For non openstack worker node, pid file won't exist
|
||||
|
@ -878,10 +878,9 @@ def init_func():
|
|||
global OVS_VSWITCHD_SOCKET
|
||||
OVS_VSWITCHD_SOCKET = \
|
||||
"".join([OVS_VSWITCHD_PATH, ".", pid, ".ctl"])
|
||||
obj.init_done = True
|
||||
obj.hostname = obj.gethostname()
|
||||
collectd.info("%s initialization complete" % PLUGIN)
|
||||
obj.error_logged = False
|
||||
obj.init_completed()
|
||||
|
||||
elif obj.error_logged is False:
|
||||
collectd.info("%s failed to retrieve pid for ovs-vswitchd in "
|
||||
|
@ -900,7 +899,7 @@ def init_func():
|
|||
def read_func():
|
||||
"""collectd ovs interface/port monitor plugin read function"""
|
||||
|
||||
if obj.init_done is False:
|
||||
if obj.init_complete is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
# Copyright (c) 2019-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
@ -15,6 +15,7 @@ import json
|
|||
import uuid
|
||||
import httplib2
|
||||
import socket
|
||||
import time
|
||||
import os
|
||||
from oslo_concurrency import processutils
|
||||
from fm_api import constants as fm_constants
|
||||
|
@ -56,7 +57,7 @@ GROUP_OVERALL = 'overall'
|
|||
GROUP_FIRST = 'first'
|
||||
GROUP_PODS = 'pods'
|
||||
|
||||
# Overall cpuacct groupings
|
||||
# Overall cpuacct groupings
|
||||
GROUP_TOTAL = 'cgroup-total'
|
||||
GROUP_PLATFORM = 'platform'
|
||||
GROUP_BASE = 'base'
|
||||
|
@ -99,7 +100,7 @@ PLUGIN_FAIL = 1
|
|||
|
||||
class PluginObject(object):
|
||||
|
||||
def __init__(self, plugin, url):
|
||||
def __init__(self, plugin, url=""):
|
||||
|
||||
# static variables set in init_func
|
||||
self.plugin = plugin # the name of this plugin
|
||||
|
@ -110,9 +111,9 @@ class PluginObject(object):
|
|||
|
||||
# dynamic gate variables
|
||||
self.virtual = False # set to True if host is virtual
|
||||
self.config_complete = False # set to True once config is complete
|
||||
self._config_complete = False # set to True once config is complete
|
||||
self.config_done = False # set true if config_func completed ok
|
||||
self.init_done = False # set true if init_func completed ok
|
||||
self.init_complete = False # set true if init_func completed ok
|
||||
self.fm_connectivity = False # set true when fm connectivity ok
|
||||
|
||||
self.alarm_type = fm_constants.FM_ALARM_TYPE_7 # OPERATIONAL
|
||||
|
@ -141,6 +142,7 @@ class PluginObject(object):
|
|||
self.error_logged = False # used to prevent log flooding
|
||||
self.log_throttle_count = 0 # used to count throttle logs
|
||||
self.INIT_LOG_THROTTLE = 10 # the init log throttle threshold
|
||||
self.CONFIG_LOG_THROTTLE = 50 # the config log throttle threshold
|
||||
self.http_retry_count = 0 # track http error cases
|
||||
self.HTTP_RETRY_THROTTLE = 6 # http retry threshold
|
||||
self.phase = 0 # tracks current phase; init, sampling
|
||||
|
@ -150,28 +152,57 @@ class PluginObject(object):
|
|||
|
||||
###########################################################################
|
||||
#
|
||||
# Name : init_ready
|
||||
# Name : init_completed
|
||||
#
|
||||
# Description: Test for init ready condition
|
||||
# Description: Declare init completed
|
||||
#
|
||||
# Parameters : plugin name
|
||||
#
|
||||
# Returns : False if initial config complete is not done
|
||||
# True if initial config complete is done
|
||||
###########################################################################
|
||||
|
||||
def init_completed(self):
|
||||
"""Declare plugin init complete"""
|
||||
|
||||
collectd.info("%s initialization completed" % self.plugin)
|
||||
self.init_complete = True
|
||||
|
||||
###########################################################################
|
||||
#
|
||||
# Name : config_complete
|
||||
#
|
||||
# Description: Test for config complete condition
|
||||
#
|
||||
# Parameters : plugin name
|
||||
#
|
||||
# Returns : False if config is not complete
|
||||
# True if config is complete
|
||||
#
|
||||
###########################################################################
|
||||
|
||||
def init_ready(self):
|
||||
"""Test for system init ready state"""
|
||||
def config_complete(self):
|
||||
"""Test for config complete state"""
|
||||
|
||||
if os.path.exists(tsc.INITIAL_CONFIG_COMPLETE_FLAG) is False:
|
||||
self.log_throttle_count += 1
|
||||
if self.log_throttle_count > self.INIT_LOG_THROTTLE:
|
||||
collectd.info("%s initialization needs retry" % self.plugin)
|
||||
if self._config_complete is False:
|
||||
if tsc.nodetype == 'worker' or 'worker' in tsc.subfunctions:
|
||||
flag_file = tsc.VOLATILE_WORKER_CONFIG_COMPLETE
|
||||
elif tsc.nodetype == 'storage':
|
||||
flag_file = tsc.VOLATILE_STORAGE_CONFIG_COMPLETE
|
||||
else:
|
||||
flag_file = tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE
|
||||
|
||||
if os.path.exists(flag_file) is False:
|
||||
self._config_complete = False
|
||||
self.log_throttle_count += 1
|
||||
if self.log_throttle_count > self.CONFIG_LOG_THROTTLE:
|
||||
collectd.info("%s configuration check needs retry" %
|
||||
self.plugin)
|
||||
self.log_throttle_count = 0
|
||||
time.sleep(1)
|
||||
return False
|
||||
else:
|
||||
self._config_complete = True
|
||||
self.log_throttle_count = 0
|
||||
return False
|
||||
else:
|
||||
self.log_throttle_count = 0
|
||||
collectd.info("%s configuration completed" % self.plugin)
|
||||
|
||||
return True
|
||||
|
||||
|
|
|
@ -584,7 +584,8 @@ def read_timestamp_mode():
|
|||
#####################################################################
|
||||
def init_func():
|
||||
|
||||
if obj.init_ready() is False:
|
||||
# do nothing till config is complete.
|
||||
if obj.config_complete() is False:
|
||||
return False
|
||||
|
||||
obj.hostname = obj.gethostname()
|
||||
|
@ -630,9 +631,9 @@ def init_func():
|
|||
obj.controller = True
|
||||
|
||||
obj.virtual = obj.is_virtual()
|
||||
obj.init_done = True
|
||||
obj.log_throttle_count = 0
|
||||
collectd.info("%s initialization complete" % PLUGIN)
|
||||
|
||||
obj.init_completed()
|
||||
return 0
|
||||
|
||||
|
||||
#####################################################################
|
||||
|
@ -643,7 +644,6 @@ def init_func():
|
|||
#
|
||||
# Assumptions: collectd calls init_func one time.
|
||||
#
|
||||
#
|
||||
# retry init if needed
|
||||
# retry fm connect if needed
|
||||
# check service enabled state
|
||||
|
@ -657,11 +657,7 @@ def read_func():
|
|||
if obj.virtual is True:
|
||||
return 0
|
||||
|
||||
# check and run init until it reports init_done True
|
||||
if obj.init_done is False:
|
||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
||||
collectd.info("%s re-running init" % PLUGIN)
|
||||
obj.log_throttle_count += 1
|
||||
if obj.init_complete is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
|
@ -727,7 +723,6 @@ def read_func():
|
|||
else:
|
||||
collectd.info("%s no startup alarms found" % PLUGIN)
|
||||
|
||||
obj.config_complete = True
|
||||
obj.fm_connectivity = True
|
||||
# assert_all_alarms()
|
||||
|
||||
|
@ -813,7 +808,8 @@ def read_func():
|
|||
#
|
||||
# sudo /usr/sbin/pmc -u -b 0 'GET PORT_DATA_SET'
|
||||
#
|
||||
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f', PLUGIN_CONF_FILE,
|
||||
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f',
|
||||
PLUGIN_CONF_FILE,
|
||||
'-u', '-b', '0', 'GET PORT_DATA_SET'])
|
||||
|
||||
port_locked = False
|
||||
|
@ -829,7 +825,8 @@ def read_func():
|
|||
#
|
||||
# sudo /usr/sbin/pmc -u -b 0 'GET TIME_STATUS_NP'
|
||||
#
|
||||
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f', PLUGIN_CONF_FILE,
|
||||
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC, '-f',
|
||||
PLUGIN_CONF_FILE,
|
||||
'-u', '-b', '0', 'GET TIME_STATUS_NP'])
|
||||
|
||||
got_master_offset = False
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
# Copyright (c) 2019-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
@ -146,15 +146,14 @@ def init_func():
|
|||
if tsc.nodetype != 'controller':
|
||||
return 0
|
||||
|
||||
if obj.init_done is False:
|
||||
if obj.init_ready() is False:
|
||||
return False
|
||||
# do nothing till config is complete.
|
||||
if obj.config_complete() is False:
|
||||
return 0
|
||||
|
||||
obj.hostname = obj.gethostname()
|
||||
obj.base_eid = 'host=' + obj.hostname
|
||||
obj.init_done = True
|
||||
collectd.info("%s initialization complete" % PLUGIN)
|
||||
|
||||
obj.init_completed()
|
||||
return True
|
||||
|
||||
|
||||
|
@ -166,7 +165,7 @@ def read_func():
|
|||
if tsc.nodetype != 'controller':
|
||||
return 0
|
||||
|
||||
if obj.init_done is False:
|
||||
if obj.init_complete is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
|
|
Loading…
Reference in New Issue