Merge "Add Remote Logging Server connectivity monitoring to collectd"

This commit is contained in:
Zuul 2019-03-15 20:11:43 +00:00 committed by Gerrit Code Review
commit 15a451434c
6 changed files with 369 additions and 1 deletions

View File

@ -16,7 +16,9 @@ COPY_LIST="$PKG_BASE/src/LICENSE \
$PKG_BASE/src/ntpq.conf \
$PKG_BASE/src/interface.py \
$PKG_BASE/src/interface.conf \
$PKG_BASE/src/remotels.py \
$PKG_BASE/src/remotels.conf \
$PKG_BASE/src/example.py \
$PKG_BASE/src/example.conf"
TIS_PATCH_VER=7
TIS_PATCH_VER=8

View File

@ -23,6 +23,7 @@ Source12: memory.py
Source14: example.py
Source15: ntpq.py
Source16: interface.py
Source17: remotels.py
# collectd plugin conf files into /etc/collectd.d
Source100: python_plugins.conf
@ -32,6 +33,7 @@ Source103: df.conf
Source104: example.conf
Source105: ntpq.conf
Source106: interface.conf
Source107: remotels.conf
BuildRequires: systemd-devel
@ -75,6 +77,7 @@ install -m 700 %{SOURCE12} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE14} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE15} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE16} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE17} %{buildroot}%{local_python_extensions_dir}
# collectd plugin conf files into /etc/collectd.d
@ -85,6 +88,7 @@ install -m 600 %{SOURCE103} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE104} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE105} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE106} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE107} %{buildroot}%{local_plugin_dir}
%clean
rm -rf $RPM_BUILD_ROOT

View File

@ -33,6 +33,7 @@ class PluginObject(object):
self.plugin = plugin # the name of this plugin
self.hostname = '' # the name of this host
self.port = 0 # the port number for this plugin
self.base_eid = '' # the base entity id host=<hostname>
# dynamic gate variables
self.config_complete = False # set to True once config is complete
@ -42,6 +43,8 @@ class PluginObject(object):
# dynamic variables set in read_func
self.usage = float(0) # last usage value recorded as float
self.audits = 0 # number of audit since init
self.enabled = False # tracks a plugin's enabled state
self.alarmed = False # tracks the current alarmed state
# http and json specific variables
self.url = url # target url

View File

@ -14,6 +14,7 @@ LoadPlugin python
<Module "interface">
Port 2122
</Module>
Import "remotels"
LogTraces = true
Encoding "utf-8"
</Plugin>

View File

@ -0,0 +1,13 @@
<Plugin "threshold">
<Plugin "remotels">
<Type "absolute">
Instance "reachable"
Persist true
PersistOK true
WarningMin 1
FailureMin 0
Hits 2
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -0,0 +1,345 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
############################################################################
#
# This is the Remote Logging Server plugin for collectd.
#
# The Remote Logging Server is enabled if /etc/syslog-ng/syslog-ng.conf
# contains '@include remotelogging.conf'
#
# There is no asynchronous notification of remote logging server
# configuration enable/disable state changes. Therefore, each audit
# interval needs to check whether its enabled or not.
#
# every audit interval ...
#
# read_func:
# check enabled:
# if disabled and alarmed:
# clear alarm
# if enabled:
# get ip and port
# query status
# if connected and alarmed:
# clear alarm
# if not connected and not alarmed:
# raise alarm
#
# system remotelogging-modify --ip_address <ip address>
# --transport tcp
# --enabled True
#
############################################################################
import os
import collectd
import tsconfig.tsconfig as tsc
import plugin_common as pc
from fm_api import constants as fm_constants
from oslo_concurrency import processutils
from fm_api import fm_api
# Fault manager API Object
api = fm_api.FaultAPIs()
# name of the plugin
PLUGIN_NAME = 'remotels'
# all logs produced by this plugin are prefixed with this
PLUGIN = 'remote logging server'
# Interface Monitoring Interval in seconds
PLUGIN_AUDIT_INTERVAL = 60
# Sample Data 'type' and 'instance' database field values.
PLUGIN_TYPE = 'absolute'
PLUGIN_TYPE_INSTANCE = 'reachable'
# Remote Logging Connectivity Alarm ID
PLUGIN_ALARMID = '100.118'
# The file where this plugin learns if remote logging is enabled
SYSLOG_CONF_FILE = '/etc/syslog-ng/syslog-ng.conf'
# Plugin Control Object
obj = pc.PluginObject(PLUGIN, "")
# Raise Remote Logging Server Alarm
def raise_alarm():
""" Raise Remote Logging Server Alarm. """
repair = 'Ensure Remote Log Server IP is reachable from '
repair += 'Controller through OAM interface; otherwise '
repair += 'contact next level of support.'
reason = 'Controller cannot establish connection with '
reason += 'remote logging server.'
try:
fault = fm_api.Fault(
alarm_id=PLUGIN_ALARMID,
alarm_state=fm_constants.FM_ALARM_STATE_SET,
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
entity_instance_id=obj.base_eid,
severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
reason_text=reason,
alarm_type=fm_constants.FM_ALARM_TYPE_1,
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_6,
proposed_repair_action=repair,
service_affecting=False,
suppression=False)
alarm_uuid = api.set_fault(fault)
if pc.is_uuid_like(alarm_uuid) is False:
collectd.error("%s %s:%s set_fault failed:%s" %
(PLUGIN, PLUGIN_ALARMID,
obj.base_eid, alarm_uuid))
else:
collectd.info("%s %s:%s alarm raised" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
obj.alarmed = True
except:
collectd.error("%s %s:%s set_fault exception" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
# Clear remote logging server alarm
def clear_alarm():
""" Clear remote logging server alarm """
try:
if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is True:
collectd.info("%s alarm cleared" % PLUGIN)
obj.alarmed = False
return True
except:
collectd.error("%s %s:%s clear failed ; will retry" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
return False
# The config function - called once on collectd process startup
def config_func(config):
""" Configure the plugin """
# all configuration is learned during normal monitoring
obj.config_done = True
return 0
# The init function - called once on collectd process startup
def init_func():
""" Init the plugin """
# remote logging server monitoring is for controllers only
if tsc.nodetype != 'controller':
return 0
if obj.init_done is False:
if obj.init_ready() is False:
return False
obj.hostname = obj.gethostname()
obj.base_eid = 'host=' + obj.hostname
obj.init_done = True
collectd.info("%s initialization complete" % PLUGIN)
return True
# The sample read function - called on every audit interval
def read_func():
""" Remote logging server connectivity plugin read function """
# remote logging server monitoring is for controllers only
if tsc.nodetype != 'controller':
return 0
if obj.init_done is False:
init_func()
return 0
# get current state
current_enabled_state = obj.enabled
# check to see if remote logging is enabled
obj.enabled = False # assume disabled
if os.path.exists(SYSLOG_CONF_FILE) is True:
with open(SYSLOG_CONF_FILE, 'r') as infile:
for line in infile:
if line.startswith('@include '):
service = line.rstrip().split(' ')[1]
if service == '"remotelogging.conf"':
obj.enabled = True
break
if current_enabled_state == obj.enabled:
logit = False
else:
if obj.enabled is False:
collectd.info("%s is disabled" % PLUGIN)
else:
collectd.info("%s is enabled" % PLUGIN)
logit = True
# Handle startup case by clearing existing alarm if its raised.
# Its runtime cheaper and simpler to issue a blind clear than query.
if obj.audits == 0:
if clear_alarm() is False:
# if clear fails then retry next time
return 0
if obj.enabled is False:
collectd.info("%s is disabled" % PLUGIN)
obj.audits = 1
if obj.enabled is False:
if obj.alarmed is True:
clear_alarm()
return 0
# If we get here then the server is enabled ...
# Need to query it
# Get the ip and port from line that looks like this
#
# tag proto address port
# ----------------------------- --- -------------- ---
# destination remote_log_server {tcp("128.224.186.65" port(514));};
#
address = protocol = port = ''
with open(SYSLOG_CONF_FILE, 'r') as infile:
for line in infile:
if line.startswith('destination remote_log_server'):
try:
if len(line.split('{')) > 1:
protocol = line.split('{')[1][0:3]
address = line.split('{')[1].split('"')[1]
port = line.split('{')[1].split('(')[2].split(')')[0]
if not protocol or not address or not port:
collectd.error("%s remote log server credentials "
"parse error ; (%s:%s:%s)" %
(PLUGIN, protocol, address, port))
return 1
else:
# line parsed ; move on ...
break
else:
collectd.error("%s remote log server line parse error"
" ; %s" % (PLUGIN, line))
except Exception as ex:
collectd.error("%s remote log server credentials "
"parse exception ; (%s)" % (PLUGIN, line))
if ':' in address:
ipv = 6
protocol += 6
# Monitoring of IPV6 is not currently supported
return 0
else:
ipv = 4
# This plugin detects server connectivity through its socket status.
# To get that construct the remote logging server IP string.
# The files being looked at(/proc/net/tcp(udp)) use hex values,
# so convert the string caps hex value with reverse ordering of
# the "ipv4" values
index = 3
addr = [0, 0, 0, 0]
# swap order
for tup in address.split('.'):
addr[index] = int(tup)
index -= 1
# build the CAPs HEX address
UPPER_HEX_IP = ''
for tup in addr:
val = hex(int(tup)).split('x')[-1].upper()
if len(val) == 1:
UPPER_HEX_IP += '0'
UPPER_HEX_IP += val
UPPER_HEX_IP += ':'
tmp = hex(int(port)).split('x')[-1].upper()
for i in range(4-len(tmp)):
UPPER_HEX_IP += '0'
UPPER_HEX_IP += tmp
# log example tcp:ipv4:128.224.186.65:514 : IP:41BAE080:0202
collectd.debug("%s %s:ipv%d:%s:%s : IP:%s" %
(PLUGIN, protocol, ipv, address, port, UPPER_HEX_IP))
cmd = "cat /proc/net/" + protocol
cmd += " | awk '{print $3 \" \" $4}' | grep " + UPPER_HEX_IP
cmd += " | awk '{print $2}'"
res, err = processutils.execute(cmd, shell=True)
if err:
collectd.error("%s processutils error:%s" % (PLUGIN, err))
# cmd example:
# cat /proc/net/tcp | awk '{print $3 " " $4}'
# | grep 41BAE080:0202
# | awk '{print $2}'
collectd.debug("%s Cmd:%s" % (PLUGIN, cmd))
return 0
if res and res.rstrip() == '01':
# connected state reads 01
# Example log: Res:[01]
# clear alarm if
# - currently alarmed and
# - debounced by 1 ; need 2 connected readings in a row
if obj.alarmed is True:
clear_alarm()
# Only log on state change
if obj.usage != 1:
logit = True
obj.usage = 1
conn = ''
else:
# res typically reads 02 when notr connected
# Example log: Res:[02]
collectd.debug("%s Res:[%s] " % (PLUGIN, res.rstrip()))
# raise alarm if
# - not already alarmed
# - debounced by 1 ; need 2 failures in a row
if obj.alarmed is False and obj.usage == 0:
raise_alarm()
# only log on state change
if obj.usage == 1 or obj.audits == 1:
logit = True
obj.usage = 0
conn = 'not '
if logit is True:
collectd.info("%s is %sconnected [%s ipv%d %s:%s]" %
(PLUGIN, conn, protocol, ipv, address, port))
obj.audits += 1
# Dispatch usage value to collectd
val = collectd.Values(host=obj.hostname)
val.plugin = PLUGIN_NAME
val.type = PLUGIN_TYPE
val.type_instance = PLUGIN_TYPE_INSTANCE
val.dispatch(values=[obj.usage])
return 0
# register the config, init and read functions
collectd.register_config(config_func)
collectd.register_init(init_func)
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)