Remove alarm query before clear in NTP plugin

Issue titled 'NTP 100.14 alarm is not cleared' exposed
an issue where the NTP plugin alarm clear operation is
circumvented when its pre-curser fm_api.get_fault call
returns None if the fm process is not running.
From the callers point of view the None return suggests
that the alarm to be cleared does not exist so the code
skips the call to clear.

This update works around this by simply issuing the
clear without the query.

Change-Id: Idcc05bb0e7e1aa1082af1e8ecdcb1a5463b19440
Closes-Bug: 1812440
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2019-01-18 16:29:56 -05:00
parent 9fe8574234
commit abaff6b275
2 changed files with 21 additions and 25 deletions

View File

@ -16,4 +16,4 @@ COPY_LIST="$PKG_BASE/src/LICENSE \
$PKG_BASE/src/example.py \ $PKG_BASE/src/example.py \
$PKG_BASE/src/example.conf" $PKG_BASE/src/example.conf"
TIS_PATCH_VER=5 TIS_PATCH_VER=6

View File

@ -222,15 +222,14 @@ def _raise_alarm(ip=None):
def _clear_base_alarm(): def _clear_base_alarm():
""" Clear the NTP base alarm """ """ Clear the NTP base alarm """
if api.get_fault(PLUGIN_ALARMID, obj.base_eid) is not None: if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False:
if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False: collectd.error("%s failed to clear alarm %s:%s" %
collectd.error("%s failed to clear alarm %s:%s" % (PLUGIN, PLUGIN_ALARMID, obj.base_eid))
(PLUGIN, PLUGIN_ALARMID, obj.base_eid)) return True
return True else:
else: collectd.info("%s cleared alarm %s:%s" %
collectd.info("%s cleared alarm %s:%s" % (PLUGIN, PLUGIN_ALARMID, obj.base_eid))
(PLUGIN, PLUGIN_ALARMID, obj.base_eid)) obj.alarm_raised = False
obj.alarm_raised = False
return False return False
@ -263,23 +262,20 @@ def _remove_ip_from_unreachable_list(ip):
if ip and ip in obj.unreachable_servers: if ip and ip in obj.unreachable_servers:
eid = obj.base_eid + '=' + ip eid = obj.base_eid + '=' + ip
collectd.debug("%s trying to clear alarm %s" % (PLUGIN, eid)) collectd.debug("%s trying to clear alarm %s" % (PLUGIN, eid))
# clear the alarm if its asserted # clear the alarm if its asserted
if api.get_fault(PLUGIN_ALARMID, eid) is not None: if api.clear_fault(PLUGIN_ALARMID, eid) is True:
if api.clear_fault(PLUGIN_ALARMID, eid) is True: collectd.info("%s cleared %s:%s alarm" %
collectd.info("%s cleared %s:%s alarm" % (PLUGIN, PLUGIN_ALARMID, eid))
(PLUGIN, PLUGIN_ALARMID, eid))
obj.unreachable_servers.remove(ip)
else:
# Handle clear failure by not removing the IP from the list.
# It will retry on next audit.
# Error should only occur if FM is not running at the time
# this get or clear is called
collectd.error("%s failed alarm clear %s:%s" %
(PLUGIN, PLUGIN_ALARMID, eid))
return True
else:
obj.unreachable_servers.remove(ip) obj.unreachable_servers.remove(ip)
collectd.info("%s alarm %s not raised" % (PLUGIN, eid)) else:
# Handle clear failure by not removing the IP from the list.
# It will retry on next audit.
# Error should only occur if FM is not running at the time
# this get or clear is called
collectd.error("%s failed alarm clear %s:%s" %
(PLUGIN, PLUGIN_ALARMID, eid))
return True
return False return False