From 3b0dbfa17ab7d40adc08347e2a28987581a43b5d Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Fri, 18 Jan 2019 16:29:56 -0500 Subject: [PATCH] Remove alarm query before clear in NTP plugin Issue titled 'NTP 100.14 alarm is not cleared' exposed an issue where the NTP plugin alarm clear operation is circumvented when its pre-curser fm_api.get_fault call returns None if the fm process is not running. From the callers point of view the None return suggests that the alarm to be cleared does not exist so the code skips the call to clear. This update works around this by simply issuing the clear without the query. Change-Id: Idcc05bb0e7e1aa1082af1e8ecdcb1a5463b19440 Closes-Bug: 1812440 Signed-off-by: Eric MacDonald --- collectd-extensions/centos/build_srpm.data | 2 +- collectd-extensions/src/ntpq.py | 44 ++++++++++------------ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/collectd-extensions/centos/build_srpm.data b/collectd-extensions/centos/build_srpm.data index e5b3c50..82cafe8 100644 --- a/collectd-extensions/centos/build_srpm.data +++ b/collectd-extensions/centos/build_srpm.data @@ -16,4 +16,4 @@ COPY_LIST="$PKG_BASE/src/LICENSE \ $PKG_BASE/src/example.py \ $PKG_BASE/src/example.conf" -TIS_PATCH_VER=5 +TIS_PATCH_VER=6 diff --git a/collectd-extensions/src/ntpq.py b/collectd-extensions/src/ntpq.py index 7b6f343..3f79646 100755 --- a/collectd-extensions/src/ntpq.py +++ b/collectd-extensions/src/ntpq.py @@ -222,15 +222,14 @@ def _raise_alarm(ip=None): def _clear_base_alarm(): """ Clear the NTP base alarm """ - if api.get_fault(PLUGIN_ALARMID, obj.base_eid) is not None: - if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False: - collectd.error("%s failed to clear alarm %s:%s" % - (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) - return True - else: - collectd.info("%s cleared alarm %s:%s" % - (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) - obj.alarm_raised = False + if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False: + collectd.error("%s failed to clear alarm %s:%s" % + (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) + return True + else: + collectd.info("%s cleared alarm %s:%s" % + (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) + obj.alarm_raised = False return False @@ -263,23 +262,20 @@ def _remove_ip_from_unreachable_list(ip): if ip and ip in obj.unreachable_servers: eid = obj.base_eid + '=' + ip collectd.debug("%s trying to clear alarm %s" % (PLUGIN, eid)) + # clear the alarm if its asserted - if api.get_fault(PLUGIN_ALARMID, eid) is not None: - if api.clear_fault(PLUGIN_ALARMID, eid) is True: - collectd.info("%s cleared %s:%s alarm" % - (PLUGIN, PLUGIN_ALARMID, eid)) - obj.unreachable_servers.remove(ip) - else: - # Handle clear failure by not removing the IP from the list. - # It will retry on next audit. - # Error should only occur if FM is not running at the time - # this get or clear is called - collectd.error("%s failed alarm clear %s:%s" % - (PLUGIN, PLUGIN_ALARMID, eid)) - return True - else: + if api.clear_fault(PLUGIN_ALARMID, eid) is True: + collectd.info("%s cleared %s:%s alarm" % + (PLUGIN, PLUGIN_ALARMID, eid)) obj.unreachable_servers.remove(ip) - collectd.info("%s alarm %s not raised" % (PLUGIN, eid)) + else: + # Handle clear failure by not removing the IP from the list. + # It will retry on next audit. + # Error should only occur if FM is not running at the time + # this get or clear is called + collectd.error("%s failed alarm clear %s:%s" % + (PLUGIN, PLUGIN_ALARMID, eid)) + return True return False