Protect FM API shared data with thread locking

Collectd runs all its python plugins concurrently.

With the expansion of collectd python plugins due
to the obsolesence of rmon collectd core dumps are
being reported during collectd startup when the FM
service on the controller is not running.

Debug of the issue revealed that the core dumps are
due to having no mutex around FM API's shared data.

The required mutex is provided by this update by
adding a while locked expression to the start of
each API.

Also fixed 3 pep8 errors.

Closes-Bug: 1819473

Test Plan:
PASS: Test before and after cases to confirm that without
      the change we see core dumps but with the change the
      API and collectd plugin behavior is correct without
      the core dumps.
PASS: System install with current collectd plugins and fm's
      python API enhanced with locking.
PASS: Have sm stop managing the fmManager process, kill it
      and then restart collectd over and over.
      Should not see any collectd core dumps.
PASS: Verify nfv alarming still works

Change-Id: I3d5ef0bd9cb774299b4c0f3b9e33cddb7c0f776c
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2019-04-03 15:50:41 -04:00
parent b7e7f911e5
commit 7ae75e24c8
1 changed files with 132 additions and 112 deletions

View File

@ -14,6 +14,9 @@ import copy
from . import constants
import six
import fm_core
import threading
fm_api_lock = threading.Lock()
class ClientException(Exception):
@ -91,7 +94,8 @@ class FaultAPIsBase(object):
sep + data.severity + sep + self._check_val(data.reason_text) +
sep + data.alarm_type + sep + data.probable_cause + sep +
self._check_val(data.proposed_repair_action) + sep +
str(data.service_affecting) + sep + str(data.suppression) + sep)
str(data.service_affecting) + sep +
str(data.suppression) + sep)
@staticmethod
def _str_to_alarm(alarm_str):
@ -172,6 +176,7 @@ class FaultAPIsBase(object):
class FaultAPIs(FaultAPIsBase):
def set_fault(self, data):
with fm_api_lock:
self._check_required_attributes(data)
self._validate_attributes(data)
buff = self._alarm_to_str(data)
@ -181,13 +186,16 @@ class FaultAPIs(FaultAPIsBase):
return None
def clear_fault(self, alarm_id, entity_instance_id):
with fm_api_lock:
sep = constants.FM_CLIENT_STR_SEP
buff = (sep + self._check_val(alarm_id) + sep +
self._check_val(entity_instance_id) + sep)
try:
resp = fm_core.clear(buff)
# resp may be True/False/None after FaultAPIsV2 implementation.
# to keep FaultAPIs the same as before, return False for None case.
# resp may be True/False/None after FaultAPIsV2
# implementation.
# To keep FaultAPIs the same as before,
# return False for None case.
if resp is True:
return True
else:
@ -196,6 +204,7 @@ class FaultAPIs(FaultAPIsBase):
return False
def get_fault(self, alarm_id, entity_instance_id):
with fm_api_lock:
sep = constants.FM_CLIENT_STR_SEP
buff = (sep + self._check_val(alarm_id) + sep +
self._check_val(entity_instance_id) + sep)
@ -208,10 +217,13 @@ class FaultAPIs(FaultAPIsBase):
return None
def clear_all(self, entity_instance_id):
with fm_api_lock:
try:
resp = fm_core.clear_all(entity_instance_id)
# resp may be True/False/None after FaultAPIsV2 implementation.
# to keep FaultAPIs the same as before, return False for None case.
# resp may be True/False/None after FaultAPIsV2
# implementation.
# To keep FaultAPIs the same as before,
# return False for None case.
if resp is True:
return True
else:
@ -220,6 +232,7 @@ class FaultAPIs(FaultAPIsBase):
return False
def get_faults(self, entity_instance_id):
with fm_api_lock:
try:
resp = fm_core.get_by_eid(entity_instance_id)
if resp:
@ -232,6 +245,7 @@ class FaultAPIs(FaultAPIsBase):
return None
def get_faults_by_id(self, alarm_id):
with fm_api_lock:
try:
resp = fm_core.get_by_aid(alarm_id)
if resp:
@ -251,6 +265,7 @@ class FaultAPIsV2(FaultAPIsBase):
# Exception: 1. Input Alarm format is not valid
# 2. When there is operation failure
def set_fault(self, data):
with fm_api_lock:
self._check_required_attributes(data)
self._validate_attributes(data)
buff = self._alarm_to_str(data)
@ -264,6 +279,7 @@ class FaultAPIsV2(FaultAPIsBase):
# Alarm doesn't exist: False
# Exception: When there is operation failure
def clear_fault(self, alarm_id, entity_instance_id):
with fm_api_lock:
sep = constants.FM_CLIENT_STR_SEP
buff = (sep + self._check_val(alarm_id) + sep +
self._check_val(entity_instance_id) + sep)
@ -282,6 +298,7 @@ class FaultAPIsV2(FaultAPIsBase):
# Alarm doesn't exist: None
# Exception: When there is operation failure
def get_fault(self, alarm_id, entity_instance_id):
with fm_api_lock:
sep = constants.FM_CLIENT_STR_SEP
buff = (sep + self._check_val(alarm_id) + sep +
self._check_val(entity_instance_id) + sep)
@ -296,6 +313,7 @@ class FaultAPIsV2(FaultAPIsBase):
# Alarm doesn't exist: False
# Exception: When there is operation failure
def clear_all(self, entity_instance_id):
with fm_api_lock:
resp = fm_core.clear_all(entity_instance_id)
if resp is False:
# There is operation failure
@ -311,6 +329,7 @@ class FaultAPIsV2(FaultAPIsBase):
# Alarm doesn't exist: None
# Exception: When there is operation failure
def get_faults(self, entity_instance_id):
with fm_api_lock:
resp = fm_core.get_by_eid(entity_instance_id)
if resp is False:
raise APIException("Failed to execute get_faults.")
@ -327,6 +346,7 @@ class FaultAPIsV2(FaultAPIsBase):
# Alarm doesn't exist: None
# Exception: When there is operation failure
def get_faults_by_id(self, alarm_id):
with fm_api_lock:
resp = fm_core.get_by_aid(alarm_id)
if resp is False:
raise APIException("Failed to execute get_faults_by_id.")