Protect FM API shared data with thread locking
Collectd runs all its python plugins concurrently. With the expansion of collectd python plugins due to the obsolesence of rmon collectd core dumps are being reported during collectd startup when the FM service on the controller is not running. Debug of the issue revealed that the core dumps are due to having no mutex around FM API's shared data. The required mutex is provided by this update by adding a while locked expression to the start of each API. Also fixed 3 pep8 errors. Closes-Bug: 1819473 Test Plan: PASS: Test before and after cases to confirm that without the change we see core dumps but with the change the API and collectd plugin behavior is correct without the core dumps. PASS: System install with current collectd plugins and fm's python API enhanced with locking. PASS: Have sm stop managing the fmManager process, kill it and then restart collectd over and over. Should not see any collectd core dumps. PASS: Verify nfv alarming still works Change-Id: I3d5ef0bd9cb774299b4c0f3b9e33cddb7c0f776c Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
b7e7f911e5
commit
7ae75e24c8
@ -14,6 +14,9 @@ import copy
|
|||||||
from . import constants
|
from . import constants
|
||||||
import six
|
import six
|
||||||
import fm_core
|
import fm_core
|
||||||
|
import threading
|
||||||
|
|
||||||
|
fm_api_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
class ClientException(Exception):
|
class ClientException(Exception):
|
||||||
@ -91,7 +94,8 @@ class FaultAPIsBase(object):
|
|||||||
sep + data.severity + sep + self._check_val(data.reason_text) +
|
sep + data.severity + sep + self._check_val(data.reason_text) +
|
||||||
sep + data.alarm_type + sep + data.probable_cause + sep +
|
sep + data.alarm_type + sep + data.probable_cause + sep +
|
||||||
self._check_val(data.proposed_repair_action) + sep +
|
self._check_val(data.proposed_repair_action) + sep +
|
||||||
str(data.service_affecting) + sep + str(data.suppression) + sep)
|
str(data.service_affecting) + sep +
|
||||||
|
str(data.suppression) + sep)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _str_to_alarm(alarm_str):
|
def _str_to_alarm(alarm_str):
|
||||||
@ -172,6 +176,7 @@ class FaultAPIsBase(object):
|
|||||||
class FaultAPIs(FaultAPIsBase):
|
class FaultAPIs(FaultAPIsBase):
|
||||||
|
|
||||||
def set_fault(self, data):
|
def set_fault(self, data):
|
||||||
|
with fm_api_lock:
|
||||||
self._check_required_attributes(data)
|
self._check_required_attributes(data)
|
||||||
self._validate_attributes(data)
|
self._validate_attributes(data)
|
||||||
buff = self._alarm_to_str(data)
|
buff = self._alarm_to_str(data)
|
||||||
@ -181,13 +186,16 @@ class FaultAPIs(FaultAPIsBase):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def clear_fault(self, alarm_id, entity_instance_id):
|
def clear_fault(self, alarm_id, entity_instance_id):
|
||||||
|
with fm_api_lock:
|
||||||
sep = constants.FM_CLIENT_STR_SEP
|
sep = constants.FM_CLIENT_STR_SEP
|
||||||
buff = (sep + self._check_val(alarm_id) + sep +
|
buff = (sep + self._check_val(alarm_id) + sep +
|
||||||
self._check_val(entity_instance_id) + sep)
|
self._check_val(entity_instance_id) + sep)
|
||||||
try:
|
try:
|
||||||
resp = fm_core.clear(buff)
|
resp = fm_core.clear(buff)
|
||||||
# resp may be True/False/None after FaultAPIsV2 implementation.
|
# resp may be True/False/None after FaultAPIsV2
|
||||||
# to keep FaultAPIs the same as before, return False for None case.
|
# implementation.
|
||||||
|
# To keep FaultAPIs the same as before,
|
||||||
|
# return False for None case.
|
||||||
if resp is True:
|
if resp is True:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
@ -196,6 +204,7 @@ class FaultAPIs(FaultAPIsBase):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def get_fault(self, alarm_id, entity_instance_id):
|
def get_fault(self, alarm_id, entity_instance_id):
|
||||||
|
with fm_api_lock:
|
||||||
sep = constants.FM_CLIENT_STR_SEP
|
sep = constants.FM_CLIENT_STR_SEP
|
||||||
buff = (sep + self._check_val(alarm_id) + sep +
|
buff = (sep + self._check_val(alarm_id) + sep +
|
||||||
self._check_val(entity_instance_id) + sep)
|
self._check_val(entity_instance_id) + sep)
|
||||||
@ -208,10 +217,13 @@ class FaultAPIs(FaultAPIsBase):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def clear_all(self, entity_instance_id):
|
def clear_all(self, entity_instance_id):
|
||||||
|
with fm_api_lock:
|
||||||
try:
|
try:
|
||||||
resp = fm_core.clear_all(entity_instance_id)
|
resp = fm_core.clear_all(entity_instance_id)
|
||||||
# resp may be True/False/None after FaultAPIsV2 implementation.
|
# resp may be True/False/None after FaultAPIsV2
|
||||||
# to keep FaultAPIs the same as before, return False for None case.
|
# implementation.
|
||||||
|
# To keep FaultAPIs the same as before,
|
||||||
|
# return False for None case.
|
||||||
if resp is True:
|
if resp is True:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
@ -220,6 +232,7 @@ class FaultAPIs(FaultAPIsBase):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def get_faults(self, entity_instance_id):
|
def get_faults(self, entity_instance_id):
|
||||||
|
with fm_api_lock:
|
||||||
try:
|
try:
|
||||||
resp = fm_core.get_by_eid(entity_instance_id)
|
resp = fm_core.get_by_eid(entity_instance_id)
|
||||||
if resp:
|
if resp:
|
||||||
@ -232,6 +245,7 @@ class FaultAPIs(FaultAPIsBase):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def get_faults_by_id(self, alarm_id):
|
def get_faults_by_id(self, alarm_id):
|
||||||
|
with fm_api_lock:
|
||||||
try:
|
try:
|
||||||
resp = fm_core.get_by_aid(alarm_id)
|
resp = fm_core.get_by_aid(alarm_id)
|
||||||
if resp:
|
if resp:
|
||||||
@ -251,6 +265,7 @@ class FaultAPIsV2(FaultAPIsBase):
|
|||||||
# Exception: 1. Input Alarm format is not valid
|
# Exception: 1. Input Alarm format is not valid
|
||||||
# 2. When there is operation failure
|
# 2. When there is operation failure
|
||||||
def set_fault(self, data):
|
def set_fault(self, data):
|
||||||
|
with fm_api_lock:
|
||||||
self._check_required_attributes(data)
|
self._check_required_attributes(data)
|
||||||
self._validate_attributes(data)
|
self._validate_attributes(data)
|
||||||
buff = self._alarm_to_str(data)
|
buff = self._alarm_to_str(data)
|
||||||
@ -264,6 +279,7 @@ class FaultAPIsV2(FaultAPIsBase):
|
|||||||
# Alarm doesn't exist: False
|
# Alarm doesn't exist: False
|
||||||
# Exception: When there is operation failure
|
# Exception: When there is operation failure
|
||||||
def clear_fault(self, alarm_id, entity_instance_id):
|
def clear_fault(self, alarm_id, entity_instance_id):
|
||||||
|
with fm_api_lock:
|
||||||
sep = constants.FM_CLIENT_STR_SEP
|
sep = constants.FM_CLIENT_STR_SEP
|
||||||
buff = (sep + self._check_val(alarm_id) + sep +
|
buff = (sep + self._check_val(alarm_id) + sep +
|
||||||
self._check_val(entity_instance_id) + sep)
|
self._check_val(entity_instance_id) + sep)
|
||||||
@ -282,6 +298,7 @@ class FaultAPIsV2(FaultAPIsBase):
|
|||||||
# Alarm doesn't exist: None
|
# Alarm doesn't exist: None
|
||||||
# Exception: When there is operation failure
|
# Exception: When there is operation failure
|
||||||
def get_fault(self, alarm_id, entity_instance_id):
|
def get_fault(self, alarm_id, entity_instance_id):
|
||||||
|
with fm_api_lock:
|
||||||
sep = constants.FM_CLIENT_STR_SEP
|
sep = constants.FM_CLIENT_STR_SEP
|
||||||
buff = (sep + self._check_val(alarm_id) + sep +
|
buff = (sep + self._check_val(alarm_id) + sep +
|
||||||
self._check_val(entity_instance_id) + sep)
|
self._check_val(entity_instance_id) + sep)
|
||||||
@ -296,6 +313,7 @@ class FaultAPIsV2(FaultAPIsBase):
|
|||||||
# Alarm doesn't exist: False
|
# Alarm doesn't exist: False
|
||||||
# Exception: When there is operation failure
|
# Exception: When there is operation failure
|
||||||
def clear_all(self, entity_instance_id):
|
def clear_all(self, entity_instance_id):
|
||||||
|
with fm_api_lock:
|
||||||
resp = fm_core.clear_all(entity_instance_id)
|
resp = fm_core.clear_all(entity_instance_id)
|
||||||
if resp is False:
|
if resp is False:
|
||||||
# There is operation failure
|
# There is operation failure
|
||||||
@ -311,6 +329,7 @@ class FaultAPIsV2(FaultAPIsBase):
|
|||||||
# Alarm doesn't exist: None
|
# Alarm doesn't exist: None
|
||||||
# Exception: When there is operation failure
|
# Exception: When there is operation failure
|
||||||
def get_faults(self, entity_instance_id):
|
def get_faults(self, entity_instance_id):
|
||||||
|
with fm_api_lock:
|
||||||
resp = fm_core.get_by_eid(entity_instance_id)
|
resp = fm_core.get_by_eid(entity_instance_id)
|
||||||
if resp is False:
|
if resp is False:
|
||||||
raise APIException("Failed to execute get_faults.")
|
raise APIException("Failed to execute get_faults.")
|
||||||
@ -327,6 +346,7 @@ class FaultAPIsV2(FaultAPIsBase):
|
|||||||
# Alarm doesn't exist: None
|
# Alarm doesn't exist: None
|
||||||
# Exception: When there is operation failure
|
# Exception: When there is operation failure
|
||||||
def get_faults_by_id(self, alarm_id):
|
def get_faults_by_id(self, alarm_id):
|
||||||
|
with fm_api_lock:
|
||||||
resp = fm_core.get_by_aid(alarm_id)
|
resp = fm_core.get_by_aid(alarm_id)
|
||||||
if resp is False:
|
if resp is False:
|
||||||
raise APIException("Failed to execute get_faults_by_id.")
|
raise APIException("Failed to execute get_faults_by_id.")
|
||||||
|
Loading…
Reference in New Issue
Block a user