From 7ae75e24c8c425bb1b2741a55d203c5069072990 Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Wed, 3 Apr 2019 15:50:41 -0400 Subject: [PATCH] Protect FM API shared data with thread locking Collectd runs all its python plugins concurrently. With the expansion of collectd python plugins due to the obsolesence of rmon collectd core dumps are being reported during collectd startup when the FM service on the controller is not running. Debug of the issue revealed that the core dumps are due to having no mutex around FM API's shared data. The required mutex is provided by this update by adding a while locked expression to the start of each API. Also fixed 3 pep8 errors. Closes-Bug: 1819473 Test Plan: PASS: Test before and after cases to confirm that without the change we see core dumps but with the change the API and collectd plugin behavior is correct without the core dumps. PASS: System install with current collectd plugins and fm's python API enhanced with locking. PASS: Have sm stop managing the fmManager process, kill it and then restart collectd over and over. Should not see any collectd core dumps. PASS: Verify nfv alarming still works Change-Id: I3d5ef0bd9cb774299b4c0f3b9e33cddb7c0f776c Signed-off-by: Eric MacDonald --- fm-api/fm_api/fm_api.py | 244 ++++++++++++++++++++++------------------ 1 file changed, 132 insertions(+), 112 deletions(-) diff --git a/fm-api/fm_api/fm_api.py b/fm-api/fm_api/fm_api.py index 3c070e9c..d2cd34d8 100755 --- a/fm-api/fm_api/fm_api.py +++ b/fm-api/fm_api/fm_api.py @@ -14,6 +14,9 @@ import copy from . import constants import six import fm_core +import threading + +fm_api_lock = threading.Lock() class ClientException(Exception): @@ -91,7 +94,8 @@ class FaultAPIsBase(object): sep + data.severity + sep + self._check_val(data.reason_text) + sep + data.alarm_type + sep + data.probable_cause + sep + self._check_val(data.proposed_repair_action) + sep + - str(data.service_affecting) + sep + str(data.suppression) + sep) + str(data.service_affecting) + sep + + str(data.suppression) + sep) @staticmethod def _str_to_alarm(alarm_str): @@ -172,76 +176,86 @@ class FaultAPIsBase(object): class FaultAPIs(FaultAPIsBase): def set_fault(self, data): - self._check_required_attributes(data) - self._validate_attributes(data) - buff = self._alarm_to_str(data) - try: - return fm_core.set(buff) - except (RuntimeError, SystemError, TypeError): - return None + with fm_api_lock: + self._check_required_attributes(data) + self._validate_attributes(data) + buff = self._alarm_to_str(data) + try: + return fm_core.set(buff) + except (RuntimeError, SystemError, TypeError): + return None def clear_fault(self, alarm_id, entity_instance_id): - sep = constants.FM_CLIENT_STR_SEP - buff = (sep + self._check_val(alarm_id) + sep + - self._check_val(entity_instance_id) + sep) - try: - resp = fm_core.clear(buff) - # resp may be True/False/None after FaultAPIsV2 implementation. - # to keep FaultAPIs the same as before, return False for None case. - if resp is True: - return True - else: + with fm_api_lock: + sep = constants.FM_CLIENT_STR_SEP + buff = (sep + self._check_val(alarm_id) + sep + + self._check_val(entity_instance_id) + sep) + try: + resp = fm_core.clear(buff) + # resp may be True/False/None after FaultAPIsV2 + # implementation. + # To keep FaultAPIs the same as before, + # return False for None case. + if resp is True: + return True + else: + return False + except (RuntimeError, SystemError, TypeError): return False - except (RuntimeError, SystemError, TypeError): - return False def get_fault(self, alarm_id, entity_instance_id): - sep = constants.FM_CLIENT_STR_SEP - buff = (sep + self._check_val(alarm_id) + sep + - self._check_val(entity_instance_id) + sep) - try: - resp = fm_core.get(buff) - if resp: - return self._str_to_alarm(resp) - except (RuntimeError, SystemError, TypeError): - pass - return None + with fm_api_lock: + sep = constants.FM_CLIENT_STR_SEP + buff = (sep + self._check_val(alarm_id) + sep + + self._check_val(entity_instance_id) + sep) + try: + resp = fm_core.get(buff) + if resp: + return self._str_to_alarm(resp) + except (RuntimeError, SystemError, TypeError): + pass + return None def clear_all(self, entity_instance_id): - try: - resp = fm_core.clear_all(entity_instance_id) - # resp may be True/False/None after FaultAPIsV2 implementation. - # to keep FaultAPIs the same as before, return False for None case. - if resp is True: - return True - else: + with fm_api_lock: + try: + resp = fm_core.clear_all(entity_instance_id) + # resp may be True/False/None after FaultAPIsV2 + # implementation. + # To keep FaultAPIs the same as before, + # return False for None case. + if resp is True: + return True + else: + return False + except (RuntimeError, SystemError, TypeError): return False - except (RuntimeError, SystemError, TypeError): - return False def get_faults(self, entity_instance_id): - try: - resp = fm_core.get_by_eid(entity_instance_id) - if resp: - data = [] - for i in resp: - data.append(self._str_to_alarm(i)) - return data - except (RuntimeError, SystemError, TypeError): - pass - return None + with fm_api_lock: + try: + resp = fm_core.get_by_eid(entity_instance_id) + if resp: + data = [] + for i in resp: + data.append(self._str_to_alarm(i)) + return data + except (RuntimeError, SystemError, TypeError): + pass + return None def get_faults_by_id(self, alarm_id): - try: - resp = fm_core.get_by_aid(alarm_id) - if resp: - data = [] - for i in resp: - data.append(self._str_to_alarm(i)) - return data - except (RuntimeError, SystemError, TypeError): - pass - return None + with fm_api_lock: + try: + resp = fm_core.get_by_aid(alarm_id) + if resp: + data = [] + for i in resp: + data.append(self._str_to_alarm(i)) + return data + except (RuntimeError, SystemError, TypeError): + pass + return None class FaultAPIsV2(FaultAPIsBase): @@ -251,10 +265,11 @@ class FaultAPIsV2(FaultAPIsBase): # Exception: 1. Input Alarm format is not valid # 2. When there is operation failure def set_fault(self, data): - self._check_required_attributes(data) - self._validate_attributes(data) - buff = self._alarm_to_str(data) - uuid = fm_core.set(buff) + with fm_api_lock: + self._check_required_attributes(data) + self._validate_attributes(data) + buff = self._alarm_to_str(data) + uuid = fm_core.set(buff) if uuid is None: raise APIException("Failed to execute set_fault.") return uuid @@ -264,76 +279,81 @@ class FaultAPIsV2(FaultAPIsBase): # Alarm doesn't exist: False # Exception: When there is operation failure def clear_fault(self, alarm_id, entity_instance_id): - sep = constants.FM_CLIENT_STR_SEP - buff = (sep + self._check_val(alarm_id) + sep + - self._check_val(entity_instance_id) + sep) - resp = fm_core.clear(buff) - if resp is False: - # There is operation failure - raise APIException("Failed to execute clear_fault.") - elif resp is None: - # alarm is not found - return False - else: - return True + with fm_api_lock: + sep = constants.FM_CLIENT_STR_SEP + buff = (sep + self._check_val(alarm_id) + sep + + self._check_val(entity_instance_id) + sep) + resp = fm_core.clear(buff) + if resp is False: + # There is operation failure + raise APIException("Failed to execute clear_fault.") + elif resp is None: + # alarm is not found + return False + else: + return True # Input: alarm_id, entity_instance_id # Return: Success: Alarm # Alarm doesn't exist: None # Exception: When there is operation failure def get_fault(self, alarm_id, entity_instance_id): - sep = constants.FM_CLIENT_STR_SEP - buff = (sep + self._check_val(alarm_id) + sep + - self._check_val(entity_instance_id) + sep) - resp = fm_core.get(buff) - if resp is False: - raise APIException("Failed to execute get_fault.") - else: - return self._str_to_alarm(resp) if resp else None + with fm_api_lock: + sep = constants.FM_CLIENT_STR_SEP + buff = (sep + self._check_val(alarm_id) + sep + + self._check_val(entity_instance_id) + sep) + resp = fm_core.get(buff) + if resp is False: + raise APIException("Failed to execute get_fault.") + else: + return self._str_to_alarm(resp) if resp else None # Input: entity_instance_id # Return: Success: True # Alarm doesn't exist: False # Exception: When there is operation failure def clear_all(self, entity_instance_id): - resp = fm_core.clear_all(entity_instance_id) - if resp is False: - # There is operation failure - raise APIException("Failed to execute clear_all.") - elif resp is None: - # alarm is not found - return False - else: - return True + with fm_api_lock: + resp = fm_core.clear_all(entity_instance_id) + if resp is False: + # There is operation failure + raise APIException("Failed to execute clear_all.") + elif resp is None: + # alarm is not found + return False + else: + return True # Input: entity_instance_id # Return: Success: Alarm list # Alarm doesn't exist: None # Exception: When there is operation failure def get_faults(self, entity_instance_id): - resp = fm_core.get_by_eid(entity_instance_id) - if resp is False: - raise APIException("Failed to execute get_faults.") - elif resp: - data = [] - for i in resp: - data.append(self._str_to_alarm(i)) - return data - else: - return None + with fm_api_lock: + resp = fm_core.get_by_eid(entity_instance_id) + if resp is False: + raise APIException("Failed to execute get_faults.") + elif resp: + data = [] + for i in resp: + data.append(self._str_to_alarm(i)) + return data + else: + return None # Input: alarm_id # Return: Success: Alarm list # Alarm doesn't exist: None # Exception: When there is operation failure def get_faults_by_id(self, alarm_id): - resp = fm_core.get_by_aid(alarm_id) - if resp is False: - raise APIException("Failed to execute get_faults_by_id.") - elif resp: - data = [] - for i in resp: - data.append(self._str_to_alarm(i)) - return data - else: - return None + with fm_api_lock: + resp = fm_core.get_by_aid(alarm_id) + if resp is False: + raise APIException("Failed to execute get_faults_by_id.") + elif resp: + data = [] + for i in resp: + data.append(self._str_to_alarm(i)) + return data + else: + return None