From 86681b75985a457112db57b4d59469be3fb8a73e Mon Sep 17 00:00:00 2001 From: Girish Subramanya Date: Mon, 3 Oct 2022 10:04:55 -0400 Subject: [PATCH] Alarm Hostname controller function has in-service failure reported When compute services remain healthy: - listing alarms shall not refer to the below Obsoleted alarm - 200.012 alarm hostname controller function has an in-service failure This update deletes definition of the obsoleted alarm and any references 200.012 is removed in events.yaml file Also updated any reference to this alarm definition. Need to also raise a Bug to track the Doc change. Test Plan: Verify on a Standard configuration no alarms are listed for hostname controller in-service failure Code (removal) changes exercised with fix prior to ansible bootstrap and host-unlock and verify no unexpected alarms Regression: There is no need to test the alarm referred here as they are obsolete Closes-Bug: 1991531 Signed-off-by: Girish Subramanya Change-Id: I255af68155c5392ea42244b931516f742fa838c3 --- mtce-common/src/common/alarmUtil.h | 1 - mtce/src/alarm/alarm.h | 1 - mtce/src/common/nodeClass.cpp | 3 +-- mtce/src/maintenance/mtcAlarm.cpp | 32 ------------------------------ mtce/src/maintenance/mtcAlarm.h | 1 - 5 files changed, 1 insertion(+), 37 deletions(-) diff --git a/mtce-common/src/common/alarmUtil.h b/mtce-common/src/common/alarmUtil.h index 25100e95..e4aebfca 100644 --- a/mtce-common/src/common/alarmUtil.h +++ b/mtce-common/src/common/alarmUtil.h @@ -38,7 +38,6 @@ #define CLSTR_HB_ALARM_ID ((const char *)"200.009") #define BM_ALARM_ID ((const char *)"200.010") #define CONFIG_ALARM_ID ((const char *)"200.011") -#define CH_CONT_ALARM_ID ((const char *)"200.012") /* Combo Host Controller Failure - with Active Compute */ #define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */ #define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */ #define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */ diff --git a/mtce/src/alarm/alarm.h b/mtce/src/alarm/alarm.h index 2ce56391..58e02e13 100644 --- a/mtce/src/alarm/alarm.h +++ b/mtce/src/alarm/alarm.h @@ -36,7 +36,6 @@ #define CLSTR_HB_ALARM_ID ((const char *)"200.009") #define BM_ALARM_ID ((const char *)"200.010") #define CONFIG_ALARM_ID ((const char *)"200.011") -#define CH_CONT_ALARM_ID ((const char *)"200.012") /* Combo Host Controller Failure - with Active Compute */ #define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */ #define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */ #define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */ diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp index e2320430..90dc3c68 100755 --- a/mtce/src/common/nodeClass.cpp +++ b/mtce/src/common/nodeClass.cpp @@ -9133,12 +9133,11 @@ void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr ) void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr ) { char str[MAX_MEM_LOG_DATA] ; - snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s%s\n", + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s\n", node_ptr->hostname.c_str(), node_ptr->alarms[MTC_ALARM_ID__LOCK ] ? " Locked" : " .", node_ptr->alarms[MTC_ALARM_ID__CONFIG ] ? " Config" : " .", node_ptr->alarms[MTC_ALARM_ID__ENABLE ] ? " Enable" : " .", - node_ptr->alarms[MTC_ALARM_ID__CH_CONT ] ? " Control" : " .", node_ptr->alarms[MTC_ALARM_ID__CH_COMP ] ? " Compute" : " .", node_ptr->alarms[MTC_ALARM_ID__BM ] ? " Brd Mgmt" : " ."); mem_log (str); diff --git a/mtce/src/maintenance/mtcAlarm.cpp b/mtce/src/maintenance/mtcAlarm.cpp index 28d1b6bc..67c7e61e 100644 --- a/mtce/src/maintenance/mtcAlarm.cpp +++ b/mtce/src/maintenance/mtcAlarm.cpp @@ -155,36 +155,6 @@ void mtcAlarm_init ( void ) snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, "Check Host's board management config and connectivity."); - /** Init Controller Failure Alarm Entry **********************************/ - - ptr = &alarm_list[MTC_ALARM_ID__CH_CONT]; - memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); - snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CH_CONT_ALARM_ID); - - ptr->name = "Controller Function" ; - ptr->instc_prefix = "" ; - - ptr->critl_reason = - ptr->major_reason = - ptr->minor_reason = "controller function has in-service failure while compute services " - "remain healthy."; - ptr->clear_reason = "controller function has recovered"; - - ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL; - ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ; - ptr->alarm.inhibit_alarms = FM_FALSE ; - ptr->alarm.service_affecting = FM_TRUE ; - ptr->alarm.suppression = FM_TRUE ; - - ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ - ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ - - snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, - "Lock and then Unlock host to recover. " - "Avoid using 'Force Lock' action as that will impact compute services " - "running on this host. If lock action fails then contact next level " - "of support to investigate and recover."); - /** Init Compute Failure Alarm Entry *************************************/ ptr = &alarm_list[MTC_ALARM_ID__CH_COMP]; @@ -344,7 +314,6 @@ string _getIdentity ( mtc_alarm_id_enum id ) case MTC_ALARM_ID__CONFIG: return (CONFIG_ALARM_ID); case MTC_ALARM_ID__ENABLE: return (ENABLE_ALARM_ID); case MTC_ALARM_ID__BM: return (BM_ALARM_ID); - case MTC_ALARM_ID__CH_CONT: return (CH_CONT_ALARM_ID); case MTC_ALARM_ID__CH_COMP: return (CH_COMP_ALARM_ID); case MTC_LOG_ID__EVENT: return (EVENT_LOG_ID); case MTC_LOG_ID__COMMAND: return (COMMAND_LOG_ID); @@ -466,7 +435,6 @@ void nodeLinkClass::mtcAlarm_audit ( struct nodeLinkClass::node * node_ptr ) else if (( id == MTC_ALARM_ID__CONFIG ) || ( id == MTC_ALARM_ID__ENABLE ) || ( id == MTC_ALARM_ID__BM ) || - ( id == MTC_ALARM_ID__CH_CONT) || ( id == MTC_ALARM_ID__CH_COMP)) { EFmAlarmSeverityT severity = mtcAlarm_state ( node_ptr->hostname, id); diff --git a/mtce/src/maintenance/mtcAlarm.h b/mtce/src/maintenance/mtcAlarm.h index 6e93f659..3d998ff3 100644 --- a/mtce/src/maintenance/mtcAlarm.h +++ b/mtce/src/maintenance/mtcAlarm.h @@ -29,7 +29,6 @@ typedef enum MTC_ALARM_ID__CONFIG, MTC_ALARM_ID__ENABLE, MTC_ALARM_ID__BM, - MTC_ALARM_ID__CH_CONT, /* Combo Host Controller Failure - with Active Compute */ MTC_ALARM_ID__CH_COMP, /* Combo Host Compute Failure - on last Controller */ MTC_LOG_ID__EVENT,