Alarm Hostname controller function has in-service failure reported

When compute services remain healthy:
 - listing alarms shall not refer to the below Obsoleted alarm
 - 200.012 alarm hostname controller function has an in-service failure

This update deletes definition of the obsoleted alarm and any references
200.012 is removed in events.yaml file
Also updated any reference to this alarm definition.
Need to also raise a Bug to track the Doc change.

Test Plan:
Verify on a Standard configuration no alarms are listed for
hostname controller in-service failure
Code (removal) changes exercised with fix prior to ansible bootstrap
and host-unlock and verify no unexpected alarms
Regression:
There is no need to test the alarm referred here as they are obsolete

Closes-Bug: 1991531

Signed-off-by: Girish Subramanya <girish.subramanya@windriver.com>

Change-Id: I255af68155c5392ea42244b931516f742fa838c3
This commit is contained in:
Girish Subramanya 2022-10-03 10:04:55 -04:00
parent b763229079
commit 86681b7598
5 changed files with 1 additions and 37 deletions

View File

@ -38,7 +38,6 @@
#define CLSTR_HB_ALARM_ID ((const char *)"200.009")
#define BM_ALARM_ID ((const char *)"200.010")
#define CONFIG_ALARM_ID ((const char *)"200.011")
#define CH_CONT_ALARM_ID ((const char *)"200.012") /* Combo Host Controller Failure - with Active Compute */
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */

View File

@ -36,7 +36,6 @@
#define CLSTR_HB_ALARM_ID ((const char *)"200.009")
#define BM_ALARM_ID ((const char *)"200.010")
#define CONFIG_ALARM_ID ((const char *)"200.011")
#define CH_CONT_ALARM_ID ((const char *)"200.012") /* Combo Host Controller Failure - with Active Compute */
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */

View File

@ -9133,12 +9133,11 @@ void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s%s\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s\n",
node_ptr->hostname.c_str(),
node_ptr->alarms[MTC_ALARM_ID__LOCK ] ? " Locked" : " .",
node_ptr->alarms[MTC_ALARM_ID__CONFIG ] ? " Config" : " .",
node_ptr->alarms[MTC_ALARM_ID__ENABLE ] ? " Enable" : " .",
node_ptr->alarms[MTC_ALARM_ID__CH_CONT ] ? " Control" : " .",
node_ptr->alarms[MTC_ALARM_ID__CH_COMP ] ? " Compute" : " .",
node_ptr->alarms[MTC_ALARM_ID__BM ] ? " Brd Mgmt" : " .");
mem_log (str);

View File

@ -155,36 +155,6 @@ void mtcAlarm_init ( void )
snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
"Check Host's board management config and connectivity.");
/** Init Controller Failure Alarm Entry **********************************/
ptr = &alarm_list[MTC_ALARM_ID__CH_CONT];
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CH_CONT_ALARM_ID);
ptr->name = "Controller Function" ;
ptr->instc_prefix = "" ;
ptr->critl_reason =
ptr->major_reason =
ptr->minor_reason = "controller function has in-service failure while compute services "
"remain healthy.";
ptr->clear_reason = "controller function has recovered";
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL;
ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ;
ptr->alarm.inhibit_alarms = FM_FALSE ;
ptr->alarm.service_affecting = FM_TRUE ;
ptr->alarm.suppression = FM_TRUE ;
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
"Lock and then Unlock host to recover. "
"Avoid using 'Force Lock' action as that will impact compute services "
"running on this host. If lock action fails then contact next level "
"of support to investigate and recover.");
/** Init Compute Failure Alarm Entry *************************************/
ptr = &alarm_list[MTC_ALARM_ID__CH_COMP];
@ -344,7 +314,6 @@ string _getIdentity ( mtc_alarm_id_enum id )
case MTC_ALARM_ID__CONFIG: return (CONFIG_ALARM_ID);
case MTC_ALARM_ID__ENABLE: return (ENABLE_ALARM_ID);
case MTC_ALARM_ID__BM: return (BM_ALARM_ID);
case MTC_ALARM_ID__CH_CONT: return (CH_CONT_ALARM_ID);
case MTC_ALARM_ID__CH_COMP: return (CH_COMP_ALARM_ID);
case MTC_LOG_ID__EVENT: return (EVENT_LOG_ID);
case MTC_LOG_ID__COMMAND: return (COMMAND_LOG_ID);
@ -466,7 +435,6 @@ void nodeLinkClass::mtcAlarm_audit ( struct nodeLinkClass::node * node_ptr )
else if (( id == MTC_ALARM_ID__CONFIG ) ||
( id == MTC_ALARM_ID__ENABLE ) ||
( id == MTC_ALARM_ID__BM ) ||
( id == MTC_ALARM_ID__CH_CONT) ||
( id == MTC_ALARM_ID__CH_COMP))
{
EFmAlarmSeverityT severity = mtcAlarm_state ( node_ptr->hostname, id);

View File

@ -29,7 +29,6 @@ typedef enum
MTC_ALARM_ID__CONFIG,
MTC_ALARM_ID__ENABLE,
MTC_ALARM_ID__BM,
MTC_ALARM_ID__CH_CONT, /* Combo Host Controller Failure - with Active Compute */
MTC_ALARM_ID__CH_COMP, /* Combo Host Compute Failure - on last Controller */
MTC_LOG_ID__EVENT,