Alarm Hostname controller function has in-service failure reported
When compute services remain healthy: - listing alarms shall not refer to the below Obsoleted alarm - 200.012 alarm hostname controller function has an in-service failure This update deletes definition of the obsoleted alarm and any references 200.012 is removed in events.yaml file Also updated any reference to this alarm definition. Need to also raise a Bug to track the Doc change. Test Plan: Verify on a Standard configuration no alarms are listed for hostname controller in-service failure Code (removal) changes exercised with fix prior to ansible bootstrap and host-unlock and verify no unexpected alarms Regression: There is no need to test the alarm referred here as they are obsolete Closes-Bug: 1991531 Signed-off-by: Girish Subramanya <girish.subramanya@windriver.com> Change-Id: I255af68155c5392ea42244b931516f742fa838c3
This commit is contained in:
parent
b763229079
commit
86681b7598
|
@ -38,7 +38,6 @@
|
||||||
#define CLSTR_HB_ALARM_ID ((const char *)"200.009")
|
#define CLSTR_HB_ALARM_ID ((const char *)"200.009")
|
||||||
#define BM_ALARM_ID ((const char *)"200.010")
|
#define BM_ALARM_ID ((const char *)"200.010")
|
||||||
#define CONFIG_ALARM_ID ((const char *)"200.011")
|
#define CONFIG_ALARM_ID ((const char *)"200.011")
|
||||||
#define CH_CONT_ALARM_ID ((const char *)"200.012") /* Combo Host Controller Failure - with Active Compute */
|
|
||||||
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
|
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
|
||||||
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
|
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
|
||||||
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */
|
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */
|
||||||
|
|
|
@ -36,7 +36,6 @@
|
||||||
#define CLSTR_HB_ALARM_ID ((const char *)"200.009")
|
#define CLSTR_HB_ALARM_ID ((const char *)"200.009")
|
||||||
#define BM_ALARM_ID ((const char *)"200.010")
|
#define BM_ALARM_ID ((const char *)"200.010")
|
||||||
#define CONFIG_ALARM_ID ((const char *)"200.011")
|
#define CONFIG_ALARM_ID ((const char *)"200.011")
|
||||||
#define CH_CONT_ALARM_ID ((const char *)"200.012") /* Combo Host Controller Failure - with Active Compute */
|
|
||||||
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
|
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
|
||||||
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
|
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
|
||||||
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */
|
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */
|
||||||
|
|
|
@ -9133,12 +9133,11 @@ void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr )
|
||||||
void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr )
|
void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr )
|
||||||
{
|
{
|
||||||
char str[MAX_MEM_LOG_DATA] ;
|
char str[MAX_MEM_LOG_DATA] ;
|
||||||
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s%s\n",
|
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s\n",
|
||||||
node_ptr->hostname.c_str(),
|
node_ptr->hostname.c_str(),
|
||||||
node_ptr->alarms[MTC_ALARM_ID__LOCK ] ? " Locked" : " .",
|
node_ptr->alarms[MTC_ALARM_ID__LOCK ] ? " Locked" : " .",
|
||||||
node_ptr->alarms[MTC_ALARM_ID__CONFIG ] ? " Config" : " .",
|
node_ptr->alarms[MTC_ALARM_ID__CONFIG ] ? " Config" : " .",
|
||||||
node_ptr->alarms[MTC_ALARM_ID__ENABLE ] ? " Enable" : " .",
|
node_ptr->alarms[MTC_ALARM_ID__ENABLE ] ? " Enable" : " .",
|
||||||
node_ptr->alarms[MTC_ALARM_ID__CH_CONT ] ? " Control" : " .",
|
|
||||||
node_ptr->alarms[MTC_ALARM_ID__CH_COMP ] ? " Compute" : " .",
|
node_ptr->alarms[MTC_ALARM_ID__CH_COMP ] ? " Compute" : " .",
|
||||||
node_ptr->alarms[MTC_ALARM_ID__BM ] ? " Brd Mgmt" : " .");
|
node_ptr->alarms[MTC_ALARM_ID__BM ] ? " Brd Mgmt" : " .");
|
||||||
mem_log (str);
|
mem_log (str);
|
||||||
|
|
|
@ -155,36 +155,6 @@ void mtcAlarm_init ( void )
|
||||||
snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
||||||
"Check Host's board management config and connectivity.");
|
"Check Host's board management config and connectivity.");
|
||||||
|
|
||||||
/** Init Controller Failure Alarm Entry **********************************/
|
|
||||||
|
|
||||||
ptr = &alarm_list[MTC_ALARM_ID__CH_CONT];
|
|
||||||
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
||||||
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CH_CONT_ALARM_ID);
|
|
||||||
|
|
||||||
ptr->name = "Controller Function" ;
|
|
||||||
ptr->instc_prefix = "" ;
|
|
||||||
|
|
||||||
ptr->critl_reason =
|
|
||||||
ptr->major_reason =
|
|
||||||
ptr->minor_reason = "controller function has in-service failure while compute services "
|
|
||||||
"remain healthy.";
|
|
||||||
ptr->clear_reason = "controller function has recovered";
|
|
||||||
|
|
||||||
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL;
|
|
||||||
ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ;
|
|
||||||
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
||||||
ptr->alarm.service_affecting = FM_TRUE ;
|
|
||||||
ptr->alarm.suppression = FM_TRUE ;
|
|
||||||
|
|
||||||
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
||||||
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
|
|
||||||
|
|
||||||
snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
||||||
"Lock and then Unlock host to recover. "
|
|
||||||
"Avoid using 'Force Lock' action as that will impact compute services "
|
|
||||||
"running on this host. If lock action fails then contact next level "
|
|
||||||
"of support to investigate and recover.");
|
|
||||||
|
|
||||||
/** Init Compute Failure Alarm Entry *************************************/
|
/** Init Compute Failure Alarm Entry *************************************/
|
||||||
|
|
||||||
ptr = &alarm_list[MTC_ALARM_ID__CH_COMP];
|
ptr = &alarm_list[MTC_ALARM_ID__CH_COMP];
|
||||||
|
@ -344,7 +314,6 @@ string _getIdentity ( mtc_alarm_id_enum id )
|
||||||
case MTC_ALARM_ID__CONFIG: return (CONFIG_ALARM_ID);
|
case MTC_ALARM_ID__CONFIG: return (CONFIG_ALARM_ID);
|
||||||
case MTC_ALARM_ID__ENABLE: return (ENABLE_ALARM_ID);
|
case MTC_ALARM_ID__ENABLE: return (ENABLE_ALARM_ID);
|
||||||
case MTC_ALARM_ID__BM: return (BM_ALARM_ID);
|
case MTC_ALARM_ID__BM: return (BM_ALARM_ID);
|
||||||
case MTC_ALARM_ID__CH_CONT: return (CH_CONT_ALARM_ID);
|
|
||||||
case MTC_ALARM_ID__CH_COMP: return (CH_COMP_ALARM_ID);
|
case MTC_ALARM_ID__CH_COMP: return (CH_COMP_ALARM_ID);
|
||||||
case MTC_LOG_ID__EVENT: return (EVENT_LOG_ID);
|
case MTC_LOG_ID__EVENT: return (EVENT_LOG_ID);
|
||||||
case MTC_LOG_ID__COMMAND: return (COMMAND_LOG_ID);
|
case MTC_LOG_ID__COMMAND: return (COMMAND_LOG_ID);
|
||||||
|
@ -466,7 +435,6 @@ void nodeLinkClass::mtcAlarm_audit ( struct nodeLinkClass::node * node_ptr )
|
||||||
else if (( id == MTC_ALARM_ID__CONFIG ) ||
|
else if (( id == MTC_ALARM_ID__CONFIG ) ||
|
||||||
( id == MTC_ALARM_ID__ENABLE ) ||
|
( id == MTC_ALARM_ID__ENABLE ) ||
|
||||||
( id == MTC_ALARM_ID__BM ) ||
|
( id == MTC_ALARM_ID__BM ) ||
|
||||||
( id == MTC_ALARM_ID__CH_CONT) ||
|
|
||||||
( id == MTC_ALARM_ID__CH_COMP))
|
( id == MTC_ALARM_ID__CH_COMP))
|
||||||
{
|
{
|
||||||
EFmAlarmSeverityT severity = mtcAlarm_state ( node_ptr->hostname, id);
|
EFmAlarmSeverityT severity = mtcAlarm_state ( node_ptr->hostname, id);
|
||||||
|
|
|
@ -29,7 +29,6 @@ typedef enum
|
||||||
MTC_ALARM_ID__CONFIG,
|
MTC_ALARM_ID__CONFIG,
|
||||||
MTC_ALARM_ID__ENABLE,
|
MTC_ALARM_ID__ENABLE,
|
||||||
MTC_ALARM_ID__BM,
|
MTC_ALARM_ID__BM,
|
||||||
MTC_ALARM_ID__CH_CONT, /* Combo Host Controller Failure - with Active Compute */
|
|
||||||
MTC_ALARM_ID__CH_COMP, /* Combo Host Compute Failure - on last Controller */
|
MTC_ALARM_ID__CH_COMP, /* Combo Host Compute Failure - on last Controller */
|
||||||
|
|
||||||
MTC_LOG_ID__EVENT,
|
MTC_LOG_ID__EVENT,
|
||||||
|
|
Loading…
Reference in New Issue