Merge "Failure case handling of LUKS service"
This commit is contained in:
commit
125601c2f9
@ -41,6 +41,7 @@
|
|||||||
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
|
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
|
||||||
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
|
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
|
||||||
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */
|
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */
|
||||||
|
#define LUKS_ALARM_ID ((const char *)"200.016") /* LUKS volume failure alarm */
|
||||||
|
|
||||||
#define EVENT_LOG_ID ((const char *)"200.020")
|
#define EVENT_LOG_ID ((const char *)"200.020")
|
||||||
#define COMMAND_LOG_ID ((const char *)"200.021")
|
#define COMMAND_LOG_ID ((const char *)"200.021")
|
||||||
|
@ -86,6 +86,7 @@ void daemon_exit ( void );
|
|||||||
#define MTC_FLAG__SM_DEGRADED (0x00000080)
|
#define MTC_FLAG__SM_DEGRADED (0x00000080)
|
||||||
#define MTC_FLAG__PATCHING (0x00000100) /* Patching in progress */
|
#define MTC_FLAG__PATCHING (0x00000100) /* Patching in progress */
|
||||||
#define MTC_FLAG__PATCHED (0x00000200) /* Patched but not reset */
|
#define MTC_FLAG__PATCHED (0x00000200) /* Patched but not reset */
|
||||||
|
#define MTC_FLAG__LUKS_VOL_FAILED (0x00000400)
|
||||||
#define MTC_FLAG__SM_UNHEALTHY (0x00001000)
|
#define MTC_FLAG__SM_UNHEALTHY (0x00001000)
|
||||||
|
|
||||||
#define MTC_UNHEALTHY_THRESHOLD (3)
|
#define MTC_UNHEALTHY_THRESHOLD (3)
|
||||||
@ -289,6 +290,7 @@ typedef enum
|
|||||||
#define MTC_TASK_AR_DISABLED_SERVICES "Service Failure, threshold reached, Lock/Unlock to retry"
|
#define MTC_TASK_AR_DISABLED_SERVICES "Service Failure, threshold reached, Lock/Unlock to retry"
|
||||||
#define MTC_TASK_AR_DISABLED_ENABLE "Enable Failure, threshold reached, Lock/Unlock to retry"
|
#define MTC_TASK_AR_DISABLED_ENABLE "Enable Failure, threshold reached, Lock/Unlock to retry"
|
||||||
#define MTC_TASK_AR_DISABLED_HEARTBEAT "Heartbeat Failure, threshold reached, Lock/Unlock to retry"
|
#define MTC_TASK_AR_DISABLED_HEARTBEAT "Heartbeat Failure, threshold reached, Lock/Unlock to retry"
|
||||||
|
#define MTC_TASK_AR_DISABLED_LUKS "LUKS volume failure, threshold reached, Lock/Unlock to retry"
|
||||||
|
|
||||||
#define MTC_TASK_RESET_FAIL "Reset Failed"
|
#define MTC_TASK_RESET_FAIL "Reset Failed"
|
||||||
#define MTC_TASK_RESET_QUEUE "Reset Failed, retrying (%d of %d)"
|
#define MTC_TASK_RESET_QUEUE "Reset Failed, retrying (%d of %d)"
|
||||||
@ -1020,7 +1022,7 @@ string get_configStages_str ( mtc_configStages_enum stage );
|
|||||||
#define DEGRADE_MASK_CONFIG 0x00000400
|
#define DEGRADE_MASK_CONFIG 0x00000400
|
||||||
#define DEGRADE_MASK_COLLECTD 0x00000800
|
#define DEGRADE_MASK_COLLECTD 0x00000800
|
||||||
#define DEGRADE_MASK_ENABLE 0x00001000
|
#define DEGRADE_MASK_ENABLE 0x00001000
|
||||||
#define DEGRADE_MASK_RES4 0x00002000
|
#define DEGRADE_MASK_LUKS 0x00002000
|
||||||
#define DEGRADE_MASK_RES5 0x00004000
|
#define DEGRADE_MASK_RES5 0x00004000
|
||||||
#define DEGRADE_MASK_RES6 0x00008000
|
#define DEGRADE_MASK_RES6 0x00008000
|
||||||
|
|
||||||
@ -1261,6 +1263,7 @@ typedef enum
|
|||||||
MTC_AR_DISABLE_CAUSE__GOENABLE,
|
MTC_AR_DISABLE_CAUSE__GOENABLE,
|
||||||
MTC_AR_DISABLE_CAUSE__HOST_SERVICES,
|
MTC_AR_DISABLE_CAUSE__HOST_SERVICES,
|
||||||
MTC_AR_DISABLE_CAUSE__HEARTBEAT,
|
MTC_AR_DISABLE_CAUSE__HEARTBEAT,
|
||||||
|
MTC_AR_DISABLE_CAUSE__LUKS,
|
||||||
MTC_AR_DISABLE_CAUSE__LAST,
|
MTC_AR_DISABLE_CAUSE__LAST,
|
||||||
MTC_AR_DISABLE_CAUSE__NONE,
|
MTC_AR_DISABLE_CAUSE__NONE,
|
||||||
} autorecovery_disable_cause_enum ;
|
} autorecovery_disable_cause_enum ;
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
|
#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */
|
||||||
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
|
#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */
|
||||||
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */
|
#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */
|
||||||
|
#define LUKS_ALARM_ID ((const char *)"200.016") /* LUKS volume failure alarm */
|
||||||
|
|
||||||
#define EVENT_LOG_ID ((const char *)"200.020")
|
#define EVENT_LOG_ID ((const char *)"200.020")
|
||||||
#define COMMAND_LOG_ID ((const char *)"200.021")
|
#define COMMAND_LOG_ID ((const char *)"200.021")
|
||||||
|
@ -1640,6 +1640,41 @@ int nodeLinkClass::lazy_graceful_fs_reboot ( struct nodeLinkClass::node * node_p
|
|||||||
return (FAIL);
|
return (FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Generate a log and a critical alarm if the LUKS volume config failed */
|
||||||
|
int nodeLinkClass::alarm_luks_failure ( struct nodeLinkClass::node * node_ptr )
|
||||||
|
{
|
||||||
|
if ( (node_ptr->degrade_mask & DEGRADE_MASK_LUKS) == 0 )
|
||||||
|
{
|
||||||
|
node_ptr->degrade_mask |= DEGRADE_MASK_LUKS ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( node_ptr->alarms[MTC_ALARM_ID__LUKS] != FM_ALARM_SEVERITY_CRITICAL )
|
||||||
|
{
|
||||||
|
elog ("%s critical luks filesystem config failure\n", node_ptr->hostname.c_str());
|
||||||
|
|
||||||
|
mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__LUKS );
|
||||||
|
node_ptr->alarms[MTC_ALARM_ID__LUKS] = FM_ALARM_SEVERITY_CRITICAL ;
|
||||||
|
}
|
||||||
|
return (PASS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clear the luks alarm and degrade flag */
|
||||||
|
int nodeLinkClass::alarm_luks_clear ( struct nodeLinkClass::node * node_ptr )
|
||||||
|
{
|
||||||
|
if ( node_ptr->degrade_mask & DEGRADE_MASK_LUKS )
|
||||||
|
{
|
||||||
|
node_ptr->degrade_mask &= ~DEGRADE_MASK_LUKS ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( node_ptr->alarms[MTC_ALARM_ID__LUKS] != FM_ALARM_SEVERITY_CLEAR )
|
||||||
|
{
|
||||||
|
ilog ("%s luks config alarm clear\n", node_ptr->hostname.c_str());
|
||||||
|
|
||||||
|
mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__LUKS );
|
||||||
|
node_ptr->alarms[MTC_ALARM_ID__LUKS] = FM_ALARM_SEVERITY_CLEAR ;
|
||||||
|
}
|
||||||
|
return (PASS);
|
||||||
|
}
|
||||||
|
|
||||||
/* Generate a log and a critical alarm if the node config failed */
|
/* Generate a log and a critical alarm if the node config failed */
|
||||||
int nodeLinkClass::alarm_config_failure ( struct nodeLinkClass::node * node_ptr )
|
int nodeLinkClass::alarm_config_failure ( struct nodeLinkClass::node * node_ptr )
|
||||||
|
@ -1106,6 +1106,9 @@ private:
|
|||||||
int alarm_config_clear ( struct nodeLinkClass::node * node_ptr );
|
int alarm_config_clear ( struct nodeLinkClass::node * node_ptr );
|
||||||
int alarm_config_failure ( struct nodeLinkClass::node * node_ptr );
|
int alarm_config_failure ( struct nodeLinkClass::node * node_ptr );
|
||||||
|
|
||||||
|
int alarm_luks_clear ( struct nodeLinkClass::node * node_ptr );
|
||||||
|
int alarm_luks_failure ( struct nodeLinkClass::node * node_ptr );
|
||||||
|
|
||||||
int alarm_compute_clear ( struct nodeLinkClass::node * node_ptr, bool force );
|
int alarm_compute_clear ( struct nodeLinkClass::node * node_ptr, bool force );
|
||||||
int alarm_compute_failure ( struct nodeLinkClass::node * node_ptr , EFmAlarmSeverityT sev );
|
int alarm_compute_failure ( struct nodeLinkClass::node * node_ptr , EFmAlarmSeverityT sev );
|
||||||
|
|
||||||
|
@ -184,6 +184,34 @@ void mtcAlarm_init ( void )
|
|||||||
"and Switch Activity (Swact) to it as soon as possible. If the alarm "
|
"and Switch Activity (Swact) to it as soon as possible. If the alarm "
|
||||||
"persists then Lock/Unlock host to recover its local compute service.");
|
"persists then Lock/Unlock host to recover its local compute service.");
|
||||||
|
|
||||||
|
/** LUKS volume config failure Alarm Entry *************************************/
|
||||||
|
|
||||||
|
ptr = &alarm_list[MTC_ALARM_ID__LUKS];
|
||||||
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
||||||
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", LUKS_ALARM_ID);
|
||||||
|
|
||||||
|
ptr->name = "LUKS volume failure" ;
|
||||||
|
ptr->instc_prefix = "" ;
|
||||||
|
|
||||||
|
ptr->minor_reason =
|
||||||
|
ptr->major_reason =
|
||||||
|
ptr->critl_reason = "LUKS volume is not active or functioning properly.";
|
||||||
|
ptr->clear_reason = "'LUKS volume' has been successfully unsealed and service is functioning properly.";
|
||||||
|
|
||||||
|
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL;
|
||||||
|
ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ;
|
||||||
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
||||||
|
ptr->alarm.service_affecting = FM_FALSE ;
|
||||||
|
ptr->alarm.suppression = FM_TRUE ;
|
||||||
|
|
||||||
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
||||||
|
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
|
||||||
|
|
||||||
|
snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
||||||
|
"If this alarm does not automatically clear after some time and "
|
||||||
|
"continues to be asserted after Host is locked and unlocked then "
|
||||||
|
"contact next level of support for root cause analysis and recovery.");
|
||||||
|
|
||||||
/** Init Event Log Entry *************************************************/
|
/** Init Event Log Entry *************************************************/
|
||||||
|
|
||||||
ptr = &alarm_list[MTC_LOG_ID__EVENT];
|
ptr = &alarm_list[MTC_LOG_ID__EVENT];
|
||||||
@ -315,6 +343,7 @@ string _getIdentity ( mtc_alarm_id_enum id )
|
|||||||
case MTC_ALARM_ID__ENABLE: return (ENABLE_ALARM_ID);
|
case MTC_ALARM_ID__ENABLE: return (ENABLE_ALARM_ID);
|
||||||
case MTC_ALARM_ID__BM: return (BM_ALARM_ID);
|
case MTC_ALARM_ID__BM: return (BM_ALARM_ID);
|
||||||
case MTC_ALARM_ID__CH_COMP: return (CH_COMP_ALARM_ID);
|
case MTC_ALARM_ID__CH_COMP: return (CH_COMP_ALARM_ID);
|
||||||
|
case MTC_ALARM_ID__LUKS: return (LUKS_ALARM_ID);
|
||||||
case MTC_LOG_ID__EVENT: return (EVENT_LOG_ID);
|
case MTC_LOG_ID__EVENT: return (EVENT_LOG_ID);
|
||||||
case MTC_LOG_ID__COMMAND: return (COMMAND_LOG_ID);
|
case MTC_LOG_ID__COMMAND: return (COMMAND_LOG_ID);
|
||||||
case MTC_LOG_ID__STATECHANGE: return (STATECHANGE_LOG_ID);
|
case MTC_LOG_ID__STATECHANGE: return (STATECHANGE_LOG_ID);
|
||||||
|
@ -30,6 +30,7 @@ typedef enum
|
|||||||
MTC_ALARM_ID__ENABLE,
|
MTC_ALARM_ID__ENABLE,
|
||||||
MTC_ALARM_ID__BM,
|
MTC_ALARM_ID__BM,
|
||||||
MTC_ALARM_ID__CH_COMP, /* Combo Host Compute Failure - on last Controller */
|
MTC_ALARM_ID__CH_COMP, /* Combo Host Compute Failure - on last Controller */
|
||||||
|
MTC_ALARM_ID__LUKS,
|
||||||
|
|
||||||
MTC_LOG_ID__EVENT,
|
MTC_LOG_ID__EVENT,
|
||||||
MTC_LOG_ID__COMMAND,
|
MTC_LOG_ID__COMMAND,
|
||||||
|
@ -771,6 +771,11 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int
|
|||||||
|
|
||||||
/* Insert the mtce flags */
|
/* Insert the mtce flags */
|
||||||
msg.parm[MTC_PARM_FLAGS_IDX] = 0 ;
|
msg.parm[MTC_PARM_FLAGS_IDX] = 0 ;
|
||||||
|
|
||||||
|
//Check if LUKS FS manager service is active
|
||||||
|
int exitstatus = system("cryptsetup status luks_encrypted_vault");
|
||||||
|
if ( 0 != exitstatus )
|
||||||
|
msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__LUKS_VOL_FAILED ;
|
||||||
if ( daemon_is_file_present ( CONFIG_COMPLETE_FILE ) )
|
if ( daemon_is_file_present ( CONFIG_COMPLETE_FILE ) )
|
||||||
msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_CONFIGURED ;
|
msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_CONFIGURED ;
|
||||||
if ( daemon_is_file_present ( CONFIG_FAIL_FILE ) )
|
if ( daemon_is_file_present ( CONFIG_FAIL_FILE ) )
|
||||||
|
@ -406,6 +406,8 @@ static int mtc_config_handler ( void * user,
|
|||||||
mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HOST_SERVICES] = atoi(value);
|
mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HOST_SERVICES] = atoi(value);
|
||||||
else if (MATCH("agent", "ar_heartbeat_threshold"))
|
else if (MATCH("agent", "ar_heartbeat_threshold"))
|
||||||
mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = atoi(value);
|
mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = atoi(value);
|
||||||
|
else if (MATCH("agent", "ar_luks_threshold"))
|
||||||
|
mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__LUKS] = atoi(value);
|
||||||
|
|
||||||
else if (MATCH("agent", "ar_config_interval"))
|
else if (MATCH("agent", "ar_config_interval"))
|
||||||
mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__CONFIG] = atoi(value);
|
mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__CONFIG] = atoi(value);
|
||||||
@ -415,6 +417,8 @@ static int mtc_config_handler ( void * user,
|
|||||||
mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__HOST_SERVICES] = atoi(value);
|
mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__HOST_SERVICES] = atoi(value);
|
||||||
else if (MATCH("agent", "ar_heartbeat_interval"))
|
else if (MATCH("agent", "ar_heartbeat_interval"))
|
||||||
mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = atoi(value);
|
mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = atoi(value);
|
||||||
|
else if (MATCH("agent", "ar_luks_interval"))
|
||||||
|
mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__LUKS] = atoi(value);
|
||||||
|
|
||||||
|
|
||||||
else
|
else
|
||||||
@ -757,6 +761,9 @@ int daemon_configure ( void )
|
|||||||
ilog("AR Heartbeat: %d (threshold) %d sec (retry interval)",
|
ilog("AR Heartbeat: %d (threshold) %d sec (retry interval)",
|
||||||
mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HEARTBEAT],
|
mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HEARTBEAT],
|
||||||
mtcInv.ar_interval [MTC_AR_DISABLE_CAUSE__HEARTBEAT]);
|
mtcInv.ar_interval [MTC_AR_DISABLE_CAUSE__HEARTBEAT]);
|
||||||
|
ilog("AR luks : %d (threshold) %d sec (retry interval)",
|
||||||
|
mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__LUKS],
|
||||||
|
mtcInv.ar_interval [MTC_AR_DISABLE_CAUSE__LUKS]);
|
||||||
|
|
||||||
/* Get this Controller Activity State */
|
/* Get this Controller Activity State */
|
||||||
mtc_config.active = daemon_get_run_option ("active") ;
|
mtc_config.active = daemon_get_run_option ("active") ;
|
||||||
|
@ -1099,8 +1099,27 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
|
|||||||
node_ptr->hbsClient_ready = false ;
|
node_ptr->hbsClient_ready = false ;
|
||||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||||
|
|
||||||
|
/* Check for LUKS volume availability */
|
||||||
|
if ( node_ptr->mtce_flags & MTC_FLAG__LUKS_VOL_FAILED )
|
||||||
|
{
|
||||||
|
elog ("%s LUKS volume failure (oob:%x)\n",
|
||||||
|
node_ptr->hostname.c_str(),
|
||||||
|
node_ptr->mtce_flags)
|
||||||
|
|
||||||
|
/* raise an alarm for the failure of the config */
|
||||||
|
alarm_luks_failure ( node_ptr );
|
||||||
|
|
||||||
|
mtcInvApi_update_task ( node_ptr, MTC_TASK_MAIN_CONFIG_FAIL );
|
||||||
|
enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
|
||||||
|
|
||||||
|
/* handle auto recovery for this failure */
|
||||||
|
if ( ar_manage ( node_ptr,
|
||||||
|
MTC_AR_DISABLE_CAUSE__LUKS,
|
||||||
|
MTC_TASK_AR_DISABLED_LUKS ) != PASS )
|
||||||
|
break ;
|
||||||
|
}
|
||||||
/* Check to see if the host is/got configured correctly */
|
/* Check to see if the host is/got configured correctly */
|
||||||
if ((( !node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED )) ||
|
else if ((( !node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED )) ||
|
||||||
(( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY )))
|
(( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY )))
|
||||||
{
|
{
|
||||||
elog ("%s configuration failed or incomplete (oob:%x)\n",
|
elog ("%s configuration failed or incomplete (oob:%x)\n",
|
||||||
@ -6341,7 +6360,8 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
|
|||||||
(( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_CONFIG)) ||
|
(( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_CONFIG)) ||
|
||||||
( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_GOENABLE))||
|
( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_GOENABLE))||
|
||||||
( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_SERVICES))||
|
( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_SERVICES))||
|
||||||
( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_HEARTBEAT))))
|
( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_HEARTBEAT))||
|
||||||
|
(!node_ptr->task.compare(MTC_TASK_AR_DISABLED_LUKS))))
|
||||||
{
|
{
|
||||||
if ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_CONFIG ))
|
if ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_CONFIG ))
|
||||||
{
|
{
|
||||||
@ -6362,6 +6382,11 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
|
|||||||
{
|
{
|
||||||
node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__HEARTBEAT ;
|
node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__HEARTBEAT ;
|
||||||
}
|
}
|
||||||
|
else if ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_LUKS ))
|
||||||
|
{
|
||||||
|
node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__LUKS ;
|
||||||
|
alarm_luks_failure ( node_ptr );
|
||||||
|
}
|
||||||
node_ptr->ar_disabled = true ;
|
node_ptr->ar_disabled = true ;
|
||||||
|
|
||||||
if ( THIS_HOST )
|
if ( THIS_HOST )
|
||||||
@ -7949,10 +7974,19 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
|
|||||||
{
|
{
|
||||||
/* clear the SM degrade flag */
|
/* clear the SM degrade flag */
|
||||||
node_ptr->degrade_mask &= ~DEGRADE_MASK_SM ;
|
node_ptr->degrade_mask &= ~DEGRADE_MASK_SM ;
|
||||||
|
|
||||||
ilog ("%s sm degrade clear\n", node_ptr->hostname.c_str());
|
ilog ("%s sm degrade clear\n", node_ptr->hostname.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* In-service luks volume config failure handling */
|
||||||
|
if ( !(node_ptr->mtce_flags & MTC_FLAG__LUKS_VOL_FAILED))
|
||||||
|
{
|
||||||
|
alarm_luks_clear ( node_ptr );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
alarm_luks_failure ( node_ptr );
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In-service Config Failure/Alarm handling
|
* In-service Config Failure/Alarm handling
|
||||||
*/
|
*/
|
||||||
|
@ -56,6 +56,7 @@ ar_config_threshold = 2
|
|||||||
ar_goenable_threshold = 2
|
ar_goenable_threshold = 2
|
||||||
ar_hostservices_threshold = 2
|
ar_hostservices_threshold = 2
|
||||||
ar_heartbeat_threshold = 2
|
ar_heartbeat_threshold = 2
|
||||||
|
ar_luks_threshold = 2
|
||||||
|
|
||||||
; Service specific Auto Recovery retry interval.
|
; Service specific Auto Recovery retry interval.
|
||||||
;
|
;
|
||||||
@ -68,6 +69,7 @@ ar_config_interval = 30
|
|||||||
ar_goenable_interval = 30
|
ar_goenable_interval = 30
|
||||||
ar_hostservices_interval = 30
|
ar_hostservices_interval = 30
|
||||||
ar_heartbeat_interval = 600
|
ar_heartbeat_interval = 600
|
||||||
|
ar_luks_interval = 30
|
||||||
|
|
||||||
api_retries = 10 ; number of API retries b4 failure
|
api_retries = 10 ; number of API retries b4 failure
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user