18922761a6
Signed-off-by: Dean Troyer <dtroyer@gmail.com>
660 lines
25 KiB
C++
660 lines
25 KiB
C++
/*
|
|
* Copyright (c) 2013-2016 Wind River Systems, Inc.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
*/
|
|
|
|
/****************************************************************************
|
|
* @file
|
|
* Wind River CGTS Platform Node "Handlers" Implementation
|
|
*
|
|
* Description: This file contains the handlers that implement the X.731 FSM.
|
|
|
|
* Interfaces:
|
|
*
|
|
* nodeLinkClass::subf_enable_handler
|
|
* nodeLinkClass:: disable_handler
|
|
* nodeLinkClass:: delete_handler
|
|
* nodeLinkClass::degrade_handler
|
|
* nodeLinkClass::reset_handler
|
|
* nodeLinkClass::event_handler
|
|
* nodeLinkClass::recovery_handler
|
|
|
|
****************************************************************************/
|
|
|
|
using namespace std;
|
|
|
|
#define __AREA__ "hdl"
|
|
|
|
#include "nodeClass.h" /* All base stuff */
|
|
#include "mtcAlarm.h" /* for ... mtcAlarm_<severity> */
|
|
#include "mtcNodeMsg.h" /* for ... send_mtc_cmd */
|
|
#include "nodeTimers.h" /* for ... mtcTimer_start/stop */
|
|
#include "jsonUtil.h" /* for ... jsonApi_array_value */
|
|
#include "mtcNodeHdlrs.h" /* for ... mtcTimer_handler */
|
|
#include "mtcInvApi.h" /* for ... SYSINV API */
|
|
#include "mtcSmgrApi.h" /* for ... SM API */
|
|
#include "mtcVimApi.h" /* for ... VIm API */
|
|
|
|
#include "daemon_common.h"
|
|
|
|
int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
|
|
{
|
|
int rc = PASS ;
|
|
|
|
/* Setup the log prefix */
|
|
string name = node_ptr->hostname ;
|
|
name.append("-compute");
|
|
|
|
bool simplex = false ;
|
|
if (( SIMPLEX ) ||
|
|
(( THIS_HOST ) &&
|
|
(( this->is_inactive_controller_main_insv() == false ) ||
|
|
( this->is_inactive_controller_subf_insv() == false ))))
|
|
{
|
|
simplex = true ;
|
|
}
|
|
|
|
switch ( (int)node_ptr->handlerStage.enable )
|
|
{
|
|
case MTC_ENABLE__FAILURE_WAIT:
|
|
{
|
|
if ( node_ptr->mtcTimer.ring == true )
|
|
{
|
|
wlog ("%s workQueue empty timeout, purging ...\n", name.c_str());
|
|
}
|
|
else
|
|
{
|
|
/* give the work queues some time to complete */
|
|
rc = workQueue_done ( node_ptr );
|
|
if ( rc == RETRY )
|
|
{
|
|
/* wait longer */
|
|
break ;
|
|
}
|
|
}
|
|
workQueue_purge ( node_ptr );
|
|
doneQueue_purge ( node_ptr );
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__START );
|
|
|
|
/* avoid failing this controller if there is no inactive to
|
|
* take over and avoid thrashing back and forth if the sub
|
|
* function on the inactive is disabled */
|
|
if ( simplex )
|
|
{
|
|
/* if autorecovery is enabled then handle it that way. */
|
|
if ( this->autorecovery_enabled == true )
|
|
{
|
|
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
|
|
enableStageChange ( node_ptr, MTC_ENABLE__START );
|
|
|
|
manage_autorecovery ( node_ptr );
|
|
}
|
|
|
|
wlog ("%s is ENABLED-degraded (failed subfunction)\n", name.c_str());
|
|
}
|
|
else
|
|
{
|
|
/* if there is another controller enabled then just force a full enable of this one */
|
|
force_full_enable ( node_ptr ) ;
|
|
}
|
|
break ;
|
|
}
|
|
|
|
case MTC_ENABLE__START:
|
|
{
|
|
plog ("%s Subf Enable FSM (from start)\n", name.c_str());
|
|
|
|
node_ptr->unknown_health_reported = false ;
|
|
node_ptr->goEnabled_failed_subf = false ;
|
|
|
|
/* load compute subfunciton alarm state */
|
|
EFmAlarmSeverityT sev = mtcAlarm_state ( node_ptr->hostname,
|
|
MTC_ALARM_ID__CH_COMP);
|
|
if ( sev != FM_ALARM_SEVERITY_CLEAR )
|
|
{
|
|
node_ptr->alarms[MTC_ALARM_ID__CH_COMP] = sev ;
|
|
node_ptr->degrade_mask |= DEGRADE_MASK_SUBF;
|
|
}
|
|
|
|
/* start a timer that waits for the /var/run/.compute_config_complete flag */
|
|
mtcTimer_reset ( node_ptr->mtcTimer );
|
|
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_COMPUTE_CONFIG_TIMEOUT );
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__CONFIG_COMPLETE_WAIT );
|
|
break ;
|
|
}
|
|
|
|
/* Wait for the CONFIG_COMPLETE_COMPUTE flag file that indicates
|
|
* that the compute part of the combo-blade init is finished */
|
|
case MTC_ENABLE__CONFIG_COMPLETE_WAIT:
|
|
{
|
|
/* look for file */
|
|
if ( node_ptr->mtce_flags & MTC_FLAG__SUBF_CONFIGURED )
|
|
{
|
|
mtcTimer_reset (node_ptr->mtcTimer);
|
|
plog ("%s Subf Configured OK\n", name.c_str());
|
|
enableStageChange ( node_ptr, MTC_ENABLE__GOENABLED_TIMER );
|
|
alarm_config_clear ( node_ptr );
|
|
}
|
|
else if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY )
|
|
{
|
|
mtcTimer_reset (node_ptr->mtcTimer);
|
|
elog ("%s configuration failed (oob:%x:%x)\n",
|
|
name.c_str(),
|
|
node_ptr->mtce_flags,
|
|
MTC_FLAG__I_AM_NOT_HEALTHY);
|
|
|
|
alarm_config_failure ( node_ptr );
|
|
|
|
if ( simplex )
|
|
mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_FAIL_ );
|
|
else
|
|
mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_FAIL );
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
|
|
}
|
|
|
|
/* timeout handling */
|
|
else if ( node_ptr->mtcTimer.ring == true )
|
|
{
|
|
elog ("%s configuration timeout (%d secs)\n",
|
|
name.c_str(),
|
|
MTC_COMPUTE_CONFIG_TIMEOUT );
|
|
|
|
alarm_config_failure ( node_ptr );
|
|
|
|
if ( simplex )
|
|
mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_TO_ );
|
|
else
|
|
mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_TO );
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
|
|
}
|
|
else
|
|
{
|
|
; /* wait longer */
|
|
}
|
|
break ;
|
|
}
|
|
|
|
case MTC_ENABLE__GOENABLED_TIMER:
|
|
{
|
|
/*****************************************************************
|
|
*
|
|
* issue: subfunction go-enable patching script fails and
|
|
* maintenance reboots the active controller when no-reboot
|
|
* patching maintenance in CPE.
|
|
*
|
|
* The fix is to avoid running the subfunction go-enabled tests
|
|
* on self while patching.
|
|
*
|
|
****************************************************************/
|
|
if (( THIS_HOST ) &&
|
|
(( daemon_is_file_present ( PATCHING_IN_PROG_FILE )) ||
|
|
( daemon_is_file_present ( NODE_IS_PATCHED_FILE ))))
|
|
{
|
|
ilog ("%s skipping out-of-service tests while self patching\n", name.c_str());
|
|
|
|
/* set the goenabled complete flag */
|
|
daemon_log ( GOENABLED_SUBF_PASS, "out-of-service tests skipped due to patching");
|
|
node_ptr->goEnabled_failed_subf = false ;
|
|
|
|
alarm_compute_clear ( node_ptr, true );
|
|
|
|
/* ok. great, got the go-enabled message, lets move on */
|
|
enableStageChange ( node_ptr, MTC_ENABLE__HOST_SERVICES_START );
|
|
break ;
|
|
}
|
|
ilog ("%s running out-of-service tests\n", name.c_str());
|
|
|
|
/* See if the host is there and already in the go enabled state */
|
|
send_mtc_cmd ( node_ptr->hostname, MTC_REQ_SUBF_GOENABLED, MGMNT_INTERFACE );
|
|
|
|
/* start the reboot timer - is cought in the mtc alive case */
|
|
mtcTimer_reset ( node_ptr->mtcTimer );
|
|
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, this->goenabled_timeout );
|
|
|
|
/* start waiting fhr the ENABLE READY message */
|
|
enableStageChange ( node_ptr, MTC_ENABLE__GOENABLED_WAIT );
|
|
|
|
node_ptr->goEnabled_subf = false ;
|
|
node_ptr->goEnabled_failed_subf = false ;
|
|
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__GOENABLED_WAIT:
|
|
{
|
|
/* search for the Go Enable message */
|
|
if ( node_ptr->goEnabled_failed_subf == true )
|
|
{
|
|
mtcTimer_reset ( node_ptr->mtcTimer );
|
|
elog ("%s one or more out-of-service tests failed\n", name.c_str());
|
|
|
|
mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_INTEST_FAIL_ : MTC_TASK_INTEST_FAIL );
|
|
|
|
/* Need thresholded auto recovery for this failure mode */
|
|
if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
|
|
this->autorecovery_enabled = true ;
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
|
|
}
|
|
|
|
/* search for the Go Enable message */
|
|
else if ( node_ptr->goEnabled_subf == true )
|
|
{
|
|
mtcTimer_reset ( node_ptr->mtcTimer );
|
|
|
|
alarm_enabled_clear ( node_ptr, false );
|
|
|
|
plog ("%s passed out-of-service tests\n", name.c_str());
|
|
|
|
/* O.K. clearing the state now that we got it */
|
|
// node_ptr->goEnabled_subf = true ;
|
|
node_ptr->goEnabled_failed_subf = false ;
|
|
|
|
/* ok. great, got the go-enabled message, lets move on */
|
|
|
|
if ( node_ptr->start_services_needed_subf == true )
|
|
{
|
|
/* If the add_handler set start_services_needed_subf to
|
|
* true then we bypass inline execution and allow it to
|
|
* be serviced as a scheduled background operation. */
|
|
enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_CHECK );
|
|
}
|
|
else
|
|
{
|
|
enableStageChange ( node_ptr, MTC_ENABLE__HOST_SERVICES_START );
|
|
}
|
|
break ;
|
|
}
|
|
|
|
else if ( node_ptr->mtcTimer.ring == true )
|
|
{
|
|
elog ("%s out-of-service test execution timeout\n", name.c_str());
|
|
|
|
mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_INTEST_FAIL_TO_ : MTC_TASK_INTEST_FAIL_TO );
|
|
|
|
/* Need thresholded auto recovery for this failure mode */
|
|
if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
|
|
this->autorecovery_enabled = true ;
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
|
|
}
|
|
else
|
|
{
|
|
; /* wait some more */
|
|
}
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__HOST_SERVICES_START:
|
|
{
|
|
bool start = true ;
|
|
bool subf = true ;
|
|
|
|
plog ("%s %s host services\n",
|
|
name.c_str(),
|
|
node_ptr->start_services_needed_subf ? "scheduling start compute" :
|
|
"starting compute");
|
|
|
|
if ( node_ptr->start_services_needed_subf == true )
|
|
{
|
|
bool force = true ;
|
|
|
|
/* If the add_handler set start_services_needed_subf to
|
|
* true then we bypass inline execution and allow it to
|
|
* be serviced as a scheduled background operation. */
|
|
enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_CHECK );
|
|
alarm_compute_clear ( node_ptr, force );
|
|
}
|
|
|
|
else if ( launch_host_services_cmd ( node_ptr, start, subf ) != PASS )
|
|
{
|
|
node_ptr->hostservices_failed_subf = true ;
|
|
|
|
wlog ("%s %s failed ; launch\n",
|
|
name.c_str(),
|
|
node_ptr->host_services_req.name.c_str());
|
|
|
|
/* Need thresholded auto recovery for this failure mode */
|
|
if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
|
|
this->autorecovery_enabled = true ;
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
|
|
}
|
|
else
|
|
{
|
|
enableStageChange ( node_ptr, MTC_ENABLE__HOST_SERVICES_WAIT );
|
|
}
|
|
break ;
|
|
}
|
|
|
|
case MTC_ENABLE__HOST_SERVICES_WAIT:
|
|
{
|
|
/* Wait for host services to complete - pass or fail.
|
|
* The host_services_handler manages timeout. */
|
|
rc = host_services_handler ( node_ptr );
|
|
if ( rc == RETRY )
|
|
{
|
|
/* wait for the mtcClient's response ... */
|
|
break ;
|
|
}
|
|
else if ( rc != PASS )
|
|
{
|
|
/* Need thresholded auto recovery for this failure mode */
|
|
if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
|
|
this->autorecovery_enabled = true ;
|
|
|
|
node_ptr->hostservices_failed_subf = true ;
|
|
if ( rc == FAIL_TIMEOUT )
|
|
{
|
|
elog ("%s %s failed ; timeout\n",
|
|
name.c_str(),
|
|
node_ptr->host_services_req.name.c_str());
|
|
|
|
/* Report "Enabling Compute Service Timeout" to sysinv/horizon */
|
|
mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF_TO );
|
|
}
|
|
else
|
|
{
|
|
elog ("%s %s failed ; rc:%d\n",
|
|
name.c_str(),
|
|
node_ptr->host_services_req.name.c_str(),
|
|
rc);
|
|
|
|
/* Report "Enabling Compute Service Failed" to sysinv/horizon */
|
|
mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF_FAIL );
|
|
}
|
|
enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
|
|
}
|
|
else /* success path */
|
|
{
|
|
alarm_compute_clear ( node_ptr, true );
|
|
node_ptr->hostservices_failed_subf = false ;
|
|
enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_CHECK );
|
|
}
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__HEARTBEAT_CHECK:
|
|
{
|
|
if ( THIS_HOST )
|
|
{
|
|
enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
|
|
}
|
|
else
|
|
{
|
|
/* allow the fsm to wait for up to 1 minute for the
|
|
* hbsClient's ready event before starting heartberat
|
|
* test. */
|
|
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 );
|
|
enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_WAIT );
|
|
}
|
|
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__HEARTBEAT_WAIT:
|
|
{
|
|
if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
|
{
|
|
wlog ("%s hbsClient ready event timeout\n", name.c_str());
|
|
}
|
|
else if ( node_ptr->hbsClient_ready == false )
|
|
{
|
|
break ;
|
|
}
|
|
else
|
|
{
|
|
mtcTimer_reset ( node_ptr->mtcTimer );
|
|
}
|
|
|
|
plog ("%s Starting %d sec Heartbeat Soak (with%s)\n",
|
|
name.c_str(),
|
|
MTC_HEARTBEAT_SOAK_BEFORE_ENABLE,
|
|
node_ptr->hbsClient_ready ? " ready event" : "out ready event" );
|
|
|
|
/* Start Monitoring Services - heartbeat, process and hardware */
|
|
send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
|
|
|
|
/* allow heartbeat to run for 10 seconds before we declare enable */
|
|
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_HEARTBEAT_SOAK_BEFORE_ENABLE );
|
|
enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_SOAK );
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__HEARTBEAT_SOAK:
|
|
{
|
|
if ( node_ptr->mtcTimer.ring == true )
|
|
{
|
|
plog ("%s heartbeating\n", name.c_str() );
|
|
/* if heartbeat is not working then we will
|
|
* never get here and enable the host */
|
|
enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
|
|
}
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__STATE_CHANGE:
|
|
{
|
|
/* Check the work queue complete and done status's */
|
|
mtcInvApi_force_task ( node_ptr, "" );
|
|
|
|
/* Now that we have posted the unlocked-enabled-available state we need
|
|
* to force the final part of the enable sequence through */
|
|
if ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE )
|
|
{
|
|
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__ENABLE );
|
|
}
|
|
enableStageChange ( node_ptr, MTC_ENABLE__WORKQUEUE_WAIT );
|
|
|
|
/* Start a timer that failed enable if the work queue
|
|
* does not empty or if commands in the done queue have failed */
|
|
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, work_queue_timeout );
|
|
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__WORKQUEUE_WAIT:
|
|
{
|
|
bool fail = false ;
|
|
rc = workQueue_done ( node_ptr );
|
|
if ( rc == RETRY )
|
|
{
|
|
/* wait longer */
|
|
break ;
|
|
}
|
|
else if ( rc == FAIL_WORKQ_TIMEOUT )
|
|
{
|
|
elog ("%s enable failed ; Enable workQueue timeout, purging ...\n", name.c_str());
|
|
|
|
mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_ENABLE_WORK_TO_ : MTC_TASK_ENABLE_WORK_TO );
|
|
|
|
fail = true ;
|
|
}
|
|
else if ( rc != PASS )
|
|
{
|
|
elog ("%s enable failed ; Enable doneQueue has failed commands\n", name.c_str());
|
|
|
|
mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_ENABLE_WORK_FAIL_ : MTC_TASK_ENABLE_WORK_FAIL );
|
|
|
|
fail = true ;
|
|
}
|
|
else if ( this->system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX )
|
|
{
|
|
/* Loop over the heartbeat interfaces and fail the Enable if any of them are failing */
|
|
for ( int i = 0 ; i < MAX_IFACES ; i++ )
|
|
{
|
|
if ( node_ptr->heartbeat_failed[i] == true )
|
|
{
|
|
elog ("%s Enable failure due to %s Network *** Heartbeat Loss ***\n",
|
|
name.c_str(),
|
|
get_iface_name_str ((iface_enum)i));
|
|
|
|
mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_FAIL_HB );
|
|
fail = true ;
|
|
}
|
|
}
|
|
}
|
|
if ( fail == true )
|
|
{
|
|
enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
|
|
break ;
|
|
}
|
|
else
|
|
{
|
|
if ( node_ptr->dor_recovery_mode || node_ptr->was_dor_recovery_mode )
|
|
{
|
|
node_ptr->dor_recovery_mode = false ;
|
|
node_ptr->was_dor_recovery_mode = true ;
|
|
}
|
|
|
|
if (( node_ptr->alarms[MTC_ALARM_ID__CH_COMP] != FM_ALARM_SEVERITY_CLEAR ) ||
|
|
( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CLEAR ) ||
|
|
( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CLEAR ))
|
|
{
|
|
wlog ("%s enable to degraded migration due to alarm [%d:%d:%d]\n",
|
|
name.c_str(),
|
|
node_ptr->alarms[MTC_ALARM_ID__CH_COMP],
|
|
node_ptr->alarms[MTC_ALARM_ID__ENABLE],
|
|
node_ptr->alarms[MTC_ALARM_ID__CONFIG] );
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
|
|
}
|
|
else if ( node_ptr->degrade_mask )
|
|
{
|
|
enableStageChange ( node_ptr, MTC_ENABLE__DEGRADED );
|
|
}
|
|
else
|
|
{
|
|
enableStageChange ( node_ptr, MTC_ENABLE__ENABLED );
|
|
}
|
|
}
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__ENABLED:
|
|
{
|
|
bool force = true ;
|
|
|
|
/* Set node as unlocked-enabled */
|
|
allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED,
|
|
MTC_OPER_STATE__ENABLED,
|
|
MTC_AVAIL_STATUS__AVAILABLE );
|
|
|
|
subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED,
|
|
MTC_AVAIL_STATUS__AVAILABLE );
|
|
|
|
node_ptr->subf_enabled = true ;
|
|
node_ptr->inservice_failed_subf = false ;
|
|
if ( node_ptr->was_dor_recovery_mode )
|
|
{
|
|
report_dor_recovery ( node_ptr , "is ENABLED" );
|
|
}
|
|
else
|
|
{
|
|
plog ("%s is ENABLED\n", name.c_str());
|
|
}
|
|
|
|
/* already cleared if true so no need to do it again */
|
|
if ( node_ptr->start_services_needed_subf != true )
|
|
{
|
|
alarm_compute_clear ( node_ptr, force );
|
|
}
|
|
|
|
enableStageChange ( node_ptr, MTC_ENABLE__DONE );
|
|
|
|
break ;
|
|
}
|
|
/* Allow the host to come up in the degraded state */
|
|
case MTC_ENABLE__DEGRADED:
|
|
{
|
|
if ( node_ptr->alarms[MTC_ALARM_ID__CH_COMP] == FM_ALARM_SEVERITY_CLEAR )
|
|
{
|
|
subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED,
|
|
MTC_AVAIL_STATUS__AVAILABLE );
|
|
}
|
|
else
|
|
{
|
|
subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
|
|
MTC_AVAIL_STATUS__FAILED );
|
|
}
|
|
|
|
/* Set node as unlocked-enabled */
|
|
allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED,
|
|
MTC_OPER_STATE__ENABLED,
|
|
MTC_AVAIL_STATUS__DEGRADED );
|
|
|
|
if ( node_ptr->was_dor_recovery_mode )
|
|
{
|
|
report_dor_recovery ( node_ptr , "is ENABLED-degraded" );
|
|
}
|
|
else
|
|
{
|
|
wlog ("%s is ENABLED-degraded\n", name.c_str());
|
|
}
|
|
enableStageChange ( node_ptr, MTC_ENABLE__DONE );
|
|
|
|
break ;
|
|
}
|
|
/* Allow the host to come up in the degraded state */
|
|
case MTC_ENABLE__SUBF_FAILED:
|
|
{
|
|
subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
|
|
MTC_AVAIL_STATUS__FAILED );
|
|
|
|
/* Set node as unlocked-enabled */
|
|
allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED,
|
|
MTC_OPER_STATE__ENABLED,
|
|
MTC_AVAIL_STATUS__DEGRADED );
|
|
|
|
if ( node_ptr->was_dor_recovery_mode )
|
|
{
|
|
report_dor_recovery ( node_ptr , "is DISABLED-failed" );
|
|
}
|
|
else
|
|
{
|
|
elog ("%s is DISABLED-failed (subfunction failed)\n",
|
|
name.c_str() );
|
|
}
|
|
this->dor_mode_active = false ;
|
|
|
|
alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_CRITICAL ) ;
|
|
|
|
/* Start a timer that failed enable if the work queue
|
|
* does not empty or if commands in the done queue have failed */
|
|
mtcTimer_reset ( node_ptr->mtcTimer );
|
|
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, work_queue_timeout );
|
|
enableStageChange ( node_ptr, MTC_ENABLE__FAILURE_WAIT );
|
|
|
|
break ;
|
|
}
|
|
case MTC_ENABLE__DONE:
|
|
{
|
|
mtcTimer_reset ( node_ptr->mtcTimer );
|
|
|
|
/* Override cmd of ENABLED if action is UNLOCK */
|
|
mtc_cmd_enum cmd = CONTROLLER_ENABLED ;
|
|
if ( node_ptr->adminAction == MTC_ADMIN_ACTION__UNLOCK )
|
|
{
|
|
cmd = CONTROLLER_UNLOCKED ;
|
|
}
|
|
|
|
mtcSmgrApi_request ( node_ptr, cmd, SMGR_MAX_RETRIES );
|
|
mtcVimApi_state_change ( node_ptr, VIM_HOST_ENABLED, 3 );
|
|
|
|
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
|
|
enableStageChange ( node_ptr, MTC_ENABLE__START );
|
|
|
|
node_ptr->enabled_count++ ;
|
|
node_ptr->health_threshold_counter = 0 ;
|
|
|
|
node_ptr->was_dor_recovery_mode = false ;
|
|
node_ptr->dor_recovery_mode = false ;
|
|
this->dor_mode_active = false ;
|
|
|
|
mtcInvApi_force_task ( node_ptr, "" );
|
|
break ;
|
|
}
|
|
default:
|
|
rc = FAIL_BAD_CASE ;
|
|
}
|
|
return (rc);
|
|
}
|