Merge "Fix AIO-DX failover issues" into r/stx.5.0
This commit is contained in:
commit
e8f2c3dc42
@ -206,8 +206,8 @@ SmFailoverFailPendingState::~SmFailoverFailPendingState()
|
||||
|
||||
SmErrorT SmFailoverFailPendingState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
//SmFSMEventDataTypeT event_data_type = event_data->get_event_data_type();
|
||||
bool duplex = false;
|
||||
bool blind_guess = false;
|
||||
switch (event)
|
||||
{
|
||||
case SM_FAILOVER_EVENT_IF_STATE_CHANGED:
|
||||
@ -249,13 +249,10 @@ SmErrorT SmFailoverFailPendingState::event_handler(SmFailoverEventT event, const
|
||||
if(healthy)
|
||||
{
|
||||
blind_guess_scenario_start();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->fsm.set_state(SM_FAILOVER_STATE_FAILED);
|
||||
blind_guess = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if( !blind_guess )
|
||||
{
|
||||
SmSystemFailoverStatus& failover_status = SmSystemFailoverStatus::get_status();
|
||||
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "sm_types.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_node_utils.h"
|
||||
#include "sm_node_api.h"
|
||||
#include "sm_failover.h"
|
||||
#include "sm_failover_fsm.h"
|
||||
#include "sm_failover_ss.h"
|
||||
@ -183,9 +184,54 @@ static bool sm_failover_failed_recovery_criteria_met( void )
|
||||
return (criteria_met);
|
||||
}
|
||||
|
||||
SmErrorT proceed_recovery()
|
||||
{
|
||||
SmErrorT error;
|
||||
char peer_name[SM_NODE_NAME_MAX_CHAR];
|
||||
char host_name[SM_NODE_NAME_MAX_CHAR];
|
||||
// delete peer node
|
||||
error = sm_node_api_get_peername(peer_name);
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFI("Cannot retrieve peer's hostname, error %s", sm_error_str(error));
|
||||
return error;
|
||||
}
|
||||
error = sm_node_api_delete_node(peer_name);
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFI("Failed to delete peer %s, error %s", peer_name, sm_error_str(error));
|
||||
return error;
|
||||
}else
|
||||
{
|
||||
DPRINTFI("Peer %s is deleted.", peer_name);
|
||||
}
|
||||
|
||||
// enable host
|
||||
error = sm_node_api_get_hostname(host_name);
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFI("Cannot retrieve hostname, error %s", sm_error_str(error));
|
||||
return error;
|
||||
}
|
||||
error = sm_node_api_recover_node(host_name);
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFI("Failed to recover %s, error %s", host_name, sm_error_str(error));
|
||||
return error;
|
||||
}else
|
||||
{
|
||||
DPRINTFI("Host %s is recovered.", host_name);
|
||||
}
|
||||
|
||||
sm_node_utils_reset_unhealthy_flag();
|
||||
DPRINTFI("Unhealthy flag is removed");
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
// The 'Failover Failed' state recovery audit handler
|
||||
SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
SmErrorT error;
|
||||
event_data=event_data;
|
||||
switch (event)
|
||||
{
|
||||
@ -197,8 +243,12 @@ SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmF
|
||||
DPRINTFI("************************************");
|
||||
DPRINTFI("** Failover Failed state recovery **");
|
||||
DPRINTFI("************************************");
|
||||
sm_node_utils_reset_unhealthy_flag();
|
||||
sm_failover_failed_process_restart(PROCESS_HBSAGENT);
|
||||
error = proceed_recovery();
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFE("Cannot recover from failed state");
|
||||
}else
|
||||
{
|
||||
sm_failover_failed_process_restart(PROCESS_SM);
|
||||
for ( int i = 0 ; i < 10 ; i++ )
|
||||
{
|
||||
@ -208,6 +258,7 @@ SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmF
|
||||
DPRINTFE("Restart did not occur ; reinstating unhealthy flag ; recovery will retry");
|
||||
sm_node_utils_set_unhealthy();
|
||||
}
|
||||
}
|
||||
else if ( ++_log_throttle > 1 )
|
||||
{
|
||||
if ( _log_throttle > SM_FAILOVER_FAILED_LOG_THROTTLE_THLD )
|
||||
|
@ -742,6 +742,44 @@ SmErrorT sm_node_api_fail_node( char node_name[] )
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Node API - Recover Node
|
||||
// ======================
|
||||
SmErrorT sm_node_api_recover_node( char node_name[] )
|
||||
{
|
||||
SmDbNodeT node;
|
||||
SmErrorT error;
|
||||
error = sm_db_nodes_read( _sm_db_handle, node_name, &node );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to read node (%s) information, error=%s.",
|
||||
node_name, sm_error_str( error ) );
|
||||
return( error );
|
||||
}
|
||||
|
||||
if( node.oper_state != SM_NODE_OPERATIONAL_STATE_DISABLED ||
|
||||
node.avail_status != SM_NODE_AVAIL_STATUS_FAILED )
|
||||
{
|
||||
DPRINTFD("Not in failure mode %s", node_name);
|
||||
}
|
||||
|
||||
DPRINTFE("Node %s is to recover from failure mode.", node_name);
|
||||
|
||||
error = sm_node_api_update_node(
|
||||
node_name,
|
||||
node.admin_state,
|
||||
SM_NODE_OPERATIONAL_STATE_ENABLED,
|
||||
SM_NODE_AVAIL_STATUS_AVAILABLE);
|
||||
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to set node (%s) failed, error=%s.",
|
||||
node_name, sm_error_str( error ) );
|
||||
}
|
||||
return( error );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Node API - Delete Node
|
||||
// ======================
|
||||
|
@ -54,6 +54,12 @@ extern SmErrorT sm_node_api_update_node( char node_name[],
|
||||
SmErrorT sm_node_api_fail_node( char node_name[] );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Node API - Fail Node
|
||||
// ======================
|
||||
SmErrorT sm_node_api_recover_node( char node_name[] );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Node API - Delete Node
|
||||
// ======================
|
||||
|
Loading…
Reference in New Issue
Block a user