// // Copyright (c) 2014-2017 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // #include "sm_node_api.h" #include #include #include #include #include #include #include #include #include #include #include #include "sm_limits.h" #include "sm_types.h" #include "sm_debug.h" #include "sm_selobj.h" #include "sm_time.h" #include "sm_timer.h" #include "sm_uuid.h" #include "sm_msg.h" #include "sm_db.h" #include "sm_db_foreach.h" #include "sm_db_nodes.h" #include "sm_db_node_history.h" #include "sm_node_utils.h" #include "sm_node_fsm.h" #include "sm_service_domain_scheduler.h" #include "sm_service_domain_interface_table.h" #include "sm_log.h" #include "sm_alarm.h" #include "sm_log.h" #include "sm_troubleshoot.h" #include "sm_node_swact_monitor.h" #define SM_REBOOT_DELAY_IN_MS 30000 #define SM_REBOOT_TIMEOUT_IN_MINS 8 #define SM_REBOOT_TIMEOUT_IN_MS (SM_REBOOT_TIMEOUT_IN_MINS*60*1000) static SmDbHandleT* _sm_db_handle = NULL; static SmMsgCallbacksT _msg_callbacks = {0}; static SmTimerIdT _reboot_delay_timer_id = SM_TIMER_ID_INVALID; static SmTimerIdT _reboot_timer_id = SM_TIMER_ID_INVALID; // **************************************************************************** // Node API - Get Host Name // ======================== SmErrorT sm_node_api_get_hostname( char node_name[] ) { return( sm_node_utils_get_hostname( node_name ) ); } // **************************************************************************** // **************************************************************************** // Node API - Get Peer Name // ======================== SmErrorT sm_node_api_get_peername(char peer_name[SM_NODE_NAME_MAX_CHAR]) { char node_name[SM_NODE_NAME_MAX_CHAR]; SmErrorT error = sm_node_api_get_hostname(node_name); if(SM_OKAY != error) { return error; } char format[] = "name <> '%s'"; char query[SM_NODE_NAME_MAX_CHAR + sizeof(format)]; unsigned int cnt = snprintf(query, sizeof(query), format, node_name); if(cnt < sizeof(query) - 1) { SmDbNodeT node; error = sm_db_nodes_query(_sm_db_handle, query, &node); if(SM_OKAY != error) { return error; } strncpy(peer_name, node.name, SM_NODE_NAME_MAX_CHAR); return SM_OKAY; } return SM_FAILED; } // **************************************************************************** // **************************************************************************** // Node API - Configuration Complete // ================================= SmErrorT sm_node_api_config_complete( char node_name[], bool* complete ) { return( sm_node_utils_config_complete( complete ) ); } // **************************************************************************** // **************************************************************************** // Node API - Interface Send Node Hello // ==================================== static void sm_node_api_interface_send_node_hello( void* user_data[], SmServiceDomainInterfaceT* interface ) { SmDbNodeT* node = (SmDbNodeT*) user_data[0]; long uptime = *(long*) user_data[1]; SmErrorT error; if(( SM_INTERFACE_STATE_ENABLED == interface->interface_state )&& ( SM_PATH_TYPE_STATUS_ONLY != interface->path_type )) { error = sm_msg_send_node_hello( node->name, node->admin_state, node->oper_state, node->avail_status, node->ready_state, node->state_uuid, uptime, interface ); if( SM_OKAY != error ) { DPRINTFE( "Failed to send node (%s) hello, error=%s.", node->name, sm_error_str( error ) ); return; } } } // **************************************************************************** // **************************************************************************** // Node API - Send Node Hello // ========================== static SmErrorT sm_node_api_send_node_hello( SmDbNodeT* node ) { long uptime; void* user_data[] = {node, &uptime}; SmErrorT error; error = sm_node_utils_get_uptime( &uptime ); if( SM_OKAY != error ) { DPRINTFE( "Failed to to get local node uptime, error=%s.", sm_error_str( error ) ); return( error ); } sm_msg_increment_seq_num(); sm_service_domain_interface_table_foreach( user_data, sm_node_api_interface_send_node_hello ); return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Interface Send Node Update // ===================================== static void sm_node_api_interface_send_node_update( void* user_data[], SmServiceDomainInterfaceT* interface ) { SmUuidPtrT old_state_uuid = (SmUuidPtrT) user_data[0]; SmDbNodeT* node = (SmDbNodeT*) user_data[1]; long uptime = *(long*) user_data[2]; bool force = *(bool*) user_data[3]; SmErrorT error; if(( SM_INTERFACE_STATE_ENABLED == interface->interface_state )&& ( SM_PATH_TYPE_STATUS_ONLY != interface->path_type )) { error = sm_msg_send_node_update( node->name, node->admin_state, node->oper_state, node->avail_status, node->ready_state, old_state_uuid, node->state_uuid, uptime, force, interface ); if( SM_OKAY != error ) { DPRINTFE( "Failed to send node (%s) update, error=%s.", node->name, sm_error_str( error ) ); return; } } } // **************************************************************************** // **************************************************************************** // Node API - Send Node Update // =========================== static SmErrorT sm_node_api_send_node_update( SmUuidT old_state_uuid, SmDbNodeT* node, bool force ) { long uptime; void* user_data[] = {old_state_uuid, node, &uptime, &force}; SmErrorT error; error = sm_node_utils_get_uptime( &uptime ); if( SM_OKAY != error ) { DPRINTFE( "Failed to to get local node uptime, error=%s.", sm_error_str( error ) ); return( error ); } sm_msg_increment_seq_num(); sm_service_domain_interface_table_foreach( user_data, sm_node_api_interface_send_node_update ); return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Interface Send Node Swact // ==================================== static void sm_node_api_interface_send_node_swact( void* user_data[], SmServiceDomainInterfaceT* interface ) { SmUuidPtrT request_uuid = (SmUuidPtrT) user_data[0]; SmDbNodeT* node = (SmDbNodeT*) user_data[1]; bool force = *(bool*) user_data[2]; SmErrorT error; if(( SM_INTERFACE_STATE_ENABLED == interface->interface_state )&& ( SM_PATH_TYPE_STATUS_ONLY != interface->path_type )) { error = sm_msg_send_node_swact( node->name, force, request_uuid, interface ); if( SM_OKAY != error ) { DPRINTFE( "Failed to send node (%s) swact, error=%s.", node->name, sm_error_str( error ) ); return; } } } // **************************************************************************** // **************************************************************************** // Node API - Send Node Swact // ========================== static SmErrorT sm_node_api_send_node_swact( SmDbNodeT* node, bool force, SmUuidT request_uuid ) { void* user_data[] = {request_uuid, node, &force}; sm_msg_increment_seq_num(); sm_service_domain_interface_table_foreach( user_data, sm_node_api_interface_send_node_swact ); return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Node Hello Callback // ============================== static void sm_node_api_node_hello_callback( SmNetworkAddressT* network_address, int network_port, int version, int revision, char node_name[], SmNodeAdminStateT admin_state, SmNodeOperationalStateT oper_state, SmNodeAvailStatusT avail_status, SmNodeReadyStateT ready_state, SmUuidT state_uuid, long uptime ) { long local_uptime; char db_query[SM_DB_QUERY_STATEMENT_MAX_CHAR]; SmUuidT new_state_uuid; SmDbNodeT node; SmDbNodeHistoryT node_history; SmErrorT error; sm_uuid_create( new_state_uuid ); error = sm_node_utils_get_uptime( &local_uptime ); if( SM_OKAY != error ) { DPRINTFE( "Failed to to get local node uptime, error=%s.", sm_error_str( error ) ); return; } error = sm_db_nodes_read( _sm_db_handle, node_name, &node ); if( SM_OKAY == error ) { if( 0 == strcmp( state_uuid, node.state_uuid ) ) { if(( admin_state == node.admin_state )&& ( oper_state == node.oper_state ) && ( ready_state == node.ready_state )) { DPRINTFD( "Node (%s) info up to date.", node_name ); return; } if( admin_state != node.admin_state ) { sm_log_node_state_change( node_name, sm_node_state_str( node.admin_state, node.ready_state ), sm_node_state_str( admin_state, node.ready_state ), "customer action" ); } node.admin_state = admin_state; node.oper_state = oper_state; node.avail_status = avail_status; node.ready_state = ready_state; error = sm_db_nodes_update( _sm_db_handle, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to update node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return; } return; } else { DPRINTFI( "Node (%s) received=%s, have=%s.", node_name, state_uuid, node.state_uuid ); } snprintf( db_query, sizeof(db_query), "%s = '%s' and %s = '%s'", SM_NODES_TABLE_COLUMN_NAME, node_name, SM_NODES_TABLE_COLUMN_STATE_UUID, state_uuid ); error = sm_db_node_history_query( _sm_db_handle, db_query, &node_history ); if( SM_OKAY == error ) { DPRINTFI( "Send update for node (%s) uuid=%s.", node_name, state_uuid ); error = sm_node_api_send_node_update( state_uuid, &node, false ); if( SM_OKAY != error ) { DPRINTFE( "Failed to send node (%s) update, error=%s.", node_name, sm_error_str( error ) ); return; } } else if( SM_NOT_FOUND ) { if( uptime <= local_uptime ) { DPRINTFI( "Send update for node (%s) uuid=%s, uptime=%ld, " "local_uptime=%ld.", node_name, state_uuid, uptime, local_uptime ); error = sm_node_api_send_node_update( state_uuid, &node, false ); if( SM_OKAY != error ) { DPRINTFE( "Failed to send node (%s) update, error=%s.", node_name, sm_error_str( error ) ); return; } } else { DPRINTFI( "Update node (%s) uuid=%s, was=%s.", node_name, state_uuid, node.state_uuid ); snprintf( node_history.name, sizeof(node_history.name), "%s", node.name ); node_history.admin_state = node.admin_state; node_history.oper_state = node.oper_state; node_history.avail_status = node.avail_status; node_history.ready_state = node.ready_state; snprintf( node_history.state_uuid, sizeof(node_history.state_uuid), "%s", node.state_uuid ); error = sm_db_node_history_insert( _sm_db_handle, &node_history ); if( SM_OKAY != error ) { DPRINTFE( "Failed to save node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return; } if( admin_state != node.admin_state ) { sm_log_node_state_change( node_name, sm_node_state_str( node.admin_state, node.ready_state ), sm_node_state_str( admin_state, node.ready_state ), "customer action" ); } node.admin_state = admin_state; node.oper_state = oper_state; node.avail_status = avail_status; node.ready_state = ready_state; snprintf( node.state_uuid, sizeof(node.state_uuid), "%s", state_uuid ); error = sm_db_nodes_update( _sm_db_handle, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to update node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return; } } } else { DPRINTFE( "Failed to read node history (%s), error=%s.", node_name, sm_error_str( error ) ); return; } } else if( SM_NOT_FOUND == error ) { DPRINTFI( "Inserting node (%s), uuid=%s.", node_name, new_state_uuid ); snprintf( node.name, SM_NODE_NAME_MAX_CHAR, "%s", node_name ); node.admin_state = admin_state; node.oper_state = oper_state; node.avail_status = avail_status; node.ready_state = ready_state; snprintf( node.state_uuid, sizeof(node.state_uuid), "%s", new_state_uuid ); error = sm_db_nodes_insert( _sm_db_handle, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to insert node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return; } sm_log_node_state_change( node_name, sm_node_state_str( SM_NODE_ADMIN_STATE_UNKNOWN, SM_NODE_READY_STATE_UNKNOWN ), sm_node_state_str( node.admin_state, node.ready_state ), "customer action" ); } else { DPRINTFE( "Failed to read node (%s), error=%s.", node_name, sm_error_str( error ) ); return; } } // **************************************************************************** // **************************************************************************** // Node API - Node Update Callback // =============================== static void sm_node_api_node_update_callback( SmNetworkAddressT* network_address, int network_port, int version, int revision, char node_name[], SmNodeAdminStateT admin_state, SmNodeOperationalStateT oper_state, SmNodeAvailStatusT avail_status, SmNodeReadyStateT ready_state, SmUuidT old_state_uuid, SmUuidT state_uuid, long uptime, bool force ) { bool update = false; char db_query[SM_DB_QUERY_STATEMENT_MAX_CHAR]; SmDbNodeT node; SmDbNodeHistoryT node_history; SmErrorT error; if( force ) { error = sm_db_nodes_query( _sm_db_handle, NULL, &node ); if( SM_OKAY == error ) { update = true; DPRINTFI("Updating node (%s) info.", node_name ); } else if( SM_NOT_FOUND != error ) { DPRINTFE( "Failed to read node (%s), error=%s.", node_name, sm_error_str( error ) ); return; } } else { snprintf( db_query, sizeof(db_query), "%s = '%s' and %s = '%s'", SM_NODES_TABLE_COLUMN_NAME, node_name, SM_NODES_TABLE_COLUMN_STATE_UUID, old_state_uuid ); error = sm_db_nodes_query( _sm_db_handle, db_query, &node ); if( SM_OKAY == error ) { update = true; DPRINTFI("Updating node (%s) info.", node_name ); } else if( SM_NOT_FOUND != error ) { DPRINTFE( "Failed to read node (%s), error=%s.", node_name, sm_error_str( error ) ); return; } } if( update ) { if( admin_state != node.admin_state ) { sm_log_node_state_change( node_name, sm_node_state_str( node.admin_state, node.ready_state ), sm_node_state_str( admin_state, node.ready_state ), "customer action" ); } snprintf( node_history.name, sizeof(node_history.name), "%s", node.name ); node_history.admin_state = node.admin_state; node_history.oper_state = node.oper_state; node_history.avail_status = node.avail_status; node_history.ready_state = node.ready_state; snprintf( node_history.state_uuid, sizeof(node_history.state_uuid), "%s", node.state_uuid ); error = sm_db_node_history_insert( _sm_db_handle, &node_history ); if( SM_OKAY != error ) { DPRINTFE( "Failed to save node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return; } node.admin_state = admin_state; node.oper_state = oper_state; node.avail_status = avail_status; node.ready_state = ready_state; snprintf( node.state_uuid, sizeof(node.state_uuid), "%s", state_uuid ); error = sm_db_nodes_update( _sm_db_handle, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to update node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return; } } } // **************************************************************************** // **************************************************************************** // Node API - Add Node // =================== SmErrorT sm_node_api_add_node( char node_name[] ) { SmDbNodeT node; SmUuidT state_uuid; SmErrorT error; sm_uuid_create( state_uuid ); error = sm_db_nodes_read( _sm_db_handle, node_name, &node ); if( SM_OKAY == error ) { DPRINTFD( "Already added node (%s).", node_name ); } else if( SM_NOT_FOUND == error ) { snprintf( node.name, SM_NODE_NAME_MAX_CHAR, "%s", node_name ); node.admin_state = SM_NODE_ADMIN_STATE_UNLOCKED; node.oper_state = SM_NODE_OPERATIONAL_STATE_ENABLED; node.avail_status = SM_NODE_AVAIL_STATUS_AVAILABLE; node.ready_state = SM_NODE_READY_STATE_UNKNOWN; snprintf( node.state_uuid, sizeof(node.state_uuid), "%s", state_uuid ); error = sm_db_nodes_insert( _sm_db_handle, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to insert node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return( error ); } DPRINTFI( "Added node (%s).", node.name ); sm_log_node_state_change( node_name, sm_node_state_str( SM_NODE_ADMIN_STATE_UNKNOWN, SM_NODE_READY_STATE_UNKNOWN ), sm_node_state_str( node.admin_state, node.ready_state ), "customer action" ); } else { DPRINTFE( "Failed to read node (%s) information, error=%s.", node_name, sm_error_str( error ) ); return( error ); } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Update Node // ====================== SmErrorT sm_node_api_update_node( char node_name[], SmNodeAdminStateT admin_state, SmNodeOperationalStateT oper_state, SmNodeAvailStatusT avail_status ) { bool send_update = false; SmUuidT old_state_uuid; SmUuidT new_state_uuid; SmDbNodeT node; SmDbNodeHistoryT node_history; SmErrorT error; sm_uuid_create( new_state_uuid ); error = sm_db_nodes_read( _sm_db_handle, node_name, &node ); if( SM_OKAY == error ) { if( admin_state != node.admin_state ) { sm_log_node_state_change( node_name, sm_node_state_str( node.admin_state, node.ready_state ), sm_node_state_str( admin_state, node.ready_state ), "customer action" ); } if( oper_state != node.oper_state ) { sm_log_node_state_change( node_name, sm_node_oper_state_str( node.oper_state ), sm_node_oper_state_str( oper_state ), "oper state changed" ); } if(( admin_state != node.admin_state )|| ( oper_state != node.oper_state )) { memcpy( old_state_uuid, node.state_uuid, sizeof(old_state_uuid) ); snprintf( node_history.name, sizeof(node_history.name), "%s", node.name ); node_history.admin_state = node.admin_state; node_history.oper_state = node.oper_state; node_history.avail_status = node.avail_status; node_history.ready_state = node.ready_state; snprintf( node_history.state_uuid, sizeof(node_history.state_uuid), "%s", node.state_uuid ); error = sm_db_node_history_insert( _sm_db_handle, &node_history ); if( SM_OKAY != error ) { DPRINTFE( "Failed to save node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return( error ); } node.admin_state = admin_state; node.oper_state = oper_state; node.avail_status = avail_status; snprintf( node.state_uuid, sizeof(node.state_uuid), "%s", new_state_uuid ); error = sm_db_nodes_update( _sm_db_handle, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to update node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return( error ); } send_update = true; } } else if( SM_NOT_FOUND == error ) { memset( old_state_uuid, 0, sizeof(old_state_uuid) ); snprintf( node.name, SM_NODE_NAME_MAX_CHAR, "%s", node_name ); node.admin_state = admin_state; node.oper_state = oper_state; node.avail_status = avail_status; node.ready_state = SM_NODE_READY_STATE_DISABLED; snprintf( node.state_uuid, sizeof(node.state_uuid), "%s", new_state_uuid ); error = sm_db_nodes_insert( _sm_db_handle, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to insert node (%s) info, error=%s.", node_name, sm_error_str( error ) ); return( error ); } sm_log_node_state_change( node_name, sm_node_state_str( SM_NODE_ADMIN_STATE_UNKNOWN, SM_NODE_READY_STATE_UNKNOWN ), sm_node_state_str( node.admin_state, node.ready_state ), "customer action" ); send_update = true; } else { DPRINTFE( "Failed to read node (%s) information, error=%s.", node_name, sm_error_str( error ) ); return( error ); } if( send_update ) { error = sm_node_api_send_node_update( old_state_uuid, &node, true ); if( SM_OKAY != error ) { DPRINTFE( "Failed to send node (%s) node update, error=%s.", node_name, sm_error_str(error) ); return( error ); } } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Fail Node // ====================== SmErrorT sm_node_api_fail_node( char node_name[] ) { SmDbNodeT node; SmErrorT error; error = sm_db_nodes_read( _sm_db_handle, node_name, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to read node (%s) information, error=%s.", node_name, sm_error_str( error ) ); return( error ); } if( node.oper_state == SM_NODE_OPERATIONAL_STATE_DISABLED && node.avail_status == SM_NODE_AVAIL_STATUS_FAILED ) { DPRINTFD("Already in failure mode %s", node_name); } DPRINTFE("Node %s is entering to failure mode.", node_name); error = sm_node_api_update_node( node_name, node.admin_state, SM_NODE_OPERATIONAL_STATE_DISABLED, SM_NODE_AVAIL_STATUS_FAILED); if( SM_OKAY != error ) { DPRINTFE( "Failed to set node (%s) failed, error=%s.", node_name, sm_error_str( error ) ); } return( error ); } // **************************************************************************** // **************************************************************************** // Node API - Recover Node // ====================== SmErrorT sm_node_api_recover_node( char node_name[] ) { SmDbNodeT node; SmErrorT error; error = sm_db_nodes_read( _sm_db_handle, node_name, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to read node (%s) information, error=%s.", node_name, sm_error_str( error ) ); return( error ); } if( node.oper_state != SM_NODE_OPERATIONAL_STATE_DISABLED || node.avail_status != SM_NODE_AVAIL_STATUS_FAILED ) { DPRINTFD("Not in failure mode %s", node_name); } DPRINTFE("Node %s is to recover from failure mode.", node_name); error = sm_node_api_update_node( node_name, node.admin_state, SM_NODE_OPERATIONAL_STATE_ENABLED, SM_NODE_AVAIL_STATUS_AVAILABLE); if( SM_OKAY != error ) { DPRINTFE( "Failed to set node (%s) failed, error=%s.", node_name, sm_error_str( error ) ); } return( error ); } // **************************************************************************** // **************************************************************************** // Node API - Delete Node // ====================== SmErrorT sm_node_api_delete_node( char node_name[] ) { SmErrorT error; error = sm_db_nodes_delete( _sm_db_handle, node_name ); if(( SM_OKAY != error )&&( SM_NOT_FOUND != error )) { DPRINTFE( "Failed to delete node, error=%s.", sm_error_str( error ) ); return( error ); } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Swact // ================ SmErrorT sm_node_api_swact( char node_name[], bool force ) { SmUuidT request_uuid; SmDbNodeT node; SmErrorT error; sm_uuid_create( request_uuid ); error = sm_db_nodes_read( _sm_db_handle, node_name, &node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to read node (%s) information, error=%s.", node_name, sm_error_str( error ) ); return( error ); } error = sm_node_api_send_node_swact( &node, force, request_uuid ); if( SM_OKAY != error ) { DPRINTFE( "Failed to send node (%s) swact, error=%s.", node_name, sm_error_str( error ) ); return( error ); } error = sm_service_domain_scheduler_swact_node( node_name, force, request_uuid ); if( SM_OKAY != error ) { DPRINTFE( "Failed to set node scheduling state, error=%s.", sm_error_str( error ) ); return( error ); } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Reboot Timeout // ========================= static bool sm_node_api_reboot_timeout( SmTimerIdT timer_id, int64_t user_data ) { int sysrq_handler_fd; int sysrq_tigger_fd; char hostname[SM_NODE_NAME_MAX_CHAR] = ""; char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR] = ""; SmErrorT error; error = sm_node_api_get_hostname( hostname ); if( SM_OKAY != error ) { DPRINTFE( "Failed to get hostname, error=%s.", sm_error_str( error ) ); } // Enable sysrq handling. sysrq_handler_fd = open( "/proc/sys/kernel/sysrq", O_RDWR | O_CLOEXEC ); if( 0 > sysrq_handler_fd ) { DPRINTFE( "Failed to open sysrq handler file, error=%s.", strerror(errno) ); return( true ); } write( sysrq_handler_fd, "1", 1 ); close( sysrq_handler_fd ); // Trigger sysrq command. sysrq_tigger_fd = open( "/proc/sysrq-trigger", O_RDWR | O_CLOEXEC ); if( 0 > sysrq_tigger_fd ) { DPRINTFE( "Failed to open sysrq trigger file, error=%s.", strerror(errno) ); return( true ); } snprintf( reason_text, sizeof(reason_text), "timed out after %i minute%s " "waiting for a controlled reboot, escalating to a forced reboot", SM_REBOOT_TIMEOUT_IN_MINS, (1 == SM_REBOOT_TIMEOUT_IN_MINS) ? "" : "s" ); sm_log_node_reboot( hostname, reason_text, true ); DPRINTFI( "******************************************************" "************************************" ); DPRINTFI( "** Issuing an immediate reboot of the system, without " "unmounting or syncing filesystems **" ); DPRINTFI( "******************************************************" "************************************" ); sleep(5); // wait 5 seconds before a forced reboot. write( sysrq_tigger_fd, "b", 1 ); close( sysrq_tigger_fd ); return( true ); } // **************************************************************************** // **************************************************************************** // Node API - Reboot Delay // ======================= static bool sm_node_api_reboot_delay( SmTimerIdT timer_id, int64_t user_data ) { pid_t pid; char hostname[SM_NODE_NAME_MAX_CHAR] = ""; char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR] = ""; SmErrorT error; error = sm_node_api_get_hostname( hostname ); if( SM_OKAY != error ) { DPRINTFE( "Failed to get hostname, error=%s.", sm_error_str( error ) ); } snprintf( reason_text, sizeof(reason_text), "reboot delay expired, " "issuing reboot" ); sm_log_node_reboot( hostname, reason_text, false ); DPRINTFI( "***********************************************" ); DPRINTFI( "** Issuing a controlled reboot of the system **" ); DPRINTFI( "***********************************************" ); pid = fork(); if( 0 > pid ) { DPRINTFE( "Failed to fork process for reboot, error=%s.", strerror( errno ) ); return( true ); } else if( 0 == pid ) { // Child process. struct rlimit file_limits; char reboot_cmd[] = "reboot"; char* reboot_argv[] = {reboot_cmd, NULL}; char* reboot_env[] = {NULL}; setpgid( 0, 0 ); if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) ) { unsigned int fd_i; for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i ) { close( fd_i ); } open( "/dev/null", O_RDONLY ); // stdin open( "/dev/null", O_WRONLY ); // stdout open( "/dev/null", O_WRONLY ); // stderr } execve( "/sbin/reboot", reboot_argv, reboot_env ); // Shouldn't get this far, else there was an error. exit(-1); } else { // Parent process. SmErrorT error; DPRINTFI( "Child process (%i) created for reboot.", (int) pid ); error = sm_timer_register( "reboot force", SM_REBOOT_TIMEOUT_IN_MS, sm_node_api_reboot_timeout, 0, &_reboot_timer_id ); if( SM_OKAY != error ) { DPRINTFE( "Failed to create reboot timer, error=%s.", sm_error_str( error ) ); return( true ); } } return( false ); } // **************************************************************************** // **************************************************************************** // Node API - Reboot // ================= SmErrorT sm_node_api_reboot( char reason_text[] ) { if( SM_TIMER_ID_INVALID == _reboot_delay_timer_id ) { char hostname[SM_NODE_NAME_MAX_CHAR] = ""; SmErrorT error; error = sm_node_api_get_hostname( hostname ); if( SM_OKAY != error ) { DPRINTFE( "Failed to get hostname, error=%s.", sm_error_str( error ) ); } DPRINTFI( "Reboot of %s requested, reason=%s.", hostname, reason_text ); sm_log_node_reboot( hostname, reason_text, false ); sm_troubleshoot_dump_data( reason_text ); // Give some time to allow the dump data to finish before // actually attempting a reboot. error = sm_timer_register( "reboot delay", SM_REBOOT_DELAY_IN_MS, sm_node_api_reboot_delay, 0, &_reboot_delay_timer_id ); if( SM_OKAY != error ) { DPRINTFE( "Failed to create reboot delay timer, error=%s.", sm_error_str( error ) ); return( error ); } } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Send Event // ===================== static SmErrorT sm_node_api_send_event( void* user_data[], void* record ) { SmNodeEventT* event = (SmNodeEventT*) user_data[0]; const char* reason_text = (char*) user_data[1]; SmDbNodeT* node = (SmDbNodeT*) record; SmErrorT error; error = sm_node_api_send_node_hello( node ); if( SM_OKAY != error ) { DPRINTFE( "Failed to send hello for node (%s), error=%s.", node->name, sm_error_str( error ) ); } error = sm_node_fsm_event_handler( node->name, *event, NULL, reason_text ); if( SM_OKAY != error ) { DPRINTFE( "Event (%s) not handled for node (%s), error=%s.", sm_node_event_str( *event ), node->name, sm_error_str( error ) ); return( error ); } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Audit // ================ SmErrorT sm_node_api_audit( void ) { char hostname[SM_NODE_NAME_MAX_CHAR]; char db_query[SM_DB_QUERY_STATEMENT_MAX_CHAR]; char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR] = "audit requested"; SmNodeEventT event = SM_NODE_EVENT_AUDIT; SmDbNodeT node; SmErrorT error; void* user_data[] = { &event, reason_text }; error = sm_node_utils_get_hostname( hostname ); if( SM_OKAY != error ) { DPRINTFE( "Failed to get hostname, error=%s.", sm_error_str( error ) ); return( error ); } snprintf( db_query, sizeof(db_query), "%s = '%s'", SM_NODES_TABLE_COLUMN_NAME, hostname ); error = sm_db_foreach( SM_DATABASE_NAME, SM_NODES_TABLE_NAME, db_query, &node, sm_db_nodes_convert, sm_node_api_send_event, user_data ); if( SM_OKAY != error ) { DPRINTFE( "Failed to loop over nodes, error=%s.", sm_error_str( error ) ); return( error ); } return( SM_OKAY ); } // **************************************************************************** void sm_node_api_node_swact_callback( SmNetworkAddressT* network_address, int network_port, int version, int revision, char node_name[], bool force, SmUuidT request_uuid ) { SmNodeSwactMonitor::SwactStart(SM_NODE_STATE_ACTIVE); } // **************************************************************************** // Node API - Initialize // ===================== SmErrorT sm_node_api_initialize( void ) { SmErrorT error; error = sm_db_connect( SM_DATABASE_NAME, &_sm_db_handle ); if( SM_OKAY != error ) { DPRINTFE( "Failed to connect to database (%s), error=%s.", SM_DATABASE_NAME, sm_error_str( error ) ); return( error ); } error = sm_node_fsm_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize node fsm, error=%s.", sm_error_str( error ) ); return( error ); } memset( &_msg_callbacks, 0, sizeof(_msg_callbacks) ); _msg_callbacks.node_hello = sm_node_api_node_hello_callback; _msg_callbacks.node_update = sm_node_api_node_update_callback; _msg_callbacks.node_swact = sm_node_api_node_swact_callback; error = sm_msg_register_callbacks( &_msg_callbacks ); if( SM_OKAY != error ) { DPRINTFE( "Failed to register messaging callbacks, error=%s.", sm_error_str( error ) ); return( error ); } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Node API - Finalize // =================== SmErrorT sm_node_api_finalize( void ) { SmErrorT error; error = sm_msg_deregister_callbacks( &_msg_callbacks ); if( SM_OKAY != error ) { DPRINTFE( "Failed to deregister messaging callbacks, error=%s.", sm_error_str( error ) ); } memset( &_msg_callbacks, 0, sizeof(_msg_callbacks) ); error = sm_node_fsm_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize node fsm, error=%s.", sm_error_str( error ) ); } if( NULL != _sm_db_handle ) { error = sm_db_disconnect( _sm_db_handle ); if( SM_OKAY != error ) { DPRINTFE( "Failed to disconnect from database (%s), error=%s.", SM_DATABASE_NAME, sm_error_str( error ) ); } _sm_db_handle = NULL; } return( SM_OKAY ); } // ****************************************************************************