diff --git a/mtce-common/src/common/nodeUtil.cpp b/mtce-common/src/common/nodeUtil.cpp index 460a3fe7..bc245703 100755 --- a/mtce-common/src/common/nodeUtil.cpp +++ b/mtce-common/src/common/nodeUtil.cpp @@ -1835,33 +1835,69 @@ int execute_pipe_cmd(const char *command, char *result, unsigned int result_size #define PIPE_COMMAND_RESPON_LEN (100) #endif -system_state_enum get_system_state ( void ) +const char * get_system_state_str ( system_state_enum system_state ) { - char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ; - execute_pipe_cmd ( "systemctl is-system-running", &pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN ); - if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 ) + switch(system_state) { - ilog ("systemctl reports host as '%s'\n", pipe_cmd_output ); - string temp = pipe_cmd_output ; - if ( temp.find ("stopping") != string::npos ) - return MTC_SYSTEM_STATE__STOPPING; - if ( temp.find ("running") != string::npos ) - return MTC_SYSTEM_STATE__RUNNING; - if ( temp.find ("degraded") != string::npos ) - return MTC_SYSTEM_STATE__DEGRADED; - if ( temp.find ("starting") != string::npos ) - return MTC_SYSTEM_STATE__STARTING; - if ( temp.find ("initializing") != string::npos ) - return MTC_SYSTEM_STATE__INITIALIZING; - if ( temp.find ("offline") != string::npos ) - return MTC_SYSTEM_STATE__OFFLINE; - if ( temp.find ("maintenance") != string::npos ) - return MTC_SYSTEM_STATE__MAINTENANCE; - slog ("unexpected response: <%s>\n", temp.c_str()); + case MTC_SYSTEM_STATE__RUNNING: return("running"); + case MTC_SYSTEM_STATE__STOPPING: return("stopping"); + case MTC_SYSTEM_STATE__STARTING: return("starting"); + case MTC_SYSTEM_STATE__DEGRADED: return("degraded"); + case MTC_SYSTEM_STATE__INITIALIZING: return("initializing"); + case MTC_SYSTEM_STATE__OFFLINE: return("offline"); + case MTC_SYSTEM_STATE__MAINTENANCE: return("maintenance"); + default: return("unknown"); } - else - { - wlog ("systemctl is-system-running yielded no response\n"); - } - return MTC_SYSTEM_STATE__UNKNOWN ; +} + +system_state_enum get_system_state ( bool verbose ) +{ + + int retry = 0 ; + bool unexpected_response = false ; + string temp = "" ; + system_state_enum system_state = MTC_SYSTEM_STATE__UNKNOWN ; + for ( ; retry < 3 ; retry++ ) + { + char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ; + execute_pipe_cmd ( "systemctl is-system-running", + &pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN ); + if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 ) + { + temp = pipe_cmd_output ; + if ( temp.find ("stopping") != string::npos ) + system_state = MTC_SYSTEM_STATE__STOPPING; + else if ( temp.find ("running") != string::npos ) + system_state = MTC_SYSTEM_STATE__RUNNING; + else if ( temp.find ("degraded") != string::npos ) + system_state = MTC_SYSTEM_STATE__DEGRADED; + else if ( temp.find ("starting") != string::npos ) + system_state = MTC_SYSTEM_STATE__STARTING; + else if ( temp.find ("initializing") != string::npos ) + system_state = MTC_SYSTEM_STATE__INITIALIZING; + else if ( temp.find ("offline") != string::npos ) + system_state = MTC_SYSTEM_STATE__OFFLINE; + else if ( temp.find ("maintenance") != string::npos ) + system_state = MTC_SYSTEM_STATE__MAINTENANCE; + else + unexpected_response = true ; + } + + if ( system_state != MTC_SYSTEM_STATE__UNKNOWN ) + break ; + } + + if ( verbose || unexpected_response ) + { + if ( unexpected_response ) + { + ilog ("systemctl provided unexpected response:'%s'", temp.c_str()); + } + else + { + ilog ("systemctl reports host in '%s' state (%d)", + get_system_state_str(system_state), retry); + } + } + return system_state ; } diff --git a/mtce-common/src/common/nodeUtil.h b/mtce-common/src/common/nodeUtil.h index 00a6828b..c56f3796 100755 --- a/mtce-common/src/common/nodeUtil.h +++ b/mtce-common/src/common/nodeUtil.h @@ -127,6 +127,7 @@ typedef enum MTC_SYSTEM_STATE__UNKNOWN } system_state_enum ; -system_state_enum get_system_state ( void ); +system_state_enum get_system_state ( bool verbose=true ); +const char * get_system_state_str ( system_state_enum system_state ); #endif diff --git a/mtce/src/pmon/pmonHdlr.cpp b/mtce/src/pmon/pmonHdlr.cpp index 7b5977d1..7ab0a8ee 100644 --- a/mtce/src/pmon/pmonHdlr.cpp +++ b/mtce/src/pmon/pmonHdlr.cpp @@ -1807,7 +1807,7 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr ) ilog ("Starting 'Degrade Audit' timer (%d secs)\n", degrade_period ); mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, degrade_period ); - ilog ("Starting 'Pulse' timer (%d secs)\n", pulse_period ); + ilog ("Starting 'Pulse' timer (%d msecs)\n", pulse_period ); mtcTimer_start_msec ( pmonTimer_pulse, pmon_timer_handler, pulse_period ); ilog ("Starting 'Host Watchdog' timer (%d secs)\n", hostwd_period ); @@ -1887,17 +1887,6 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr ) } } - /* Avoid pmond thrashing trying to recover processes during - * system shutdown. */ - if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING ) - { - wlog_throttled ( shutdown_log_throttle, 500, - "process monitoring disabled during system shutdown\n"); - usleep (500); - continue ; - } - if ( shutdown_log_throttle ) shutdown_log_throttle = 0 ; - if ( inotify_fault == false ) { if ( get_inotify_events ( ctrl_ptr->fd ) == true ) @@ -1992,9 +1981,48 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr ) _get_events ( ); } + /* Check system state before managing processes. + * + * Prevent process recoverty while not in the + * running or degraded state. */ + if (( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__RUNNING ) && + ( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__DEGRADED )) + { + system_state_enum system_state = get_system_state(false); + if ( system_state != _pmon_ctrl_ptr->system_state ) + { + _pmon_ctrl_ptr->system_state = system_state ; + if (( system_state != MTC_SYSTEM_STATE__RUNNING ) && + ( system_state != MTC_SYSTEM_STATE__DEGRADED )) + { + /* log every state change that is not running / degraded */ + wlog ("process monitoring disabled while in '%s' state", + get_system_state_str(system_state)); + } + else + { + /* log every state change that is not running / degraded */ + wlog ("process monitoring re-enabled while in '%s' state", + get_system_state_str(system_state)); + } + } + + /* throttle the disabled state during shutdown log */ + if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING ) + { + wlog_throttled ( shutdown_log_throttle, 60, + "process monitoring disabled during system shutdown\n"); + } + sleep (1); + continue ; + } + else if ( shutdown_log_throttle ) + shutdown_log_throttle = 0 ; + /* Monitor Processes */ for ( int i = 0 ; i < ctrl_ptr->processes ; i++ ) { + /* Allow a process to be ignored */ if ( process_config[i].ignore == true ) {