Merge "Prevent pmond process recovery when system is not running"

This commit is contained in:
Zuul 2020-06-18 16:25:57 +00:00 committed by Gerrit Code Review
commit a316fea461
3 changed files with 104 additions and 39 deletions

View File

@ -1835,33 +1835,69 @@ int execute_pipe_cmd(const char *command, char *result, unsigned int result_size
#define PIPE_COMMAND_RESPON_LEN (100)
#endif
system_state_enum get_system_state ( void )
const char * get_system_state_str ( system_state_enum system_state )
{
char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ;
execute_pipe_cmd ( "systemctl is-system-running", &pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN );
if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 )
switch(system_state)
{
ilog ("systemctl reports host as '%s'\n", pipe_cmd_output );
string temp = pipe_cmd_output ;
if ( temp.find ("stopping") != string::npos )
return MTC_SYSTEM_STATE__STOPPING;
if ( temp.find ("running") != string::npos )
return MTC_SYSTEM_STATE__RUNNING;
if ( temp.find ("degraded") != string::npos )
return MTC_SYSTEM_STATE__DEGRADED;
if ( temp.find ("starting") != string::npos )
return MTC_SYSTEM_STATE__STARTING;
if ( temp.find ("initializing") != string::npos )
return MTC_SYSTEM_STATE__INITIALIZING;
if ( temp.find ("offline") != string::npos )
return MTC_SYSTEM_STATE__OFFLINE;
if ( temp.find ("maintenance") != string::npos )
return MTC_SYSTEM_STATE__MAINTENANCE;
slog ("unexpected response: <%s>\n", temp.c_str());
case MTC_SYSTEM_STATE__RUNNING: return("running");
case MTC_SYSTEM_STATE__STOPPING: return("stopping");
case MTC_SYSTEM_STATE__STARTING: return("starting");
case MTC_SYSTEM_STATE__DEGRADED: return("degraded");
case MTC_SYSTEM_STATE__INITIALIZING: return("initializing");
case MTC_SYSTEM_STATE__OFFLINE: return("offline");
case MTC_SYSTEM_STATE__MAINTENANCE: return("maintenance");
default: return("unknown");
}
else
{
wlog ("systemctl is-system-running yielded no response\n");
}
return MTC_SYSTEM_STATE__UNKNOWN ;
}
system_state_enum get_system_state ( bool verbose )
{
int retry = 0 ;
bool unexpected_response = false ;
string temp = "" ;
system_state_enum system_state = MTC_SYSTEM_STATE__UNKNOWN ;
for ( ; retry < 3 ; retry++ )
{
char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ;
execute_pipe_cmd ( "systemctl is-system-running",
&pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN );
if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 )
{
temp = pipe_cmd_output ;
if ( temp.find ("stopping") != string::npos )
system_state = MTC_SYSTEM_STATE__STOPPING;
else if ( temp.find ("running") != string::npos )
system_state = MTC_SYSTEM_STATE__RUNNING;
else if ( temp.find ("degraded") != string::npos )
system_state = MTC_SYSTEM_STATE__DEGRADED;
else if ( temp.find ("starting") != string::npos )
system_state = MTC_SYSTEM_STATE__STARTING;
else if ( temp.find ("initializing") != string::npos )
system_state = MTC_SYSTEM_STATE__INITIALIZING;
else if ( temp.find ("offline") != string::npos )
system_state = MTC_SYSTEM_STATE__OFFLINE;
else if ( temp.find ("maintenance") != string::npos )
system_state = MTC_SYSTEM_STATE__MAINTENANCE;
else
unexpected_response = true ;
}
if ( system_state != MTC_SYSTEM_STATE__UNKNOWN )
break ;
}
if ( verbose || unexpected_response )
{
if ( unexpected_response )
{
ilog ("systemctl provided unexpected response:'%s'", temp.c_str());
}
else
{
ilog ("systemctl reports host in '%s' state (%d)",
get_system_state_str(system_state), retry);
}
}
return system_state ;
}

View File

@ -127,6 +127,7 @@ typedef enum
MTC_SYSTEM_STATE__UNKNOWN
} system_state_enum ;
system_state_enum get_system_state ( void );
system_state_enum get_system_state ( bool verbose=true );
const char * get_system_state_str ( system_state_enum system_state );
#endif

View File

@ -1807,7 +1807,7 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
ilog ("Starting 'Degrade Audit' timer (%d secs)\n", degrade_period );
mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, degrade_period );
ilog ("Starting 'Pulse' timer (%d secs)\n", pulse_period );
ilog ("Starting 'Pulse' timer (%d msecs)\n", pulse_period );
mtcTimer_start_msec ( pmonTimer_pulse, pmon_timer_handler, pulse_period );
ilog ("Starting 'Host Watchdog' timer (%d secs)\n", hostwd_period );
@ -1887,17 +1887,6 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
}
}
/* Avoid pmond thrashing trying to recover processes during
* system shutdown. */
if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING )
{
wlog_throttled ( shutdown_log_throttle, 500,
"process monitoring disabled during system shutdown\n");
usleep (500);
continue ;
}
if ( shutdown_log_throttle ) shutdown_log_throttle = 0 ;
if ( inotify_fault == false )
{
if ( get_inotify_events ( ctrl_ptr->fd ) == true )
@ -1992,9 +1981,48 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
_get_events ( );
}
/* Check system state before managing processes.
*
* Prevent process recoverty while not in the
* running or degraded state. */
if (( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__RUNNING ) &&
( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__DEGRADED ))
{
system_state_enum system_state = get_system_state(false);
if ( system_state != _pmon_ctrl_ptr->system_state )
{
_pmon_ctrl_ptr->system_state = system_state ;
if (( system_state != MTC_SYSTEM_STATE__RUNNING ) &&
( system_state != MTC_SYSTEM_STATE__DEGRADED ))
{
/* log every state change that is not running / degraded */
wlog ("process monitoring disabled while in '%s' state",
get_system_state_str(system_state));
}
else
{
/* log every state change that is not running / degraded */
wlog ("process monitoring re-enabled while in '%s' state",
get_system_state_str(system_state));
}
}
/* throttle the disabled state during shutdown log */
if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING )
{
wlog_throttled ( shutdown_log_throttle, 60,
"process monitoring disabled during system shutdown\n");
}
sleep (1);
continue ;
}
else if ( shutdown_log_throttle )
shutdown_log_throttle = 0 ;
/* Monitor Processes */
for ( int i = 0 ; i < ctrl_ptr->processes ; i++ )
{
/* Allow a process to be ignored */
if ( process_config[i].ignore == true )
{