6a5e10492c
This decouples the build and packaging of guest-server, guest-agent from mtce, by splitting guest component into stx-nfv repo. This leaves existing C++ code, scripts, and resource files untouched, so there is no functional change. Code refactoring is beyond the scope of this update. Makefiles were modified to include devel headers directories /usr/include/mtce-common and /usr/include/mtce-daemon. This ensures there is no contamination with other system headers. The cgts-mtce-common package is renamed and split into: - repo stx-metal: mtce-common, mtce-common-dev - repo stx-metal: mtce - repo stx-nfv: mtce-guest - repo stx-ha: updates package dependencies to mtce-pmon for service-mgmt, sm, and sm-api mtce-common: - contains common and daemon shared source utility code mtce-common-dev: - based on mtce-common, contains devel package required to build mtce-guest and mtce - contains common library archives and headers mtce: - contains components: alarm, fsmon, fsync, heartbeat, hostw, hwmon, maintenance, mtclog, pmon, public, rmon mtce-guest: - contains guest component guest-server, guest-agent Story: 2002829 Task: 22748 Change-Id: I9c7a9b846fd69fd566b31aa3f12a043c08f19f1f Signed-off-by: Jim Gauld <james.gauld@windriver.com>
818 lines
27 KiB
C++
818 lines
27 KiB
C++
/*
|
|
* Copyright (c) 2016-2017 Wind River Systems, Inc.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
*/
|
|
|
|
/****************************************************************************
|
|
*
|
|
* @file
|
|
* Wind River Titanium Cloud - Threading Base Implementation Module"
|
|
*
|
|
* This module implements the common thread utility module that can be used
|
|
* by any maintenance process to
|
|
*
|
|
* - launch threads
|
|
* - monitor thread execution
|
|
* - detect thread completion
|
|
* - kill a thread if needed
|
|
*
|
|
* - thread exeuction FSM
|
|
*
|
|
* For more complehensive description please refer
|
|
* to the module header threadUtil.h
|
|
*
|
|
****************************************************************************/
|
|
|
|
#include "daemon_common.h" /* for ... daemon_health_test */
|
|
#include "nodeBase.h" /* for ... mtce node common definitions */
|
|
#include "hostUtil.h" /* for ... mtce host common definitions */
|
|
#include "threadUtil.h" /* for ... this module header */
|
|
|
|
/* Stores the parent process's timer handler */
|
|
static void (*thread_timer_handler)(int, siginfo_t*, void*) = NULL ;
|
|
|
|
static pthread_attr_t __attr;
|
|
static sigset_t __enabled_mask ;
|
|
static sigset_t __disabled_mask;
|
|
static unsigned int __thread_init_sig ;
|
|
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Name : threadUtil_init
|
|
*
|
|
* Description: Module init with caller specified timer handler.
|
|
*
|
|
****************************************************************************/
|
|
|
|
static std::string threadStages_str[THREAD_STAGE__STAGES+1];
|
|
|
|
int threadUtil_init ( void (*handler)(int, siginfo_t*, void* ))
|
|
{
|
|
/* preserve parent process timer handler */
|
|
thread_timer_handler = handler ;
|
|
|
|
/* setup the stage strings */
|
|
threadStages_str[THREAD_STAGE__IDLE] = "Idle" ;
|
|
threadStages_str[THREAD_STAGE__IGNORE] = "Ignore" ;
|
|
threadStages_str[THREAD_STAGE__LAUNCH] = "Launch" ;
|
|
threadStages_str[THREAD_STAGE__MONITOR] = "Monitor" ;
|
|
threadStages_str[THREAD_STAGE__DONE] = "Done" ;
|
|
threadStages_str[THREAD_STAGE__KILL] = "Kill" ;
|
|
threadStages_str[THREAD_STAGE__WAIT] = "Wait" ;
|
|
|
|
/* setup to create a 'detached' threads */
|
|
pthread_attr_init(&__attr);
|
|
pthread_attr_setdetachstate(&__attr, PTHREAD_CREATE_DETACHED);
|
|
threadUtil_setstack_size ();
|
|
|
|
__thread_init_sig = THREAD_INIT_SIG ;
|
|
|
|
return (PASS);
|
|
}
|
|
|
|
#define MTCE_PTHREAD_MAX_STACK_SIZE (0x20000) /* 128K */
|
|
void threadUtil_setstack_size ( void )
|
|
{
|
|
size_t stack_size_before = 0 ;
|
|
size_t stack_size_after = 0 ;
|
|
/* manage pthread stack size */
|
|
if ( pthread_attr_getstacksize (&__attr,&stack_size_before) == PASS )
|
|
{
|
|
if ( stack_size_before > MTCE_PTHREAD_MAX_STACK_SIZE )
|
|
{
|
|
if ( pthread_attr_setstacksize ( &__attr, MTCE_PTHREAD_MAX_STACK_SIZE ) == PASS )
|
|
{
|
|
if ( pthread_attr_getstacksize (&__attr,&stack_size_after) == PASS )
|
|
{
|
|
ilog ("thread Stack: %zu KB (was %zu)\n",
|
|
stack_size_after/1024,
|
|
stack_size_before/1024 );
|
|
}
|
|
else
|
|
{
|
|
elog ("failed to set pthread stack size (%d:%m)\n", errno );
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ilog ("pthread stack size is %zu bytes\n", stack_size_before );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
elog ("failed to get pthread stack size (%d:%m)\n", errno );
|
|
}
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Name : threadUtil_fini
|
|
*
|
|
* Description: Module cleanup
|
|
*
|
|
****************************************************************************/
|
|
|
|
void threadUtil_fini ( void )
|
|
{
|
|
; // ilog ("called\n");
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Name : _stage_change
|
|
*
|
|
* Description: Change thread FSM stage.
|
|
*
|
|
* See thread_stages_enum in threadUtil.h for a list of stage enums
|
|
*
|
|
****************************************************************************/
|
|
|
|
void _stage_change ( thread_ctrl_type & ctrl, thread_stages_enum newStage )
|
|
{
|
|
if ( newStage < THREAD_STAGE__STAGES )
|
|
{
|
|
clog ("%s %s thread stage from %s -> %s\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
threadStages_str[ctrl.stage].c_str(),
|
|
threadStages_str[newStage].c_str());
|
|
ctrl.stage = newStage ;
|
|
}
|
|
else
|
|
{
|
|
slog ("%s %s thread stage change to '%d' is invalid ; switching to KILL\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
newStage );
|
|
ctrl.stage = THREAD_STAGE__KILL ;
|
|
}
|
|
ctrl.stage_log_throttle = 0 ;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Name : thread_stage
|
|
*
|
|
* Description: Returns a string representing the current thread stage.
|
|
*
|
|
*****************************************************************************/
|
|
|
|
string thread_stage ( thread_ctrl_type & ctrl )
|
|
{
|
|
if ( ctrl.stage < THREAD_STAGE__STAGES )
|
|
return(threadStages_str[ctrl.stage]);
|
|
return("out-of-range thread stage");
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Name : thread_init
|
|
*
|
|
* Description: Default a thread 'ctrl' and 'info' structs.
|
|
*
|
|
* Assumptions: Called at init time once.
|
|
*
|
|
* Warning : Thread should be running when this is called.
|
|
* Warning : Should not be called more than once or else might create
|
|
* an orphan timer.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* - reference to the ctrl and info structs for a specified thread
|
|
* - pointer to thread specific extra data
|
|
* - the thread function pointer itself
|
|
* - thread execution timeout in seconds
|
|
* - reference to the host and thread names.
|
|
*
|
|
* Returns : nothing
|
|
*
|
|
*****************************************************************************/
|
|
|
|
void thread_init ( thread_ctrl_type & thread_ctrl,
|
|
thread_info_type & thread_info,
|
|
void* extra_data_ptr,
|
|
void* (*thread) (void*),
|
|
int timeout,
|
|
string & hostname,
|
|
string threadname )
|
|
{
|
|
/* default the ctrl struct */
|
|
thread_ctrl.stage = THREAD_STAGE__IDLE ;
|
|
thread_ctrl.done = true ;
|
|
thread_ctrl.idle = true ;
|
|
thread_ctrl.id = 0 ;
|
|
thread_ctrl.thread = thread ;
|
|
thread_ctrl.hostname = hostname ;
|
|
thread_ctrl.name = threadname ;
|
|
|
|
thread_ctrl.timeout = timeout ;
|
|
mtcTimer_init ( thread_ctrl.timer, hostname, threadname );
|
|
|
|
thread_ctrl.status = PASS ;
|
|
thread_ctrl.runcount = 0 ;
|
|
thread_ctrl.retries = 0 ;
|
|
|
|
thread_ctrl.stage_log_throttle = 0 ;
|
|
|
|
/* Init the thread's info struct - the only non-stack memory the
|
|
* thread can look at or touch */
|
|
thread_info.hostname = hostname ;
|
|
thread_info.name = threadname ;
|
|
thread_info.id = 0 ;
|
|
thread_info.command = 0 ;
|
|
thread_info.runcount = 0 ;
|
|
thread_info.progress = 0 ;
|
|
thread_info.signal = 0 ;
|
|
thread_info.data.clear() ;
|
|
thread_info.extra_info_ptr = extra_data_ptr ;
|
|
thread_info.pw_file_fd = 0 ;
|
|
thread_info.password_file.clear() ;
|
|
|
|
/* command execution status */
|
|
thread_info.status_string.clear();
|
|
thread_info.status = 0 ;
|
|
|
|
snprintf ( thread_info.log_prefix, MAX_LOG_PREFIX_LEN, "%s %s thread",
|
|
thread_ctrl.hostname.data(), thread_ctrl.name.data());
|
|
}
|
|
|
|
/****************************************************************************
|
|
*
|
|
* Name : thread_done
|
|
*
|
|
* Description: Return true if we are in the DONE stage.
|
|
*
|
|
****************************************************************************/
|
|
|
|
bool thread_done ( thread_ctrl_type & ctrl )
|
|
{
|
|
if ( ctrl.stage == THREAD_STAGE__DONE )
|
|
{
|
|
return (true) ;
|
|
}
|
|
return (false);
|
|
}
|
|
|
|
/****************************************************************************
|
|
*
|
|
* Name : thread_idle
|
|
*
|
|
* Description: Return true if we are in the IDLE stage.
|
|
*
|
|
****************************************************************************/
|
|
|
|
bool thread_idle ( thread_ctrl_type & ctrl )
|
|
{
|
|
if ( ctrl.stage == THREAD_STAGE__IDLE )
|
|
{
|
|
return (true) ;
|
|
}
|
|
return (false);
|
|
}
|
|
|
|
/****************************************************************************
|
|
*
|
|
* Name : thread_launch
|
|
*
|
|
* Description: Perform prechecks that verify the ctrl struct is ready for
|
|
* thread launch and if so change stage to THREAD_STAGE__LAUNCH.
|
|
*
|
|
****************************************************************************/
|
|
|
|
int thread_launch ( thread_ctrl_type & ctrl, thread_info_type & info )
|
|
{
|
|
int rc = FAIL ;
|
|
if ( ! thread_timer_handler )
|
|
{
|
|
slog ("%s no thread timer handler bound in\n",
|
|
ctrl.hostname.c_str());
|
|
rc = FAIL_NULL_POINTER ;
|
|
}
|
|
|
|
else if ( ctrl.thread == NULL )
|
|
{
|
|
slog ("%s %s no thread bound in\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str());
|
|
rc = FAIL_NULL_POINTER ;
|
|
}
|
|
|
|
else if ( ctrl.stage != THREAD_STAGE__IDLE )
|
|
{
|
|
wlog ("%s %s not in IDLE stage (in %s stage)\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
threadStages_str[ctrl.stage].c_str());
|
|
thread_kill ( ctrl, info );
|
|
rc = FAIL_BAD_STATE ;
|
|
}
|
|
|
|
else if ( ctrl.id )
|
|
{
|
|
slog ("%s %s thread may be running ; id is not null and should be\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str());
|
|
thread_kill ( ctrl, info );
|
|
rc = FAIL_THREAD_RUNNING ;
|
|
}
|
|
|
|
else
|
|
{
|
|
_stage_change ( ctrl, THREAD_STAGE__LAUNCH );
|
|
rc = PASS ;
|
|
}
|
|
return (rc);
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
*
|
|
* Name : thread_kill
|
|
*
|
|
* Description: put the FSM in the kill state.
|
|
*
|
|
****************************************************************************/
|
|
|
|
void thread_kill ( thread_ctrl_type & ctrl, thread_info_type & info )
|
|
{
|
|
info.signal = SIGKILL ;
|
|
|
|
/* only go to kill if not already handling kill */
|
|
if (( ctrl.stage != THREAD_STAGE__KILL ) &&
|
|
( ctrl.stage != THREAD_STAGE__WAIT ) &&
|
|
( ctrl.stage != THREAD_STAGE__IDLE ))
|
|
{
|
|
blog ("%s kill request\n", ctrl.hostname.c_str() );
|
|
_stage_change ( ctrl, THREAD_STAGE__KILL );
|
|
}
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Name : thread_handler
|
|
*
|
|
* Description: finite state machine to manage a pthread execution life cycle
|
|
*
|
|
* The parent must periodically run this thread_handler to service and make
|
|
* forward progress in the FSM.
|
|
*
|
|
* Thread FSM life cycle and responsibilities:
|
|
*
|
|
* Parent calls thread_init once before any launch which sets up the ctrl
|
|
* and info structs. Default state THREAD_STAGE__IDLE IDLE
|
|
*
|
|
* When there is a thread to be launched ...
|
|
*
|
|
* 1. Parent FSM calls thread_launch to launch the thread
|
|
* - Thread FSM performs thread launch pre-checks
|
|
* - check for timer handler binding
|
|
* - rc = FAIL_NULL_POINTER
|
|
* - check that there is a thread bound in
|
|
* - rc = FAIL_NULL_POINTER
|
|
* - verify we are in the correct stage for launch
|
|
* - rc = FAIL_BAD_STATE
|
|
* - verify the thread is not already running
|
|
* - rc = FAIL_THREAD_RUNNING
|
|
*
|
|
* - if rc == PASS change state to THREAD_STAGE__LAUNCH
|
|
* - if rc != PASS change state to THREAD_STAGE__IDLE
|
|
*
|
|
* - Parent FSM handles thread_launch return status
|
|
*
|
|
* if ( thread_launch == PASS )
|
|
* - start a parent timer ; a longer umbrella timer
|
|
* else
|
|
* - fail operation or retry
|
|
*
|
|
* 2. Thread FSM launches the thread in THREAD_STAGE__LAUNCH stage
|
|
* - preserves parent signal mask
|
|
* - clears signal mask so that thread does not inherit signal handling
|
|
* - launch the thread
|
|
* - restore signal mask
|
|
*
|
|
* if launch failed
|
|
* - change ctrl.status = FAIL_THREAD_CREATE
|
|
* - change ctrl.stage = THREAD_STAGE__DONE
|
|
*
|
|
* if launch passed
|
|
* - start the thread timeout timer if timeout is !0
|
|
* - change ctrl.stage = THREAD_STAGE__MONITOR
|
|
*
|
|
* 3. Thread FSM monitors thread execution in THREAD_STAGE__MONITOR stage
|
|
* - waits for done conditions or thread timeout
|
|
* - ctrl.timer.ring or incremented info.runcount
|
|
* Note: thread increments info.runcount on exit/done
|
|
*
|
|
* if ( thread timeout )
|
|
* - sets ctrl.status = FAIL_TIMEOUT
|
|
* - sets ctrl.stage = THREAD_STAGE__KILL
|
|
*
|
|
* if ( info.runcount > ctrl.runcount )
|
|
* - stop thread timer
|
|
* - change ctrl.stage = THREAD_STAGE__DONE
|
|
*
|
|
* 4. Parent FSM Monitors for thread done or parent timer timeout
|
|
* - has started its own umbrella timeout timer that is a
|
|
* few seconds longer than the actual thread timeout.
|
|
* - thread_done returns true when ctrl.stage == THREAD_STAGE__DONE
|
|
*
|
|
* if ( parent timeout )
|
|
* - sets ctrl.status = FAIL_TIMEOUT
|
|
* - sets trl.stage = THREAD_STAGE__KILL
|
|
*
|
|
* if ( thread_done )
|
|
* - interprets ctrl.status
|
|
* - interprets info.status
|
|
* - consumes info.data which contains thread execution result
|
|
* - changes ctrl.done = true once data is consumed.
|
|
* - Parent FSM is done with this thread
|
|
*
|
|
* 5. Thread FSM monitors for Parent FSM done in THREAD_STAGE__DONE
|
|
* - Parent FSM changes ctrl.done to true once it has consumed the thread data
|
|
* - Thread FSM polls ctrl.done
|
|
*
|
|
* if ( ctrl.done == true )
|
|
* - changes ctrl.stage = THREAD_STAGE__IDLE
|
|
* - Thread FSM is done with this thread
|
|
*
|
|
* Note: The ctrl and info structs are intentionally kept separate for two
|
|
* reasons ...
|
|
*
|
|
* 1. distinguish between parent process (ctrl) and thread (info) data.
|
|
* 2. the parent might want them to occupying completely differnet memory
|
|
* spaces in the future.
|
|
*
|
|
*****************************************************************************/
|
|
|
|
int thread_handler ( thread_ctrl_type & ctrl, thread_info_type & info )
|
|
{
|
|
int rc = PASS ;
|
|
|
|
switch ( ctrl.stage )
|
|
{
|
|
case THREAD_STAGE__IGNORE:
|
|
{
|
|
break ;
|
|
}
|
|
case THREAD_STAGE__IDLE:
|
|
{
|
|
if ( ctrl.idle == false )
|
|
{
|
|
ctrl.idle = true ;
|
|
dlog ("%s IDLE\n", info.log_prefix);
|
|
if (( ctrl.id ) || ( info.id ) || ( ctrl.done == false ))
|
|
{
|
|
slog ("%s bad thread state [%lu:%lu:%d]\n", info.log_prefix, ctrl.id, info.id, ctrl.done );
|
|
}
|
|
}
|
|
|
|
/******************** Garbage Collection *****************/
|
|
|
|
|
|
/* remove previous password file if it somehow did not get removed before */
|
|
if ( info.pw_file_fd )
|
|
{
|
|
wlog ("%s closing pw fd (%d) ; garbage collected\n",
|
|
info.hostname.c_str(),
|
|
info.pw_file_fd );
|
|
|
|
close(info.pw_file_fd);
|
|
info.pw_file_fd = 0 ;
|
|
}
|
|
|
|
if ( ! info.password_file.empty() )
|
|
{
|
|
if ( daemon_is_file_present ( info.password_file.data() ))
|
|
{
|
|
wlog ("%s removing pw file (%s) ; garbage collected\n",
|
|
info.hostname.c_str(),
|
|
info.password_file.c_str());
|
|
|
|
unlink(info.password_file.data());
|
|
daemon_remove_file (info.password_file.data());
|
|
info.password_file.clear();
|
|
}
|
|
}
|
|
|
|
break ;
|
|
}
|
|
case THREAD_STAGE__WAIT:
|
|
{
|
|
if ( mtcTimer_expired ( ctrl.timer ) )
|
|
{
|
|
ctrl.timer.ring = false ;
|
|
ctrl.done = true ;
|
|
ctrl.id = 0 ;
|
|
info.id = 0 ;
|
|
info.command = 0 ;
|
|
_stage_change ( ctrl, THREAD_STAGE__IDLE );
|
|
}
|
|
else if ( ctrl.done == true )
|
|
{
|
|
/* force wait completed */
|
|
mtcTimer_reset ( ctrl.timer );
|
|
info.command = 0 ;
|
|
_stage_change ( ctrl, THREAD_STAGE__IDLE );
|
|
}
|
|
|
|
break ;
|
|
}
|
|
case THREAD_STAGE__DONE:
|
|
{
|
|
if ( ctrl.done == true )
|
|
{
|
|
if (( info.signal_handling == 0 ) && ( info.status == PASS ))
|
|
{
|
|
wlog ("%s %s thread not servicing pthread_signal_handler\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str());
|
|
}
|
|
dlog ("%s %s thread data was consumed by parent ; switching to IDLE\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str());
|
|
ctrl.id = 0 ;
|
|
info.id = 0 ;
|
|
|
|
dlog ("%s %s done\n", ctrl.hostname.c_str(), ctrl.name.c_str());
|
|
|
|
_stage_change ( ctrl, THREAD_STAGE__IDLE );
|
|
}
|
|
else if ( info.signal == SIGKILL )
|
|
{
|
|
wlog ("%s %s thread completed ; waiting on DONE but got SIGKILL ; forcing DONE\n", ctrl.hostname.c_str(), ctrl.name.c_str() );
|
|
ctrl.done = true ;
|
|
}
|
|
break ;
|
|
}
|
|
case THREAD_STAGE__LAUNCH:
|
|
{
|
|
/*
|
|
* pre-check should never this this come in as non-null but just
|
|
* to be sure a thread is actually created properly we set it to null
|
|
*/
|
|
if ( ctrl.id )
|
|
{
|
|
slog ("%s %s thread id should be 0\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str());
|
|
|
|
ctrl.id = 0 ;
|
|
}
|
|
/*
|
|
* Prepare thread complete criteria.
|
|
*
|
|
* When info.runcount > ctrl.runcount then the thread is done.
|
|
*/
|
|
ctrl.runcount = info.runcount ;
|
|
|
|
/* thread updates this stuff */
|
|
info.status_string.clear() ;
|
|
info.status = -1 ;
|
|
info.progress = 0 ;
|
|
info.signal = 0 ;
|
|
info.id = 0 ;
|
|
info.data.clear() ;
|
|
info.signal_handling = 0 ;
|
|
|
|
ctrl.idle = false ; /* not idle - for idle log throttle */
|
|
ctrl.done = false ; /* declare the thread as running */
|
|
|
|
daemon_signal_hdlr ();
|
|
|
|
/* Block signals */
|
|
sigfillset(&__disabled_mask);
|
|
|
|
// sigemptyset(&__enabled_mask); /* maybe not needed */
|
|
pthread_sigmask(SIG_SETMASK, &__disabled_mask, NULL );
|
|
pthread_sigmask(SIG_BLOCK, &__disabled_mask, &__enabled_mask);
|
|
|
|
rc = pthread_create(&ctrl.id, &__attr, ctrl.thread, (void*)&info);
|
|
|
|
if ( sigismember (&__enabled_mask, SIGINT ) == 0 )
|
|
{
|
|
slog ("%s SIGINT signal was not enabled ; enabling\n", ctrl.hostname.c_str());
|
|
sigaddset(&__enabled_mask, SIGINT);
|
|
}
|
|
if ( sigismember (&__enabled_mask, SIGTERM ) == 0 )
|
|
{
|
|
slog ("%s SIGTERM signal was not enabled ; enabling\n", ctrl.hostname.c_str());
|
|
sigaddset(&__enabled_mask, SIGTERM);
|
|
}
|
|
if ( sigismember (&__enabled_mask, SIGUSR1 ) == 0 )
|
|
{
|
|
slog ("%s SIGUSR1 signal was not enabled ; enabling\n", ctrl.hostname.c_str());
|
|
sigaddset(&__enabled_mask, SIGUSR1);
|
|
}
|
|
|
|
/* restore signal mask */
|
|
pthread_sigmask(SIG_SETMASK, &__enabled_mask, NULL );
|
|
pthread_sigmask(SIG_UNBLOCK, &__enabled_mask, NULL );
|
|
|
|
/* The above disables signal handling for a short period while a
|
|
* thread is started. In the meantime the only signal that is
|
|
* crutial not to miss is USR1.
|
|
* Work Around: run the USR1 signal handler immediately following
|
|
* the launch just in case it was requested during the launch
|
|
* while the signals were masked. */
|
|
daemon_health_test ();
|
|
|
|
if (rc != PASS)
|
|
{
|
|
elog ("%s %s thread launch failed (%d:%d:%m]",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
rc, errno );
|
|
|
|
ctrl.status = info.status = FAIL_THREAD_CREATE ;
|
|
_stage_change ( ctrl, THREAD_STAGE__DONE );
|
|
}
|
|
else if ( ctrl.id == 0 )
|
|
{
|
|
elog ("%s %s thread id is null\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str());
|
|
|
|
ctrl.status = info.status = FAIL_THREAD_CREATE ;
|
|
_stage_change ( ctrl, THREAD_STAGE__DONE );
|
|
}
|
|
else
|
|
{
|
|
dlog ("%s %s thread launched with command:%d\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
info.command );
|
|
ctrl.status = PASS ;
|
|
|
|
if ( ctrl.timeout )
|
|
{
|
|
mtcTimer_start ( ctrl.timer, thread_timer_handler, ctrl.timeout );
|
|
}
|
|
|
|
/* start monitoring */
|
|
_stage_change ( ctrl, THREAD_STAGE__MONITOR );
|
|
}
|
|
break ;
|
|
}
|
|
case THREAD_STAGE__MONITOR:
|
|
{
|
|
/* provide subtle indication that the thread ids don't match */
|
|
if (( ctrl.id != info.id ) && ( info.id != 0 ))
|
|
{
|
|
ilog_throttled (ctrl.stage_log_throttle, 50, "%s %s thread [%ld:%ld] monitoring (progress:%d)\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
ctrl.id,
|
|
info.id,
|
|
info.progress);
|
|
}
|
|
#ifdef WANT_THROTTLED_PROGRESS_LOG
|
|
else
|
|
{
|
|
ilog_throttled (ctrl.stage_log_throttle, 50, "%s %s thread monitoring (progress:%d)\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
info.progress);
|
|
}
|
|
#endif
|
|
if (( ctrl.timeout ) && ( mtcTimer_expired ( ctrl.timer ) ))
|
|
{
|
|
elog ("%s %s thread timeout\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str());
|
|
|
|
ctrl.status = FAIL_TIMEOUT ;
|
|
_stage_change ( ctrl, THREAD_STAGE__KILL );
|
|
}
|
|
else if ( info.runcount > ctrl.runcount )
|
|
{
|
|
mtcTimer_reset ( ctrl.timer );
|
|
|
|
if ( info.runcount != (ctrl.runcount+1))
|
|
{
|
|
wlog ("%s %s thread runcount jumped from %d to %d (rc:%d)\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
ctrl.runcount,
|
|
info.runcount,
|
|
info.status);
|
|
}
|
|
else
|
|
{
|
|
if ( info.status )
|
|
{
|
|
wlog ("%s %s thread completed (rc:%d)\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
info.status);
|
|
}
|
|
}
|
|
ctrl.id = 0 ;
|
|
info.id = 0 ;
|
|
_stage_change ( ctrl, THREAD_STAGE__DONE );
|
|
}
|
|
break ;
|
|
}
|
|
case THREAD_STAGE__KILL:
|
|
{
|
|
info.signal = SIGKILL ;
|
|
if ( info.id != 0 )
|
|
{
|
|
wlog ("%s %s thread kill req (rc:%d)\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str(),
|
|
info.status);
|
|
}
|
|
if ( ctrl.id != 0 )
|
|
{
|
|
/* Tell the thread ; by way of cancellation points ; to exit
|
|
*
|
|
* WARNING: Cannot send a cancel to the thread because if the
|
|
* thread is already gone then, although the Linux man page says
|
|
* it will just return an error, in fact after testing with 0 and
|
|
* invalid numbers, causes the calling process to segfault.
|
|
* Too dangerous !! Need cooperative exit */
|
|
// pthread_cancel(ctrl.id);
|
|
ctrl.id = 0 ;
|
|
}
|
|
|
|
mtcTimer_reset ( ctrl.timer );
|
|
mtcTimer_start ( ctrl.timer, thread_timer_handler, THREAD_POST_KILL_WAIT );
|
|
_stage_change ( ctrl, THREAD_STAGE__WAIT );
|
|
|
|
break ;
|
|
}
|
|
case THREAD_STAGE__STAGES:
|
|
default:
|
|
{
|
|
slog ("%s %s has invalid stage ; changing to IDLE\n",
|
|
ctrl.hostname.c_str(),
|
|
ctrl.name.c_str() );
|
|
|
|
_stage_change ( ctrl , THREAD_STAGE__IDLE );
|
|
rc = FAIL ;
|
|
break ;
|
|
}
|
|
}
|
|
return (rc);
|
|
}
|
|
|
|
/* called by the thread */
|
|
void pthread_signal_handler ( thread_info_type * info_ptr )
|
|
{
|
|
switch ( info_ptr->signal )
|
|
{
|
|
case SIGKILL:
|
|
ilog ("%s SIGKILL ; exiting ...\n", info_ptr->log_prefix );
|
|
|
|
/* avoid touching data after the sigkill is received */
|
|
// info_ptr->data = "thread SIGKILL" ;
|
|
// info_ptr->status = FAIL_THREAD_EXIT ;
|
|
// info_ptr->runcount++ ;
|
|
pthread_exit(&info_ptr->status );
|
|
exit (FAIL_THREAD_EXIT);
|
|
break ;
|
|
default:
|
|
info_ptr->signal_handling++ ;
|
|
}
|
|
|
|
/* check for a cancel request - handled internally */
|
|
/* Note: No pint using pthread_testcancel since we don't
|
|
* use pthread_cancel because if the risk of crashing the
|
|
* parent process
|
|
* pthread_testcancel ();
|
|
*/
|
|
}
|
|
|
|
|
|
pthread_t thread_launch_thread (void*(thread)(void*), void * arg)
|
|
{
|
|
pthread_t id ;
|
|
int rc = FAIL ;
|
|
if ( __thread_init_sig == THREAD_INIT_SIG )
|
|
{
|
|
rc = pthread_create(&id, &__attr, thread, (void*)arg);
|
|
}
|
|
else
|
|
{
|
|
slog ("cannot launch thread ; threading not initialized yet\n");
|
|
}
|
|
if ( rc )
|
|
return 0 ;
|
|
return id ;
|
|
}
|
|
|
|
void * thread_test ( void * arg )
|
|
{
|
|
UNUSED(arg);
|
|
for ( ; ; )
|
|
sleep (1);
|
|
|
|
return NULL ;
|
|
}
|