18922761a6
Signed-off-by: Dean Troyer <dtroyer@gmail.com>
522 lines
14 KiB
Bash
Executable File
522 lines
14 KiB
Bash
Executable File
#!/bin/sh
|
|
#
|
|
# Copyright (c) 2013-2017 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
#
|
|
# Support: www.windriver.com
|
|
#
|
|
# Purpose: This resource agent manages
|
|
#
|
|
# .... the Titanium Cloud Controller Maintenance Daemon
|
|
#
|
|
# RA Spec:
|
|
#
|
|
# http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD
|
|
#
|
|
#######################################################################
|
|
# Initialization:
|
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
|
|
|
|
#######################################################################
|
|
|
|
# Fill in some defaults if no values are specified
|
|
OCF_RESKEY_binary_default="mtcAgent"
|
|
OCF_RESKEY_config_default="/etc/mtc.ini"
|
|
OCF_RESKEY_dbg_default="false"
|
|
OCF_RESKEY_logging_default="true"
|
|
OCF_RESKEY_mode_default="normal"
|
|
OCF_RESKEY_user_default="admin"
|
|
OCF_RESKEY_pid_default="/var/run/mtcAgent.pid"
|
|
OCF_RESKEY_state_default="standby"
|
|
|
|
|
|
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
: ${OCF_RESKEY_logging=${OCF_RESKEY_logging_default}}
|
|
: ${OCF_RESKEY_dbg=${OCF_RESKEY_dbg_default}}
|
|
: ${OCF_RESKEY_mode=${OCF_RESKEY_mode_default}}
|
|
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}}
|
|
|
|
mydaemon="/usr/local/bin/${OCF_RESKEY_binary}"
|
|
statusfile="/var/run/${OCF_RESKEY_binary}.info"
|
|
|
|
#######################################################################
|
|
|
|
usage() {
|
|
cat <<UEND
|
|
|
|
usage: $0 (start|stop|reload|status|monitor|validate-all|meta-data)
|
|
|
|
$0 manages the Platform's Controller Maintenance (mtcAgent) process as an HA resource
|
|
|
|
The 'start' ..... operation starts the maintenance service in the active state.
|
|
The 'stop' ...... operation stops the maintenance service.
|
|
The 'reload' .... operation stops and then starts the maintenance service.
|
|
The 'status' .... operation checks the status of the maintenance service.
|
|
The 'monitor' ... operation indicates the in-service status of the maintenance service.
|
|
The 'validate-all' operation reports whether the parameters are valid.
|
|
The 'meta-data' . operation reports the mtcAgent's meta-data information.
|
|
|
|
UEND
|
|
}
|
|
|
|
#######################################################################
|
|
|
|
meta_data() {
|
|
if [ ${OCF_RESKEY_dbg} = "true" ] ; then
|
|
ocf_log info "mtcAgent:meta_data"
|
|
fi
|
|
|
|
cat <<END
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
<resource-agent name="mtcAgent">
|
|
<version>1.0</version>
|
|
|
|
<longdesc lang="en">
|
|
This 'mtcAgent' is an OCF Compliant Resource Agent that manages start, stop
|
|
and in-service monitoring of the Host Maintenance Process on Wind River's
|
|
Titanium Cloud in the active mode.
|
|
</longdesc>
|
|
|
|
<shortdesc lang="en">
|
|
Manages the Titanium Cloud's Maintenance (mtcAgent) Daemon.
|
|
</shortdesc>
|
|
|
|
|
|
<parameters>
|
|
|
|
<parameter name="state" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
state = standby ... run maintenance daemon in 'standby' mode (default)
|
|
state = active ... run maintenance daemon in 'active' mode
|
|
</longdesc>
|
|
<shortdesc lang="en">Maintenance Activity State Option</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_state_default}"/>
|
|
</parameter>
|
|
|
|
<parameter name="mode" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
mode = normal ... run maintenance daemon in 'normal' mode (default)
|
|
mode = passive ... run maintenance daemon in 'passive' mode
|
|
</longdesc>
|
|
<shortdesc lang="en">Maintenance Mode Option</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_mode_default}"/>
|
|
</parameter>
|
|
|
|
|
|
<parameter name="logging" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
This option is used to direct the mtcAgent dameon log stream.
|
|
|
|
logging = true ... /var/log/mtcAgent.log (default)
|
|
logging = false ... /dev/null
|
|
|
|
See also debug option which sets the verbosity of logging.
|
|
</longdesc>
|
|
<shortdesc lang="en">Service Logging Control Option</shortdesc>
|
|
<content type="boolean" default="${OCF_RESKEY_logging_default}"/>
|
|
</parameter>
|
|
|
|
|
|
<parameter name="dbg" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
dbg = false ... info, warn and err logs sent to output stream (default)
|
|
dbg = true ... Additional dbg logs are also sent to the output stream
|
|
</longdesc>
|
|
<shortdesc lang="en">Service Debug Control Option</shortdesc>
|
|
<content type="boolean" default="${OCF_RESKEY_dbg_default}"/>
|
|
</parameter>
|
|
|
|
</parameters>
|
|
|
|
|
|
<actions>
|
|
<action name="start" timeout="10s" />
|
|
<action name="stop" timeout="10s" />
|
|
<action name="monitor" timeout="10s" interval="300s" />
|
|
<action name="meta-data" timeout="10s" />
|
|
<action name="validate-all" timeout="10s" />
|
|
</actions>
|
|
</resource-agent>
|
|
END
|
|
return ${OCF_SUCCESS}
|
|
}
|
|
|
|
mtcAgent_validate() {
|
|
|
|
if [ ${OCF_RESKEY_dbg} = "true" ] ; then
|
|
ocf_log info "mtcAgent:validate"
|
|
fi
|
|
|
|
check_binary "/usr/local/bin/${OCF_RESKEY_binary}"
|
|
check_binary "/usr/local/bin/hbsAgent"
|
|
check_binary "/usr/local/bin/mtcClient"
|
|
check_binary "/usr/local/bin/hbsClient"
|
|
check_binary sysinv-api
|
|
check_binary pidof
|
|
|
|
if [ ! -f ${OCF_RESKEY_config} ] ; then
|
|
msg="${OCF_RESKEY_binary} ini file missing ${OCF_RESKEY_config}"
|
|
ocf_log err "${msg}"
|
|
return ${OCF_ERR_CONFIGURED}
|
|
fi
|
|
|
|
return ${OCF_SUCCESS}
|
|
}
|
|
|
|
function log_procfs()
|
|
{
|
|
pid=`cat ${OCF_RESKEY_pid}`
|
|
PROCSCHEDFILE="/proc/$pid/sched"
|
|
if [ -r $PROCSCHEDFILE ] ; then
|
|
PROCSCHED="$(cat $PROCSCHEDFILE 2>&1)"
|
|
echo "$PROCSCHED" | while read line; do
|
|
ocf_log info "sched: ${line}"
|
|
done
|
|
fi
|
|
|
|
PROCSTACKFILE="/proc/$pid/stack"
|
|
echo "stack file: $PROCSTACKFILE"
|
|
if [ -r $PROCSTACKFILE ] ; then
|
|
PROCSTACK="$(cat $PROCSTACKFILE 2>&1)"
|
|
echo "$PROCSTACK" | while read line; do
|
|
ocf_log info "stack: ${line}"
|
|
done
|
|
fi
|
|
}
|
|
|
|
# total worst case timeout of this status check is 13 seconds.
|
|
# This is 2 seconds under SM's default 15 second timeout.
|
|
mtcAgent_status () {
|
|
|
|
proc="mtcAgent:status"
|
|
if [ ${OCF_RESKEY_dbg} = "true" ] ; then
|
|
ocf_log info "mtcAgent:status"
|
|
fi
|
|
|
|
# remove the status file before we request a new
|
|
rm -f ${statusfile}
|
|
|
|
# Verify the pid file exists as part of status
|
|
for ((loop=0;loop<3;loop++)) {
|
|
if [ -f ${OCF_RESKEY_pid} ] ; then
|
|
break
|
|
else
|
|
sleep 1
|
|
fi
|
|
}
|
|
|
|
# See if the daemon is running
|
|
pid=`cat ${OCF_RESKEY_pid}`
|
|
kill -0 $pid 2> /dev/null
|
|
if [ $? -eq 0 ] ; then
|
|
|
|
log_sig="${OCF_RESKEY_binary} In-Service Active Monitor Test"
|
|
|
|
# Ask the daemon to produce status
|
|
ocf_run kill -s USR1 $pid
|
|
|
|
# Wait for the response
|
|
for ((loop=0;loop<10;loop++)) {
|
|
sleep 1
|
|
if [ -f ${statusfile} ] ; then
|
|
|
|
ocf_log info "${log_sig} Passed ($loop)"
|
|
return ${OCF_SUCCESS}
|
|
|
|
elif [ $loop -eq 5 ] ; then
|
|
|
|
# send the signal again
|
|
ocf_run kill -s USR1 $pid
|
|
|
|
pid_stat=`cat /proc/${pid}/stat`
|
|
ocf_log notice "${log_sig} is slow to respond"
|
|
ocf_log notice "$pid_stat"
|
|
|
|
elif [ $loop -eq 8 ] ; then
|
|
|
|
pid_stat=`cat /proc/${pid}/stat`
|
|
ocf_log warn "${log_sig} is very slow to respond"
|
|
ocf_log warn "$pid_stat"
|
|
|
|
fi
|
|
}
|
|
log_procfs
|
|
ocf_log err "${log_sig} Failed"
|
|
return ${OCF_ERR_GENERIC}
|
|
fi
|
|
return ${OCF_NOT_RUNNING}
|
|
}
|
|
|
|
mtcAgent_monitor () {
|
|
|
|
proc="mtcAgent:monitor"
|
|
if [ ${OCF_RESKEY_dbg} = "true" ] ; then
|
|
ocf_log info "${proc}"
|
|
fi
|
|
|
|
# Uncomment if you want the monitor function to force-pass
|
|
# return ${OCF_SUCCESS}
|
|
|
|
pid=`cat ${OCF_RESKEY_pid}`
|
|
kill -0 $pid 2> /dev/null
|
|
if [ $? -ne 0 ] ; then
|
|
if [ ${OCF_RESKEY_dbg} = "true" ] ; then
|
|
ocf_log info "${proc} called while ${OCF_RESKEY_binary} not running."
|
|
fi
|
|
return ${OCF_NOT_RUNNING}
|
|
fi
|
|
|
|
mtcAgent_status
|
|
return $?
|
|
}
|
|
|
|
|
|
mtcAgent_start () {
|
|
|
|
local rc
|
|
|
|
start_proc="mtcAgent:start"
|
|
if [ ${OCF_RESKEY_dbg} = "true" ] ; then
|
|
ocf_log info "${start_proc}"
|
|
fi
|
|
|
|
# Uncomment if you want the start function to force-pass without starting
|
|
# return ${OCF_SUCCESS}
|
|
|
|
# If running then issue a ping test
|
|
pid=`cat ${OCF_RESKEY_pid}`
|
|
kill -0 $pid 2> /dev/null
|
|
if [ $? -eq 0 ] ; then
|
|
mtcAgent_status
|
|
rc=$?
|
|
if [ $rc -ne ${OCF_SUCCESS} ] ; then
|
|
msg="${start_proc} ping test failed rc=${rc}"
|
|
ocf_log err "${msg}"
|
|
mtcAgent_stop
|
|
else
|
|
# Spec says to return success if process is already running for start
|
|
pid=`cat ${OCF_RESKEY_pid}`
|
|
kill -0 $pid 2> /dev/null
|
|
if [ $? -eq 0 ] ; then
|
|
ocf_log info "${start_proc} called while ${OCF_RESKEY_binary} is already running"
|
|
return ${OCF_SUCCESS}
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# should not be running now or error
|
|
pid=`cat ${OCF_RESKEY_pid}`
|
|
kill -0 $pid 2> /dev/null
|
|
if [ $? -eq 0 ] ; then
|
|
msg="${start_proc} cannot kill off existing instance of ${OCF_RESKEY_binary}"
|
|
ocf_log err "${msg}"
|
|
return ${OCF_RUNNING_MASTER}
|
|
fi
|
|
|
|
rm -f ${statusfile}
|
|
|
|
if [ ${OCF_RESKEY_state} = "active" ] ; then
|
|
RUN_OPT_STATE="-a"
|
|
else
|
|
RUN_OPT_STATE=""
|
|
fi
|
|
|
|
if [ ${OCF_RESKEY_dbg} = "true" ] ; then
|
|
RUN_OPT_DEBUG="-d debug"
|
|
else
|
|
RUN_OPT_DEBUG=""
|
|
fi
|
|
|
|
if [ ${OCF_RESKEY_mode} = "passive" ] ; then
|
|
RUN_OPT_MODE="-p"
|
|
else
|
|
RUN_OPT_MODE=""
|
|
fi
|
|
|
|
if [ ${OCF_RESKEY_logging} = "true" ] ; then
|
|
RUN_OPT_LOG="-l"
|
|
else
|
|
RUN_OPT_LOG=""
|
|
fi
|
|
|
|
# default PID to null
|
|
pid=""
|
|
|
|
# Try to Start the daemon
|
|
${mydaemon} ${RUN_OPT_STATE} ${RUN_OPT_LOG} ${RUN_OPT_MODE} ${RUN_OPT_DEBUG}
|
|
rc=$?
|
|
|
|
# verify it was started and set return code appropriately
|
|
if [ $rc -eq ${OCF_SUCCESS} ] ; then
|
|
# Verify the pid file exists as part of status
|
|
for ((loop=0;loop<3;loop++)) {
|
|
if [ -f ${OCF_RESKEY_pid} ] ; then
|
|
break
|
|
else
|
|
ocf_log info "${start_proc} waiting ... loop=${loop}"
|
|
sleep 1
|
|
fi
|
|
}
|
|
|
|
pid=`cat ${OCF_RESKEY_pid}`
|
|
# ocf_log info "PID:$pid"
|
|
kill -0 $pid 2> /dev/null
|
|
if [ $? -ne 0 ] ; then
|
|
rc=${OCF_FAILED_MASTER}
|
|
else
|
|
if [ ! -f ${statusfile} ] ; then
|
|
ocf_log info "mtcAgent: Startup Health Test Failed - missing info"
|
|
rc = ${OCF_ERR_GENERIC}
|
|
fi
|
|
fi
|
|
else
|
|
ocf_log info "${start_proc} failed ${mydaemon} daemon rc=${rc}"
|
|
rc = ${OCF_ERR_GENERIC}
|
|
fi
|
|
|
|
# Record success or failure and return status
|
|
if [ ${rc} -eq $OCF_SUCCESS ] ; then
|
|
msg="${start_proc}ed pid=${pid}"
|
|
ocf_log info "${msg}"
|
|
else
|
|
msg="${start_proc} failed rc=${rc}"
|
|
ocf_log err "${msg}"
|
|
rc=${OCF_NOT_RUNNING}
|
|
fi
|
|
return $rc
|
|
}
|
|
|
|
mtcAgent_confirm_stop () {
|
|
|
|
proc="mtcAgent:confirm_stop"
|
|
ocf_log info "${proc}"
|
|
|
|
pid=`pidof ${OCF_RESKEY_binary}`
|
|
kill -0 ${pid} 2> /dev/null
|
|
if [ $? -eq 0 ] ; then
|
|
ocf_log info "${proc} 'kill -9 ${pid}'"
|
|
kill -9 ${pid}
|
|
ocf_log info "${proc}ed (by emergency kill -9 ${pid})"
|
|
sleep 1
|
|
fi
|
|
rm -f ${OCF_RESKEY_pid}
|
|
}
|
|
|
|
mtcAgent_stop () {
|
|
|
|
proc="mtcAgent:stop"
|
|
|
|
# See if the process is running by pidfile
|
|
|
|
pid=`pidof ${OCF_RESKEY_binary}`
|
|
ocf_log info "${proc} PID:${pid}"
|
|
kill -0 ${pid} 2> /dev/null
|
|
if [ $? -ne 0 ] ; then
|
|
ocf_log info "${proc} called while already stopped (no process)"
|
|
mtcAgent_confirm_stop
|
|
return ${OCF_SUCCESS}
|
|
fi
|
|
|
|
MAX=3
|
|
for ((loop=0;loop<$MAX;loop++)) {
|
|
|
|
# verify stop with pidfile
|
|
if [ -f ${OCF_RESKEY_pid} ] ; then
|
|
|
|
pid=`cat ${OCF_RESKEY_pid}`
|
|
|
|
# if pid file is gone we are done
|
|
if [ ${pid} = "" ] ; then
|
|
ocf_log info "${proc}ped (by -int)"
|
|
break
|
|
|
|
# if pidfile is empty then kill by -int
|
|
else
|
|
|
|
kill -0 ${pid} 2> /dev/null
|
|
if [ $? -ne 0 ] ; then
|
|
ocf_log info "${proc}ped (by pid)"
|
|
break
|
|
else
|
|
ocf_log info "${proc}ping (by -int - loop:${loop})"
|
|
kill -int ${pid}
|
|
sleep 1
|
|
fi
|
|
fi
|
|
fi
|
|
}
|
|
mtcAgent_confirm_stop
|
|
return ${OCF_SUCCESS}
|
|
}
|
|
|
|
mtcAgent_reload () {
|
|
|
|
local rc
|
|
|
|
proc="mtcAgent:reload"
|
|
if [ ${OCF_RESKEY_dbg} = "true" ] ; then
|
|
ocf_log info "${proc}"
|
|
fi
|
|
|
|
mtcAgent_stop
|
|
rc=$?
|
|
if [ $rc -eq ${OCF_SUCCESS} ] ; then
|
|
#sleep 1
|
|
mtcAgent_start
|
|
rc=$?
|
|
if [ $rc -eq ${OCF_SUCCESS} ] ; then
|
|
msg="${proc}ed"
|
|
ocf_log info "${mgs}"
|
|
fi
|
|
fi
|
|
|
|
if [ ${rc} -ne ${OCF_SUCCESS} ] ; then
|
|
msg="${OCF_RESKEY_binary}: failed to restart rc=${rc}"
|
|
ocf_log info "${mgs}"
|
|
fi
|
|
|
|
return ${rc}
|
|
}
|
|
|
|
case ${__OCF_ACTION} in
|
|
meta-data) meta_data
|
|
exit ${OCF_SUCCESS}
|
|
;;
|
|
usage|help) usage
|
|
exit ${OCF_SUCCESS}
|
|
;;
|
|
esac
|
|
|
|
ocf_log info "mtcAgent:${__OCF_ACTION} action"
|
|
|
|
# Anything except meta-data and help must pass validation
|
|
mtcAgent_validate || exit $?
|
|
|
|
case ${__OCF_ACTION} in
|
|
start) mtcAgent_start
|
|
;;
|
|
stop) mtcAgent_stop
|
|
;;
|
|
status) mtcAgent_status
|
|
;;
|
|
reload) mtcAgent_reload
|
|
;;
|
|
monitor) mtcAgent_monitor
|
|
;;
|
|
validate-all) mtcAgent_validate
|
|
;;
|
|
*) usage
|
|
exit ${OCF_ERR_UNIMPLEMENTED}
|
|
;;
|
|
esac
|