#!/bin/sh # # Copyright (c) 2013-2017 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # # Support: www.windriver.com # # Purpose: This resource agent manages # # .... the Titanium Cloud Controller Maintenance Daemon # # RA Spec: # # http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Fill in some defaults if no values are specified OCF_RESKEY_binary_default="mtcAgent" OCF_RESKEY_config_default="/etc/mtc.ini" OCF_RESKEY_dbg_default="false" OCF_RESKEY_logging_default="true" OCF_RESKEY_mode_default="normal" OCF_RESKEY_user_default="admin" OCF_RESKEY_pid_default="/var/run/mtcAgent.pid" OCF_RESKEY_state_default="standby" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_logging=${OCF_RESKEY_logging_default}} : ${OCF_RESKEY_dbg=${OCF_RESKEY_dbg_default}} : ${OCF_RESKEY_mode=${OCF_RESKEY_mode_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_state=${OCF_RESKEY_state_default}} mydaemon="/usr/local/bin/${OCF_RESKEY_binary}" statusfile="/var/run/${OCF_RESKEY_binary}.info" ####################################################################### usage() { cat < 1.0 This 'mtcAgent' is an OCF Compliant Resource Agent that manages start, stop and in-service monitoring of the Host Maintenance Process on Wind River's Titanium Cloud in the active mode. Manages the Titanium Cloud's Maintenance (mtcAgent) Daemon. state = standby ... run maintenance daemon in 'standby' mode (default) state = active ... run maintenance daemon in 'active' mode Maintenance Activity State Option mode = normal ... run maintenance daemon in 'normal' mode (default) mode = passive ... run maintenance daemon in 'passive' mode Maintenance Mode Option This option is used to direct the mtcAgent dameon log stream. logging = true ... /var/log/mtcAgent.log (default) logging = false ... /dev/null See also debug option which sets the verbosity of logging. Service Logging Control Option dbg = false ... info, warn and err logs sent to output stream (default) dbg = true ... Additional dbg logs are also sent to the output stream Service Debug Control Option END return ${OCF_SUCCESS} } mtcAgent_validate() { if [ ${OCF_RESKEY_dbg} = "true" ] ; then ocf_log info "mtcAgent:validate" fi check_binary "/usr/local/bin/${OCF_RESKEY_binary}" check_binary "/usr/local/bin/hbsAgent" check_binary "/usr/local/bin/mtcClient" check_binary "/usr/local/bin/hbsClient" check_binary sysinv-api check_binary pidof if [ ! -f ${OCF_RESKEY_config} ] ; then msg="${OCF_RESKEY_binary} ini file missing ${OCF_RESKEY_config}" ocf_log err "${msg}" return ${OCF_ERR_CONFIGURED} fi return ${OCF_SUCCESS} } function log_procfs() { pid=`cat ${OCF_RESKEY_pid}` PROCSCHEDFILE="/proc/$pid/sched" if [ -r $PROCSCHEDFILE ] ; then PROCSCHED="$(cat $PROCSCHEDFILE 2>&1)" echo "$PROCSCHED" | while read line; do ocf_log info "sched: ${line}" done fi PROCSTACKFILE="/proc/$pid/stack" echo "stack file: $PROCSTACKFILE" if [ -r $PROCSTACKFILE ] ; then PROCSTACK="$(cat $PROCSTACKFILE 2>&1)" echo "$PROCSTACK" | while read line; do ocf_log info "stack: ${line}" done fi } # total worst case timeout of this status check is 13 seconds. # This is 2 seconds under SM's default 15 second timeout. mtcAgent_status () { proc="mtcAgent:status" if [ ${OCF_RESKEY_dbg} = "true" ] ; then ocf_log info "mtcAgent:status" fi # remove the status file before we request a new rm -f ${statusfile} # Verify the pid file exists as part of status for ((loop=0;loop<3;loop++)) { if [ -f ${OCF_RESKEY_pid} ] ; then break else sleep 1 fi } # See if the daemon is running pid=`cat ${OCF_RESKEY_pid}` kill -0 $pid 2> /dev/null if [ $? -eq 0 ] ; then log_sig="${OCF_RESKEY_binary} In-Service Active Monitor Test" # Ask the daemon to produce status ocf_run kill -s USR1 $pid # Wait for the response for ((loop=0;loop<10;loop++)) { sleep 1 if [ -f ${statusfile} ] ; then ocf_log info "${log_sig} Passed ($loop)" return ${OCF_SUCCESS} elif [ $loop -eq 5 ] ; then # send the signal again ocf_run kill -s USR1 $pid pid_stat=`cat /proc/${pid}/stat` ocf_log notice "${log_sig} is slow to respond" ocf_log notice "$pid_stat" elif [ $loop -eq 8 ] ; then pid_stat=`cat /proc/${pid}/stat` ocf_log warn "${log_sig} is very slow to respond" ocf_log warn "$pid_stat" fi } log_procfs ocf_log err "${log_sig} Failed" return ${OCF_ERR_GENERIC} fi return ${OCF_NOT_RUNNING} } mtcAgent_monitor () { proc="mtcAgent:monitor" if [ ${OCF_RESKEY_dbg} = "true" ] ; then ocf_log info "${proc}" fi # Uncomment if you want the monitor function to force-pass # return ${OCF_SUCCESS} pid=`cat ${OCF_RESKEY_pid}` kill -0 $pid 2> /dev/null if [ $? -ne 0 ] ; then if [ ${OCF_RESKEY_dbg} = "true" ] ; then ocf_log info "${proc} called while ${OCF_RESKEY_binary} not running." fi return ${OCF_NOT_RUNNING} fi mtcAgent_status return $? } mtcAgent_start () { local rc start_proc="mtcAgent:start" if [ ${OCF_RESKEY_dbg} = "true" ] ; then ocf_log info "${start_proc}" fi # Uncomment if you want the start function to force-pass without starting # return ${OCF_SUCCESS} # If running then issue a ping test pid=`cat ${OCF_RESKEY_pid}` kill -0 $pid 2> /dev/null if [ $? -eq 0 ] ; then mtcAgent_status rc=$? if [ $rc -ne ${OCF_SUCCESS} ] ; then msg="${start_proc} ping test failed rc=${rc}" ocf_log err "${msg}" mtcAgent_stop else # Spec says to return success if process is already running for start pid=`cat ${OCF_RESKEY_pid}` kill -0 $pid 2> /dev/null if [ $? -eq 0 ] ; then ocf_log info "${start_proc} called while ${OCF_RESKEY_binary} is already running" return ${OCF_SUCCESS} fi fi fi # should not be running now or error pid=`cat ${OCF_RESKEY_pid}` kill -0 $pid 2> /dev/null if [ $? -eq 0 ] ; then msg="${start_proc} cannot kill off existing instance of ${OCF_RESKEY_binary}" ocf_log err "${msg}" return ${OCF_RUNNING_MASTER} fi rm -f ${statusfile} if [ ${OCF_RESKEY_state} = "active" ] ; then RUN_OPT_STATE="-a" else RUN_OPT_STATE="" fi if [ ${OCF_RESKEY_dbg} = "true" ] ; then RUN_OPT_DEBUG="-d debug" else RUN_OPT_DEBUG="" fi if [ ${OCF_RESKEY_mode} = "passive" ] ; then RUN_OPT_MODE="-p" else RUN_OPT_MODE="" fi if [ ${OCF_RESKEY_logging} = "true" ] ; then RUN_OPT_LOG="-l" else RUN_OPT_LOG="" fi # default PID to null pid="" # Try to Start the daemon ${mydaemon} ${RUN_OPT_STATE} ${RUN_OPT_LOG} ${RUN_OPT_MODE} ${RUN_OPT_DEBUG} rc=$? # verify it was started and set return code appropriately if [ $rc -eq ${OCF_SUCCESS} ] ; then # Verify the pid file exists as part of status for ((loop=0;loop<3;loop++)) { if [ -f ${OCF_RESKEY_pid} ] ; then break else ocf_log info "${start_proc} waiting ... loop=${loop}" sleep 1 fi } pid=`cat ${OCF_RESKEY_pid}` # ocf_log info "PID:$pid" kill -0 $pid 2> /dev/null if [ $? -ne 0 ] ; then rc=${OCF_FAILED_MASTER} else if [ ! -f ${statusfile} ] ; then ocf_log info "mtcAgent: Startup Health Test Failed - missing info" rc = ${OCF_ERR_GENERIC} fi fi else ocf_log info "${start_proc} failed ${mydaemon} daemon rc=${rc}" rc = ${OCF_ERR_GENERIC} fi # Record success or failure and return status if [ ${rc} -eq $OCF_SUCCESS ] ; then msg="${start_proc}ed pid=${pid}" ocf_log info "${msg}" else msg="${start_proc} failed rc=${rc}" ocf_log err "${msg}" rc=${OCF_NOT_RUNNING} fi return $rc } mtcAgent_confirm_stop () { proc="mtcAgent:confirm_stop" ocf_log info "${proc}" pid=`pidof ${OCF_RESKEY_binary}` kill -0 ${pid} 2> /dev/null if [ $? -eq 0 ] ; then ocf_log info "${proc} 'kill -9 ${pid}'" kill -9 ${pid} ocf_log info "${proc}ed (by emergency kill -9 ${pid})" sleep 1 fi rm -f ${OCF_RESKEY_pid} } mtcAgent_stop () { proc="mtcAgent:stop" # See if the process is running by pidfile pid=`pidof ${OCF_RESKEY_binary}` ocf_log info "${proc} PID:${pid}" kill -0 ${pid} 2> /dev/null if [ $? -ne 0 ] ; then ocf_log info "${proc} called while already stopped (no process)" mtcAgent_confirm_stop return ${OCF_SUCCESS} fi MAX=3 for ((loop=0;loop<$MAX;loop++)) { # verify stop with pidfile if [ -f ${OCF_RESKEY_pid} ] ; then pid=`cat ${OCF_RESKEY_pid}` # if pid file is gone we are done if [ ${pid} = "" ] ; then ocf_log info "${proc}ped (by -int)" break # if pidfile is empty then kill by -int else kill -0 ${pid} 2> /dev/null if [ $? -ne 0 ] ; then ocf_log info "${proc}ped (by pid)" break else ocf_log info "${proc}ping (by -int - loop:${loop})" kill -int ${pid} sleep 1 fi fi fi } mtcAgent_confirm_stop return ${OCF_SUCCESS} } mtcAgent_reload () { local rc proc="mtcAgent:reload" if [ ${OCF_RESKEY_dbg} = "true" ] ; then ocf_log info "${proc}" fi mtcAgent_stop rc=$? if [ $rc -eq ${OCF_SUCCESS} ] ; then #sleep 1 mtcAgent_start rc=$? if [ $rc -eq ${OCF_SUCCESS} ] ; then msg="${proc}ed" ocf_log info "${mgs}" fi fi if [ ${rc} -ne ${OCF_SUCCESS} ] ; then msg="${OCF_RESKEY_binary}: failed to restart rc=${rc}" ocf_log info "${mgs}" fi return ${rc} } case ${__OCF_ACTION} in meta-data) meta_data exit ${OCF_SUCCESS} ;; usage|help) usage exit ${OCF_SUCCESS} ;; esac if [ ${__OCF_ACTION} = "monitor" ] ; then ocf_log debug "mtcAgent:${__OCF_ACTION} action" else ocf_log info "mtcAgent:${__OCF_ACTION} action" fi # Anything except meta-data and help must pass validation mtcAgent_validate || exit $? case ${__OCF_ACTION} in start) mtcAgent_start ;; stop) mtcAgent_stop ;; status) mtcAgent_status ;; reload) mtcAgent_reload ;; monitor) mtcAgent_monitor ;; validate-all) mtcAgent_validate ;; *) usage exit ${OCF_ERR_UNIMPLEMENTED} ;; esac