fuel-library/files/fuel-ha-utils/ocf/ceilometer-agent-compute
Alex Schultz 7c210ee68d Update OCF stop actions to use procfs
This change adds a set of fuel functions that can be used to stop
processes using a pid file or a pid. These functions allow for the same
method of stopping processes to be used by all ocf scripts. The process
stopping logic leverages procfs to check to make sure the process has
been stopped.

This change adds a default retry of 5 times with a 2 second sleep
between sending SIGTERM signals to the processes. This retry count is
dynamic based on the timeout for the stopping command. If the SIGTERM
does not work, a SIGKILL will also be tried one time.

This change does not include the ocf script for rabbitmq as that is the
upstream version and won't be leveraging the ocf-fuel-funcs library.

This change does not include the ocf script for mysql as that needs
further work.

Change-Id: I8dcc1c37d17068a9c29480b886d4f1a051f28894
Partial-Bug: 1425579
2015-11-16 16:36:29 +00:00

309 lines
10 KiB
Bash

#!/bin/bash
#
#
# OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute)
#
# Description: Manages an OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute) process as an HA resource
#
# Authors: Emilien Macchi
# Mainly inspired by the Nova Scheduler resource agent written by Sebastien Han
#
# Support: openstack@lists.launchpad.net
# License: Apache Software License (ASL) 2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_config
# OCF_RESKEY_user
# OCF_RESKEY_pid
# OCF_RESKEY_monitor_binary
# OCF_RESKEY_amqp_server_port
# OCF_RESKEY_additional_parameters
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_FUEL_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/fuel}
. ${OCF_FUEL_FUNCTIONS_DIR}/ocf-fuel-funcs
#######################################################################
# Fill in some defaults if no values are specified
OCF_RESKEY_binary_default="ceilometer-polling"
OCF_RESKEY_config_default="/etc/ceilometer/ceilometer.conf"
OCF_RESKEY_user_default="ceilometer"
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
OCF_RESKEY_amqp_server_port_default="5673"
: ${HA_LOGTAG="ocf-ceilometer-agent-compute"}
: ${HA_LOGFACILITY="daemon"}
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute) process as an HA resource
The 'start' operation starts the scheduler service.
The 'stop' operation stops the scheduler service.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the scheduler service is running
The 'monitor' operation reports whether the scheduler service seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ceilometer-agent-compute">
<version>1.0</version>
<longdesc lang="en">
Resource agent for the OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute)
May manage a ceilometer-agent-compute instance or a clone set that
creates a distributed ceilometer-agent-compute cluster.
</longdesc>
<shortdesc lang="en">Manages the OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute)</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack Ceilometer Compute Agent server binary (ceilometer-agent-compute)
</longdesc>
<shortdesc lang="en">OpenStack Ceilometer Compute Agent server binary (ceilometer-agent-compute)</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack Ceilometer Compute Agent Compute (ceilometer-agent-compute) configuration file
</longdesc>
<shortdesc lang="en">OpenStack Ceilometer Compute Agent (ceilometer-agent-compute registry) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute)
</longdesc>
<shortdesc lang="en">OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute) user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pid file to use for this OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute) instance
</longdesc>
<shortdesc lang="en">OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute) pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="amqp_server_port" unique="0" required="0">
<longdesc lang="en">
The listening port number of the AMQP server. Use for monitoring purposes
</longdesc>
<shortdesc lang="en">AMQP listening port</shortdesc>
<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters to pass on to the OpenStack Ceilometer Compute Agent Service (ceilometer-agent-compute)
</longdesc>
<shortdesc lang="en">Additional parameters for ceilometer-agent-compute</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" timeout="30" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# Functions invoked by resource manager actions
ceilometer_agent_compute_check_port() {
# This function has been taken from the squid RA and improved a bit
# The length of the integer must be 4
# Examples of valid port: "1080", "0080"
# Examples of invalid port: "1080bad", "0", "0000", ""
local int
local cnt
int="$1"
cnt=${#int}
echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
ocf_log err "Invalid port number: $1"
exit $OCF_ERR_CONFIGURED
fi
}
ceilometer_agent_compute_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary netstat
ceilometer_agent_compute_check_port $OCF_RESKEY_amqp_server_port
# A config file on shared storage that is not available
# during probes is OK.
if [ ! -f $OCF_RESKEY_config ]; then
if ! ocf_is_probe; then
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
true
}
ceilometer_agent_compute_status() {
local pid
local rc
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log info "OpenStack Ceilometer Compute Agent (ceilometer-agent-compute) is not running"
return $OCF_NOT_RUNNING
else
pid=`cat $OCF_RESKEY_pid`
fi
ocf_run -warn kill -s 0 $pid
rc=$?
if [ $rc -eq 0 ]; then
return $OCF_SUCCESS
else
ocf_log info "Old PID file found, but OpenStack Ceilometer Compute Agent (ceilometer-agent-compute) is not running"
return $OCF_NOT_RUNNING
fi
}
ceilometer_agent_compute_monitor() {
local rc
local pid
local scheduler_amqp_check
ceilometer_agent_compute_status
rc=$?
# If status returned anything but success, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# Check the connections according to the PID.
# We are sure to hit the scheduler process and not other process with the same connection behavior
pid=`cat $OCF_RESKEY_pid`
scheduler_amqp_check=`netstat -punt | grep -s "$OCF_RESKEY_amqp_server_port" | grep -s "$pid" | grep -qs "ESTABLISHED"`
rc=$?
ocf_log debug "OpenStack Ceilometer Compute Agent (ceilometer-agent-compute) monitor succeeded"
return $OCF_SUCCESS
}
ceilometer_agent_compute_start() {
local rc
ceilometer_agent_compute_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "OpenStack Ceilometer Compute Agent (ceilometer-agent-compute) already running"
return $OCF_SUCCESS
fi
# run the actual ceilometer-agent-compute daemon. Don't use ocf_run as we're sending the tool's output
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --polling-namespaces compute --config-file=$OCF_RESKEY_config \
$OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
# Spin waiting for the server to come up.
while true; do
ceilometer_agent_compute_monitor
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "OpenStack Ceilometer Compute Agent (ceilometer-agent-compute) start failed"
exit $OCF_ERR_GENERIC
fi
sleep 1
done
ocf_log info "OpenStack Ceilometer Compute Agent (ceilometer-agent-compute) started"
return $OCF_SUCCESS
}
ceilometer_agent_compute_stop() {
local rc
local shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000) ))
fi
ceilometer_agent_compute_status
rc="${?}"
if [ "${rc}" -eq "${OCF_NOT_RUNNING}" ]; then
ocf_log info "OpenStack Compute Compute Agent (${OCF_RESKEY_binary}) already stopped"
return "${OCF_SUCCESS}"
fi
proc_stop "${OCF_RESKEY_pid}" "${OCF_RESKEY_binary}" $shutdown_timeout
return "${?}"
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
ceilometer_agent_compute_validate || exit $?
# What kind of method was invoked?
case "$1" in
start) ceilometer_agent_compute_start;;
stop) ceilometer_agent_compute_stop;;
status) ceilometer_agent_compute_status;;
monitor) ceilometer_agent_compute_monitor;;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac