Add AMQP conn check for heat-engine ocf

Improve status check by AMQP port connection tracking.
Heat-engine service will be restarted by pacemaker,
if amqp connection is lost. And monitor action will
report fail, unless amqp connection re-established.
Source: https://github.com/madkiss/openstack-resource-agents
0263ab71d319d3c398c5d29c4c0eb261db358ab1

DocImpact
Related blueprint pacemaker-improvements

Change-Id: I0a2246051212332acc40b8a82d42b585188d5d73
Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
This commit is contained in:
Bogdan Dobrelya 2014-10-06 14:58:13 +02:00
parent 8814b0224d
commit 07c23e3a7d
2 changed files with 116 additions and 9 deletions

View File

@ -16,6 +16,7 @@
# OCF_RESKEY_config
# OCF_RESKEY_user
# OCF_RESKEY_pid
# OCF_RESKEY_amqp_server_port
# OCF_RESKEY_additional_parameters
#######################################################################
# Initialization:
@ -33,12 +34,14 @@ LOG="/var/log/heat/engine.log"
OCF_RESKEY_binary_default="/usr/bin/heat-engine"
OCF_RESKEY_config_default="/etc/heat/heat.conf"
OCF_RESKEY_pid_default="/var/run/heat/heat-engine.pid"
OCF_RESKEY_amqp_server_port_default="5673"
OCF_RESKEY_user_default="heat"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
#######################################################################
@ -107,6 +110,14 @@ The pid file to use for this process
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="amqp_server_port" unique="0" required="0">
<longdesc lang="en">
The listening port number of the AMQP server. Use for monitoring purposes
</longdesc>
<shortdesc lang="en">AMQP listening port</shortdesc>
<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters to pass on to the Heat engine if you do need them
@ -132,10 +143,31 @@ END
#######################################################################
# Functions invoked by resource manager actions
heat_engine_check_port() {
# This function has been taken from the squid RA and improved a bit
# The length of the integer must be 4
# Examples of valid port: "1080", "0080"
# Examples of invalid port: "1080bad", "0", "0000", ""
local int
local cnt
int="$1"
cnt=${#int}
echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
ocf_log err "Invalid port number: $1"
exit $OCF_ERR_CONFIGURED
fi
}
service_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary netstat
heat_engine_check_port $OCF_RESKEY_amqp_server_port
# A config file on shared storage that is not available
# during probes is OK.
@ -165,7 +197,7 @@ service_status() {
ocf_log info "${SERVICE_NAME} is not running"
return $OCF_NOT_RUNNING
else
pid=`cat $OCF_RESKEY_pid`
pid=`cat $OCF_RESKEY_pid`
fi
ocf_run -warn kill -s 0 $pid
@ -179,9 +211,31 @@ service_status() {
}
service_monitor() {
# There is no way to do extended monitoring of Heat Engine
# so we just run simple status check instead
service_status
local rc
local pid
local rc_amqp
local engine_amqp_check
service_status
rc=$?
# If status returned anything but success, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# Check the connections according to the PID.
# We are sure to hit the heat-engine process and not other heat process with the same connection behavior (for example heat-api)
pid=`cat $OCF_RESKEY_pid`
engine_amqp_check=`netstat -punt | grep -s "$OCF_RESKEY_amqp_server_port" | grep -s "$pid" | grep -qs "ESTABLISHED"`
rc_amqp=$?
if [ $rc_amqp -ne 0 ]; then
ocf_log err "${SERVICE_NAME} is not connected to the AMQP server: AMQP connection test returned $rc_amqp"
return $OCF_NOT_RUNNING
fi
ocf_log debug "OpenStack Orchestration Engine (heat-engine) monitor succeeded"
return $OCF_SUCCESS
}
service_start() {

View File

@ -16,6 +16,7 @@
# OCF_RESKEY_config
# OCF_RESKEY_user
# OCF_RESKEY_pid
# OCF_RESKEY_amqp_server_port
# OCF_RESKEY_additional_parameters
#######################################################################
# Initialization:
@ -33,12 +34,14 @@ LOG="/var/log/heat/engine.log"
OCF_RESKEY_binary_default="/usr/bin/heat-engine"
OCF_RESKEY_config_default="/etc/heat/heat.conf"
OCF_RESKEY_pid_default="/var/run/heat-engine.pid"
OCF_RESKEY_amqp_server_port_default="5673"
OCF_RESKEY_user_default="heat"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
#######################################################################
@ -107,6 +110,14 @@ The pid file to use for this process
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="amqp_server_port" unique="0" required="0">
<longdesc lang="en">
The listening port number of the AMQP server. Use for monitoring purposes
</longdesc>
<shortdesc lang="en">AMQP listening port</shortdesc>
<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters to pass on to the Heat engine if you do need them
@ -132,10 +143,31 @@ END
#######################################################################
# Functions invoked by resource manager actions
heat_engine_check_port() {
# This function has been taken from the squid RA and improved a bit
# The length of the integer must be 4
# Examples of valid port: "1080", "0080"
# Examples of invalid port: "1080bad", "0", "0000", ""
local int
local cnt
int="$1"
cnt=${#int}
echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
ocf_log err "Invalid port number: $1"
exit $OCF_ERR_CONFIGURED
fi
}
service_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary netstat
heat_engine_check_port $OCF_RESKEY_amqp_server_port
# A config file on shared storage that is not available
# during probes is OK.
@ -165,7 +197,7 @@ service_status() {
ocf_log info "${SERVICE_NAME} is not running"
return $OCF_NOT_RUNNING
else
pid=`cat $OCF_RESKEY_pid`
pid=`cat $OCF_RESKEY_pid`
fi
ocf_run -warn kill -s 0 $pid
@ -179,9 +211,31 @@ service_status() {
}
service_monitor() {
# There is no way to do extended monitoring of Heat Engine
# so we just run simple status check instead
service_status
local rc
local pid
local rc_amqp
local engine_amqp_check
service_status
rc=$?
# If status returned anything but success, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# Check the connections according to the PID.
# We are sure to hit the heat-engine process and not other heat process with the same connection behavior (for example heat-api)
pid=`cat $OCF_RESKEY_pid`
engine_amqp_check=`netstat -punt | grep -s "$OCF_RESKEY_amqp_server_port" | grep -s "$pid" | grep -qs "ESTABLISHED"`
rc_amqp=$?
if [ $rc_amqp -ne 0 ]; then
ocf_log err "${SERVICE_NAME} is not connected to the AMQP server: AMQP connection test returned $rc_amqp"
return $OCF_NOT_RUNNING
fi
ocf_log debug "OpenStack Orchestration Engine (heat-engine) monitor succeeded"
return $OCF_SUCCESS
}
service_start() {
@ -197,7 +251,6 @@ service_start() {
# source init and venv
. /lib/lsb/init-functions
# run the actual daemon.
#daemon --user "${OCF_RESKEY_user}" --pidfile "${OCF_RESKEY_pid}" "${OCF_RESKEY_binary} --config-file ${OCF_RESKEY_config} --logfile ${LOG} &>/dev/null & echo \$! > ${OCF_RESKEY_pid}"
start-stop-daemon --start --background --quiet --chuid "${OCF_RESKEY_user}:${OCF_RESKEY_user}" --make-pidfile --pidfile "${OCF_RESKEY_pid}" --startas "${OCF_RESKEY_binary}"
# Spin waiting for the server to come up.