Add AMQP conn check for heat-engine ocf
Improve status check by AMQP port connection tracking. Heat-engine service will be restarted by pacemaker, if amqp connection is lost. And monitor action will report fail, unless amqp connection re-established. Source: https://github.com/madkiss/openstack-resource-agents 0263ab71d319d3c398c5d29c4c0eb261db358ab1 DocImpact Related blueprint pacemaker-improvements Change-Id: I0a2246051212332acc40b8a82d42b585188d5d73 Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
This commit is contained in:
parent
8814b0224d
commit
07c23e3a7d
@ -16,6 +16,7 @@
|
||||
# OCF_RESKEY_config
|
||||
# OCF_RESKEY_user
|
||||
# OCF_RESKEY_pid
|
||||
# OCF_RESKEY_amqp_server_port
|
||||
# OCF_RESKEY_additional_parameters
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
@ -33,12 +34,14 @@ LOG="/var/log/heat/engine.log"
|
||||
OCF_RESKEY_binary_default="/usr/bin/heat-engine"
|
||||
OCF_RESKEY_config_default="/etc/heat/heat.conf"
|
||||
OCF_RESKEY_pid_default="/var/run/heat/heat-engine.pid"
|
||||
OCF_RESKEY_amqp_server_port_default="5673"
|
||||
OCF_RESKEY_user_default="heat"
|
||||
|
||||
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
||||
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
||||
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
||||
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
||||
: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
|
||||
|
||||
#######################################################################
|
||||
|
||||
@ -107,6 +110,14 @@ The pid file to use for this process
|
||||
<content type="string" default="${OCF_RESKEY_pid_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="amqp_server_port" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
The listening port number of the AMQP server. Use for monitoring purposes
|
||||
</longdesc>
|
||||
<shortdesc lang="en">AMQP listening port</shortdesc>
|
||||
<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="additional_parameters" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Additional parameters to pass on to the Heat engine if you do need them
|
||||
@ -132,10 +143,31 @@ END
|
||||
#######################################################################
|
||||
# Functions invoked by resource manager actions
|
||||
|
||||
heat_engine_check_port() {
|
||||
# This function has been taken from the squid RA and improved a bit
|
||||
# The length of the integer must be 4
|
||||
# Examples of valid port: "1080", "0080"
|
||||
# Examples of invalid port: "1080bad", "0", "0000", ""
|
||||
|
||||
local int
|
||||
local cnt
|
||||
|
||||
int="$1"
|
||||
cnt=${#int}
|
||||
echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
|
||||
|
||||
if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
|
||||
ocf_log err "Invalid port number: $1"
|
||||
exit $OCF_ERR_CONFIGURED
|
||||
fi
|
||||
}
|
||||
|
||||
service_validate() {
|
||||
local rc
|
||||
|
||||
check_binary $OCF_RESKEY_binary
|
||||
check_binary netstat
|
||||
heat_engine_check_port $OCF_RESKEY_amqp_server_port
|
||||
|
||||
# A config file on shared storage that is not available
|
||||
# during probes is OK.
|
||||
@ -165,7 +197,7 @@ service_status() {
|
||||
ocf_log info "${SERVICE_NAME} is not running"
|
||||
return $OCF_NOT_RUNNING
|
||||
else
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
fi
|
||||
|
||||
ocf_run -warn kill -s 0 $pid
|
||||
@ -179,9 +211,31 @@ service_status() {
|
||||
}
|
||||
|
||||
service_monitor() {
|
||||
# There is no way to do extended monitoring of Heat Engine
|
||||
# so we just run simple status check instead
|
||||
service_status
|
||||
local rc
|
||||
local pid
|
||||
local rc_amqp
|
||||
local engine_amqp_check
|
||||
|
||||
service_status
|
||||
rc=$?
|
||||
|
||||
# If status returned anything but success, return that immediately
|
||||
if [ $rc -ne $OCF_SUCCESS ]; then
|
||||
return $rc
|
||||
fi
|
||||
|
||||
# Check the connections according to the PID.
|
||||
# We are sure to hit the heat-engine process and not other heat process with the same connection behavior (for example heat-api)
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
engine_amqp_check=`netstat -punt | grep -s "$OCF_RESKEY_amqp_server_port" | grep -s "$pid" | grep -qs "ESTABLISHED"`
|
||||
rc_amqp=$?
|
||||
if [ $rc_amqp -ne 0 ]; then
|
||||
ocf_log err "${SERVICE_NAME} is not connected to the AMQP server: AMQP connection test returned $rc_amqp"
|
||||
return $OCF_NOT_RUNNING
|
||||
fi
|
||||
|
||||
ocf_log debug "OpenStack Orchestration Engine (heat-engine) monitor succeeded"
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
service_start() {
|
||||
|
@ -16,6 +16,7 @@
|
||||
# OCF_RESKEY_config
|
||||
# OCF_RESKEY_user
|
||||
# OCF_RESKEY_pid
|
||||
# OCF_RESKEY_amqp_server_port
|
||||
# OCF_RESKEY_additional_parameters
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
@ -33,12 +34,14 @@ LOG="/var/log/heat/engine.log"
|
||||
OCF_RESKEY_binary_default="/usr/bin/heat-engine"
|
||||
OCF_RESKEY_config_default="/etc/heat/heat.conf"
|
||||
OCF_RESKEY_pid_default="/var/run/heat-engine.pid"
|
||||
OCF_RESKEY_amqp_server_port_default="5673"
|
||||
OCF_RESKEY_user_default="heat"
|
||||
|
||||
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
||||
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
||||
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
||||
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
||||
: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
|
||||
|
||||
#######################################################################
|
||||
|
||||
@ -107,6 +110,14 @@ The pid file to use for this process
|
||||
<content type="string" default="${OCF_RESKEY_pid_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="amqp_server_port" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
The listening port number of the AMQP server. Use for monitoring purposes
|
||||
</longdesc>
|
||||
<shortdesc lang="en">AMQP listening port</shortdesc>
|
||||
<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="additional_parameters" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Additional parameters to pass on to the Heat engine if you do need them
|
||||
@ -132,10 +143,31 @@ END
|
||||
#######################################################################
|
||||
# Functions invoked by resource manager actions
|
||||
|
||||
heat_engine_check_port() {
|
||||
# This function has been taken from the squid RA and improved a bit
|
||||
# The length of the integer must be 4
|
||||
# Examples of valid port: "1080", "0080"
|
||||
# Examples of invalid port: "1080bad", "0", "0000", ""
|
||||
|
||||
local int
|
||||
local cnt
|
||||
|
||||
int="$1"
|
||||
cnt=${#int}
|
||||
echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
|
||||
|
||||
if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
|
||||
ocf_log err "Invalid port number: $1"
|
||||
exit $OCF_ERR_CONFIGURED
|
||||
fi
|
||||
}
|
||||
|
||||
service_validate() {
|
||||
local rc
|
||||
|
||||
check_binary $OCF_RESKEY_binary
|
||||
check_binary netstat
|
||||
heat_engine_check_port $OCF_RESKEY_amqp_server_port
|
||||
|
||||
# A config file on shared storage that is not available
|
||||
# during probes is OK.
|
||||
@ -165,7 +197,7 @@ service_status() {
|
||||
ocf_log info "${SERVICE_NAME} is not running"
|
||||
return $OCF_NOT_RUNNING
|
||||
else
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
fi
|
||||
|
||||
ocf_run -warn kill -s 0 $pid
|
||||
@ -179,9 +211,31 @@ service_status() {
|
||||
}
|
||||
|
||||
service_monitor() {
|
||||
# There is no way to do extended monitoring of Heat Engine
|
||||
# so we just run simple status check instead
|
||||
service_status
|
||||
local rc
|
||||
local pid
|
||||
local rc_amqp
|
||||
local engine_amqp_check
|
||||
|
||||
service_status
|
||||
rc=$?
|
||||
|
||||
# If status returned anything but success, return that immediately
|
||||
if [ $rc -ne $OCF_SUCCESS ]; then
|
||||
return $rc
|
||||
fi
|
||||
|
||||
# Check the connections according to the PID.
|
||||
# We are sure to hit the heat-engine process and not other heat process with the same connection behavior (for example heat-api)
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
engine_amqp_check=`netstat -punt | grep -s "$OCF_RESKEY_amqp_server_port" | grep -s "$pid" | grep -qs "ESTABLISHED"`
|
||||
rc_amqp=$?
|
||||
if [ $rc_amqp -ne 0 ]; then
|
||||
ocf_log err "${SERVICE_NAME} is not connected to the AMQP server: AMQP connection test returned $rc_amqp"
|
||||
return $OCF_NOT_RUNNING
|
||||
fi
|
||||
|
||||
ocf_log debug "OpenStack Orchestration Engine (heat-engine) monitor succeeded"
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
service_start() {
|
||||
@ -197,7 +251,6 @@ service_start() {
|
||||
# source init and venv
|
||||
. /lib/lsb/init-functions
|
||||
# run the actual daemon.
|
||||
#daemon --user "${OCF_RESKEY_user}" --pidfile "${OCF_RESKEY_pid}" "${OCF_RESKEY_binary} --config-file ${OCF_RESKEY_config} --logfile ${LOG} &>/dev/null & echo \$! > ${OCF_RESKEY_pid}"
|
||||
start-stop-daemon --start --background --quiet --chuid "${OCF_RESKEY_user}:${OCF_RESKEY_user}" --make-pidfile --pidfile "${OCF_RESKEY_pid}" --startas "${OCF_RESKEY_binary}"
|
||||
|
||||
# Spin waiting for the server to come up.
|
||||
|
Loading…
Reference in New Issue
Block a user