1468 lines
44 KiB
Diff
1468 lines
44 KiB
Diff
---
|
|
ocf/aodh-api | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
ocf/aodh-evaluator | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
ocf/aodh-listener | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
ocf/aodh-notifier | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
4 files changed, 1448 insertions(+)
|
|
|
|
--- /dev/null
|
|
+++ b/ocf/aodh-api
|
|
@@ -0,0 +1,368 @@
|
|
+#!/bin/sh
|
|
+#
|
|
+#
|
|
+# OpenStack Alarming API Service (aodh-api)
|
|
+#
|
|
+# Description: Manages an OpenStack Alarming API Service (aodh-api) process as an HA resource
|
|
+#
|
|
+# Authors: Emilien Macchi
|
|
+#
|
|
+# Support: openstack@lists.launchpad.net
|
|
+# License: Apache Software License (ASL) 2.0
|
|
+#
|
|
+# Copyright (c) 2014 Wind River Systems, Inc.
|
|
+# SPDX-License-Identifier: Apache-2.0
|
|
+#
|
|
+#
|
|
+#
|
|
+#
|
|
+#
|
|
+# See usage() function below for more details ...
|
|
+#
|
|
+# OCF instance parameters:
|
|
+# OCF_RESKEY_binary
|
|
+# OCF_RESKEY_config
|
|
+# OCF_RESKEY_user
|
|
+# OCF_RESKEY_pid
|
|
+# OCF_RESKEY_monitor_binary
|
|
+# OCF_RESKEY_server_port
|
|
+# OCF_RESKEY_additional_parameters
|
|
+#######################################################################
|
|
+# Initialization:
|
|
+
|
|
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+# Fill in some defaults if no values are specified
|
|
+
|
|
+OCF_RESKEY_binary_default="aodh-api"
|
|
+OCF_RESKEY_config_default="/etc/aodh/aodh.conf"
|
|
+OCF_RESKEY_user_default="root"
|
|
+OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
|
+OCF_RESKEY_server_port_default="8042"
|
|
+
|
|
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
+: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
+: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
+: ${OCF_RESKEY_server_port=${OCF_RESKEY_server_port_default}}
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+usage() {
|
|
+ cat <<UEND
|
|
+ usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
|
+
|
|
+ $0 manages an OpenStack Alarming API Service (aodh-api) process as an HA resource
|
|
+
|
|
+ The 'start' operation starts the aodh-api service.
|
|
+ The 'stop' operation stops the aodh-api service.
|
|
+ The 'validate-all' operation reports whether the parameters are valid
|
|
+ The 'meta-data' operation reports this RA's meta-data information
|
|
+ The 'status' operation reports whether the aodh-api service is running
|
|
+ The 'monitor' operation reports whether the aodh-api service seems to be working
|
|
+
|
|
+UEND
|
|
+}
|
|
+
|
|
+meta_data() {
|
|
+ cat <<END
|
|
+<?xml version="1.0"?>
|
|
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
+<resource-agent name="aodh-api">
|
|
+<version>1.0</version>
|
|
+
|
|
+<longdesc lang="en">
|
|
+Resource agent for the OpenStack Alarming API Service (aodh-api)
|
|
+May manage a aodh-api instance or a clone set that
|
|
+creates a distributed aodh-api cluster.
|
|
+</longdesc>
|
|
+<shortdesc lang="en">Manages the OpenStack Alarming API Service (aodh-api)</shortdesc>
|
|
+<parameters>
|
|
+
|
|
+<parameter name="binary" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Location of the OpenStack Alarming API server binary (aodh-api)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming API server binary (aodh-api)</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="config" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Location of the OpenStack Alarming API Service (aodh-api) configuration file
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming API (aodh-api) config file</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="user" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+User running OpenStack Alarming API Service (aodh-api)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming API Service (aodh-api) user</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="pid" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+The pid file to use for this OpenStack Alarming API Service (aodh-api) instance
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming API Service (aodh-api) pid file</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_pid_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="server_port" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+The listening port number of the aodh-api server.
|
|
+
|
|
+</longdesc>
|
|
+<shortdesc lang="en">aodh-api listening port</shortdesc>
|
|
+<content type="integer" default="${OCF_RESKEY_server_port_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="additional_parameters" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Additional parameters to pass on to the OpenStack Alarming API Service (aodh-api)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">Additional parameters for aodh-api</shortdesc>
|
|
+<content type="string" />
|
|
+</parameter>
|
|
+
|
|
+</parameters>
|
|
+
|
|
+<actions>
|
|
+<action name="start" timeout="20" />
|
|
+<action name="stop" timeout="20" />
|
|
+<action name="status" timeout="20" />
|
|
+<action name="monitor" timeout="30" interval="20" />
|
|
+<action name="validate-all" timeout="5" />
|
|
+<action name="meta-data" timeout="5" />
|
|
+</actions>
|
|
+</resource-agent>
|
|
+END
|
|
+}
|
|
+
|
|
+#######################################################################
|
|
+# Functions invoked by resource manager actions
|
|
+
|
|
+aodh_api_check_port() {
|
|
+# This function has been taken from the squid RA and improved a bit
|
|
+# The length of the integer must be 4
|
|
+# Examples of valid port: "1080", "0080"
|
|
+# Examples of invalid port: "1080bad", "0", "0000", ""
|
|
+
|
|
+ local int
|
|
+ local cnt
|
|
+
|
|
+ int="$1"
|
|
+ cnt=${#int}
|
|
+ echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
|
|
+
|
|
+ if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
|
|
+ ocf_log err "Invalid port number: $1"
|
|
+ exit $OCF_ERR_CONFIGURED
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_api_validate() {
|
|
+ local rc
|
|
+
|
|
+ check_binary $OCF_RESKEY_binary
|
|
+ check_binary netstat
|
|
+ aodh_api_check_port $OCF_RESKEY_server_port
|
|
+
|
|
+ # A config file on shared storage that is not available
|
|
+ # during probes is OK.
|
|
+ if [ ! -f $OCF_RESKEY_config ]; then
|
|
+ if ! ocf_is_probe; then
|
|
+ ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
|
+ return $OCF_ERR_INSTALLED
|
|
+ fi
|
|
+ ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
|
+ fi
|
|
+
|
|
+ getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
+ rc=$?
|
|
+ if [ $rc -ne 0 ]; then
|
|
+ ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
|
+ return $OCF_ERR_INSTALLED
|
|
+ fi
|
|
+
|
|
+ true
|
|
+}
|
|
+
|
|
+aodh_api_status() {
|
|
+ local pid
|
|
+ local rc
|
|
+
|
|
+ if [ ! -f $OCF_RESKEY_pid ]; then
|
|
+ ocf_log info "OpenStack Alarming API (aodh-api) is not running"
|
|
+ return $OCF_NOT_RUNNING
|
|
+ else
|
|
+ pid=`cat $OCF_RESKEY_pid`
|
|
+ fi
|
|
+
|
|
+ ocf_run -warn kill -s 0 $pid
|
|
+ rc=$?
|
|
+ if [ $rc -eq 0 ]; then
|
|
+ return $OCF_SUCCESS
|
|
+ else
|
|
+ ocf_log info "Old PID file found, but OpenStack Alarming API (aodh-api) is not running"
|
|
+ rm -f $OCF_RESKEY_pid
|
|
+ return $OCF_NOT_RUNNING
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_api_monitor() {
|
|
+ local rc
|
|
+ local pid
|
|
+ local rc_db
|
|
+ local engine_db_check
|
|
+
|
|
+ aodh_api_status
|
|
+ rc=$?
|
|
+
|
|
+ # If status returned anything but success, return that immediately
|
|
+ if [ $rc -ne $OCF_SUCCESS ]; then
|
|
+ return $rc
|
|
+ fi
|
|
+
|
|
+ # Check the server is listening on the server port
|
|
+ engine_db_check=`netstat -an | grep -s "$OCF_RESKEY_console_port" | grep -qs "LISTEN"`
|
|
+ rc_db=$?
|
|
+ if [ $rc_db -ne 0 ]; then
|
|
+ ocf_log err "aodh-api is not listening on $OCF_RESKEY_console_port: $rc_db"
|
|
+ return $OCF_NOT_RUNNING
|
|
+ fi
|
|
+
|
|
+ ocf_log debug "OpenStack Alarming API (aodh-api) monitor succeeded"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+aodh_api_start() {
|
|
+ local rc
|
|
+
|
|
+ aodh_api_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_SUCCESS ]; then
|
|
+ ocf_log info "OpenStack Alarming API (aodh-api) already running"
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ # run the actual aodh-api daemon. Don't use ocf_run as we're sending the tool's output
|
|
+ # straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
|
|
+ su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
|
+ $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
|
+
|
|
+ # Spin waiting for the server to come up.
|
|
+ while true; do
|
|
+ aodh_api_monitor
|
|
+ rc=$?
|
|
+ [ $rc -eq $OCF_SUCCESS ] && break
|
|
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
+ ocf_log err "OpenStack Alarming API (aodh-api) start failed"
|
|
+ exit $OCF_ERR_GENERIC
|
|
+ fi
|
|
+ sleep 1
|
|
+ done
|
|
+
|
|
+ ocf_log info "OpenStack Alarming API (aodh-api) started"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+aodh_api_confirm_stop() {
|
|
+ local my_bin
|
|
+ local my_processes
|
|
+
|
|
+ my_binary=`which ${OCF_RESKEY_binary}`
|
|
+ my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`
|
|
+
|
|
+ if [ -n "${my_processes}" ]
|
|
+ then
|
|
+ ocf_log info "About to SIGKILL the following: ${my_processes}"
|
|
+ pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_api_stop() {
|
|
+ local rc
|
|
+ local pid
|
|
+
|
|
+ aodh_api_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
+ ocf_log info "OpenStack Alarming API (aodh-api) already stopped"
|
|
+ aodh_api_confirm_stop
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ # Try SIGTERM
|
|
+ pid=`cat $OCF_RESKEY_pid`
|
|
+ ocf_run kill -s TERM $pid
|
|
+ rc=$?
|
|
+ if [ $rc -ne 0 ]; then
|
|
+ ocf_log err "OpenStack Alarming API (aodh-api) couldn't be stopped"
|
|
+ aodh_api_confirm_stop
|
|
+ exit $OCF_ERR_GENERIC
|
|
+ fi
|
|
+
|
|
+ # stop waiting
|
|
+ shutdown_timeout=15
|
|
+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
+ shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
|
|
+ fi
|
|
+ count=0
|
|
+ while [ $count -lt $shutdown_timeout ]; do
|
|
+ aodh_api_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
+ break
|
|
+ fi
|
|
+ count=`expr $count + 1`
|
|
+ sleep 1
|
|
+ ocf_log debug "OpenStack Alarming API (aodh-api) still hasn't stopped yet. Waiting ..."
|
|
+ done
|
|
+
|
|
+ aodh_api_status
|
|
+ rc=$?
|
|
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
+ # SIGTERM didn't help either, try SIGKILL
|
|
+ ocf_log info "OpenStack Alarming API (aodh-api) failed to stop after ${shutdown_timeout}s \
|
|
+ using SIGTERM. Trying SIGKILL ..."
|
|
+ ocf_run kill -s KILL $pid
|
|
+ fi
|
|
+ aodh_api_confirm_stop
|
|
+
|
|
+ ocf_log info "OpenStack Alarming API (aodh-api) stopped"
|
|
+
|
|
+ rm -f $OCF_RESKEY_pid
|
|
+
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+case "$1" in
|
|
+ meta-data) meta_data
|
|
+ exit $OCF_SUCCESS;;
|
|
+ usage|help) usage
|
|
+ exit $OCF_SUCCESS;;
|
|
+esac
|
|
+
|
|
+# Anything except meta-data and help must pass validation
|
|
+aodh_api_validate || exit $?
|
|
+
|
|
+# What kind of method was invoked?
|
|
+case "$1" in
|
|
+ start) aodh_api_start;;
|
|
+ stop) aodh_api_stop;;
|
|
+ status) aodh_api_status;;
|
|
+ monitor) aodh_api_monitor;;
|
|
+ validate-all) ;;
|
|
+ *) usage
|
|
+ exit $OCF_ERR_UNIMPLEMENTED;;
|
|
+esac
|
|
+
|
|
--- /dev/null
|
|
+++ b/ocf/aodh-evaluator
|
|
@@ -0,0 +1,360 @@
|
|
+#!/bin/sh
|
|
+#
|
|
+#
|
|
+# OpenStack Alarming Evaluator Service (aodh-evaluator)
|
|
+#
|
|
+# Description: Manages an OpenStack Alarming Evaluator Service (aodh-evaluator) process as an HA resource
|
|
+#
|
|
+# Authors: Emilien Macchi
|
|
+# Mainly inspired by the Nova Scheduler resource agent written by Sebastien Han
|
|
+#
|
|
+# Support: openstack@lists.launchpad.net
|
|
+# License: Apache Software License (ASL) 2.0
|
|
+#
|
|
+# Copyright (c) 2014-2016 Wind River Systems, Inc.
|
|
+# SPDX-License-Identifier: Apache-2.0
|
|
+#
|
|
+#
|
|
+#
|
|
+#
|
|
+#
|
|
+# See usage() function below for more details ...
|
|
+#
|
|
+# OCF instance parameters:
|
|
+# OCF_RESKEY_binary
|
|
+# OCF_RESKEY_config
|
|
+# OCF_RESKEY_user
|
|
+# OCF_RESKEY_pid
|
|
+# OCF_RESKEY_monitor_binary
|
|
+# OCF_RESKEY_amqp_server_port
|
|
+# OCF_RESKEY_additional_parameters
|
|
+#######################################################################
|
|
+# Initialization:
|
|
+
|
|
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
+. /usr/bin/tsconfig
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+# Fill in some defaults if no values are specified
|
|
+
|
|
+OCF_RESKEY_binary_default="aodh-evaluator"
|
|
+OCF_RESKEY_config_default="/etc/aodh/aodh.conf"
|
|
+OCF_RESKEY_user_default="root"
|
|
+OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
|
+OCF_RESKEY_amqp_server_port_default="5672"
|
|
+
|
|
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
+: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
+: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
+: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+usage() {
|
|
+ cat <<UEND
|
|
+ usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
|
+
|
|
+ $0 manages an OpenStack Alarming Evaluator Service (aodh-evaluator) process as an HA resource
|
|
+
|
|
+ The 'start' operation starts the scheduler service.
|
|
+ The 'stop' operation stops the scheduler service.
|
|
+ The 'validate-all' operation reports whether the parameters are valid
|
|
+ The 'meta-data' operation reports this RA's meta-data information
|
|
+ The 'status' operation reports whether the scheduler service is running
|
|
+ The 'monitor' operation reports whether the scheduler service seems to be working
|
|
+
|
|
+UEND
|
|
+}
|
|
+
|
|
+meta_data() {
|
|
+ cat <<END
|
|
+<?xml version="1.0"?>
|
|
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
+<resource-agent name="aodh-evaluator">
|
|
+<version>1.0</version>
|
|
+
|
|
+<longdesc lang="en">
|
|
+Resource agent for the OpenStack Alarming Evaluator Service (aodh-evaluator)
|
|
+May manage a aodh-evaluator instance or a clone set that
|
|
+creates a distributed aodh-evaluator cluster.
|
|
+</longdesc>
|
|
+<shortdesc lang="en">Manages the OpenStack Alarming Evaluator Service (aodh-evaluator)</shortdesc>
|
|
+<parameters>
|
|
+
|
|
+<parameter name="binary" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Location of the OpenStack Alarming Evaluator server binary (aodh-evaluator)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Evaluator server binary (aodh-evaluator)</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="config" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Location of the OpenStack Alarming Evaluator Service (aodh-evaluator) configuration file
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Evaluator (aodh-evaluator registry) config file</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="user" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+User running OpenStack Alarming Evaluator Service (aodh-evaluator)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Evaluator Service (aodh-evaluator) user</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="pid" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+The pid file to use for this OpenStack Alarming Evaluator Service (aodh-evaluator) instance
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Evaluator Service (aodh-evaluator) pid file</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_pid_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="amqp_server_port" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+The listening port number of the AMQP server. Use for monitoring purposes
|
|
+</longdesc>
|
|
+<shortdesc lang="en">AMQP listening port</shortdesc>
|
|
+<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
|
|
+</parameter>
|
|
+
|
|
+
|
|
+<parameter name="additional_parameters" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Additional parameters to pass on to the OpenStack Alarming Evaluator Service (aodh-evaluator)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">Additional parameters for aodh-evaluator</shortdesc>
|
|
+<content type="string" />
|
|
+</parameter>
|
|
+
|
|
+</parameters>
|
|
+
|
|
+<actions>
|
|
+<action name="start" timeout="20" />
|
|
+<action name="stop" timeout="20" />
|
|
+<action name="status" timeout="20" />
|
|
+<action name="monitor" timeout="30" interval="20" />
|
|
+<action name="validate-all" timeout="5" />
|
|
+<action name="meta-data" timeout="5" />
|
|
+</actions>
|
|
+</resource-agent>
|
|
+END
|
|
+}
|
|
+
|
|
+#######################################################################
|
|
+# Functions invoked by resource manager actions
|
|
+
|
|
+aodh_evaluator_check_port() {
|
|
+# This function has been taken from the squid RA and improved a bit
|
|
+# The length of the integer must be 4
|
|
+# Examples of valid port: "1080", "0080"
|
|
+# Examples of invalid port: "1080bad", "0", "0000", ""
|
|
+
|
|
+ local int
|
|
+ local cnt
|
|
+
|
|
+ int="$1"
|
|
+ cnt=${#int}
|
|
+ echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
|
|
+
|
|
+ if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
|
|
+ ocf_log err "Invalid port number: $1"
|
|
+ exit $OCF_ERR_CONFIGURED
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_evaluator_validate() {
|
|
+ local rc
|
|
+
|
|
+ check_binary $OCF_RESKEY_binary
|
|
+ check_binary netstat
|
|
+ aodh_evaluator_check_port $OCF_RESKEY_amqp_server_port
|
|
+
|
|
+ # A config file on shared storage that is not available
|
|
+ # during probes is OK.
|
|
+ if [ ! -f $OCF_RESKEY_config ]; then
|
|
+ if ! ocf_is_probe; then
|
|
+ ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
|
+ return $OCF_ERR_INSTALLED
|
|
+ fi
|
|
+ ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
|
+ fi
|
|
+
|
|
+ getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
+ rc=$?
|
|
+ if [ $rc -ne 0 ]; then
|
|
+ ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
|
+ return $OCF_ERR_INSTALLED
|
|
+ fi
|
|
+
|
|
+ true
|
|
+}
|
|
+
|
|
+aodh_evaluator_status() {
|
|
+ local pid
|
|
+ local rc
|
|
+
|
|
+ if [ ! -f $OCF_RESKEY_pid ]; then
|
|
+ ocf_log info "OpenStack Alarming Evaluator (aodh-evaluator) is not running"
|
|
+ return $OCF_NOT_RUNNING
|
|
+ else
|
|
+ pid=`cat $OCF_RESKEY_pid`
|
|
+ fi
|
|
+
|
|
+ ocf_run -warn kill -s 0 $pid
|
|
+ rc=$?
|
|
+ if [ $rc -eq 0 ]; then
|
|
+ return $OCF_SUCCESS
|
|
+ else
|
|
+ ocf_log info "Old PID file found, but OpenStack Alarming Evaluator (aodh-evaluator) is not running"
|
|
+ rm -f $OCF_RESKEY_pid
|
|
+ return $OCF_NOT_RUNNING
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_evaluator_monitor() {
|
|
+ local rc
|
|
+ local pid
|
|
+ local scheduler_amqp_check
|
|
+
|
|
+ aodh_evaluator_status
|
|
+ rc=$?
|
|
+
|
|
+ # If status returned anything but success, return that immediately
|
|
+ if [ $rc -ne $OCF_SUCCESS ]; then
|
|
+ return $rc
|
|
+ fi
|
|
+
|
|
+ ocf_log debug "OpenStack Alarming Evaluator (aodh-evaluator) monitor succeeded"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+aodh_evaluator_start() {
|
|
+ local rc
|
|
+
|
|
+ aodh_evaluator_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_SUCCESS ]; then
|
|
+ ocf_log info "OpenStack Alarming Evaluator (aodh-evaluator) already running"
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ # run the actual aodh-evaluator daemon. Don't use ocf_run as we're sending the tool's output
|
|
+ # straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
|
|
+ su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
|
+ $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
|
+
|
|
+ # Spin waiting for the server to come up.
|
|
+ while true; do
|
|
+ aodh_evaluator_monitor
|
|
+ rc=$?
|
|
+ [ $rc -eq $OCF_SUCCESS ] && break
|
|
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
+ ocf_log err "OpenStack Alarming Evaluator (aodh-evaluator) start failed"
|
|
+ exit $OCF_ERR_GENERIC
|
|
+ fi
|
|
+ sleep 1
|
|
+ done
|
|
+
|
|
+ ocf_log info "OpenStack Alarming Evaluator (aodh-evaluator) started"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+aodh_evaluator_confirm_stop() {
|
|
+ local my_bin
|
|
+ local my_processes
|
|
+
|
|
+ my_binary=`which ${OCF_RESKEY_binary}`
|
|
+ my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`
|
|
+
|
|
+ if [ -n "${my_processes}" ]
|
|
+ then
|
|
+ ocf_log info "About to SIGKILL the following: ${my_processes}"
|
|
+ pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_evaluator_stop() {
|
|
+ local rc
|
|
+ local pid
|
|
+
|
|
+ aodh_evaluator_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
+ ocf_log info "OpenStack Alarming Evaluator (aodh-evaluator) already stopped"
|
|
+ aodh_evaluator_confirm_stop
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ # Try SIGTERM
|
|
+ pid=`cat $OCF_RESKEY_pid`
|
|
+ ocf_run kill -s TERM $pid
|
|
+ rc=$?
|
|
+ if [ $rc -ne 0 ]; then
|
|
+ ocf_log err "OpenStack Alarming Evaluator (aodh-evaluator) couldn't be stopped"
|
|
+ aodh_evaluator_confirm_stop
|
|
+ exit $OCF_ERR_GENERIC
|
|
+ fi
|
|
+
|
|
+ # stop waiting
|
|
+ shutdown_timeout=2
|
|
+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
+ shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
|
|
+ fi
|
|
+ count=0
|
|
+ while [ $count -lt $shutdown_timeout ]; do
|
|
+ aodh_evaluator_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
+ break
|
|
+ fi
|
|
+ count=`expr $count + 1`
|
|
+ sleep 1
|
|
+ ocf_log debug "OpenStack Alarming Evaluator (aodh-evaluator) still hasn't stopped yet. Waiting ..."
|
|
+ done
|
|
+
|
|
+ aodh_evaluator_status
|
|
+ rc=$?
|
|
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
+ # SIGTERM didn't help either, try SIGKILL
|
|
+ ocf_log info "OpenStack Alarming Evaluator (aodh-evaluator) failed to stop after ${shutdown_timeout}s \
|
|
+ using SIGTERM. Trying SIGKILL ..."
|
|
+ ocf_run kill -s KILL $pid
|
|
+ fi
|
|
+ aodh_evaluator_confirm_stop
|
|
+
|
|
+ ocf_log info "OpenStack Alarming Evaluator (aodh-evaluator) stopped"
|
|
+
|
|
+ rm -f $OCF_RESKEY_pid
|
|
+
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+case "$1" in
|
|
+ meta-data) meta_data
|
|
+ exit $OCF_SUCCESS;;
|
|
+ usage|help) usage
|
|
+ exit $OCF_SUCCESS;;
|
|
+esac
|
|
+
|
|
+# Anything except meta-data and help must pass validation
|
|
+aodh_evaluator_validate || exit $?
|
|
+
|
|
+# What kind of method was invoked?
|
|
+case "$1" in
|
|
+ start) aodh_evaluator_start;;
|
|
+ stop) aodh_evaluator_stop;;
|
|
+ status) aodh_evaluator_status;;
|
|
+ monitor) aodh_evaluator_monitor;;
|
|
+ validate-all) ;;
|
|
+ *) usage
|
|
+ exit $OCF_ERR_UNIMPLEMENTED;;
|
|
+esac
|
|
--- /dev/null
|
|
+++ b/ocf/aodh-listener
|
|
@@ -0,0 +1,360 @@
|
|
+#!/bin/sh
|
|
+#
|
|
+#
|
|
+# OpenStack Alarming Listener Service (aodh-listener)
|
|
+#
|
|
+# Description: Manages an OpenStack Alarming Listener Service (aodh-listener) process as an HA resource
|
|
+#
|
|
+# Authors: Emilien Macchi
|
|
+# Mainly inspired by the Nova Scheduler resource agent written by Sebastien Han
|
|
+#
|
|
+# Support: openstack@lists.launchpad.net
|
|
+# License: Apache Software License (ASL) 2.0
|
|
+#
|
|
+# Copyright (c) 2014-2016 Wind River Systems, Inc.
|
|
+# SPDX-License-Identifier: Apache-2.0
|
|
+#
|
|
+#
|
|
+#
|
|
+#
|
|
+#
|
|
+# See usage() function below for more details ...
|
|
+#
|
|
+# OCF instance parameters:
|
|
+# OCF_RESKEY_binary
|
|
+# OCF_RESKEY_config
|
|
+# OCF_RESKEY_user
|
|
+# OCF_RESKEY_pid
|
|
+# OCF_RESKEY_monitor_binary
|
|
+# OCF_RESKEY_amqp_server_port
|
|
+# OCF_RESKEY_additional_parameters
|
|
+#######################################################################
|
|
+# Initialization:
|
|
+
|
|
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
+. /usr/bin/tsconfig
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+# Fill in some defaults if no values are specified
|
|
+
|
|
+OCF_RESKEY_binary_default="aodh-listener"
|
|
+OCF_RESKEY_config_default="/etc/aodh/aodh.conf"
|
|
+OCF_RESKEY_user_default="root"
|
|
+OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
|
+OCF_RESKEY_amqp_server_port_default="5672"
|
|
+
|
|
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
+: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
+: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
+: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+usage() {
|
|
+ cat <<UEND
|
|
+ usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
|
+
|
|
+ $0 manages an OpenStack Alarming Listener Service (aodh-listener) process as an HA resource
|
|
+
|
|
+ The 'start' operation starts the scheduler service.
|
|
+ The 'stop' operation stops the scheduler service.
|
|
+ The 'validate-all' operation reports whether the parameters are valid
|
|
+ The 'meta-data' operation reports this RA's meta-data information
|
|
+ The 'status' operation reports whether the scheduler service is running
|
|
+ The 'monitor' operation reports whether the scheduler service seems to be working
|
|
+
|
|
+UEND
|
|
+}
|
|
+
|
|
+meta_data() {
|
|
+ cat <<END
|
|
+<?xml version="1.0"?>
|
|
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
+<resource-agent name="aodh-listener">
|
|
+<version>1.0</version>
|
|
+
|
|
+<longdesc lang="en">
|
|
+Resource agent for the OpenStack Alarming Listener Service (aodh-listener)
|
|
+May manage a aodh-listener instance or a clone set that
|
|
+creates a distributed aodh-listener cluster.
|
|
+</longdesc>
|
|
+<shortdesc lang="en">Manages the OpenStack Alarming Listener Service (aodh-listener)</shortdesc>
|
|
+<parameters>
|
|
+
|
|
+<parameter name="binary" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Location of the OpenStack Alarming Listener server binary (aodh-listener)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Listener server binary (aodh-listener)</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="config" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Location of the OpenStack Alarming Listener Service (aodh-listener) configuration file
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Listener (aodh-listener registry) config file</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="user" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+User running OpenStack Alarming Listener Service (aodh-listener)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Listener Service (aodh-listener) user</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="pid" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+The pid file to use for this OpenStack Alarming Listener Service (aodh-listener) instance
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Listener Service (aodh-listener) pid file</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_pid_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="amqp_server_port" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+The listening port number of the AMQP server. Use for monitoring purposes
|
|
+</longdesc>
|
|
+<shortdesc lang="en">AMQP listening port</shortdesc>
|
|
+<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
|
|
+</parameter>
|
|
+
|
|
+
|
|
+<parameter name="additional_parameters" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Additional parameters to pass on to the OpenStack Alarming Listener Service (aodh-listener)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">Additional parameters for aodh-listener</shortdesc>
|
|
+<content type="string" />
|
|
+</parameter>
|
|
+
|
|
+</parameters>
|
|
+
|
|
+<actions>
|
|
+<action name="start" timeout="20" />
|
|
+<action name="stop" timeout="20" />
|
|
+<action name="status" timeout="20" />
|
|
+<action name="monitor" timeout="30" interval="20" />
|
|
+<action name="validate-all" timeout="5" />
|
|
+<action name="meta-data" timeout="5" />
|
|
+</actions>
|
|
+</resource-agent>
|
|
+END
|
|
+}
|
|
+
|
|
+#######################################################################
|
|
+# Functions invoked by resource manager actions
|
|
+
|
|
+aodh_listener_check_port() {
|
|
+# This function has been taken from the squid RA and improved a bit
|
|
+# The length of the integer must be 4
|
|
+# Examples of valid port: "1080", "0080"
|
|
+# Examples of invalid port: "1080bad", "0", "0000", ""
|
|
+
|
|
+ local int
|
|
+ local cnt
|
|
+
|
|
+ int="$1"
|
|
+ cnt=${#int}
|
|
+ echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
|
|
+
|
|
+ if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
|
|
+ ocf_log err "Invalid port number: $1"
|
|
+ exit $OCF_ERR_CONFIGURED
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_listener_validate() {
|
|
+ local rc
|
|
+
|
|
+ check_binary $OCF_RESKEY_binary
|
|
+ check_binary netstat
|
|
+ aodh_listener_check_port $OCF_RESKEY_amqp_server_port
|
|
+
|
|
+ # A config file on shared storage that is not available
|
|
+ # during probes is OK.
|
|
+ if [ ! -f $OCF_RESKEY_config ]; then
|
|
+ if ! ocf_is_probe; then
|
|
+ ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
|
+ return $OCF_ERR_INSTALLED
|
|
+ fi
|
|
+ ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
|
+ fi
|
|
+
|
|
+ getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
+ rc=$?
|
|
+ if [ $rc -ne 0 ]; then
|
|
+ ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
|
+ return $OCF_ERR_INSTALLED
|
|
+ fi
|
|
+
|
|
+ true
|
|
+}
|
|
+
|
|
+aodh_listener_status() {
|
|
+ local pid
|
|
+ local rc
|
|
+
|
|
+ if [ ! -f $OCF_RESKEY_pid ]; then
|
|
+ ocf_log info "OpenStack Alarming Listener (aodh-listener) is not running"
|
|
+ return $OCF_NOT_RUNNING
|
|
+ else
|
|
+ pid=`cat $OCF_RESKEY_pid`
|
|
+ fi
|
|
+
|
|
+ ocf_run -warn kill -s 0 $pid
|
|
+ rc=$?
|
|
+ if [ $rc -eq 0 ]; then
|
|
+ return $OCF_SUCCESS
|
|
+ else
|
|
+ ocf_log info "Old PID file found, but OpenStack Alarming Listener (aodh-listener) is not running"
|
|
+ rm -f $OCF_RESKEY_pid
|
|
+ return $OCF_NOT_RUNNING
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_listener_monitor() {
|
|
+ local rc
|
|
+ local pid
|
|
+ local scheduler_amqp_check
|
|
+
|
|
+ aodh_listener_status
|
|
+ rc=$?
|
|
+
|
|
+ # If status returned anything but success, return that immediately
|
|
+ if [ $rc -ne $OCF_SUCCESS ]; then
|
|
+ return $rc
|
|
+ fi
|
|
+
|
|
+ ocf_log debug "OpenStack Alarming Listener (aodh-listener) monitor succeeded"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+aodh_listener_start() {
|
|
+ local rc
|
|
+
|
|
+ aodh_listener_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_SUCCESS ]; then
|
|
+ ocf_log info "OpenStack Alarming Listener (aodh-listener) already running"
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ # run the actual aodh-listener daemon. Don't use ocf_run as we're sending the tool's output
|
|
+ # straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
|
|
+ su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
|
+ $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
|
+
|
|
+ # Spin waiting for the server to come up.
|
|
+ while true; do
|
|
+ aodh_listener_monitor
|
|
+ rc=$?
|
|
+ [ $rc -eq $OCF_SUCCESS ] && break
|
|
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
+ ocf_log err "OpenStack Alarming Listener (aodh-listener) start failed"
|
|
+ exit $OCF_ERR_GENERIC
|
|
+ fi
|
|
+ sleep 1
|
|
+ done
|
|
+
|
|
+ ocf_log info "OpenStack Alarming Listener (aodh-listener) started"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+aodh_listener_confirm_stop() {
|
|
+ local my_bin
|
|
+ local my_processes
|
|
+
|
|
+ my_binary=`which ${OCF_RESKEY_binary}`
|
|
+ my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`
|
|
+
|
|
+ if [ -n "${my_processes}" ]
|
|
+ then
|
|
+ ocf_log info "About to SIGKILL the following: ${my_processes}"
|
|
+ pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_listener_stop() {
|
|
+ local rc
|
|
+ local pid
|
|
+
|
|
+ aodh_listener_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
+ ocf_log info "OpenStack Alarming Listener (aodh-listener) already stopped"
|
|
+ aodh_listener_confirm_stop
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ # Try SIGTERM
|
|
+ pid=`cat $OCF_RESKEY_pid`
|
|
+ ocf_run kill -s TERM $pid
|
|
+ rc=$?
|
|
+ if [ $rc -ne 0 ]; then
|
|
+ ocf_log err "OpenStack Alarming Listener (aodh-listener) couldn't be stopped"
|
|
+ aodh_listener_confirm_stop
|
|
+ exit $OCF_ERR_GENERIC
|
|
+ fi
|
|
+
|
|
+ # stop waiting
|
|
+ shutdown_timeout=2
|
|
+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
+ shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
|
|
+ fi
|
|
+ count=0
|
|
+ while [ $count -lt $shutdown_timeout ]; do
|
|
+ aodh_listener_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
+ break
|
|
+ fi
|
|
+ count=`expr $count + 1`
|
|
+ sleep 1
|
|
+ ocf_log debug "OpenStack Alarming Listener (aodh-listener) still hasn't stopped yet. Waiting ..."
|
|
+ done
|
|
+
|
|
+ aodh_listener_status
|
|
+ rc=$?
|
|
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
+ # SIGTERM didn't help either, try SIGKILL
|
|
+ ocf_log info "OpenStack Alarming Listener (aodh-listener) failed to stop after ${shutdown_timeout}s \
|
|
+ using SIGTERM. Trying SIGKILL ..."
|
|
+ ocf_run kill -s KILL $pid
|
|
+ fi
|
|
+ aodh_listener_confirm_stop
|
|
+
|
|
+ ocf_log info "OpenStack Alarming Listener (aodh-listener) stopped"
|
|
+
|
|
+ rm -f $OCF_RESKEY_pid
|
|
+
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+case "$1" in
|
|
+ meta-data) meta_data
|
|
+ exit $OCF_SUCCESS;;
|
|
+ usage|help) usage
|
|
+ exit $OCF_SUCCESS;;
|
|
+esac
|
|
+
|
|
+# Anything except meta-data and help must pass validation
|
|
+aodh_listener_validate || exit $?
|
|
+
|
|
+# What kind of method was invoked?
|
|
+case "$1" in
|
|
+ start) aodh_listener_start;;
|
|
+ stop) aodh_listener_stop;;
|
|
+ status) aodh_listener_status;;
|
|
+ monitor) aodh_listener_monitor;;
|
|
+ validate-all) ;;
|
|
+ *) usage
|
|
+ exit $OCF_ERR_UNIMPLEMENTED;;
|
|
+esac
|
|
--- /dev/null
|
|
+++ b/ocf/aodh-notifier
|
|
@@ -0,0 +1,360 @@
|
|
+#!/bin/sh
|
|
+#
|
|
+#
|
|
+# OpenStack Alarming Notifier Service (aodh-notifier)
|
|
+#
|
|
+# Description: Manages an OpenStack Alarming Notifier Service (aodh-notifier) process as an HA resource
|
|
+#
|
|
+# Authors: Emilien Macchi
|
|
+# Mainly inspired by the Nova Scheduler resource agent written by Sebastien Han
|
|
+#
|
|
+# Support: openstack@lists.launchpad.net
|
|
+# License: Apache Software License (ASL) 2.0
|
|
+#
|
|
+# Copyright (c) 2014-2016 Wind River Systems, Inc.
|
|
+# SPDX-License-Identifier: Apache-2.0
|
|
+#
|
|
+#
|
|
+#
|
|
+#
|
|
+#
|
|
+# See usage() function below for more details ...
|
|
+#
|
|
+# OCF instance parameters:
|
|
+# OCF_RESKEY_binary
|
|
+# OCF_RESKEY_config
|
|
+# OCF_RESKEY_user
|
|
+# OCF_RESKEY_pid
|
|
+# OCF_RESKEY_monitor_binary
|
|
+# OCF_RESKEY_amqp_server_port
|
|
+# OCF_RESKEY_additional_parameters
|
|
+#######################################################################
|
|
+# Initialization:
|
|
+
|
|
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
+. /usr/bin/tsconfig
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+# Fill in some defaults if no values are specified
|
|
+
|
|
+OCF_RESKEY_binary_default="aodh-notifier"
|
|
+OCF_RESKEY_config_default="/etc/aodh/aodh.conf"
|
|
+OCF_RESKEY_user_default="root"
|
|
+OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
|
+OCF_RESKEY_amqp_server_port_default="5672"
|
|
+
|
|
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
+: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
+: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
+: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+usage() {
|
|
+ cat <<UEND
|
|
+ usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
|
+
|
|
+ $0 manages an OpenStack Alarming Notifier Service (aodh-notifier) process as an HA resource
|
|
+
|
|
+ The 'start' operation starts the scheduler service.
|
|
+ The 'stop' operation stops the scheduler service.
|
|
+ The 'validate-all' operation reports whether the parameters are valid
|
|
+ The 'meta-data' operation reports this RA's meta-data information
|
|
+ The 'status' operation reports whether the scheduler service is running
|
|
+ The 'monitor' operation reports whether the scheduler service seems to be working
|
|
+
|
|
+UEND
|
|
+}
|
|
+
|
|
+meta_data() {
|
|
+ cat <<END
|
|
+<?xml version="1.0"?>
|
|
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
+<resource-agent name="aodh-notifier">
|
|
+<version>1.0</version>
|
|
+
|
|
+<longdesc lang="en">
|
|
+Resource agent for the OpenStack Alarming Notifier Service (aodh-notifier)
|
|
+May manage a aodh-notifier instance or a clone set that
|
|
+creates a distributed aodh-notifier cluster.
|
|
+</longdesc>
|
|
+<shortdesc lang="en">Manages the OpenStack Alarming Notifier Service (aodh-notifier)</shortdesc>
|
|
+<parameters>
|
|
+
|
|
+<parameter name="binary" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Location of the OpenStack Alarming Notifier server binary (aodh-notifier)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Notifier server binary (aodh-notifier)</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="config" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Location of the OpenStack Alarming Notifier Service (aodh-notifier) configuration file
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Notifier (aodh-notifier registry) config file</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="user" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+User running OpenStack Alarming Notifier Service (aodh-notifier)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Notifier Service (aodh-notifier) user</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="pid" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+The pid file to use for this OpenStack Alarming Notifier Service (aodh-notifier) instance
|
|
+</longdesc>
|
|
+<shortdesc lang="en">OpenStack Alarming Notifier Service (aodh-notifier) pid file</shortdesc>
|
|
+<content type="string" default="${OCF_RESKEY_pid_default}" />
|
|
+</parameter>
|
|
+
|
|
+<parameter name="amqp_server_port" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+The listening port number of the AMQP server. Use for monitoring purposes
|
|
+</longdesc>
|
|
+<shortdesc lang="en">AMQP listening port</shortdesc>
|
|
+<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
|
|
+</parameter>
|
|
+
|
|
+
|
|
+<parameter name="additional_parameters" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+Additional parameters to pass on to the OpenStack Alarming Notifier Service (aodh-notifier)
|
|
+</longdesc>
|
|
+<shortdesc lang="en">Additional parameters for aodh-notifier</shortdesc>
|
|
+<content type="string" />
|
|
+</parameter>
|
|
+
|
|
+</parameters>
|
|
+
|
|
+<actions>
|
|
+<action name="start" timeout="20" />
|
|
+<action name="stop" timeout="20" />
|
|
+<action name="status" timeout="20" />
|
|
+<action name="monitor" timeout="30" interval="20" />
|
|
+<action name="validate-all" timeout="5" />
|
|
+<action name="meta-data" timeout="5" />
|
|
+</actions>
|
|
+</resource-agent>
|
|
+END
|
|
+}
|
|
+
|
|
+#######################################################################
|
|
+# Functions invoked by resource manager actions
|
|
+
|
|
+aodh_notifier_check_port() {
|
|
+# This function has been taken from the squid RA and improved a bit
|
|
+# The length of the integer must be 4
|
|
+# Examples of valid port: "1080", "0080"
|
|
+# Examples of invalid port: "1080bad", "0", "0000", ""
|
|
+
|
|
+ local int
|
|
+ local cnt
|
|
+
|
|
+ int="$1"
|
|
+ cnt=${#int}
|
|
+ echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
|
|
+
|
|
+ if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
|
|
+ ocf_log err "Invalid port number: $1"
|
|
+ exit $OCF_ERR_CONFIGURED
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_notifier_validate() {
|
|
+ local rc
|
|
+
|
|
+ check_binary $OCF_RESKEY_binary
|
|
+ check_binary netstat
|
|
+ aodh_notifier_check_port $OCF_RESKEY_amqp_server_port
|
|
+
|
|
+ # A config file on shared storage that is not available
|
|
+ # during probes is OK.
|
|
+ if [ ! -f $OCF_RESKEY_config ]; then
|
|
+ if ! ocf_is_probe; then
|
|
+ ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
|
+ return $OCF_ERR_INSTALLED
|
|
+ fi
|
|
+ ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
|
+ fi
|
|
+
|
|
+ getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
+ rc=$?
|
|
+ if [ $rc -ne 0 ]; then
|
|
+ ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
|
+ return $OCF_ERR_INSTALLED
|
|
+ fi
|
|
+
|
|
+ true
|
|
+}
|
|
+
|
|
+aodh_notifier_status() {
|
|
+ local pid
|
|
+ local rc
|
|
+
|
|
+ if [ ! -f $OCF_RESKEY_pid ]; then
|
|
+ ocf_log info "OpenStack Alarming Notifier (aodh-notifier) is not running"
|
|
+ return $OCF_NOT_RUNNING
|
|
+ else
|
|
+ pid=`cat $OCF_RESKEY_pid`
|
|
+ fi
|
|
+
|
|
+ ocf_run -warn kill -s 0 $pid
|
|
+ rc=$?
|
|
+ if [ $rc -eq 0 ]; then
|
|
+ return $OCF_SUCCESS
|
|
+ else
|
|
+ ocf_log info "Old PID file found, but OpenStack Alarming Notifier (aodh-notifier) is not running"
|
|
+ rm -f $OCF_RESKEY_pid
|
|
+ return $OCF_NOT_RUNNING
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_notifier_monitor() {
|
|
+ local rc
|
|
+ local pid
|
|
+ local scheduler_amqp_check
|
|
+
|
|
+ aodh_notifier_status
|
|
+ rc=$?
|
|
+
|
|
+ # If status returned anything but success, return that immediately
|
|
+ if [ $rc -ne $OCF_SUCCESS ]; then
|
|
+ return $rc
|
|
+ fi
|
|
+
|
|
+ ocf_log debug "OpenStack Alarming Notifier (aodh-notifier) monitor succeeded"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+aodh_notifier_start() {
|
|
+ local rc
|
|
+
|
|
+ aodh_notifier_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_SUCCESS ]; then
|
|
+ ocf_log info "OpenStack Alarming Notifier (aodh-notifier) already running"
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ # run the actual aodh-notifier daemon. Don't use ocf_run as we're sending the tool's output
|
|
+ # straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
|
|
+ su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
|
+ $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
|
+
|
|
+ # Spin waiting for the server to come up.
|
|
+ while true; do
|
|
+ aodh_notifier_monitor
|
|
+ rc=$?
|
|
+ [ $rc -eq $OCF_SUCCESS ] && break
|
|
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
+ ocf_log err "OpenStack Alarming Notifier (aodh-notifier) start failed"
|
|
+ exit $OCF_ERR_GENERIC
|
|
+ fi
|
|
+ sleep 1
|
|
+ done
|
|
+
|
|
+ ocf_log info "OpenStack Alarming Notifier (aodh-notifier) started"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+aodh_notifier_confirm_stop() {
|
|
+ local my_bin
|
|
+ local my_processes
|
|
+
|
|
+ my_binary=`which ${OCF_RESKEY_binary}`
|
|
+ my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`
|
|
+
|
|
+ if [ -n "${my_processes}" ]
|
|
+ then
|
|
+ ocf_log info "About to SIGKILL the following: ${my_processes}"
|
|
+ pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
|
|
+ fi
|
|
+}
|
|
+
|
|
+aodh_notifier_stop() {
|
|
+ local rc
|
|
+ local pid
|
|
+
|
|
+ aodh_notifier_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
+ ocf_log info "OpenStack Alarming Notifier (aodh-notifier) already stopped"
|
|
+ aodh_notifier_confirm_stop
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ # Try SIGTERM
|
|
+ pid=`cat $OCF_RESKEY_pid`
|
|
+ ocf_run kill -s TERM $pid
|
|
+ rc=$?
|
|
+ if [ $rc -ne 0 ]; then
|
|
+ ocf_log err "OpenStack Alarming Notifier (aodh-notifier) couldn't be stopped"
|
|
+ aodh_notifier_confirm_stop
|
|
+ exit $OCF_ERR_GENERIC
|
|
+ fi
|
|
+
|
|
+ # stop waiting
|
|
+ shutdown_timeout=2
|
|
+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
+ shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
|
|
+ fi
|
|
+ count=0
|
|
+ while [ $count -lt $shutdown_timeout ]; do
|
|
+ aodh_notifier_status
|
|
+ rc=$?
|
|
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
+ break
|
|
+ fi
|
|
+ count=`expr $count + 1`
|
|
+ sleep 1
|
|
+ ocf_log debug "OpenStack Alarming Notifier (aodh-notifier) still hasn't stopped yet. Waiting ..."
|
|
+ done
|
|
+
|
|
+ aodh_notifier_status
|
|
+ rc=$?
|
|
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
+ # SIGTERM didn't help either, try SIGKILL
|
|
+ ocf_log info "OpenStack Alarming Notifier (aodh-notifier) failed to stop after ${shutdown_timeout}s \
|
|
+ using SIGTERM. Trying SIGKILL ..."
|
|
+ ocf_run kill -s KILL $pid
|
|
+ fi
|
|
+ aodh_notifier_confirm_stop
|
|
+
|
|
+ ocf_log info "OpenStack Alarming Notifier (aodh-notifier) stopped"
|
|
+
|
|
+ rm -f $OCF_RESKEY_pid
|
|
+
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+#######################################################################
|
|
+
|
|
+case "$1" in
|
|
+ meta-data) meta_data
|
|
+ exit $OCF_SUCCESS;;
|
|
+ usage|help) usage
|
|
+ exit $OCF_SUCCESS;;
|
|
+esac
|
|
+
|
|
+# Anything except meta-data and help must pass validation
|
|
+aodh_notifier_validate || exit $?
|
|
+
|
|
+# What kind of method was invoked?
|
|
+case "$1" in
|
|
+ start) aodh_notifier_start;;
|
|
+ stop) aodh_notifier_stop;;
|
|
+ status) aodh_notifier_status;;
|
|
+ monitor) aodh_notifier_monitor;;
|
|
+ validate-all) ;;
|
|
+ *) usage
|
|
+ exit $OCF_ERR_UNIMPLEMENTED;;
|
|
+esac
|