fuel-library/files/fuel-ha-utils/ocf/ocf-neutron-l3-agent
2015-08-21 13:37:55 +00:00

506 lines
17 KiB
Bash

#!/bin/bash
#
#
# OpenStack L3 Service (neutron-l3-agent)
#
# Description: Manages an OpenStack L3 Service (neutron-l3-agent) process as an HA resource
#
# Authors: Emilien Macchi
# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han
#
# Support: openstack@lists.launchpad.net
# License: Apache Software License (ASL) 2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_config
# OCF_RESKEY_plugin_config
# OCF_RESKEY_log_file
# OCF_RESKEY_check_state_reports
# OCF_RESKEY_state_reports_file
# OCF_RESKEY_state_reports_timeout
# OCF_RESKEY_user
# OCF_RESKEY_pid
# OCF_RESKEY_external_bridge
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
PATH=/sbin:/usr/sbin:/bin:/usr/bin
OCF_RESKEY_binary_default="neutron-l3-agent"
OCF_RESKEY_config_default="/etc/neutron/neutron.conf"
OCF_RESKEY_plugin_config_default="/etc/neutron/l3_agent.ini"
OCF_RESKEY_user_default="neutron"
OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid"
OCF_RESKEY_external_bridge_default="br-ex"
OCF_RESKEY_log_file_default="/var/log/neutron/l3-agent.log"
OCF_RESKEY_check_state_reports_default=false
OCF_RESKEY_state_reports_file_default="/var/lib/neutron/l3_agent_report.log"
OCF_RESKEY_state_reports_timeout_default="60"
OCF_RESKEY_remove_artifacts_on_stop_start_default='true'
: ${HA_LOGTAG="ocf-neutron-l3-agent"}
: ${HA_LOGFACILITY="daemon"}
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_plugin_config=${OCF_RESKEY_plugin_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_external_bridge=${OCF_RESKEY_external_bridge_default}}
: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}}
: ${OCF_RESKEY_check_state_reports=${OCF_RESKEY_check_state_reports_default}}
: ${OCF_RESKEY_state_reports_file=${OCF_RESKEY_state_reports_file_default}}
: ${OCF_RESKEY_state_reports_timeout=${OCF_RESKEY_state_reports_timeout_default}}
: ${OCF_RESKEY_remove_artifacts_on_stop_start=${OCF_RESKEY_remove_artifacts_on_stop_start_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an OpenStack L3 Service (neutron-l3-agent) process as an HA resource
The 'start' operation starts the networking service.
The 'stop' operation stops the networking service.
The 'reload' operation restarts the networking service without removing any artifacts
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the networking service is running
The 'monitor' operation reports whether the networking service seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="neutron-l3-agent">
<version>1.0</version>
<longdesc lang="en">
Resource agent for the OpenStack L3 agent (neutron-l3-agent)
May manage a neutron-l3-agent instance or a clone set that
creates a distributed neutron-l3-agent cluster.
</longdesc>
<shortdesc lang="en">Manages the OpenStack L3 Service (neutron-l3-agent)</shortdesc>
<parameters>
<parameter name="dummy" unique="1">
<longdesc lang="en">
This is a dummy parameter.
Pacemaker needs it to enable reload operation for the resource
</longdesc>
<shortdesc lang="en">Dummy parameter</shortdesc>
<content type="boolean" default="false" />
</parameter>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack L3 agent server binary (neutron-l3-agent)
</longdesc>
<shortdesc lang="en">OpenStack L3 agent server binary (neutron-l3-agent)</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack L3 agent (neutron-server) configuration file
</longdesc>
<shortdesc lang="en">OpenStack L3 agent (neutron-server) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="plugin_config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack L3 Service (neutron-l3-agent) configuration file
</longdesc>
<shortdesc lang="en">OpenStack L3 agent (neutron-l3-agent) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_plugin_config_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running OpenStack L3 Service (neutron-l3-agent)
</longdesc>
<shortdesc lang="en">OpenStack L3 Service (neutron-l3-agent) user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pid file to use for this OpenStack L3 Service (neutron-l3-agent) instance
</longdesc>
<shortdesc lang="en">OpenStack L3 Service (neutron-l3-agent) pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="log_file" unique="0" required="0">
<longdesc lang="en">
The log file to use for this OpenStack L3 Service (neutron-l3-agent) instance
</longdesc>
<shortdesc lang="en">OpenStack L3 Service (neutron-l3-agent) log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_file_default}" />
</parameter>
<parameter name="check_state_reports" unique="0" required="0">
<longdesc lang="en">
The flag, which enables or disables additional monitoring
based on agent's local state reports
</longdesc>
<shortdesc lang="en">Enable or disable local state reports based monitoring</shortdesc>
<content type="string" default="${OCF_RESKEY_check_state_reports_default}" />
</parameter>
<parameter name="state_reports_file" unique="0" required="0">
<longdesc lang="en">
This file contains L3 agent local state report information.
There're three section in it: STARTUP, RPC_STATE_REPORT and SYNC_STATE.
</longdesc>
<shortdesc lang="en">L3 agent local state report file</shortdesc>
<content type="string" default="${OCF_RESKEY_state_reports_file_default}" />
</parameter>
<parameter name="state_reports_timeout" unique="0" required="0">
<longdesc lang="en">
The timeout value for L3 agent to update its local state report.
If it takes more time than the specified value agent is considered to be dead.
</longdesc>
<shortdesc lang="en">L3 agent local state reports timeout</shortdesc>
<content type="string" default="${OCF_RESKEY_state_reports_timeout_default}" />
</parameter>
<parameter name="external_bridge" unique="0" required="0">
<longdesc lang="en">
External bridge for l3-agent
</longdesc>
<shortdesc lang="en">External bridge</shortdesc>
<content type="string" />
</parameter>
<parameter name="remove_artifacts_on_stop_start" unique="0" required="0">
<longdesc lang="en">
Clean up all resources created by Neutron L3 agent, such as additional processes,
network namespaces, created interfaces, on agent stop and start.
</longdesc>
<shortdesc lang="en">Clean up all resources created by L3 agent on its start and stop</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="reload" timeout="30" />
<action name="status" timeout="20" />
<action name="monitor" timeout="30" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
get_worker_pid() {
local options
local pid
# FIXME: Remove if condition and set 'falo' statically once Fuel
# discontinue support of Ubuntu 12.04 and CentOs 6.x where -a was not defined.
if pgrep -V | awk 'match($0, /[0-9]\.[0-9].*/) {if (substr($0, RSTART, RLENGTH) < 3.3) {exit 1}}'; then
options='falo'
else
options='flo'
fi
pid=`pgrep -u ${OCF_RESKEY_user} -${options} ${OCF_RESKEY_binary} | awk '/python \/usr\/bin/ {print $1}'`
echo $pid
}
#######################################################################
# Functions invoked by resource manager actions
neutron_l3_agent_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary netstat
# A config file on shared storage that is not available
# during probes is OK.
if [ ! -f $OCF_RESKEY_config ]; then
if ! ocf_is_probe; then
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
true
}
neutron_l3_agent_status() {
local pid
local f_pid
local rc
# check and make PID file dir
local PID_DIR="$( dirname ${OCF_RESKEY_pid} )"
if [ ! -d "${PID_DIR}" ] ; then
ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}"
mkdir -p "${PID_DIR}"
chown -R ${OCF_RESKEY_user} "${PID_DIR}"
chmod 755 "${PID_DIR}"
fi
pid=`get_worker_pid`
if [ "xxx$pid" == "xxx" ] ; then
ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running."
return $OCF_NOT_RUNNING
fi
#ocf_log debug "PID='$pid'"
# Check PID file and create if need
if [ ! -f $OCF_RESKEY_pid ] ; then
ocf_log warn "OpenStack Neutron agent (${OCF_RESKEY_binary}) was run, but no PID file found."
ocf_log warn "Writing PID='$pid' to '$OCF_RESKEY_pid' for '${OCF_RESKEY_binary}' worker..."
echo $pid > $OCF_RESKEY_pid
return $OCF_SUCCESS
fi
# compare PID from file with PID from `pgrep...`
f_pid=`cat $OCF_RESKEY_pid | tr '\n' ' ' | awk '{print $1}'`
if [ "xxx$pid" == "xxx$f_pid" ]; then
return $OCF_SUCCESS
fi
# at this point we have PID file and PID from it
# differents with PID from `pgrep...`
if [ ! -d "/proc/$f_pid" ] || [ "xxx$f_pid" == "xxx" ] ; then
# process with PID from PID-file not found
ocf_log warn "Old PID file $OCF_RESKEY_pid found, but no running processes with PID=$f_pid found."
ocf_log warn "PID-file will be re-created (with PID=$pid)."
echo $pid > $OCF_RESKEY_pid
return $OCF_SUCCESS
fi
# at this point we have alien PID-file and running prosess with this PID.
ocf_log warn "Another daemon (with PID=$f_pid) running with PID file '$OCF_RESKEY_pid'. My PID=$pid"
return $OCF_ERR_GENERIC
}
get_local_reports_value() {
local SECTION
local KEY
local VALUE
SECTION=$1
KEY=$2
# 'cat' is done here to avoid problems with file opening by awk due to permanent rewrites by the agent
VALUE=$(awk "/$SECTION/,/}/" $OCF_RESKEY_state_reports_file | grep $KEY | awk '{$1=""; print $0}' | tr -d "\",")
echo $VALUE
}
check_local_reports() {
local SECTIONS
local SYSTIME
local RPC_STATE_TIMESTAMP
if [ ! -f $OCF_RESKEY_state_reports_file ] ; then
ocf_log warn "State reports file wasn't found"
return $OCF_SUCCESS
fi
SECTIONS="STARTUP RPC_STATE_REPORT SYNC_STATE"
SYSTIME=$(date +%s)
RPC_STATE_TIMESTAMP=$(get_local_reports_value RPC_STATE_REPORT Timestamp)
RPC_STATE_TIMESTAMP=${RPC_STATE_TIMESTAMP%.*}
if [[ $(($SYSTIME-$RPC_STATE_TIMESTAMP)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then
ocf_log err "Agent is not reporting for too long"
return $OCF_ERR_GENERIC
fi
for SECTION in $SECTIONS ; do
STATUS=$(get_local_reports_value $SECTION Status)
if [[ $STATUS =~ failure ]] ; then
SINCE=$(date --date="$(get_local_reports_value SYNC_STATE Since)" +%s)
if [[ $(($SYSTIME-$SINCE)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then
ocf_log err "$SECTION report is in failure status for too long"
return $OCF_ERR_GENERIC
fi
fi
done
return $OCF_SUCCESS
}
neutron_l3_agent_monitor() {
neutron_l3_agent_status
rc=$?
if ocf_is_true "$OCF_RESKEY_check_state_reports" ; then
if [ $rc -eq 0 ]; then
check_local_reports
rc=$?
fi
fi
return $rc
}
neutron_l3_agent_start() {
local rc
# This variable is overridden by reload operation
# to perform fast resource restart
local remove_artifacts_on_stop_start=${1:-$OCF_RESKEY_remove_artifacts_on_stop_start}
neutron_l3_agent_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "OpenStack neutron-l3-agent already running"
return $OCF_SUCCESS
fi
if ocf_is_true "$remove_artifacts_on_stop_start"; then
neutron-netns-cleanup --agent-type=l3 --force --config-file $OCF_RESKEY_config
fi
rm -f $OCF_RESKEY_state_reports_file
# run and detach to background agent as daemon.
# Don't use ocf_run as we're sending the tool's output to /dev/null
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
--config-file=$OCF_RESKEY_plugin_config --log-file=$OCF_RESKEY_log_file \
>> /dev/null"' 2>&1 & echo \$! > $OCF_RESKEY_pid'
ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required
while true; do
neutron_l3_agent_monitor
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ] ; then
ocf_log err "OpenStack neutron-l3-agent start failed"
exit $OCF_ERR_GENERIC
fi
sleep 3
done
ocf_log info "OpenStack L3 agent (neutron-l3-agent) started"
return $OCF_SUCCESS
}
neutron_l3_agent_stop() {
local rc
local pid
# This variable is overridden by reload operation
# to perform fast resource restart
local remove_artifacts_on_stop_start=${1:-$OCF_RESKEY_remove_artifacts_on_stop_start}
neutron_l3_agent_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
if ocf_is_true "$remove_artifacts_on_stop_start"; then
neutron-netns-cleanup --agent-type=l3 --force --config-file $OCF_RESKEY_config
fi
ocf_log info "OpenStack L3 agent ($OCF_RESKEY_binary) already stopped"
return $OCF_SUCCESS
fi
# Terminate agent daemon
pid=`get_worker_pid`
shutdown_timeout=15
iteration_time=1
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-6))
fi
clock=0
# Try to terminate gracefully
while [ -d /proc/${pid}/ ] && [ $clock -lt $shutdown_timeout ]; do
ocf_log debug "Stopping L3 agent (${OCF_RESKEY_binary}) gracefully with SIGTERM"
ocf_run kill -s TERM ${pid}
sleep $iteration_time
((clock+=$iteration_time))
done
# Send kill signal if process is still up
if [ -d /proc/${pid}/ ] ; then
ocf_log debug "Killing L3 agent (${OCF_RESKEY_binary}) with SIGKILL"
ocf_run kill -s KILL ${pid}
sleep 1
if [ -d /proc/${pid}/ ] ; then
ocf_log err "OpenStack L3 agent (${OCF_RESKEY_binary}) stop failed"
return $OCF_ERR_GENERIC
fi
fi
ocf_log info "OpenStack L3 agent ($OCF_RESKEY_binary) stopped"
ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
rm -f $OCF_RESKEY_pid
if ocf_is_true "$remove_artifacts_on_stop_start"; then
neutron-netns-cleanup --agent-type=l3 --force --config-file $OCF_RESKEY_config
fi
return $OCF_SUCCESS
}
neutron_l3_agent_reload() {
# Call stop and start without removing artifacts
neutron_l3_agent_stop false
neutron_l3_agent_start false
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
neutron_l3_agent_validate || exit $?
umask 0022
# What kind of method was invoked?
case "$1" in
start) neutron_l3_agent_start;;
stop) neutron_l3_agent_stop;;
reload) neutron_l3_agent_reload;;
status) neutron_l3_agent_status;;
monitor) neutron_l3_agent_monitor;;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac