df082efdb3
- Move port validation *_check_port from all OCF script to OCF library (validate_port) - ceilometer_agent_central_check_port() - ceilometer_alarm_evaluator_check_port() - heat_engine_check_port() - nova_compute_check_port() - write bats unit tests for validate_port - Remove 'ocf-' from neutron OCF scripts names to make them typical to all OpenStack services - Rename ocf-neutron-dhcp-agent to neutron-dhcp-agent - Rename ocf-neutron-l3-agent to neutron-l3-agent - Rename ocf-neutron-ovs-agent to neutron-ovs-agent - Some minor bash typos and old comments clean-up Change-Id: Ic4bcecdbc05f6306be63dea211df96a104cb2d36 Signed-off-by: Sergii Golovatiuk <sgolovatiuk@mirantis.com>
492 lines
16 KiB
Bash
492 lines
16 KiB
Bash
#!/bin/bash
|
|
#
|
|
#
|
|
# OpenStack Neutron DHCP Service
|
|
#
|
|
# Description: Manages an OpenStack Neutron DHCP Service process as an HA resource
|
|
#
|
|
# Authors: Emilien Macchi
|
|
# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han
|
|
#
|
|
# Support: openstack@lists.launchpad.net
|
|
# License: Apache Software License (ASL) 2.0
|
|
#
|
|
#
|
|
# See usage() function below for more details ...
|
|
#
|
|
# OCF instance parameters:
|
|
# OCF_RESKEY_binary
|
|
# OCF_RESKEY_config
|
|
# OCF_RESKEY_plugin_config
|
|
# OCF_RESKEY_log_file
|
|
# OCF_RESKEY_check_state_reports
|
|
# OCF_RESKEY_state_reports_file
|
|
# OCF_RESKEY_state_reports_timeout
|
|
# OCF_RESKEY_user
|
|
# OCF_RESKEY_pid
|
|
# OCF_RESKEY_remove_artifacts_on_stop_start
|
|
#######################################################################
|
|
# Initialization:
|
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
: ${OCF_FUEL_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/fuel}
|
|
. ${OCF_FUEL_FUNCTIONS_DIR}/ocf-fuel-funcs
|
|
|
|
#######################################################################
|
|
|
|
# Fill in some defaults if no values are specified
|
|
|
|
PATH=/sbin:/usr/sbin:/bin:/usr/bin
|
|
|
|
OCF_RESKEY_binary_default="neutron-dhcp-agent"
|
|
OCF_RESKEY_config_default="/etc/neutron/neutron.conf"
|
|
OCF_RESKEY_plugin_config_default="/etc/neutron/dhcp_agent.ini"
|
|
OCF_RESKEY_log_file_default="/var/log/neutron/dhcp-agent.log"
|
|
OCF_RESKEY_check_state_reports_default=false
|
|
OCF_RESKEY_state_reports_file_default="/var/lib/neutron/dhcp_agent_report.log"
|
|
OCF_RESKEY_state_reports_timeout_default=60
|
|
OCF_RESKEY_user_default="neutron"
|
|
OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid"
|
|
OCF_RESKEY_remove_artifacts_on_stop_start_default='true'
|
|
|
|
: ${HA_LOGTAG="ocf-neutron-dhcp-agent"}
|
|
: ${HA_LOGFACILITY="daemon"}
|
|
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
: ${OCF_RESKEY_plugin_config=${OCF_RESKEY_plugin_config_default}}
|
|
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}}
|
|
: ${OCF_RESKEY_check_state_reports=${OCF_RESKEY_check_state_reports_default}}
|
|
: ${OCF_RESKEY_state_reports_file=${OCF_RESKEY_state_reports_file_default}}
|
|
: ${OCF_RESKEY_state_reports_timeout=${OCF_RESKEY_state_reports_timeout_default}}
|
|
: ${OCF_RESKEY_remove_artifacts_on_stop_start=${OCF_RESKEY_remove_artifacts_on_stop_start_default}}
|
|
|
|
|
|
|
|
#######################################################################
|
|
|
|
usage() {
|
|
cat <<UEND
|
|
usage: $0 (start|stop|reload|validate-all|meta-data|status|monitor)
|
|
|
|
$0 manages an OpenStack DHCP Service (${OCF_RESKEY_binary}) process as an HA resource
|
|
|
|
The 'start' operation starts the networking service.
|
|
The 'stop' operation stops the networking service.
|
|
The 'reload' operation restarts the networking service without removing any artifacts
|
|
The 'validate-all' operation reports whether the parameters are valid
|
|
The 'meta-data' operation reports this RA's meta-data information
|
|
The 'status' operation reports whether the networking service is running
|
|
The 'monitor' operation reports whether the networking service seems to be working
|
|
|
|
UEND
|
|
}
|
|
|
|
meta_data() {
|
|
cat <<END
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
<resource-agent name="neutron-dhcp-agent">
|
|
<version>1.0</version>
|
|
|
|
<longdesc lang="en">
|
|
Resource agent for the OpenStack Neutron DHCP Service (${OCF_RESKEY_binary})
|
|
May manage a neutron-dhcp-agent instance or a clone set that
|
|
creates a distributed neutron-dhcp-agent cluster.
|
|
</longdesc>
|
|
<shortdesc lang="en">Manages the OpenStack DHCP Service (${OCF_RESKEY_binary})</shortdesc>
|
|
<parameters>
|
|
|
|
<parameter name="dummy" unique="1">
|
|
<longdesc lang="en">
|
|
This is a dummy parameter.
|
|
Pacemaker needs it to enable reload operation for the resource
|
|
</longdesc>
|
|
<shortdesc lang="en">Dummy parameter</shortdesc>
|
|
<content type="boolean" default="false" />
|
|
</parameter>
|
|
|
|
<parameter name="binary" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the OpenStack DHCP Agent server binary (${OCF_RESKEY_binary})
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack DHCP Agent server binary (${OCF_RESKEY_binary})</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="config" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the OpenStack Neutron Service (neutron-server) configuration file
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack DHCP Agent (neutron-server) config file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="plugin_config" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the OpenStack DHCP Service (${OCF_RESKEY_binary}) configuration file
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack DHCP Agent (${OCF_RESKEY_binary}) config file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_plugin_config_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="user" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
User running OpenStack DHCP Service (${OCF_RESKEY_binary})
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack DHCP Service (${OCF_RESKEY_binary}) user</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="pid" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The pid file to use for this OpenStack DHCP Service (${OCF_RESKEY_binary}) instance
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack DHCP Service (${OCF_RESKEY_binary}) pid file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_pid_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="log_file" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The log file to use for this OpenStack DHCP Service (${OCF_RESKEY_binary}) instance
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack DHCP Service (${OCF_RESKEY_binary}) log file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_log_file_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="check_state_reports" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The flag, which enables or disables additional monitoring
|
|
based on agent's local state reports
|
|
</longdesc>
|
|
<shortdesc lang="en">Enable or disable local state reports based monitoring</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_check_state_reports_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="state_reports_file" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
This file contains DHCP agent local state report information.
|
|
There're three section in it: STARTUP, RPC_STATE_REPORT and SYNC_STATE.
|
|
</longdesc>
|
|
<shortdesc lang="en">DHCP agent local state report file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_state_reports_file_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="state_reports_timeout" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The timeout value for DHCP agent to update its local state report.
|
|
If it takes more time than the specified value agent is considered to be dead.
|
|
</longdesc>
|
|
<shortdesc lang="en">DHCP agent local state reports timeout</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_state_reports_timeout_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="remove_artifacts_on_stop_start" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Clean up all resources created by Neutron DHCP agent, such as additional processes,
|
|
network namespaces, created interfaces, on agent stop and start.
|
|
</longdesc>
|
|
<shortdesc lang="en">Clean up all resources created by DHCP agent on its start and stop</shortdesc>
|
|
<content type="string" />
|
|
</parameter>
|
|
|
|
|
|
|
|
</parameters>
|
|
|
|
<actions>
|
|
<action name="start" timeout="20" />
|
|
<action name="stop" timeout="20" />
|
|
<action name="reload" timeout="30" />
|
|
<action name="status" timeout="20" />
|
|
<action name="monitor" timeout="30" interval="20" />
|
|
<action name="validate-all" timeout="5" />
|
|
<action name="meta-data" timeout="5" />
|
|
</actions>
|
|
</resource-agent>
|
|
END
|
|
}
|
|
|
|
get_worker_pid() {
|
|
local options
|
|
local pid
|
|
# FIXME: Remove if condition and set 'falo' statically once Fuel
|
|
# discontinue support of Ubuntu 12.04 and CentOs 6.x where -a was not defined.
|
|
if pgrep -V | awk 'match($0, /[0-9]\.[0-9].*/) {if (substr($0, RSTART, RLENGTH) < 3.3) {exit 1}}'; then
|
|
options='falo'
|
|
else
|
|
options='flo'
|
|
fi
|
|
pid=`pgrep -u ${OCF_RESKEY_user} -${options} ${OCF_RESKEY_binary} | awk '{print $1}'`
|
|
echo $pid
|
|
}
|
|
|
|
#######################################################################
|
|
# Functions invoked by resource manager actions
|
|
|
|
neutron_dhcp_agent_validate() {
|
|
local rc
|
|
|
|
check_binary $OCF_RESKEY_binary
|
|
check_binary netstat
|
|
|
|
# A config file on shared storage that is not available
|
|
# during probes is OK.
|
|
if [ ! -f $OCF_RESKEY_config ]; then
|
|
if ! ocf_is_probe; then
|
|
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
|
fi
|
|
|
|
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
return ${OCF_SUCCESS}
|
|
}
|
|
|
|
neutron_dhcp_agent_status() {
|
|
local pid
|
|
local f_pid
|
|
local rc
|
|
|
|
# check and make PID file dir
|
|
local PID_DIR="$( dirname ${OCF_RESKEY_pid} )"
|
|
if [ ! -d "${PID_DIR}" ] ; then
|
|
ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}"
|
|
mkdir -p "${PID_DIR}"
|
|
chown -R ${OCF_RESKEY_user} "${PID_DIR}"
|
|
chmod 755 "${PID_DIR}"
|
|
fi
|
|
|
|
pid=`get_worker_pid`
|
|
if [ "xxx$pid" == "xxx" ] ; then
|
|
ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running."
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
#ocf_log debug "PID='$pid'"
|
|
|
|
# Check PID file and create if need
|
|
if [ ! -f $OCF_RESKEY_pid ] ; then
|
|
ocf_log warn "OpenStack Neutron agent (${OCF_RESKEY_binary}) was run, but no PID file found."
|
|
ocf_log warn "Writing PID='$pid' to '$OCF_RESKEY_pid' for '${OCF_RESKEY_binary}' worker..."
|
|
echo $pid > $OCF_RESKEY_pid
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
# compare PID from file with PID from `pgrep...`
|
|
f_pid=`cat $OCF_RESKEY_pid | tr '\n' ' ' | awk '{print $1}'`
|
|
if [ "xxx$pid" == "xxx$f_pid" ]; then
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
# at this point we have PID file and PID from it
|
|
# defferents with PID from `pgrep...`
|
|
if [ ! -d "/proc/$f_pid" ] || [ "xxx$f_pid" == "xxx" ] ; then
|
|
# process with PID from PID-file not found
|
|
ocf_log warn "Old PID file $OCF_RESKEY_pid found, but no running processes with PID=$f_pid found."
|
|
ocf_log warn "PID-file will be re-created (with PID=$pid)."
|
|
echo $pid > $OCF_RESKEY_pid
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
# at this point we have alien PID-file and running prosess with this PID.
|
|
ocf_log warn "Another daemon (with PID=$f_pid) running with PID file '$OCF_RESKEY_pid'. My PID=$pid"
|
|
return $OCF_ERR_GENERIC
|
|
}
|
|
|
|
get_local_reports_value() {
|
|
|
|
local SECTION
|
|
local KEY
|
|
local VALUE
|
|
|
|
SECTION=$1
|
|
KEY=$2
|
|
VALUE=$(awk "/$SECTION/,/}/" $OCF_RESKEY_state_reports_file | grep $KEY | awk '{$1=""; print $0}' | tr -d "\",")
|
|
echo $VALUE
|
|
}
|
|
|
|
check_local_reports() {
|
|
|
|
local SECTIONS
|
|
local SYSTIME
|
|
local RPC_STATE_TIMESTAMP
|
|
|
|
if [ ! -f $OCF_RESKEY_state_reports_file ] ; then
|
|
ocf_log warn "State reports file wasn't found"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
SECTIONS="STARTUP RPC_STATE_REPORT SYNC_STATE"
|
|
SYSTIME=$(date +%s)
|
|
RPC_STATE_TIMESTAMP=$(get_local_reports_value RPC_STATE_REPORT Timestamp)
|
|
RPC_STATE_TIMESTAMP=${RPC_STATE_TIMESTAMP%.*}
|
|
|
|
if [[ $(($SYSTIME-$RPC_STATE_TIMESTAMP)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then
|
|
ocf_log err "Agent is not reporting for too long"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
for SECTION in $SECTIONS ; do
|
|
STATUS=$(get_local_reports_value $SECTION Status)
|
|
if [[ $STATUS =~ failure ]] ; then
|
|
SINCE=$(date --date="$(get_local_reports_value SYNC_STATE Since)" +%s)
|
|
if [[ $(($SYSTIME-$SINCE)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then
|
|
ocf_log err "$SECTION report is in failure status for too long"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
fi
|
|
done
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
neutron_dhcp_agent_monitor() {
|
|
local rc
|
|
local pid
|
|
local network_amqp_check
|
|
|
|
neutron_dhcp_agent_status
|
|
rc=$?
|
|
|
|
# If status returned anything but success, return that immediately
|
|
if [ $rc -ne $OCF_SUCCESS ]; then
|
|
return $rc
|
|
fi
|
|
|
|
|
|
if ocf_is_true "$OCF_RESKEY_check_state_reports" ; then
|
|
check_local_reports
|
|
rc=$?
|
|
if [ $rc -ne $OCF_SUCCESS ]; then
|
|
return $rc
|
|
fi
|
|
fi
|
|
|
|
ocf_log debug "OpenStack DHCP Agent (neutron-dhcp-agent) monitor succeeded"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
|
|
neutron_dhcp_agent_start() {
|
|
local rc
|
|
# This variable is overridden by reload operation
|
|
# to perform fast resource restart
|
|
local remove_artifacts_on_stop_start=${1:-$OCF_RESKEY_remove_artifacts_on_stop_start}
|
|
neutron_dhcp_agent_status
|
|
rc=$?
|
|
if [ $rc -eq $OCF_SUCCESS ]; then
|
|
ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) already running"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
if ocf_is_true "$remove_artifacts_on_stop_start"; then
|
|
neutron-netns-cleanup --agent-type=dhcp --force --config-file ${OCF_RESKEY_config}
|
|
fi
|
|
rm -f $OCF_RESKEY_state_reports_file
|
|
|
|
# run and detach to background agent as daemon.
|
|
# Don't use ocf_run as we're sending the tool's output to /dev/null
|
|
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
|
--config-file=$OCF_RESKEY_plugin_config --log-file=$OCF_RESKEY_log_file \
|
|
>> /dev/null"' 2>&1 & echo \$! > $OCF_RESKEY_pid'
|
|
ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
|
|
|
|
# Spin waiting for the server to come up.
|
|
# Let the CRM/LRM time us out if required
|
|
while true; do
|
|
neutron_dhcp_agent_monitor
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] && break
|
|
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
ocf_log err "OpenStack DHCP Agent (${OCF_RESKEY_binary}) start failed"
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
sleep 3
|
|
done
|
|
|
|
ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) started"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
|
|
neutron_dhcp_agent_stop() {
|
|
local rc
|
|
local pid
|
|
local shutdown_timeout=15
|
|
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
shutdown_timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000) ))
|
|
fi
|
|
|
|
# This variable is overridden by reload operation
|
|
# to perform fast resource restart
|
|
local remove_artifacts_on_stop_start=${1:-$OCF_RESKEY_remove_artifacts_on_stop_start}
|
|
|
|
neutron_dhcp_agent_status
|
|
rc=$?
|
|
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
if ocf_is_true "$remove_artifacts_on_stop_start"; then
|
|
neutron-netns-cleanup --agent-type=dhcp --force --config-file ${OCF_RESKEY_config}
|
|
fi
|
|
ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) already stopped"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
# Terminate agent daemon
|
|
pid=`get_worker_pid`
|
|
proc_stop "${pid}" "${OCF_RESKEY_binary}" $shutdown_timeout
|
|
rc=$?
|
|
if [ "${rc}" -eq "${OCF_ERR_GENERIC}" ]; then
|
|
ocf_log err "OpenStack DHCP Agent (${OCF_RESKEY_binary}) stop failed"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
|
|
rm -f $OCF_RESKEY_pid
|
|
|
|
if ocf_is_true "$remove_artifacts_on_stop_start"; then
|
|
neutron-netns-cleanup --agent-type=dhcp --force --config-file ${OCF_RESKEY_config}
|
|
fi
|
|
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
|
|
neutron_dhcp_agent_reload() {
|
|
# Call stop and start without removing artifacts
|
|
neutron_dhcp_agent_stop false
|
|
neutron_dhcp_agent_start false
|
|
}
|
|
|
|
#######################################################################
|
|
|
|
case "$1" in
|
|
meta-data) meta_data
|
|
exit $OCF_SUCCESS;;
|
|
usage|help) usage
|
|
exit $OCF_SUCCESS;;
|
|
esac
|
|
|
|
# Anything except meta-data and help must pass validation
|
|
neutron_dhcp_agent_validate || exit $?
|
|
umask 0022
|
|
|
|
# What kind of method was invoked?
|
|
case "$1" in
|
|
start) neutron_dhcp_agent_start;;
|
|
stop) neutron_dhcp_agent_stop;;
|
|
reload) neutron_dhcp_agent_reload;;
|
|
status) neutron_dhcp_agent_status;;
|
|
monitor) neutron_dhcp_agent_monitor;;
|
|
validate-all) ;;
|
|
*) usage
|
|
exit $OCF_ERR_UNIMPLEMENTED;;
|
|
esac
|