openstack-resource-agents/ocf/neutron-ha-tool

330 lines
10 KiB
Bash

#!/bin/sh
#
#
# OpenStack HA tool for Neutron (neutron-ha-tool)
#
# This resource agent wraps the neutron-ha-tool Python script.
# It can be used to monitor neutron for the availability of the
# l3-agents and migrate routers away from agents that are
# currently offline.
# The neutron-ha-tool was originally part of the openstack-network
# cookbook for Chef. However as of icehouse it got dropped
# from upstream, and is now maintained here:
#
# https://github.com/SUSE-Cloud/cookbook-openstack-network/blob/neutron-ha-tool-maintenance/files/default/neutron-ha-tool.py
#
# You can see a brief explanation of how this RA works in this
# video:
#
# https://youtu.be/vBZgtHgSdOY?t=33m39s
#
# Authors: Ralf Haferkamp
# Mainly inspired by the Neutron L3 resource agent written by Emilien Macchi
#
# Support: openstack@lists.openstack.org
# License: Apache Software License (ASL) 2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_os_auth_url
# OCF_RESKEY_os_region_name
# OCF_RESKEY_os_username
# OCF_RESKEY_os_password
# OCF_RESKEY_os_tenant_name
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
OCF_RESKEY_binary_default="neutron-ha-tool"
OCF_RESKEY_os_auth_url_default="http://localhost:5000/v2"
OCF_RESKEY_os_region_name_default=""
OCF_RESKEY_os_username_default="admin"
OCF_RESKEY_os_password_default=""
OCF_RESKEY_os_tenant_name_default="admin"
OCF_RESKEY_os_insecure_default="0"
OCF_RESKEY_os_cacert_default=""
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_os_auth_url=${OCF_RESKEY_os_auth_url_default}}
: ${OCF_RESKEY_os_region_name=${OCF_RESKEY_os_region_name_default}}
: ${OCF_RESKEY_os_tenant_name=${OCF_RESKEY_os_tenant_name_default}}
: ${OCF_RESKEY_os_username=${OCF_RESKEY_os_username_default}}
: ${OCF_RESKEY_os_password=${OCF_RESKEY_os_password_default}}
: ${OCF_RESKEY_os_insecure=${OCF_RESKEY_os_insecure_default}}
: ${OCF_RESKEY_os_cacert=${OCF_RESKEY_os_cacert_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages the Neutron HA tool (neutron-ha-tool) as an HA resource
The 'start' operation triggers a migrations of all routers on offline
l3-agents to l3-agents that are actually online.
The 'stop' is basically noop.
The 'validate-all' operation reports whether the parameters are valid.
The 'meta-data' operation reports this RA's meta-data information.
The 'status' operation reports whether the networking service is running.
The 'monitor' operation reports whether there are some routers assigned
to l3-agents that are currently offline.
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="neutron-ha-tool">
<version>1.0</version>
<longdesc lang="en">
This resource agent wraps the Neutron HA Tool (neutron-ha-tool)
and can be used to check neutron for offline l3-agents that
have routers assigend and migrate those routers to a different
(online) l3-agent.
</longdesc>
<shortdesc lang="en">Manages the OpenStack Neutron HA Tool (neutron-ha-tool)</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack Neutron HA Tool binary (neutron-ha-tool)
</longdesc>
<shortdesc lang="en">OpenStack Neutron HA Tool binary (neutron-ha-tool)</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="os_auth_url" unique="0" required="0">
<longdesc lang="en">
The URL pointing to the Keystone instance to use for authentication.
</longdesc>
<shortdesc lang="en">Keystone URL</shortdesc>
<content type="string" default="${OCF_RESKEY_os_auth_url_default}" />
</parameter>
<parameter name="os_region_name" unique="0" required="0">
<longdesc lang="en">
The region name to use for authentication against keystone.
</longdesc>
<shortdesc lang="en">Keystone region name</shortdesc>
<content type="string" default="${OCF_RESKEY_os_region_name_default}" />
</parameter>
<parameter name="os_password" unique="0" required="0">
<longdesc lang="en">
The password to use for authentication against keystone.
</longdesc>
<shortdesc lang="en">Password for authentication</shortdesc>
<content type="string" default="${OCF_RESKEY_os_password_default}" />
</parameter>
<parameter name="os_tenant_name" unique="0" required="0">
<longdesc lang="en">
The Tenant to use for authentication against keystone.
</longdesc>
<shortdesc lang="en">Tenant name for authentication</shortdesc>
<content type="string" default="${OCF_RESKEY_os_tenant_name_default}" />
</parameter>
<parameter name="os_username" unique="0" required="0">
<longdesc lang="en">
OpenStack Username for authentication.
</longdesc>
<shortdesc lang="en">OpenStack Username</shortdesc>
<content type="string" default="${OCF_RESKEY_os_username_default}" />
</parameter>
<parameter name="os_insecure" unique="0" required="0">
<longdesc lang="en">
Disable SSL certificate verification.
</longdesc>
<shortdesc lang="en">Disable SSL certificate verification</shortdesc>
<content type="boolean" default="${OCF_RESKEY_os_insecure_default}" />
</parameter>
<parameter name="os_cacert" unique="0" required="0">
<longdesc lang="en">
Filename of a SSL CA Certificate Bundle to use for Server Certificate
verification.
</longdesc>
<shortdesc lang="en">SSL CA Bundle file</shortdesc>
<content type="boolean" default="${OCF_RESKEY_os_cacert_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" timeout="30" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# Functions invoked by resource manager actions
neutron_ha_tool_validate() {
check_binary $OCF_RESKEY_binary
if [ -n "$OCF_RESKEY_os_cacert" ]; then
if [ ! -f "$OCF_RESKEY_os_cacert" ]; then
ocf_log err "Failed to verify CA Certifcate Bundle" \
"($OCF_RESKEY_os_cacert)"
return 1
fi
fi
true
}
neutron_ha_tool_status() {
# There is not much to do here, since there is no daemon to check for.
# Just pretend we're running successfully
return $OCF_SUCCESS
}
neutron_ha_tool_monitor() {
if ! [ -e "$statefile" ]; then
# neutron-ha-tool is run on a single node at a time, i.e. in
# active/passive mode. So we use this state file to keep
# track of whether it's active on the current node, and if
# Pacemaker does a probe on a node where it's not active, we
# skip the l3-agent check and always return OCF_NOT_RUNNING,
# otherwise we'd get messages from pengine like:
#
# error: Resource neutron-ha-tool (ocf::neutron-ha-tool) is active on
# 2 nodes attempting recovery
# warning: See http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active
# for more information.
#
# and Pacemaker could attempt unnecessary recovery according to the
# value of the cluster-wide "multiple-active" option.
ocf_log debug "neutron-ha-tool not currently active on this node; " \
"skipping l3-agent check"
return $OCF_NOT_RUNNING
fi
INSECURE=""
if ocf_is_true $OCF_RESKEY_os_insecure; then
INSECURE="--insecure"
fi
${OCF_RESKEY_binary} --l3-agent-check --quiet $INSECURE
rc=$?
if [ $rc -eq 2 ]; then
ocf_log err "Some Neutron routers need migration."
return $OCF_ERR_GENERIC
fi
ocf_log debug "Neutron HA Tool (neutron-ha-tool) monitor succeeded"
return $OCF_SUCCESS
}
neutron_ha_tool_start() {
touch "$statefile"
if ! [ -e "$statefile" ]; then
ocf_log err "Failed to create $statefile - aborting!"
return $OCF_ERR_GENERIC
fi
INSECURE=""
if ocf_is_true $OCF_RESKEY_os_insecure; then
INSECURE="--insecure"
fi
# Remain backwards-compatible with older neutron-ha-tool.py which
# don't support --retry.
retry=""
if ${OCF_RESKEY_binary} --help | grep -q -- --retry; then
retry="--retry"
fi
${OCF_RESKEY_binary} --l3-agent-migrate $retry --now $INSECURE
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Neutron HA Tool failed to migrate routers away from" \
"offline L3 agents."
return $OCF_ERR_GENERIC
fi
ocf_log debug "Neutron HA Tool (neutron-ha-tool) router migration" \
"succeeded."
return $OCF_SUCCESS
}
neutron_ha_tool_stop() {
rm -f "$statefile"
if [ -e "$statefile" ]; then
ocf_log err "Uh-oh - failed to remove $statefile!"
# If we can't even remove a file in tmpfs (/run), something
# is *really* badly wrong, so fence the node.
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
#######################################################################
case "$1" in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage|help)
usage
exit $OCF_SUCCESS
;;
esac
# Anything except meta-data and help must pass validation
neutron_ha_tool_validate || exit $?
# OPENSTACK env variables
export OS_AUTH_URL=$OCF_RESKEY_os_auth_url
export OS_REGION_NAME=$OCF_RESKEY_os_region_name
export OS_TENANT_NAME=$OCF_RESKEY_os_tenant_name
export OS_USERNAME=$OCF_RESKEY_os_username
export OS_PASSWORD=$OCF_RESKEY_os_password
if [ -n "$OCF_RESKEY_os_cacert" ]; then
export OS_CACERT=$OCF_RESKEY_os_cacert
fi
statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
# What kind of method was invoked?
case "$1" in
start)
neutron_ha_tool_start
;;
stop)
neutron_ha_tool_stop
;;
status)
neutron_ha_tool_status
;;
monitor)
neutron_ha_tool_monitor
;;
validate-all)
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac