openstack-resource-agents/ocf/NovaEvacuate

383 lines
11 KiB
Bash

#!/bin/bash
#
#
# NovaCompute agent manages compute daemons.
#
# Copyright (c) 2015
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
###
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
###
: ${__OCF_ACTION=$1}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="NovaEvacuate" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Facility for tacking a list of compute nodes and reliably evacuating the ones that fence_evacuate has flagged.
</longdesc>
<shortdesc lang="en">Evacuator for OpenStack Nova Compute Server</shortdesc>
<parameters>
<parameter name="auth_url" unique="0" required="1">
<longdesc lang="en">
Authorization URL for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Authorization URL</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="username" unique="0" required="1">
<longdesc lang="en">
Username for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Username</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="password" unique="0" required="1">
<longdesc lang="en">
Password for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Password</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="tenant_name" unique="0" required="1">
<longdesc lang="en">
Tenant name for connecting to keystone in admin context.
Note that with Keystone V3 tenant names are only unique within a domain.
</longdesc>
<shortdesc lang="en">Tenant name</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="domain" unique="0" required="0">
<longdesc lang="en">
DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
</longdesc>
<shortdesc lang="en">DNS domain</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="endpoint_type" unique="0" required="0">
<longdesc lang="en">
Nova API location (internal, public or admin URL)
</longdesc>
<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="region_name" unique="0" required="0">
<longdesc lang="en">
Region name for connecting to nova.
</longdesc>
<shortdesc lang="en">Region name</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="insecure" unique="0" required="0">
<longdesc lang="en">
Explicitly allow client to perform "insecure" TLS (https) requests.
The server's certificate will not be verified against any certificate authorities.
This option should be used with caution.
</longdesc>
<shortdesc lang="en">Allow insecure TLS requests</shortdesc>
<content type="boolean" default="0" />
</parameter>
<parameter name="no_shared_storage" unique="0" required="0">
<longdesc lang="en">
Disable shared storage recovery for instances. Use at your own risk!
</longdesc>
<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
<content type="boolean" default="0" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="monitor" timeout="600" interval="10" depth="0"/>
<action name="validate-all" timeout="20" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
ocf_log info "They use TERM to bring us down. No such luck."
return
}
evacuate_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
evacuate_stop() {
rm -f "$statefile"
return $OCF_SUCCESS
}
evacuate_start() {
touch "$statefile"
# Do not invole monitor here so that the start timeout can be low
return $?
}
update_evacuation() {
attrd_updater -p -n evacuate -Q -N ${1} -U ${2}
arc=$?
if [ ${arc} != 0 ]; then
ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}"
fi
return ${arc}
}
handle_evacuations() {
while [ $# -gt 0 ]; do
node=$1
state=$2
shift; shift;
need_evacuate=0
case $state in
"")
;;
no)
ocf_log debug "$node is either fine or already handled"
;;
yes) need_evacuate=1
;;
*@*)
where=$(echo $state | awk -F@ '{print $1}')
when=$(echo $state | awk -F@ '{print $2}')
now=$(date +%s)
if [ $(($now - $when)) -gt 60 ]; then
ocf_log info "Processing partial evacuation of $node by" \
"$where at $when"
need_evacuate=1
else
# Give some time for any in-flight evacuations to either
# complete or fail Nova won't react well if there are two
# overlapping requests
ocf_log info "Deferring processing partial evacuation of" \
"$node by $where at $when"
fi
;;
esac
if [ $need_evacuate = 1 ]; then
found=0
ocf_log notice "Initiating evacuation of $node"
for known in $(fence_compute ${fence_options} -o list | \
tr -d ','); do
if [ ${known} = ${node} ]; then
found=1
break
fi
done
if [ $found = 0 ]; then
ocf_log info "Nova does not know about ${node}"
# Dont mark as no because perhaps nova is unavailable right now
continue
fi
update_evacuation ${node} "$(uname -n)@$(date +%s)"
if [ $? != 0 ]; then
return $OCF_SUCCESS
fi
fence_compute ${fence_options} -o off -n $node
rc=$?
if [ $rc = 0 ]; then
update_evacuation ${node} no
ocf_log notice "Completed evacuation of $node"
else
ocf_log warn "Evacuation of $node failed: $rc"
update_evacuation ${node} yes
fi
fi
done
return $OCF_SUCCESS
}
evacuate_monitor() {
if [ ! -f "$statefile" ]; then
return $OCF_NOT_RUNNING
fi
handle_evacuations $(
attrd_updater -n evacuate -A \
2> >(grep -v "attribute does not exist" 1>&2) |
sed 's/ value=""/ value="no"/' |
tr '="' ' ' |
awk '{print $4" "$6}'
)
return $OCF_SUCCESS
}
evacuate_validate() {
rc=$OCF_SUCCESS
fence_options=""
check_binary fence_compute
# Is the state directory writable?
state_dir=$(dirname $statefile)
touch "$state_dir/$$"
if [ $? != 0 ]; then
ocf_exit_reason "Invalid state directory: $state_dir"
return $OCF_ERR_ARGS
fi
rm -f "$state_dir/$$"
if [ -z "${OCF_RESKEY_auth_url}" ]; then
ocf_exit_reason "auth_url not configured"
exit $OCF_ERR_CONFIGURED
fi
fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
if [ -z "${OCF_RESKEY_username}" ]; then
ocf_exit_reason "username not configured"
exit $OCF_ERR_CONFIGURED
fi
fence_options="${fence_options} -l ${OCF_RESKEY_username}"
if [ -z "${OCF_RESKEY_password}" ]; then
ocf_exit_reason "password not configured"
exit $OCF_ERR_CONFIGURED
fi
fence_options="${fence_options} -p ${OCF_RESKEY_password}"
if [ -z "${OCF_RESKEY_tenant_name}" ]; then
ocf_exit_reason "tenant_name not configured"
exit $OCF_ERR_CONFIGURED
fi
fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
if [ -n "${OCF_RESKEY_domain}" ]; then
fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
fi
if [ -n "${OCF_RESKEY_region_name}" ]; then
fence_options="${fence_options} \
--region-name ${OCF_RESKEY_region_name}"
fi
if [ -n "${OCF_RESKEY_insecure}" ]; then
if ocf_is_true "${OCF_RESKEY_insecure}"; then
fence_options="${fence_options} --insecure"
fi
fi
if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
fence_options="${fence_options} --no-shared-storage"
fi
fi
if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
case ${OCF_RESKEY_endpoint_type} in
adminURL|publicURL|internalURL)
;;
*)
ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type}" \
"not valid. Use adminURL or publicURL or internalURL"
exit $OCF_ERR_CONFIGURED
;;
esac
fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
fi
if [ $rc != $OCF_SUCCESS ]; then
exit $rc
fi
return $rc
}
statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
case $__OCF_ACTION in
start)
evacuate_validate
evacuate_start
;;
stop)
evacuate_stop
;;
monitor)
evacuate_validate
evacuate_monitor
;;
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage|help)
evacuate_usage
exit $OCF_SUCCESS
;;
validate-all)
exit $OCF_SUCCESS
;;
*)
evacuate_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc