c71703128e
In order to allow for faster subcloud audits, introduce a new "audit-worker" process. By default there will be four worker processes to perform the actual subcloud audits. The main audit process will scan the DB for subclouds which need auditing based on the new audit start/end timestamps. It will then send out RPC messages to the "audit-worker" processes to request an audit for a list of subcloud IDs. When the "audit-worker" process receives the "audit_subclouds" RPC message, it loops over each of the specified subclouds. For each subcloud it updates the DB to indicate that the audit is starting for that subcloud, then basically does the exact same audit that is currently done in the audit process, then updates the DB to indicate that the audit has been completed for that subcloud. Story: 2007267 Task: 41336 Signed-off-by: Chris Friesen <chris.friesen@windriver.com> Change-Id: Ifb3dd363fd337d24f2c3f7aaa3549624fffaceca
323 lines
9.8 KiB
Bash
323 lines
9.8 KiB
Bash
#!/bin/sh
|
|
# OpenStack DC Manager Audit Worker Service (dcmanager-audit-worker)
|
|
#
|
|
# Description:
|
|
# Manages an OpenStack DC Manager Audit-Worker Service (dcmanager-audit-worker)
|
|
# process as an HA resource
|
|
#
|
|
# Copyright (c) 2021 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
#
|
|
# See usage() function below for more details ...
|
|
#
|
|
# OCF instance parameters:
|
|
# OCF_RESKEY_binary
|
|
# OCF_RESKEY_config
|
|
# OCF_RESKEY_user
|
|
# OCF_RESKEY_pid
|
|
# OCF_RESKEY_additional_parameters
|
|
#######################################################################
|
|
# Initialization:
|
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
|
|
#######################################################################
|
|
|
|
# Fill in some defaults if no values are specified
|
|
|
|
OCF_RESKEY_binary_default="/usr/bin/dcmanager-audit-worker"
|
|
OCF_RESKEY_config_default="/etc/dcmanager/dcmanager.conf"
|
|
OCF_RESKEY_user_default="root"
|
|
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
|
|
|
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
|
|
#######################################################################
|
|
|
|
usage() {
|
|
cat <<UEND
|
|
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
|
|
|
$0 manages an OpenStack DC Manager Audit-worker service (dcmanager-audit-worker) process as an HA resource
|
|
|
|
The 'start' operation starts the dcmanager-audit-worker service.
|
|
The 'stop' operation stops the dcmanager-audit-worker service.
|
|
The 'validate-all' operation reports whether the parameters are valid
|
|
The 'meta-data' operation reports this RA's meta-data information
|
|
The 'status' operation reports whether the dcmanager-audit-worker service is running
|
|
The 'monitor' operation reports whether the dcmanager-audit-worker service seems to be working
|
|
|
|
UEND
|
|
}
|
|
|
|
meta_data() {
|
|
cat <<END
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
<resource-agent name="dcmanager-audit-worker">
|
|
<version>1.0</version>
|
|
|
|
<longdesc lang="en">
|
|
Resource agent for the DC Manager service (dcmanager-audit-worker)
|
|
</longdesc>
|
|
<shortdesc lang="en">Manages the OpenStack DC Manager Audit-worker Service (dcmanager-audit-worker)</shortdesc>
|
|
<parameters>
|
|
|
|
<parameter name="binary" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the DC Manager Audit-worker Service binary (dcmanager-audit-worker)
|
|
</longdesc>
|
|
<shortdesc lang="en">DC Manager Audit-worker Service binary (dcmanager-audit-worker)</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="config" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the DC Manager Audit-worker Service (dcmanager-audit-worker) configuration file
|
|
</longdesc>
|
|
<shortdesc lang="en">DC Manager Audit-worker Service (dcmanager-audit-worker registry) config file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="user" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
User running DC Manager Audit-worker Service (dcmanager-audit-worker)
|
|
</longdesc>
|
|
<shortdesc lang="en">DC Manager Audit-worker Service (dcmanager-audit-worker) user</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="pid" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The pid file to use for this DC Manager Audit-worker Service (dcmanager-audit-worker) instance
|
|
</longdesc>
|
|
<shortdesc lang="en">DC Manager Audit-worker Service (dcmanager-audit-worker) pid file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_pid_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="additional_parameters" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Additional parameters to pass on to the dcmanager-audit-worker
|
|
</longdesc>
|
|
<shortdesc lang="en">Additional parameters for dcmanager-audit-worker</shortdesc>
|
|
<content type="string" />
|
|
</parameter>
|
|
|
|
</parameters>
|
|
|
|
<actions>
|
|
<action name="start" timeout="20" />
|
|
<action name="stop" timeout="20" />
|
|
<action name="status" timeout="20" />
|
|
<action name="monitor" timeout="10" interval="5" />
|
|
<action name="validate-all" timeout="5" />
|
|
<action name="meta-data" timeout="5" />
|
|
</actions>
|
|
</resource-agent>
|
|
END
|
|
}
|
|
|
|
#######################################################################
|
|
# Functions invoked by resource manager actions
|
|
|
|
dcmanager_audit_validate() {
|
|
local rc
|
|
|
|
check_binary $OCF_RESKEY_binary
|
|
check_binary curl
|
|
check_binary tr
|
|
check_binary grep
|
|
check_binary cut
|
|
check_binary head
|
|
|
|
# A config file on shared storage that is not available
|
|
# during probes is OK.
|
|
if [ ! -f $OCF_RESKEY_config ]; then
|
|
if ! ocf_is_probe; then
|
|
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
|
fi
|
|
|
|
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
true
|
|
}
|
|
|
|
dcmanager_audit_status() {
|
|
local pid
|
|
local rc
|
|
|
|
if [ ! -f $OCF_RESKEY_pid ]; then
|
|
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) is not running"
|
|
return $OCF_NOT_RUNNING
|
|
else
|
|
pid=`cat $OCF_RESKEY_pid`
|
|
fi
|
|
|
|
ocf_run -warn kill -s 0 $pid
|
|
rc=$?
|
|
if [ $rc -eq 0 ]; then
|
|
return $OCF_SUCCESS
|
|
else
|
|
ocf_log info "Old PID file found, but DC Manager Audit-worker Service (dcmanager-audit-worker) is not running"
|
|
rm -f $OCF_RESKEY_pid
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
}
|
|
|
|
dcmanager_audit_monitor() {
|
|
local rc
|
|
|
|
dcmanager_audit_status
|
|
rc=$?
|
|
|
|
# If status returned anything but success, return that immediately
|
|
if [ $rc -ne $OCF_SUCCESS ]; then
|
|
return $rc
|
|
fi
|
|
|
|
ocf_log debug "DC Manager Audit-worker Service (dcmanager-audit-worker) monitor succeeded"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
dcmanager_audit_start() {
|
|
local rc
|
|
|
|
dcmanager_audit_status
|
|
rc=$?
|
|
if [ $rc -eq $OCF_SUCCESS ]; then
|
|
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) already running"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
# Change the working dir to /, to be sure it's accesible
|
|
cd /
|
|
|
|
# run the actual dcmanager-audit-worker daemon. Don't use ocf_run as we're sending the tool's output
|
|
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
|
|
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
|
$OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
|
|
|
# Spin waiting for the server to come up.
|
|
# Let the CRM/LRM time us out if required
|
|
while true; do
|
|
dcmanager_audit_monitor
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] && break
|
|
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
ocf_log err "DC Manager Audit-worker Service (dcmanager-audit-worker) start failed"
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) started"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
dcmanager_audit_confirm_stop() {
|
|
local my_bin
|
|
local my_processes
|
|
|
|
my_binary=`which ${OCF_RESKEY_binary}`
|
|
my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`
|
|
|
|
if [ -n "${my_processes}" ]
|
|
then
|
|
ocf_log info "About to SIGKILL the following: ${my_processes}"
|
|
pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
|
|
fi
|
|
}
|
|
|
|
dcmanager_audit_stop() {
|
|
local rc
|
|
local pid
|
|
|
|
dcmanager_audit_status
|
|
rc=$?
|
|
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) already stopped"
|
|
dcmanager_audit_confirm_stop
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
# Try SIGTERM
|
|
pid=`cat $OCF_RESKEY_pid`
|
|
ocf_run kill -s TERM $pid
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "DC Manager Audit-worker Service (dcmanager-audit-worker) couldn't be stopped"
|
|
dcmanager_audit_confirm_stop
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
# stop waiting
|
|
shutdown_timeout=15
|
|
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
|
|
fi
|
|
count=0
|
|
while [ $count -lt $shutdown_timeout ]; do
|
|
dcmanager_audit_status
|
|
rc=$?
|
|
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
break
|
|
fi
|
|
count=`expr $count + 1`
|
|
sleep 1
|
|
ocf_log debug "DC Manager Audit-worker Service (dcmanager-audit-worker) still hasn't stopped yet. Waiting ..."
|
|
done
|
|
|
|
dcmanager_audit_status
|
|
rc=$?
|
|
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
# SIGTERM didn't help either, try SIGKILL
|
|
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) failed to stop after ${shutdown_timeout}s \
|
|
using SIGTERM. Trying SIGKILL ..."
|
|
ocf_run kill -s KILL $pid
|
|
fi
|
|
dcmanager_audit_confirm_stop
|
|
|
|
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) stopped"
|
|
|
|
rm -f $OCF_RESKEY_pid
|
|
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
#######################################################################
|
|
|
|
case "$1" in
|
|
meta-data) meta_data
|
|
exit $OCF_SUCCESS;;
|
|
usage|help) usage
|
|
exit $OCF_SUCCESS;;
|
|
esac
|
|
|
|
# Anything except meta-data and help must pass validation
|
|
dcmanager_audit_validate || exit $?
|
|
|
|
# What kind of method was invoked?
|
|
case "$1" in
|
|
start) dcmanager_audit_start;;
|
|
stop) dcmanager_audit_stop;;
|
|
status) dcmanager_audit_status;;
|
|
monitor) dcmanager_audit_monitor;;
|
|
validate-all) ;;
|
|
*) usage
|
|
exit $OCF_ERR_UNIMPLEMENTED;;
|
|
esac
|