From 949ab0e0dfa238165cf6bb65dee76d205de89a48 Mon Sep 17 00:00:00 2001 From: Victor Romano Date: Thu, 4 Jul 2024 12:04:44 -0300 Subject: [PATCH] Add OCF file for dcagent This commit adds the OCF file necessary for high availability management of dcagent service by SM. Note: This commit was tested alongside [1] to add the service into SM database. [1]: https://review.opendev.org/c/starlingx/ha/+/923698 Test plan: - PASS: Configure and provision the service using SM and verify it has correctly started and can be restarted with 'sm-restart'. Story: 2011106 Task: 50598 Change-Id: I002007beed550c55f4f5601d1450bf8e9a6435ad Signed-off-by: Victor Romano --- distributedcloud/files/dcagent.conf | 1 + distributedcloud/ocf/dcagent-api | 323 ++++++++++++++++++++++++++++ 2 files changed, 324 insertions(+) create mode 100644 distributedcloud/files/dcagent.conf create mode 100644 distributedcloud/ocf/dcagent-api diff --git a/distributedcloud/files/dcagent.conf b/distributedcloud/files/dcagent.conf new file mode 100644 index 000000000..e867e1f2a --- /dev/null +++ b/distributedcloud/files/dcagent.conf @@ -0,0 +1 @@ +d /var/run/dcagent 0755 root root - diff --git a/distributedcloud/ocf/dcagent-api b/distributedcloud/ocf/dcagent-api new file mode 100644 index 000000000..553557ea6 --- /dev/null +++ b/distributedcloud/ocf/dcagent-api @@ -0,0 +1,323 @@ +#!/bin/sh +# OpenStack DC Agent API Service (dcagent-api) +# +# Description: Manages a DC Agent API Service +# (dcagent-api) process as an HA resource +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +OCF_RESKEY_binary_default="dcagent-api" +OCF_RESKEY_config_default="/etc/dcagent/dcagent.conf" +OCF_RESKEY_user_default="root" +OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for the DC Agent API Service (dcagent-api) + +Manages the DC Agent API +Service(dcagent-api) + + + + +Location of the DC Agent API binary (dcagent-api) + +DC Agent API binary (dcagent-api) + + + + + +Location of the DC Agent API (dcagent-api) configuration file + +DC Agent API (dcagent-api registry) config file + + + + + +User running DC Agent API (dcagent-api) + +DC Agent API (dcagent-api) user + + + + + +The pid file to use for this DC Agent API (dcagent-api) instance + +DC Agent API (dcagent-api) pid file + + + + + +Additional parameters to pass on to the DC Agent API (dcagent-api) + +Additional parameters for dcagent-api + + + + + + + + + + + + + + +END +} + +####################################################################### +# Functions invoked by resource manager actions + +dcagent_api_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary curl + check_binary tr + check_binary grep + check_binary cut + check_binary head + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + +dcagent_api_status() { + local pid + local rc + + if [ ! -f $OCF_RESKEY_pid ]; then + ocf_log info "DC Agent API (dcagent-api) is not running" + return $OCF_NOT_RUNNING + else + pid=`cat $OCF_RESKEY_pid` + fi + + ocf_run -warn kill -s 0 $pid + rc=$? + if [ $rc -eq 0 ]; then + return $OCF_SUCCESS + else + ocf_log info "Old PID file found, but DC Agent API (dcagent-api) is not running" + rm -f $OCF_RESKEY_pid + return $OCF_NOT_RUNNING + fi +} + +dcagent_api_monitor() { + local rc + + dcagent_api_status + rc=$? + + # If status returned anything but success, return that immediately + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + ocf_log debug "DC Agent API (dcagent-api) monitor succeeded" + return $OCF_SUCCESS +} + +dcagent_api_start() { + local rc + + dcagent_api_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "DC Agent API (dcagent-api) already running" + return $OCF_SUCCESS + fi + + # Change the working dir to /, to be sure it's accesible + cd / + + # run the actual dcagent-api daemon. Don't use ocf_run as we're sending the tool's output + # straight to /dev/null anyway and using ocf_run would break stdout-redirection here. + su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \ + $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required + while true; do + dcagent_api_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log err "DC Agent API (dcagent-api) start failed" + exit $OCF_ERR_GENERIC + fi + sleep 1 + done + + ocf_log info "DC Agent API (dcagent-api) started" + return $OCF_SUCCESS +} + +dcagent_api_confirm_stop() { + local my_bin + local my_processes + + my_binary=`which ${OCF_RESKEY_binary}` + my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python3) ${my_binary}([^\w-]|$)"` + + if [ -n "${my_processes}" ] + then + ocf_log info "About to SIGKILL the following: ${my_processes}" + pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python3) ${my_binary}([^\w-]|$)" + fi +} + +dcagent_api_stop() { + local rc + local pid + + dcagent_api_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_log info "DC Agent API (dcagent-api) already stopped" + dcagent_api_confirm_stop + return $OCF_SUCCESS + fi + + # Try SIGTERM + pid=`cat $OCF_RESKEY_pid` + ocf_run kill -s TERM $pid + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "DC Agent API (dcagent-api) couldn't be stopped" + dcagent_api_confirm_stop + exit $OCF_ERR_GENERIC + fi + + # stop waiting + shutdown_timeout=15 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + fi + count=0 + while [ $count -lt $shutdown_timeout ]; do + dcagent_api_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + break + fi + count=`expr $count + 1` + sleep 1 + ocf_log debug "DC Agent API (dcagent-api) still hasn't stopped yet. Waiting ..." + done + + dcagent_api_status + rc=$? + if [ $rc -ne $OCF_NOT_RUNNING ]; then + # SIGTERM didn't help either, try SIGKILL + ocf_log info "DC Agent API (dcagent-api) failed to stop after ${shutdown_timeout}s \ + using SIGTERM. Trying SIGKILL ..." + ocf_run kill -s KILL $pid + fi + dcagent_api_confirm_stop + + ocf_log info "DC Agent API (dcagent-api) stopped" + + rm -f $OCF_RESKEY_pid + + return $OCF_SUCCESS +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +dcagent_api_validate || exit $? + +# What kind of method was invoked? +case "$1" in + start) dcagent_api_start;; + stop) dcagent_api_stop;; + status) dcagent_api_status;; + monitor) dcagent_api_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac +