fuel-library/files/fuel-ha-utils/ocf/ocf-fuel-funcs
Alex Schultz 7c210ee68d Update OCF stop actions to use procfs
This change adds a set of fuel functions that can be used to stop
processes using a pid file or a pid. These functions allow for the same
method of stopping processes to be used by all ocf scripts. The process
stopping logic leverages procfs to check to make sure the process has
been stopped.

This change adds a default retry of 5 times with a 2 second sleep
between sending SIGTERM signals to the processes. This retry count is
dynamic based on the timeout for the stopping command. If the SIGTERM
does not work, a SIGKILL will also be tried one time.

This change does not include the ocf script for rabbitmq as that is the
upstream version and won't be leveraging the ocf-fuel-funcs library.

This change does not include the ocf script for mysql as that needs
further work.

Change-Id: I8dcc1c37d17068a9c29480b886d4f1a051f28894
Partial-Bug: 1425579
2015-11-16 16:36:29 +00:00

151 lines
4.7 KiB
Bash

#!/bin/bash
#
# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# Shared resource script for common functions (ocf-fuel-funcs)
#
# Authors: Alex Schultz <aschultz@mirantis.com>
#
###########################################################
# Attempts to kill a process with retries and checks procfs
# to make sure the process is stopped.
#
# Globals:
# LL
# Arguments:
# $1 - pid of the process to try and kill
# $2 - service name used for logging
# $3 - signal to use, defaults to SIGTERM
# $4 - number of retries, defaults to 5
# $5 - time to sleep between retries, defaults to 2
# Returns:
# 0 - if successful
# 1 - if process is still running according to procfs
# 2 - if invalid parameters passed in
###########################################################
proc_kill()
{
local pid="${1}"
local service_name="${2}"
local signal=${3:-SIGTERM}
local count=${4:-5}
local process_sleep=${5:-2}
local LH="${LL} proc_kill():"
local pgrp=$(ps -o pgid= ${pid} | tr -d '[[:space:]]')
if [ -z "${pid}" -o "${pgrp}" -eq "1" ]; then
ocf_log info "${LH} pid provided to kill was empty or for init"
return 2
fi
while [ $count -gt 0 ]; do
if [ -d /proc/${pid} ]; then
ocf_log debug "${LH} Stopping ${service_name} with ${signal}..."
ocf_run pkill -${signal} -g "${pgrp}"
if [ ! -d /proc/${pid} ] ; then
ocf_log debug "${LH} Stopped ${service_name} with ${signal}"
return 0
fi
else
ocf_log debug "${LH} Stopped ${service_name} with ${signal}"
return 0
fi
sleep $process_sleep
count=$(( count-1 ))
done
# Check if the process ended after the last sleep
if [ ! -d /proc/${pid} ] ; then
ocf_log debug "${LH} Stopped ${service_name} with ${signal}"
return 0
fi
ocf_log debug "${LH} Failed to stop ${service_name} with ${signal}"
return 1
}
###########################################################
# Attempts to kill a process with the given pid or pid file
# using proc_kill and will retry with sigkill if sigterm is
# unsuccessful.
#
# Globals:
# OCF_ERR_GENERIC
# OCF_SUCCESS
# LL
# Arguments:
# $1 - pidfile or pid
# $2 - service name used for logging
# $3 - stop process timeout (in sec), used to determine how many times we try
# SIGTERM and an upper limit on how long this function should try and
# stop the process. Defaults to 15.
# Returns:
# OCF_SUCCESS - if successful
# OCF_ERR_GENERIC - if process is still running according to procfs
###########################################################
proc_stop()
{
local pid_param="${1}"
local service_name="${2}"
local timeout=${3:-15}
local LH="${LL} proc_stop():"
local pid
local pidfile
# check if provide just a number
echo "${pid_param}" | egrep -q '^[0-9]+$'
if [ $? -eq 0 ]; then
pid="${pid_param}"
elif [ -e "${pid_param}" ]; then # check if passed in a pid file
pidfile="${pid_param}"
pid=$(cat "${pidfile}" 2>/dev/null)
else
# nothing to do here...
ocf_log err "${LH} ERROR: pid param ${pid_param} is not a file or a number"
return "${OCF_ERR_GENERIC}"
fi
# number of times to try a SIGTEM is (timeout - 5 seconds) / 2 seconds
local stop_count=$(( ($timeout-5)/2 ))
# make sure we stop at least once
if [ $stop_count -le 0 ]; then
stop_count=1
fi
if [ -n "${pid}" ]; then
ocf_log info "${LH} Stopping ${service_name}"
proc_kill "${pid}" "${service_name}" $stop_count
if [ $? -ne 0 ]; then
# SIGTERM failed, send a single SIGKILL
proc_kill "${pid}" "${service_name}" SIGKILL 1 2
if [ $? -ne 0 ]; then
ocf_log err "${LH} ERROR: could not stop ${service_name}"
return "${OCF_ERR_GENERIC}"
fi
fi
else
ocf_log warn "${LH} unable to get PID from ${pidfile}"
fi
# Remove the pid file here which will remove empty pid files as well
if [ -n "${pidfile}" ]; then
rm -f "${pidfile}"
fi
ocf_log info "${LH} Stopped ${service_name}"
return "${OCF_SUCCESS}"
}