Manage Apache and Nagios with Pacemaker
This change implements the OCF resource agents that manage the Apache and Nagios services in the network namespace where the VIP address is running. It configures the necessary Pacemaker resources and makes sure that the Apache and Nagios services are always co-located with the VIP. Change-Id: I524157498537fa4a652f2f59e267a0ceb12f8192
This commit is contained in:
parent
008c8c786f
commit
e57f7261d2
122
deployment_scripts/puppet/manifests/ha_services.pp
Normal file
122
deployment_scripts/puppet/manifests/ha_services.pp
Normal file
@ -0,0 +1,122 @@
|
||||
# Copyright 2016 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
file { 'ocf-ns_apache':
|
||||
ensure => present,
|
||||
path => '/usr/lib/ocf/resource.d/fuel/ocf-ns_apache',
|
||||
source => 'puppet:///modules/lma_infra_alerting/ocf-ns_apache',
|
||||
mode => '0755',
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
}
|
||||
|
||||
file { 'ocf-ns_nagios':
|
||||
ensure => present,
|
||||
path => '/usr/lib/ocf/resource.d/fuel/ocf-ns_nagios',
|
||||
source => 'puppet:///modules/lma_infra_alerting/ocf-ns_nagios',
|
||||
mode => '0755',
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
}
|
||||
|
||||
# This is required so Apache and Nagios can bind to the VIP address
|
||||
exec { 'net.ipv4.ip_nonlocal_bind':
|
||||
command => '/sbin/sysctl -w net.ipv4.ip_nonlocal_bind=1',
|
||||
unless => '/sbin/sysctl -n net.ipv4.ip_nonlocal_bind | /bin/grep 1',
|
||||
}
|
||||
|
||||
# Apache2 resources for Pacemaker
|
||||
pacemaker_wrappers::service { 'apache2':
|
||||
primitive_type => 'ocf-ns_apache',
|
||||
parameters => {
|
||||
'ns' => 'infrastructure_alerting',
|
||||
'status_url' => 'http://localhost:8001/server-status',
|
||||
},
|
||||
metadata => {
|
||||
'migration-threshold' => '3',
|
||||
'failure-timeout' => '120',
|
||||
},
|
||||
operations => {
|
||||
'monitor' => {
|
||||
'interval' => '30',
|
||||
'timeout' => '60'
|
||||
},
|
||||
'start' => {
|
||||
'timeout' => '60'
|
||||
},
|
||||
'stop' => {
|
||||
'timeout' => '60'
|
||||
},
|
||||
},
|
||||
prefix => false,
|
||||
use_handler => false,
|
||||
require => [File['ocf-ns_apache'], Exec['net.ipv4.ip_nonlocal_bind']],
|
||||
}
|
||||
|
||||
cs_rsc_colocation { 'infrastructure_alerting_vip-with-apache2':
|
||||
ensure => present,
|
||||
score => 'INFINITY',
|
||||
primitives => [
|
||||
'vip__infrastructure_alerting_mgmt_vip',
|
||||
'apache2'
|
||||
],
|
||||
require => Cs_resource['apache2'],
|
||||
}
|
||||
|
||||
service { 'apache2':
|
||||
ensure => 'running',
|
||||
require => Cs_rsc_colocation['infrastructure_alerting_vip-with-apache2'],
|
||||
}
|
||||
|
||||
# Nagios resources for Pacemaker
|
||||
pacemaker_wrappers::service { 'nagios3':
|
||||
primitive_type => 'ocf-ns_nagios',
|
||||
parameters => {
|
||||
'ns' => 'infrastructure_alerting',
|
||||
},
|
||||
metadata => {
|
||||
'migration-threshold' => '3',
|
||||
'failure-timeout' => '120',
|
||||
},
|
||||
operations => {
|
||||
'monitor' => {
|
||||
'interval' => '30',
|
||||
'timeout' => '60'
|
||||
},
|
||||
'start' => {
|
||||
'timeout' => '60'
|
||||
},
|
||||
'stop' => {
|
||||
'timeout' => '60'
|
||||
},
|
||||
},
|
||||
prefix => false,
|
||||
use_handler => false,
|
||||
require => [File['ocf-ns_nagios'], Exec['net.ipv4.ip_nonlocal_bind']],
|
||||
}
|
||||
|
||||
cs_rsc_colocation { 'infrastructure_alerting_vip-with-nagios':
|
||||
ensure => present,
|
||||
score => 'INFINITY',
|
||||
primitives => [
|
||||
'vip__infrastructure_alerting_mgmt_vip',
|
||||
'nagios3'
|
||||
],
|
||||
require => Cs_resource['nagios3'],
|
||||
}
|
||||
|
||||
service { 'nagios3':
|
||||
ensure => 'running',
|
||||
require => Cs_rsc_colocation['infrastructure_alerting_vip-with-apache2'],
|
||||
}
|
@ -14,11 +14,15 @@
|
||||
#
|
||||
$hiera_dir = '/etc/hiera/plugins'
|
||||
$plugin_name = 'lma_infrastructure_alerting'
|
||||
$network_metadata = hiera('network_metadata')
|
||||
$alerting_vip = $network_metadata['vips']['infrastructure_alerting_mgmt_vip']['ipaddr']
|
||||
|
||||
$calculated_content = inline_template('
|
||||
---
|
||||
lma::corosync_roles:
|
||||
- infrastructure_alerting
|
||||
lma::infrastructure_alerting::vip: <%= @alerting_vip %>
|
||||
lma::infrastructure_alerting::vip_ns: infrastructure_alerting
|
||||
')
|
||||
|
||||
file { "${hiera_dir}/${plugin_name}.yaml":
|
||||
|
300
deployment_scripts/puppet/modules/lma_infra_alerting/files/ocf-ns_apache
Executable file
300
deployment_scripts/puppet/modules/lma_infra_alerting/files/ocf-ns_apache
Executable file
@ -0,0 +1,300 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2016 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# See usage() function below for more details ...
|
||||
#
|
||||
# OCF instance parameters:
|
||||
# OCF_RESKEY_binary
|
||||
# OCF_RESKEY_config
|
||||
# OCF_RESKEY_ns
|
||||
# OCF_RESKEY_status_url
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
|
||||
: "${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}"
|
||||
. "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs"
|
||||
|
||||
#######################################################################
|
||||
|
||||
# Fill in some defaults if no values are specified
|
||||
|
||||
SERVICE_NAME="Apache"
|
||||
|
||||
OCF_RESKEY_binary_default="/usr/sbin/apache2"
|
||||
OCF_RESKEY_config_default="/etc/apache2/apache2.conf"
|
||||
OCF_RESKEY_ns_default=
|
||||
OCF_RESKEY_status_url_default="http://localhost/status-server"
|
||||
|
||||
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
||||
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
||||
: ${OCF_RESKEY_ns=${OCF_RESKEY_ns_default}}
|
||||
: ${OCF_RESKEY_status_url=${OCF_RESKEY_status_url_default}}
|
||||
|
||||
RUN_IN_NS="ip netns exec $OCF_RESKEY_ns "
|
||||
if [ -z "${OCF_RESKEY_ns}" ] ; then
|
||||
RUN=''
|
||||
else
|
||||
RUN="$RUN_IN_NS "
|
||||
fi
|
||||
APACHE_PID_FILE="/var/run/apache2/apache2.pid"
|
||||
|
||||
#######################################################################
|
||||
|
||||
usage() {
|
||||
cat <<UEND
|
||||
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
||||
|
||||
$0 manages the ${SERVICE_NAME} process as an HA resource
|
||||
|
||||
The 'start' operation starts the ${SERVICE_NAME}
|
||||
The 'stop' operation stops the ${SERVICE_NAME}
|
||||
The 'validate-all' operation reports whether the parameters are valid
|
||||
The 'meta-data' operation reports this RA's meta-data information
|
||||
The 'status' operation reports whether the ${SERVICE_NAME} is running
|
||||
The 'monitor' operation reports whether the ${SERVICE_NAME} is running
|
||||
|
||||
UEND
|
||||
}
|
||||
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
<resource-agent name="apache">
|
||||
<version>1.0</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
Manages the Apache daemon in a network namespace as a Pacemaker Resource.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Manages Apache</shortdesc>
|
||||
<parameters>
|
||||
|
||||
<parameter name="binary" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Path of the Apache binary file that will be run.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Apache binary file</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="config" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Path to the Apache configuration file
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Apache configuration</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_config_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="ns" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Network namespace in which Apache will be run
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Apache namespace</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_ns_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="status_url" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
The status URL for checking Apache
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Apache status URL</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_status_url_default}" />
|
||||
</parameter>
|
||||
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
<action name="start" timeout="20" />
|
||||
<action name="stop" timeout="20" />
|
||||
<action name="status" timeout="20" />
|
||||
<action name="monitor" timeout="30" interval="20" />
|
||||
<action name="validate-all" timeout="5" />
|
||||
<action name="meta-data" timeout="5" />
|
||||
</actions>
|
||||
</resource-agent>
|
||||
END
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
# Functions invoked by resource manager actions
|
||||
|
||||
service_validate() {
|
||||
local rc
|
||||
|
||||
check_binary "$OCF_RESKEY_binary"
|
||||
|
||||
if [[ ! -f $OCF_RESKEY_config ]]; then
|
||||
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
||||
return "$OCF_ERR_INSTALLED"
|
||||
fi
|
||||
|
||||
ip netns pids "$OCF_RESKEY_ns" > /dev/null 2>&1
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "Namespace $OCF_RESKEY_ns doesn't exist or can't be accessed"
|
||||
return "$OCF_ERR_INSTALLED"
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname ${APACHE_PID_FILE})"
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
service_status() {
|
||||
local rc
|
||||
local pid
|
||||
|
||||
if [ ! -f "$APACHE_PID_FILE" ]; then
|
||||
ocf_log info "Apache is not running"
|
||||
return "$OCF_NOT_RUNNING"
|
||||
else
|
||||
pid=$(cat "$APACHE_PID_FILE")
|
||||
fi
|
||||
|
||||
if [ -n "${pid}" ]; then
|
||||
ocf_run -warn kill -s 0 "$pid"
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log info "Old PID file found, but Apache process isn't running"
|
||||
return "$OCF_NOT_RUNNING"
|
||||
fi
|
||||
else
|
||||
ocf_log err "PID file ${APACHE_PID_FILE} is empty!"
|
||||
return "$OCF_ERR_GENERIC"
|
||||
fi
|
||||
|
||||
if [ -n "${OCF_RESKEY_status_url}" ] ; then
|
||||
if ! $RUN_IN_NS /usr/bin/curl -sL -w "%{http_code}" -XGET "${OCF_RESKEY_status_url}" -o /dev/null | grep -q 200; then
|
||||
return "$OCF_ERR_GENERIC"
|
||||
fi
|
||||
fi
|
||||
|
||||
return "$OCF_SUCCESS"
|
||||
}
|
||||
|
||||
service_monitor() {
|
||||
local rc
|
||||
service_status
|
||||
rc=$?
|
||||
return $rc
|
||||
}
|
||||
|
||||
service_start() {
|
||||
local rc
|
||||
|
||||
service_monitor
|
||||
rc=$?
|
||||
if [ $rc -eq "$OCF_SUCCESS" ]; then
|
||||
ocf_log info "${SERVICE_NAME} is already running"
|
||||
return "$OCF_SUCCESS"
|
||||
fi
|
||||
|
||||
# This is required for Linux kernels >= 3.19. Previously the
|
||||
# net.ipv4.ip_nonlocal_bind setting was global to all namespaces but
|
||||
# starting with this version, it is per namespace.
|
||||
ocf_run ${RUN_IN_NS} /sbin/sysctl -w net.ipv4.ip_nonlocal_bind=1
|
||||
ocf_run ${RUN_IN_NS} ip link set up dev lo
|
||||
|
||||
ocf_run ${RUN} bash -c ". /etc/apache2/envvars && ${OCF_RESKEY_binary} -k start"
|
||||
|
||||
# Spin waiting for the server to come up
|
||||
while true; do
|
||||
service_monitor
|
||||
rc=$?
|
||||
[ $rc -eq "$OCF_SUCCESS" ] && break
|
||||
if [ $rc -ne "$OCF_NOT_RUNNING" ]; then
|
||||
ocf_log err "${SERVICE_NAME} start failed"
|
||||
exit "$OCF_ERR_GENERIC"
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
|
||||
ocf_log info "${SERVICE_NAME} started"
|
||||
return "$OCF_SUCCESS"
|
||||
}
|
||||
|
||||
service_stop() {
|
||||
local rc
|
||||
local pid
|
||||
|
||||
service_monitor
|
||||
rc=$?
|
||||
if [ $rc -eq "$OCF_NOT_RUNNING" ]; then
|
||||
ocf_log info "${SERVICE_NAME} is already stopped"
|
||||
return "$OCF_SUCCESS"
|
||||
fi
|
||||
|
||||
# Try SIGTERM
|
||||
pid=$(cat "$APACHE_PID_FILE")
|
||||
ocf_run kill -s TERM "$pid"
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "${SERVICE_NAME} couldn't be stopped"
|
||||
exit "$OCF_ERR_GENERIC"
|
||||
fi
|
||||
|
||||
# stop waiting
|
||||
shutdown_timeout=15
|
||||
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
||||
shutdown_timeout=$(( (OCF_RESKEY_CRM_meta_timeout/1000)-5 ))
|
||||
fi
|
||||
count=0
|
||||
while [ $count -lt $shutdown_timeout ]; do
|
||||
service_monitor
|
||||
rc=$?
|
||||
if [ $rc -eq "$OCF_NOT_RUNNING" ]; then
|
||||
break
|
||||
fi
|
||||
count=$(( count + 1))
|
||||
sleep 1
|
||||
ocf_log debug "${SERVICE_NAME} still hasn't stopped yet. Waiting ..."
|
||||
done
|
||||
|
||||
service_monitor
|
||||
rc=$?
|
||||
if [ "${rc}" -ne "${OCF_NOT_RUNNING}" ]; then
|
||||
# SIGTERM didn't help either, try SIGKILL
|
||||
ocf_log info "${SERVICE_NAME} failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL ..."
|
||||
ocf_run kill -s KILL "${pid}"
|
||||
fi
|
||||
|
||||
ocf_log info "${SERVICE_NAME} stopped"
|
||||
rm -f "${APACHE_PID_FILE}"
|
||||
|
||||
return "${OCF_SUCCESS}"
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
|
||||
case "$1" in
|
||||
meta-data) meta_data
|
||||
exit "$OCF_SUCCESS";;
|
||||
usage|help) usage
|
||||
exit "$OCF_SUCCESS";;
|
||||
esac
|
||||
|
||||
# Anything except meta-data and help must pass validation
|
||||
service_validate || exit $?
|
||||
|
||||
# What kind of method was invoked?
|
||||
case "$1" in
|
||||
start) service_start;;
|
||||
stop) service_stop;;
|
||||
status) service_status;;
|
||||
monitor) service_monitor;;
|
||||
validate-all) ;;
|
||||
*) usage
|
||||
exit "$OCF_ERR_UNIMPLEMENTED";;
|
||||
esac
|
288
deployment_scripts/puppet/modules/lma_infra_alerting/files/ocf-ns_nagios
Executable file
288
deployment_scripts/puppet/modules/lma_infra_alerting/files/ocf-ns_nagios
Executable file
@ -0,0 +1,288 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2016 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# See usage() function below for more details ...
|
||||
#
|
||||
# OCF instance parameters:
|
||||
# OCF_RESKEY_binary
|
||||
# OCF_RESKEY_config
|
||||
# OCF_RESKEY_ns
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
|
||||
: "${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}"
|
||||
. "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs"
|
||||
|
||||
#######################################################################
|
||||
|
||||
# Fill in some defaults if no values are specified
|
||||
|
||||
SERVICE_NAME="Nagios"
|
||||
|
||||
OCF_RESKEY_binary_default="/usr/sbin/nagios3"
|
||||
OCF_RESKEY_config_default="/etc/nagios3/nagios.cfg"
|
||||
OCF_RESKEY_ns_default=
|
||||
|
||||
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
||||
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
||||
: ${OCF_RESKEY_ns=${OCF_RESKEY_ns_default}}
|
||||
|
||||
RUN_IN_NS="ip netns exec $OCF_RESKEY_ns "
|
||||
if [ -z "${OCF_RESKEY_ns}" ] ; then
|
||||
RUN=''
|
||||
else
|
||||
RUN="$RUN_IN_NS "
|
||||
fi
|
||||
NAGIOS_PID_FILE="/var/run/nagios3/nagios3.pid"
|
||||
|
||||
#######################################################################
|
||||
|
||||
usage() {
|
||||
cat <<UEND
|
||||
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
||||
|
||||
$0 manages the ${SERVICE_NAME} process as an HA resource
|
||||
|
||||
The 'start' operation starts the ${SERVICE_NAME}
|
||||
The 'stop' operation stops the ${SERVICE_NAME}
|
||||
The 'validate-all' operation reports whether the parameters are valid
|
||||
The 'meta-data' operation reports this RA's meta-data information
|
||||
The 'status' operation reports whether the ${SERVICE_NAME} is running
|
||||
The 'monitor' operation reports whether the ${SERVICE_NAME} is running
|
||||
|
||||
UEND
|
||||
}
|
||||
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
<resource-agent name="nagios">
|
||||
<version>1.0</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
Manages the Nagios daemon in a network namespace as a Pacemaker Resource.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Manages Nagios</shortdesc>
|
||||
<parameters>
|
||||
|
||||
<parameter name="binary" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Path of the Nagios binary file that will be run.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Nagios binary file</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="config" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Path to the Nagios configuration file
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Nagios configuration</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_config_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="ns" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Network namespace in which Nagios will be run
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Nagios namespace</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_ns_default}" />
|
||||
</parameter>
|
||||
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
<action name="start" timeout="20" />
|
||||
<action name="stop" timeout="20" />
|
||||
<action name="status" timeout="20" />
|
||||
<action name="monitor" timeout="30" interval="20" />
|
||||
<action name="validate-all" timeout="5" />
|
||||
<action name="meta-data" timeout="5" />
|
||||
</actions>
|
||||
</resource-agent>
|
||||
END
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
# Functions invoked by resource manager actions
|
||||
|
||||
service_validate() {
|
||||
local rc
|
||||
|
||||
check_binary "$OCF_RESKEY_binary"
|
||||
|
||||
if [[ ! -f $OCF_RESKEY_config ]]; then
|
||||
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
||||
return "$OCF_ERR_INSTALLED"
|
||||
fi
|
||||
|
||||
ip netns pids "$OCF_RESKEY_ns" > /dev/null 2>&1
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "Namespace $OCF_RESKEY_ns doesn't exist or can't be accessed"
|
||||
return "$OCF_ERR_INSTALLED"
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname ${NAGIOS_PID_FILE})"
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
service_status() {
|
||||
local rc
|
||||
local pid
|
||||
|
||||
if [ ! -f "$NAGIOS_PID_FILE" ]; then
|
||||
ocf_log info "Nagios is not running"
|
||||
return "$OCF_NOT_RUNNING"
|
||||
else
|
||||
pid=$(cat "$NAGIOS_PID_FILE")
|
||||
fi
|
||||
|
||||
if [ -n "${pid}" ]; then
|
||||
ocf_run -warn kill -s 0 "$pid"
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log info "Old PID file found, but Nagios process isn't running"
|
||||
return "$OCF_NOT_RUNNING"
|
||||
fi
|
||||
else
|
||||
ocf_log err "PID file ${NAGIOS_PID_FILE} is empty!"
|
||||
return "$OCF_ERR_GENERIC"
|
||||
fi
|
||||
|
||||
return "$OCF_SUCCESS"
|
||||
}
|
||||
|
||||
service_monitor() {
|
||||
local rc
|
||||
service_status
|
||||
rc=$?
|
||||
return $rc
|
||||
}
|
||||
|
||||
service_start() {
|
||||
local rc
|
||||
|
||||
service_monitor
|
||||
rc=$?
|
||||
if [ $rc -eq "$OCF_SUCCESS" ]; then
|
||||
ocf_log info "${SERVICE_NAME} is already running"
|
||||
return "$OCF_SUCCESS"
|
||||
fi
|
||||
|
||||
# This is required for Linux kernels >= 3.19. Previously the
|
||||
# net.ipv4.ip_nonlocal_bind setting was global to all namespaces but
|
||||
# starting with this version, it is per namespace.
|
||||
ocf_run ${RUN_IN_NS} /sbin/sysctl -w net.ipv4.ip_nonlocal_bind=1
|
||||
ocf_run ${RUN_IN_NS} ip link set up dev lo
|
||||
|
||||
if ! ocf_run ${OCF_RESKEY_binary} -v ${OCF_RESKEY_config}; then
|
||||
ocf_log error "Nagios configuration is invalid"
|
||||
exit "$OCF_ERR_GENERIC"
|
||||
fi
|
||||
|
||||
ocf_run ${RUN} ${OCF_RESKEY_binary} -d ${OCF_RESKEY_config}
|
||||
|
||||
# Spin waiting for the server to come up
|
||||
while true; do
|
||||
service_monitor
|
||||
rc=$?
|
||||
[ $rc -eq "$OCF_SUCCESS" ] && break
|
||||
if [ $rc -ne "$OCF_NOT_RUNNING" ]; then
|
||||
ocf_log err "${SERVICE_NAME} start failed"
|
||||
exit "$OCF_ERR_GENERIC"
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
|
||||
ocf_log info "${SERVICE_NAME} started"
|
||||
return "$OCF_SUCCESS"
|
||||
}
|
||||
|
||||
service_stop() {
|
||||
local rc
|
||||
local pid
|
||||
|
||||
service_monitor
|
||||
rc=$?
|
||||
if [ $rc -eq "$OCF_NOT_RUNNING" ]; then
|
||||
ocf_log info "${SERVICE_NAME} is already stopped"
|
||||
return "$OCF_SUCCESS"
|
||||
fi
|
||||
|
||||
# Try SIGTERM
|
||||
pid=$(cat "$NAGIOS_PID_FILE")
|
||||
ocf_run kill -s TERM "$pid"
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "${SERVICE_NAME} couldn't be stopped"
|
||||
exit "$OCF_ERR_GENERIC"
|
||||
fi
|
||||
|
||||
# stop waiting
|
||||
shutdown_timeout=15
|
||||
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
||||
shutdown_timeout=$(( (OCF_RESKEY_CRM_meta_timeout/1000)-5 ))
|
||||
fi
|
||||
count=0
|
||||
while [ $count -lt $shutdown_timeout ]; do
|
||||
service_monitor
|
||||
rc=$?
|
||||
if [ $rc -eq "$OCF_NOT_RUNNING" ]; then
|
||||
break
|
||||
fi
|
||||
count=$(( count + 1))
|
||||
sleep 1
|
||||
ocf_log debug "${SERVICE_NAME} still hasn't stopped yet. Waiting ..."
|
||||
done
|
||||
|
||||
service_monitor
|
||||
rc=$?
|
||||
if [ "${rc}" -ne "${OCF_NOT_RUNNING}" ]; then
|
||||
# SIGTERM didn't help either, try SIGKILL
|
||||
ocf_log info "${SERVICE_NAME} failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL ..."
|
||||
ocf_run kill -s KILL "${pid}"
|
||||
fi
|
||||
|
||||
ocf_log info "${SERVICE_NAME} stopped"
|
||||
rm -f "${NAGIOS_PID_FILE}"
|
||||
|
||||
return "${OCF_SUCCESS}"
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
|
||||
case "$1" in
|
||||
meta-data) meta_data
|
||||
exit "$OCF_SUCCESS";;
|
||||
usage|help) usage
|
||||
exit "$OCF_SUCCESS";;
|
||||
esac
|
||||
|
||||
# Anything except meta-data and help must pass validation
|
||||
service_validate || exit $?
|
||||
|
||||
# What kind of method was invoked?
|
||||
case "$1" in
|
||||
start) service_start;;
|
||||
stop) service_stop;;
|
||||
status) service_status;;
|
||||
monitor) service_monitor;;
|
||||
validate-all) ;;
|
||||
*) usage
|
||||
exit "$OCF_ERR_UNIMPLEMENTED";;
|
||||
esac
|
@ -8,3 +8,11 @@
|
||||
puppet_manifest: puppet/manifests/nagios.pp
|
||||
puppet_modules: puppet/modules
|
||||
timeout: 600
|
||||
|
||||
- role: [infrastructure_alerting]
|
||||
stage: post_deployment/8101
|
||||
type: puppet
|
||||
parameters:
|
||||
puppet_manifest: puppet/manifests/ha_services.pp
|
||||
puppet_modules: puppet/modules
|
||||
timeout: 600
|
||||
|
Loading…
Reference in New Issue
Block a user