Merge "add nagios checks"
This commit is contained in:
commit
661a788e22
@ -14,3 +14,4 @@ include:
|
|||||||
- contrib.network.ip
|
- contrib.network.ip
|
||||||
- contrib.python
|
- contrib.python
|
||||||
- contrib.openstack.policyd
|
- contrib.openstack.policyd
|
||||||
|
- contrib.charmsupport
|
||||||
|
16
config.yaml
16
config.yaml
@ -457,3 +457,19 @@ options:
|
|||||||
description: |
|
description: |
|
||||||
Allow the charm and packages to restart services automatically when
|
Allow the charm and packages to restart services automatically when
|
||||||
required.
|
required.
|
||||||
|
nagios_context:
|
||||||
|
default: "juju"
|
||||||
|
type: string
|
||||||
|
description: |
|
||||||
|
Used by the nrpe subordinate charms.
|
||||||
|
A string that will be prepended to instance name to set the host name
|
||||||
|
in nagios. So for instance the hostname would be something like:
|
||||||
|
juju-myservice-0
|
||||||
|
If you're running multiple environments with the same services in them
|
||||||
|
this allows you to differentiate between them.
|
||||||
|
nagios_servicegroups:
|
||||||
|
default: ""
|
||||||
|
type: string
|
||||||
|
description: |
|
||||||
|
A comma-separated list of nagios servicegroups.
|
||||||
|
If left empty, the nagios_context will be used as the servicegroup
|
||||||
|
51
files/ovs_vsctl/check_ovs_vsctl.py
Normal file
51
files/ovs_vsctl/check_ovs_vsctl.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: us-ascii -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from nagios_plugin3 import (
|
||||||
|
CriticalError,
|
||||||
|
UnknownError,
|
||||||
|
try_check,
|
||||||
|
check_file_freshness,
|
||||||
|
)
|
||||||
|
|
||||||
|
INPUT_FILE = "/var/lib/nagios/ovs_vsctl.out"
|
||||||
|
INPUT_RC = "/var/lib/nagios/ovs_vsctl.rc"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_output():
|
||||||
|
"""Parse the ovs-vsctl list-br output and raise alertable states."""
|
||||||
|
|
||||||
|
if not os.path.exists(INPUT_FILE):
|
||||||
|
raise UnknownError(
|
||||||
|
"UNKNOWN: {} does not exist (yet?)".format(INPUT_FILE))
|
||||||
|
|
||||||
|
if not os.path.exists(INPUT_RC):
|
||||||
|
raise UnknownError(
|
||||||
|
"UNKNOWN: {} does not exist (yet?)".format(INPUT_RC))
|
||||||
|
|
||||||
|
try_check(check_file_freshness, INPUT_FILE)
|
||||||
|
|
||||||
|
with open(INPUT_RC) as rc_raw:
|
||||||
|
code = rc_raw.readline().strip()
|
||||||
|
if code != "0":
|
||||||
|
raise CriticalError("CRITICAL: ovs-vsctl list-br returns error")
|
||||||
|
|
||||||
|
with open(INPUT_FILE) as brs_raw:
|
||||||
|
brs = brs_raw.readlines()
|
||||||
|
if len(brs) == 0:
|
||||||
|
raise CriticalError(
|
||||||
|
"CRITICAL: ovs-vsctl list-br returned no bridges")
|
||||||
|
|
||||||
|
msg = ", ".join(br.strip() for br in brs)
|
||||||
|
print("OK: {}".format(msg))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Define main subroutine."""
|
||||||
|
try_check(parse_output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
35
files/ovs_vsctl/cron_ovs_vsctl.sh
Normal file
35
files/ovs_vsctl/cron_ovs_vsctl.sh
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
PATH="/snap/bin:/usr/local/bin:$PATH"
|
||||||
|
FILE=/var/lib/nagios/ovs_vsctl.out
|
||||||
|
TMP_FILE="$(tempfile)"
|
||||||
|
RC_FILE=/var/lib/nagios/ovs_vsctl.rc
|
||||||
|
LOCK_FILE=/var/lib/nagios/ovs_vsctl.lock
|
||||||
|
CMD="ovs-vsctl list-br"
|
||||||
|
GROUP="nagios"
|
||||||
|
|
||||||
|
if [ $# -gt 0 ]; then
|
||||||
|
echo "This program will cache the output of '${CMD}' as follows"
|
||||||
|
echo " stdout + stderr -> ${FILE}"
|
||||||
|
echo " return code -> ${RC_FILE}"
|
||||||
|
echo
|
||||||
|
echo "It does not accept any option or argument"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep $[$RANDOM % 60 + 10]s # sleep 10-70s
|
||||||
|
if [ -f "${LOCK_FILE}" ]; then
|
||||||
|
echo "Lock file (${LOCK_FILE}) in use, abandoning" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
touch "${LOCK_FILE}"
|
||||||
|
$CMD 2>&1 > $TMP_FILE
|
||||||
|
RC=$?
|
||||||
|
echo $RC > $RC_FILE
|
||||||
|
mv $TMP_FILE $FILE
|
||||||
|
chown :$GROUP $FILE
|
||||||
|
chmod 644 $FILE
|
||||||
|
rm "${LOCK_FILE}"
|
||||||
|
exit 0
|
13
hooks/charmhelpers/contrib/charmsupport/__init__.py
Normal file
13
hooks/charmhelpers/contrib/charmsupport/__init__.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright 2014-2015 Canonical Limited.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
522
hooks/charmhelpers/contrib/charmsupport/nrpe.py
Normal file
522
hooks/charmhelpers/contrib/charmsupport/nrpe.py
Normal file
@ -0,0 +1,522 @@
|
|||||||
|
# Copyright 2012-2021 Canonical Limited.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""Compatibility with the nrpe-external-master charm"""
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import grp
|
||||||
|
import os
|
||||||
|
import pwd
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from charmhelpers.core.hookenv import (
|
||||||
|
config,
|
||||||
|
hook_name,
|
||||||
|
local_unit,
|
||||||
|
log,
|
||||||
|
relation_get,
|
||||||
|
relation_ids,
|
||||||
|
relation_set,
|
||||||
|
relations_of_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
from charmhelpers.core.host import service
|
||||||
|
from charmhelpers.core import host
|
||||||
|
|
||||||
|
# This module adds compatibility with the nrpe-external-master and plain nrpe
|
||||||
|
# subordinate charms. To use it in your charm:
|
||||||
|
#
|
||||||
|
# 1. Update metadata.yaml
|
||||||
|
#
|
||||||
|
# provides:
|
||||||
|
# (...)
|
||||||
|
# nrpe-external-master:
|
||||||
|
# interface: nrpe-external-master
|
||||||
|
# scope: container
|
||||||
|
#
|
||||||
|
# and/or
|
||||||
|
#
|
||||||
|
# provides:
|
||||||
|
# (...)
|
||||||
|
# local-monitors:
|
||||||
|
# interface: local-monitors
|
||||||
|
# scope: container
|
||||||
|
|
||||||
|
#
|
||||||
|
# 2. Add the following to config.yaml
|
||||||
|
#
|
||||||
|
# nagios_context:
|
||||||
|
# default: "juju"
|
||||||
|
# type: string
|
||||||
|
# description: |
|
||||||
|
# Used by the nrpe subordinate charms.
|
||||||
|
# A string that will be prepended to instance name to set the host name
|
||||||
|
# in nagios. So for instance the hostname would be something like:
|
||||||
|
# juju-myservice-0
|
||||||
|
# If you're running multiple environments with the same services in them
|
||||||
|
# this allows you to differentiate between them.
|
||||||
|
# nagios_servicegroups:
|
||||||
|
# default: ""
|
||||||
|
# type: string
|
||||||
|
# description: |
|
||||||
|
# A comma-separated list of nagios servicegroups.
|
||||||
|
# If left empty, the nagios_context will be used as the servicegroup
|
||||||
|
#
|
||||||
|
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
|
||||||
|
#
|
||||||
|
# 4. Update your hooks.py with something like this:
|
||||||
|
#
|
||||||
|
# from charmsupport.nrpe import NRPE
|
||||||
|
# (...)
|
||||||
|
# def update_nrpe_config():
|
||||||
|
# nrpe_compat = NRPE()
|
||||||
|
# nrpe_compat.add_check(
|
||||||
|
# shortname = "myservice",
|
||||||
|
# description = "Check MyService",
|
||||||
|
# check_cmd = "check_http -w 2 -c 10 http://localhost"
|
||||||
|
# )
|
||||||
|
# nrpe_compat.add_check(
|
||||||
|
# "myservice_other",
|
||||||
|
# "Check for widget failures",
|
||||||
|
# check_cmd = "/srv/myapp/scripts/widget_check"
|
||||||
|
# )
|
||||||
|
# nrpe_compat.write()
|
||||||
|
#
|
||||||
|
# def config_changed():
|
||||||
|
# (...)
|
||||||
|
# update_nrpe_config()
|
||||||
|
#
|
||||||
|
# def nrpe_external_master_relation_changed():
|
||||||
|
# update_nrpe_config()
|
||||||
|
#
|
||||||
|
# def local_monitors_relation_changed():
|
||||||
|
# update_nrpe_config()
|
||||||
|
#
|
||||||
|
# 4.a If your charm is a subordinate charm set primary=False
|
||||||
|
#
|
||||||
|
# from charmsupport.nrpe import NRPE
|
||||||
|
# (...)
|
||||||
|
# def update_nrpe_config():
|
||||||
|
# nrpe_compat = NRPE(primary=False)
|
||||||
|
#
|
||||||
|
# 5. ln -s hooks.py nrpe-external-master-relation-changed
|
||||||
|
# ln -s hooks.py local-monitors-relation-changed
|
||||||
|
|
||||||
|
|
||||||
|
class CheckException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Check(object):
|
||||||
|
shortname_re = '[A-Za-z0-9-_.@]+$'
|
||||||
|
service_template = ("""
|
||||||
|
#---------------------------------------------------
|
||||||
|
# This file is Juju managed
|
||||||
|
#---------------------------------------------------
|
||||||
|
define service {{
|
||||||
|
use active-service
|
||||||
|
host_name {nagios_hostname}
|
||||||
|
service_description {nagios_hostname}[{shortname}] """
|
||||||
|
"""{description}
|
||||||
|
check_command check_nrpe!{command}
|
||||||
|
servicegroups {nagios_servicegroup}
|
||||||
|
{service_config_overrides}
|
||||||
|
}}
|
||||||
|
""")
|
||||||
|
|
||||||
|
def __init__(self, shortname, description, check_cmd, max_check_attempts=None):
|
||||||
|
super(Check, self).__init__()
|
||||||
|
# XXX: could be better to calculate this from the service name
|
||||||
|
if not re.match(self.shortname_re, shortname):
|
||||||
|
raise CheckException("shortname must match {}".format(
|
||||||
|
Check.shortname_re))
|
||||||
|
self.shortname = shortname
|
||||||
|
self.command = "check_{}".format(shortname)
|
||||||
|
# Note: a set of invalid characters is defined by the
|
||||||
|
# Nagios server config
|
||||||
|
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
|
||||||
|
self.description = description
|
||||||
|
self.check_cmd = self._locate_cmd(check_cmd)
|
||||||
|
self.max_check_attempts = max_check_attempts
|
||||||
|
|
||||||
|
def _get_check_filename(self):
|
||||||
|
return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command))
|
||||||
|
|
||||||
|
def _get_service_filename(self, hostname):
|
||||||
|
return os.path.join(NRPE.nagios_exportdir,
|
||||||
|
'service__{}_{}.cfg'.format(hostname, self.command))
|
||||||
|
|
||||||
|
def _locate_cmd(self, check_cmd):
|
||||||
|
search_path = (
|
||||||
|
'/usr/lib/nagios/plugins',
|
||||||
|
'/usr/local/lib/nagios/plugins',
|
||||||
|
)
|
||||||
|
parts = shlex.split(check_cmd)
|
||||||
|
for path in search_path:
|
||||||
|
if os.path.exists(os.path.join(path, parts[0])):
|
||||||
|
command = os.path.join(path, parts[0])
|
||||||
|
if len(parts) > 1:
|
||||||
|
command += " " + " ".join(parts[1:])
|
||||||
|
return command
|
||||||
|
log('Check command not found: {}'.format(parts[0]))
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def _remove_service_files(self):
|
||||||
|
if not os.path.exists(NRPE.nagios_exportdir):
|
||||||
|
return
|
||||||
|
for f in os.listdir(NRPE.nagios_exportdir):
|
||||||
|
if f.endswith('_{}.cfg'.format(self.command)):
|
||||||
|
os.remove(os.path.join(NRPE.nagios_exportdir, f))
|
||||||
|
|
||||||
|
def remove(self, hostname):
|
||||||
|
nrpe_check_file = self._get_check_filename()
|
||||||
|
if os.path.exists(nrpe_check_file):
|
||||||
|
os.remove(nrpe_check_file)
|
||||||
|
self._remove_service_files()
|
||||||
|
|
||||||
|
def write(self, nagios_context, hostname, nagios_servicegroups):
|
||||||
|
nrpe_check_file = self._get_check_filename()
|
||||||
|
with open(nrpe_check_file, 'w') as nrpe_check_config:
|
||||||
|
nrpe_check_config.write("# check {}\n".format(self.shortname))
|
||||||
|
if nagios_servicegroups:
|
||||||
|
nrpe_check_config.write(
|
||||||
|
"# The following header was added automatically by juju\n")
|
||||||
|
nrpe_check_config.write(
|
||||||
|
"# Modifying it will affect nagios monitoring and alerting\n")
|
||||||
|
nrpe_check_config.write(
|
||||||
|
"# servicegroups: {}\n".format(nagios_servicegroups))
|
||||||
|
nrpe_check_config.write("command[{}]={}\n".format(
|
||||||
|
self.command, self.check_cmd))
|
||||||
|
|
||||||
|
if not os.path.exists(NRPE.nagios_exportdir):
|
||||||
|
log('Not writing service config as {} is not accessible'.format(
|
||||||
|
NRPE.nagios_exportdir))
|
||||||
|
else:
|
||||||
|
self.write_service_config(nagios_context, hostname,
|
||||||
|
nagios_servicegroups)
|
||||||
|
|
||||||
|
def write_service_config(self, nagios_context, hostname,
|
||||||
|
nagios_servicegroups):
|
||||||
|
self._remove_service_files()
|
||||||
|
|
||||||
|
if self.max_check_attempts:
|
||||||
|
service_config_overrides = ' max_check_attempts {}'.format(
|
||||||
|
self.max_check_attempts
|
||||||
|
) # Note indentation is here rather than in the template to avoid trailing spaces
|
||||||
|
else:
|
||||||
|
service_config_overrides = '' # empty string to avoid printing 'None'
|
||||||
|
templ_vars = {
|
||||||
|
'nagios_hostname': hostname,
|
||||||
|
'nagios_servicegroup': nagios_servicegroups,
|
||||||
|
'description': self.description,
|
||||||
|
'shortname': self.shortname,
|
||||||
|
'command': self.command,
|
||||||
|
'service_config_overrides': service_config_overrides,
|
||||||
|
}
|
||||||
|
nrpe_service_text = Check.service_template.format(**templ_vars)
|
||||||
|
nrpe_service_file = self._get_service_filename(hostname)
|
||||||
|
with open(nrpe_service_file, 'w') as nrpe_service_config:
|
||||||
|
nrpe_service_config.write(str(nrpe_service_text))
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
subprocess.call(self.check_cmd)
|
||||||
|
|
||||||
|
|
||||||
|
class NRPE(object):
|
||||||
|
nagios_logdir = '/var/log/nagios'
|
||||||
|
nagios_exportdir = '/var/lib/nagios/export'
|
||||||
|
nrpe_confdir = '/etc/nagios/nrpe.d'
|
||||||
|
homedir = '/var/lib/nagios' # home dir provided by nagios-nrpe-server
|
||||||
|
|
||||||
|
def __init__(self, hostname=None, primary=True):
|
||||||
|
super(NRPE, self).__init__()
|
||||||
|
self.config = config()
|
||||||
|
self.primary = primary
|
||||||
|
self.nagios_context = self.config['nagios_context']
|
||||||
|
if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']:
|
||||||
|
self.nagios_servicegroups = self.config['nagios_servicegroups']
|
||||||
|
else:
|
||||||
|
self.nagios_servicegroups = self.nagios_context
|
||||||
|
self.unit_name = local_unit().replace('/', '-')
|
||||||
|
if hostname:
|
||||||
|
self.hostname = hostname
|
||||||
|
else:
|
||||||
|
nagios_hostname = get_nagios_hostname()
|
||||||
|
if nagios_hostname:
|
||||||
|
self.hostname = nagios_hostname
|
||||||
|
else:
|
||||||
|
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
|
||||||
|
self.checks = []
|
||||||
|
# Iff in an nrpe-external-master relation hook, set primary status
|
||||||
|
relation = relation_ids('nrpe-external-master')
|
||||||
|
if relation:
|
||||||
|
log("Setting charm primary status {}".format(primary))
|
||||||
|
for rid in relation:
|
||||||
|
relation_set(relation_id=rid, relation_settings={'primary': self.primary})
|
||||||
|
self.remove_check_queue = set()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def does_nrpe_conf_dir_exist(cls):
|
||||||
|
"""Return True if th nrpe_confdif directory exists."""
|
||||||
|
return os.path.isdir(cls.nrpe_confdir)
|
||||||
|
|
||||||
|
def add_check(self, *args, **kwargs):
|
||||||
|
shortname = None
|
||||||
|
if kwargs.get('shortname') is None:
|
||||||
|
if len(args) > 0:
|
||||||
|
shortname = args[0]
|
||||||
|
else:
|
||||||
|
shortname = kwargs['shortname']
|
||||||
|
|
||||||
|
self.checks.append(Check(*args, **kwargs))
|
||||||
|
try:
|
||||||
|
self.remove_check_queue.remove(shortname)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def remove_check(self, *args, **kwargs):
|
||||||
|
if kwargs.get('shortname') is None:
|
||||||
|
raise ValueError('shortname of check must be specified')
|
||||||
|
|
||||||
|
# Use sensible defaults if they're not specified - these are not
|
||||||
|
# actually used during removal, but they're required for constructing
|
||||||
|
# the Check object; check_disk is chosen because it's part of the
|
||||||
|
# nagios-plugins-basic package.
|
||||||
|
if kwargs.get('check_cmd') is None:
|
||||||
|
kwargs['check_cmd'] = 'check_disk'
|
||||||
|
if kwargs.get('description') is None:
|
||||||
|
kwargs['description'] = ''
|
||||||
|
|
||||||
|
check = Check(*args, **kwargs)
|
||||||
|
check.remove(self.hostname)
|
||||||
|
self.remove_check_queue.add(kwargs['shortname'])
|
||||||
|
|
||||||
|
def write(self):
|
||||||
|
try:
|
||||||
|
nagios_uid = pwd.getpwnam('nagios').pw_uid
|
||||||
|
nagios_gid = grp.getgrnam('nagios').gr_gid
|
||||||
|
except Exception:
|
||||||
|
log("Nagios user not set up, nrpe checks not updated")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not os.path.exists(NRPE.nagios_logdir):
|
||||||
|
os.mkdir(NRPE.nagios_logdir)
|
||||||
|
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
|
||||||
|
|
||||||
|
nrpe_monitors = {}
|
||||||
|
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
|
||||||
|
|
||||||
|
# check that the charm can write to the conf dir. If not, then nagios
|
||||||
|
# probably isn't installed, and we can defer.
|
||||||
|
if not self.does_nrpe_conf_dir_exist():
|
||||||
|
return
|
||||||
|
|
||||||
|
for nrpecheck in self.checks:
|
||||||
|
nrpecheck.write(self.nagios_context, self.hostname,
|
||||||
|
self.nagios_servicegroups)
|
||||||
|
nrpe_monitors[nrpecheck.shortname] = {
|
||||||
|
"command": nrpecheck.command,
|
||||||
|
}
|
||||||
|
# If we were passed max_check_attempts, add that to the relation data
|
||||||
|
if nrpecheck.max_check_attempts is not None:
|
||||||
|
nrpe_monitors[nrpecheck.shortname]['max_check_attempts'] = nrpecheck.max_check_attempts
|
||||||
|
|
||||||
|
# update-status hooks are configured to firing every 5 minutes by
|
||||||
|
# default. When nagios-nrpe-server is restarted, the nagios server
|
||||||
|
# reports checks failing causing unnecessary alerts. Let's not restart
|
||||||
|
# on update-status hooks.
|
||||||
|
if not hook_name() == 'update-status':
|
||||||
|
service('restart', 'nagios-nrpe-server')
|
||||||
|
|
||||||
|
monitor_ids = relation_ids("local-monitors") + \
|
||||||
|
relation_ids("nrpe-external-master")
|
||||||
|
for rid in monitor_ids:
|
||||||
|
reldata = relation_get(unit=local_unit(), rid=rid)
|
||||||
|
if 'monitors' in reldata:
|
||||||
|
# update the existing set of monitors with the new data
|
||||||
|
old_monitors = yaml.safe_load(reldata['monitors'])
|
||||||
|
old_nrpe_monitors = old_monitors['monitors']['remote']['nrpe']
|
||||||
|
# remove keys that are in the remove_check_queue
|
||||||
|
old_nrpe_monitors = {k: v for k, v in old_nrpe_monitors.items()
|
||||||
|
if k not in self.remove_check_queue}
|
||||||
|
# update/add nrpe_monitors
|
||||||
|
old_nrpe_monitors.update(nrpe_monitors)
|
||||||
|
old_monitors['monitors']['remote']['nrpe'] = old_nrpe_monitors
|
||||||
|
# write back to the relation
|
||||||
|
relation_set(relation_id=rid, monitors=yaml.dump(old_monitors))
|
||||||
|
else:
|
||||||
|
# write a brand new set of monitors, as no existing ones.
|
||||||
|
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
|
||||||
|
|
||||||
|
self.remove_check_queue.clear()
|
||||||
|
|
||||||
|
|
||||||
|
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
|
||||||
|
"""
|
||||||
|
Query relation with nrpe subordinate, return the nagios_host_context
|
||||||
|
|
||||||
|
:param str relation_name: Name of relation nrpe sub joined to
|
||||||
|
"""
|
||||||
|
for rel in relations_of_type(relation_name):
|
||||||
|
if 'nagios_host_context' in rel:
|
||||||
|
return rel['nagios_host_context']
|
||||||
|
|
||||||
|
|
||||||
|
def get_nagios_hostname(relation_name='nrpe-external-master'):
|
||||||
|
"""
|
||||||
|
Query relation with nrpe subordinate, return the nagios_hostname
|
||||||
|
|
||||||
|
:param str relation_name: Name of relation nrpe sub joined to
|
||||||
|
"""
|
||||||
|
for rel in relations_of_type(relation_name):
|
||||||
|
if 'nagios_hostname' in rel:
|
||||||
|
return rel['nagios_hostname']
|
||||||
|
|
||||||
|
|
||||||
|
def get_nagios_unit_name(relation_name='nrpe-external-master'):
|
||||||
|
"""
|
||||||
|
Return the nagios unit name prepended with host_context if needed
|
||||||
|
|
||||||
|
:param str relation_name: Name of relation nrpe sub joined to
|
||||||
|
"""
|
||||||
|
host_context = get_nagios_hostcontext(relation_name)
|
||||||
|
if host_context:
|
||||||
|
unit = "%s:%s" % (host_context, local_unit())
|
||||||
|
else:
|
||||||
|
unit = local_unit()
|
||||||
|
return unit
|
||||||
|
|
||||||
|
|
||||||
|
def add_init_service_checks(nrpe, services, unit_name, immediate_check=True):
|
||||||
|
"""
|
||||||
|
Add checks for each service in list
|
||||||
|
|
||||||
|
:param NRPE nrpe: NRPE object to add check to
|
||||||
|
:param list services: List of services to check
|
||||||
|
:param str unit_name: Unit name to use in check description
|
||||||
|
:param bool immediate_check: For sysv init, run the service check immediately
|
||||||
|
"""
|
||||||
|
for svc in services:
|
||||||
|
# Don't add a check for these services from neutron-gateway
|
||||||
|
if svc in ['ext-port', 'os-charm-phy-nic-mtu']:
|
||||||
|
next
|
||||||
|
|
||||||
|
upstart_init = '/etc/init/%s.conf' % svc
|
||||||
|
sysv_init = '/etc/init.d/%s' % svc
|
||||||
|
|
||||||
|
if host.init_is_systemd(service_name=svc):
|
||||||
|
nrpe.add_check(
|
||||||
|
shortname=svc,
|
||||||
|
description='process check {%s}' % unit_name,
|
||||||
|
check_cmd='check_systemd.py %s' % svc
|
||||||
|
)
|
||||||
|
elif os.path.exists(upstart_init):
|
||||||
|
nrpe.add_check(
|
||||||
|
shortname=svc,
|
||||||
|
description='process check {%s}' % unit_name,
|
||||||
|
check_cmd='check_upstart_job %s' % svc
|
||||||
|
)
|
||||||
|
elif os.path.exists(sysv_init):
|
||||||
|
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
|
||||||
|
checkpath = '%s/service-check-%s.txt' % (nrpe.homedir, svc)
|
||||||
|
croncmd = (
|
||||||
|
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
|
||||||
|
'-e -s /etc/init.d/%s status' % svc
|
||||||
|
)
|
||||||
|
cron_file = '*/5 * * * * root %s > %s\n' % (croncmd, checkpath)
|
||||||
|
f = open(cronpath, 'w')
|
||||||
|
f.write(cron_file)
|
||||||
|
f.close()
|
||||||
|
nrpe.add_check(
|
||||||
|
shortname=svc,
|
||||||
|
description='service check {%s}' % unit_name,
|
||||||
|
check_cmd='check_status_file.py -f %s' % checkpath,
|
||||||
|
)
|
||||||
|
# if /var/lib/nagios doesn't exist open(checkpath, 'w') will fail
|
||||||
|
# (LP: #1670223).
|
||||||
|
if immediate_check and os.path.isdir(nrpe.homedir):
|
||||||
|
f = open(checkpath, 'w')
|
||||||
|
subprocess.call(
|
||||||
|
croncmd.split(),
|
||||||
|
stdout=f,
|
||||||
|
stderr=subprocess.STDOUT
|
||||||
|
)
|
||||||
|
f.close()
|
||||||
|
os.chmod(checkpath, 0o644)
|
||||||
|
|
||||||
|
|
||||||
|
def copy_nrpe_checks(nrpe_files_dir=None):
|
||||||
|
"""
|
||||||
|
Copy the nrpe checks into place
|
||||||
|
|
||||||
|
"""
|
||||||
|
NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
|
||||||
|
if nrpe_files_dir is None:
|
||||||
|
# determine if "charmhelpers" is in CHARMDIR or CHARMDIR/hooks
|
||||||
|
for segment in ['.', 'hooks']:
|
||||||
|
nrpe_files_dir = os.path.abspath(os.path.join(
|
||||||
|
os.getenv('CHARM_DIR'),
|
||||||
|
segment,
|
||||||
|
'charmhelpers',
|
||||||
|
'contrib',
|
||||||
|
'openstack',
|
||||||
|
'files'))
|
||||||
|
if os.path.isdir(nrpe_files_dir):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise RuntimeError("Couldn't find charmhelpers directory")
|
||||||
|
if not os.path.exists(NAGIOS_PLUGINS):
|
||||||
|
os.makedirs(NAGIOS_PLUGINS)
|
||||||
|
for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")):
|
||||||
|
if os.path.isfile(fname):
|
||||||
|
shutil.copy2(fname,
|
||||||
|
os.path.join(NAGIOS_PLUGINS, os.path.basename(fname)))
|
||||||
|
|
||||||
|
|
||||||
|
def add_haproxy_checks(nrpe, unit_name):
|
||||||
|
"""
|
||||||
|
Add checks for each service in list
|
||||||
|
|
||||||
|
:param NRPE nrpe: NRPE object to add check to
|
||||||
|
:param str unit_name: Unit name to use in check description
|
||||||
|
"""
|
||||||
|
nrpe.add_check(
|
||||||
|
shortname='haproxy_servers',
|
||||||
|
description='Check HAProxy {%s}' % unit_name,
|
||||||
|
check_cmd='check_haproxy.sh')
|
||||||
|
nrpe.add_check(
|
||||||
|
shortname='haproxy_queue',
|
||||||
|
description='Check HAProxy queue depth {%s}' % unit_name,
|
||||||
|
check_cmd='check_haproxy_queue_depth.sh')
|
||||||
|
|
||||||
|
|
||||||
|
def remove_deprecated_check(nrpe, deprecated_services):
|
||||||
|
"""
|
||||||
|
Remove checks for deprecated services in list
|
||||||
|
|
||||||
|
:param nrpe: NRPE object to remove check from
|
||||||
|
:type nrpe: NRPE
|
||||||
|
:param deprecated_services: List of deprecated services that are removed
|
||||||
|
:type deprecated_services: list
|
||||||
|
"""
|
||||||
|
for dep_svc in deprecated_services:
|
||||||
|
log('Deprecated service: {}'.format(dep_svc))
|
||||||
|
nrpe.remove_check(shortname=dep_svc)
|
173
hooks/charmhelpers/contrib/charmsupport/volumes.py
Normal file
173
hooks/charmhelpers/contrib/charmsupport/volumes.py
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
# Copyright 2014-2021 Canonical Limited.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
'''
|
||||||
|
Functions for managing volumes in juju units. One volume is supported per unit.
|
||||||
|
Subordinates may have their own storage, provided it is on its own partition.
|
||||||
|
|
||||||
|
Configuration stanzas::
|
||||||
|
|
||||||
|
volume-ephemeral:
|
||||||
|
type: boolean
|
||||||
|
default: true
|
||||||
|
description: >
|
||||||
|
If false, a volume is mounted as specified in "volume-map"
|
||||||
|
If true, ephemeral storage will be used, meaning that log data
|
||||||
|
will only exist as long as the machine. YOU HAVE BEEN WARNED.
|
||||||
|
volume-map:
|
||||||
|
type: string
|
||||||
|
default: {}
|
||||||
|
description: >
|
||||||
|
YAML map of units to device names, e.g:
|
||||||
|
"{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }"
|
||||||
|
Service units will raise a configure-error if volume-ephemeral
|
||||||
|
is 'true' and no volume-map value is set. Use 'juju set' to set a
|
||||||
|
value and 'juju resolved' to complete configuration.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
from charmsupport.volumes import configure_volume, VolumeConfigurationError
|
||||||
|
from charmsupport.hookenv import log, ERROR
|
||||||
|
def post_mount_hook():
|
||||||
|
stop_service('myservice')
|
||||||
|
def post_mount_hook():
|
||||||
|
start_service('myservice')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
configure_volume(before_change=pre_mount_hook,
|
||||||
|
after_change=post_mount_hook)
|
||||||
|
except VolumeConfigurationError:
|
||||||
|
log('Storage could not be configured', ERROR)
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
# XXX: Known limitations
|
||||||
|
# - fstab is neither consulted nor updated
|
||||||
|
|
||||||
|
import os
|
||||||
|
from charmhelpers.core import hookenv
|
||||||
|
from charmhelpers.core import host
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
MOUNT_BASE = '/srv/juju/volumes'
|
||||||
|
|
||||||
|
|
||||||
|
class VolumeConfigurationError(Exception):
|
||||||
|
'''Volume configuration data is missing or invalid'''
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def get_config():
|
||||||
|
'''Gather and sanity-check volume configuration data'''
|
||||||
|
volume_config = {}
|
||||||
|
config = hookenv.config()
|
||||||
|
|
||||||
|
errors = False
|
||||||
|
|
||||||
|
if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'):
|
||||||
|
volume_config['ephemeral'] = True
|
||||||
|
else:
|
||||||
|
volume_config['ephemeral'] = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
volume_map = yaml.safe_load(config.get('volume-map', '{}'))
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
hookenv.log("Error parsing YAML volume-map: {}".format(e),
|
||||||
|
hookenv.ERROR)
|
||||||
|
errors = True
|
||||||
|
if volume_map is None:
|
||||||
|
# probably an empty string
|
||||||
|
volume_map = {}
|
||||||
|
elif not isinstance(volume_map, dict):
|
||||||
|
hookenv.log("Volume-map should be a dictionary, not {}".format(
|
||||||
|
type(volume_map)))
|
||||||
|
errors = True
|
||||||
|
|
||||||
|
volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME'])
|
||||||
|
if volume_config['device'] and volume_config['ephemeral']:
|
||||||
|
# asked for ephemeral storage but also defined a volume ID
|
||||||
|
hookenv.log('A volume is defined for this unit, but ephemeral '
|
||||||
|
'storage was requested', hookenv.ERROR)
|
||||||
|
errors = True
|
||||||
|
elif not volume_config['device'] and not volume_config['ephemeral']:
|
||||||
|
# asked for permanent storage but did not define volume ID
|
||||||
|
hookenv.log('Ephemeral storage was requested, but there is no volume '
|
||||||
|
'defined for this unit.', hookenv.ERROR)
|
||||||
|
errors = True
|
||||||
|
|
||||||
|
unit_mount_name = hookenv.local_unit().replace('/', '-')
|
||||||
|
volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name)
|
||||||
|
|
||||||
|
if errors:
|
||||||
|
return None
|
||||||
|
return volume_config
|
||||||
|
|
||||||
|
|
||||||
|
def mount_volume(config):
|
||||||
|
if os.path.exists(config['mountpoint']):
|
||||||
|
if not os.path.isdir(config['mountpoint']):
|
||||||
|
hookenv.log('Not a directory: {}'.format(config['mountpoint']))
|
||||||
|
raise VolumeConfigurationError()
|
||||||
|
else:
|
||||||
|
host.mkdir(config['mountpoint'])
|
||||||
|
if os.path.ismount(config['mountpoint']):
|
||||||
|
unmount_volume(config)
|
||||||
|
if not host.mount(config['device'], config['mountpoint'], persist=True):
|
||||||
|
raise VolumeConfigurationError()
|
||||||
|
|
||||||
|
|
||||||
|
def unmount_volume(config):
|
||||||
|
if os.path.ismount(config['mountpoint']):
|
||||||
|
if not host.umount(config['mountpoint'], persist=True):
|
||||||
|
raise VolumeConfigurationError()
|
||||||
|
|
||||||
|
|
||||||
|
def managed_mounts():
|
||||||
|
'''List of all mounted managed volumes'''
|
||||||
|
return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts())
|
||||||
|
|
||||||
|
|
||||||
|
def configure_volume(before_change=lambda: None, after_change=lambda: None):
|
||||||
|
'''Set up storage (or don't) according to the charm's volume configuration.
|
||||||
|
Returns the mount point or "ephemeral". before_change and after_change
|
||||||
|
are optional functions to be called if the volume configuration changes.
|
||||||
|
'''
|
||||||
|
|
||||||
|
config = get_config()
|
||||||
|
if not config:
|
||||||
|
hookenv.log('Failed to read volume configuration', hookenv.CRITICAL)
|
||||||
|
raise VolumeConfigurationError()
|
||||||
|
|
||||||
|
if config['ephemeral']:
|
||||||
|
if os.path.ismount(config['mountpoint']):
|
||||||
|
before_change()
|
||||||
|
unmount_volume(config)
|
||||||
|
after_change()
|
||||||
|
return 'ephemeral'
|
||||||
|
else:
|
||||||
|
# persistent storage
|
||||||
|
if os.path.ismount(config['mountpoint']):
|
||||||
|
mounts = dict(managed_mounts())
|
||||||
|
if mounts.get(config['mountpoint']) != config['device']:
|
||||||
|
before_change()
|
||||||
|
unmount_volume(config)
|
||||||
|
mount_volume(config)
|
||||||
|
after_change()
|
||||||
|
else:
|
||||||
|
before_change()
|
||||||
|
mount_volume(config)
|
||||||
|
after_change()
|
||||||
|
return config['mountpoint']
|
@ -14,8 +14,10 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import uuid
|
import uuid
|
||||||
|
import shutil
|
||||||
|
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
@ -41,6 +43,7 @@ from charmhelpers.core.hookenv import (
|
|||||||
log,
|
log,
|
||||||
relation_set,
|
relation_set,
|
||||||
relation_ids,
|
relation_ids,
|
||||||
|
charm_dir,
|
||||||
)
|
)
|
||||||
|
|
||||||
from charmhelpers.core.sysctl import create as create_sysctl
|
from charmhelpers.core.sysctl import create as create_sysctl
|
||||||
@ -50,6 +53,8 @@ from charmhelpers.core.host import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from charmhelpers.core.unitdata import kv
|
from charmhelpers.core.unitdata import kv
|
||||||
|
from charmhelpers.fetch import apt_install
|
||||||
|
from charmhelpers.contrib.charmsupport import nrpe
|
||||||
|
|
||||||
from neutron_ovs_utils import (
|
from neutron_ovs_utils import (
|
||||||
DHCP_PACKAGES,
|
DHCP_PACKAGES,
|
||||||
@ -62,6 +67,7 @@ from neutron_ovs_utils import (
|
|||||||
get_shared_secret,
|
get_shared_secret,
|
||||||
register_configs,
|
register_configs,
|
||||||
restart_map,
|
restart_map,
|
||||||
|
services,
|
||||||
use_dvr,
|
use_dvr,
|
||||||
use_l3ha,
|
use_l3ha,
|
||||||
enable_nova_metadata,
|
enable_nova_metadata,
|
||||||
@ -178,6 +184,8 @@ def config_changed(check_deferred_restarts=True):
|
|||||||
relation_id=rid,
|
relation_id=rid,
|
||||||
request_restart=request_nova_compute_restart)
|
request_restart=request_nova_compute_restart)
|
||||||
|
|
||||||
|
update_nrpe_config()
|
||||||
|
|
||||||
|
|
||||||
@hooks.hook('neutron-plugin-api-relation-changed')
|
@hooks.hook('neutron-plugin-api-relation-changed')
|
||||||
# NOTE(fnordahl): we need to act immediately to changes to OVS_DEFAULT in-line
|
# NOTE(fnordahl): we need to act immediately to changes to OVS_DEFAULT in-line
|
||||||
@ -287,6 +295,47 @@ def post_series_upgrade():
|
|||||||
resume_unit_helper, CONFIGS)
|
resume_unit_helper, CONFIGS)
|
||||||
|
|
||||||
|
|
||||||
|
def install_nrpe_cron():
|
||||||
|
src = os.path.join(charm_dir(), "files", "ovs_vsctl", "cron_ovs_vsctl.sh")
|
||||||
|
dst = shutil.copy(src, "/usr/local/lib/nagios/plugins/")
|
||||||
|
os.chmod(dst, 0o100755)
|
||||||
|
os.chown(dst, uid=0, gid=0)
|
||||||
|
|
||||||
|
cronjob_line = "3 * * * * root {cmd}\n".format(cmd=dst)
|
||||||
|
crond_file = "/etc/cron.d/neutron_openvswitch_ovs_vsctl"
|
||||||
|
with open(crond_file, "w") as crond_fd:
|
||||||
|
crond_fd.write(cronjob_line)
|
||||||
|
return dst
|
||||||
|
|
||||||
|
|
||||||
|
def install_nrpe_plugin():
|
||||||
|
src = os.path.join(charm_dir(), "files", "ovs_vsctl", "check_ovs_vsctl.py")
|
||||||
|
dst = shutil.copy(src, "/usr/local/lib/nagios/plugins")
|
||||||
|
os.chmod(dst, 0o100755)
|
||||||
|
os.chown(dst, uid=0, gid=0)
|
||||||
|
return dst
|
||||||
|
|
||||||
|
|
||||||
|
@hooks.hook('nrpe-external-master-relation-joined',
|
||||||
|
'nrpe-external-master-relation-changed')
|
||||||
|
def update_nrpe_config():
|
||||||
|
# python-dbus is used by check_upstart_job
|
||||||
|
apt_install('python-dbus')
|
||||||
|
hostname = nrpe.get_nagios_hostname()
|
||||||
|
current_unit = nrpe.get_nagios_unit_name()
|
||||||
|
nrpe_setup = nrpe.NRPE(hostname=hostname)
|
||||||
|
nrpe.copy_nrpe_checks()
|
||||||
|
nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
|
||||||
|
install_nrpe_cron()
|
||||||
|
cmd = install_nrpe_plugin()
|
||||||
|
nrpe_setup.add_check(
|
||||||
|
shortname="ovs_vsctl",
|
||||||
|
description="Check ovs-vsctl list-br for predictable operation.",
|
||||||
|
check_cmd=cmd
|
||||||
|
)
|
||||||
|
nrpe_setup.write()
|
||||||
|
|
||||||
|
|
||||||
@hooks.hook('update-status')
|
@hooks.hook('update-status')
|
||||||
def dummy_update_status():
|
def dummy_update_status():
|
||||||
"""Dummy function to silence missing hook log entry"""
|
"""Dummy function to silence missing hook log entry"""
|
||||||
|
1
hooks/nrpe-external-master-relation-changed
Symbolic link
1
hooks/nrpe-external-master-relation-changed
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
neutron_ovs_hooks.py
|
1
hooks/nrpe-external-master-relation-joined
Symbolic link
1
hooks/nrpe-external-master-relation-joined
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
neutron_ovs_hooks.py
|
@ -22,6 +22,9 @@ series:
|
|||||||
extra-bindings:
|
extra-bindings:
|
||||||
data:
|
data:
|
||||||
provides:
|
provides:
|
||||||
|
nrpe-external-master:
|
||||||
|
interface: nrpe-external-master
|
||||||
|
scope: container
|
||||||
neutron-plugin:
|
neutron-plugin:
|
||||||
interface: neutron-plugin
|
interface: neutron-plugin
|
||||||
scope: container
|
scope: container
|
||||||
|
@ -53,6 +53,7 @@ TO_PATCH = [
|
|||||||
'determine_purge_packages',
|
'determine_purge_packages',
|
||||||
'is_container',
|
'is_container',
|
||||||
'is_hook_allowed',
|
'is_hook_allowed',
|
||||||
|
'update_nrpe_config',
|
||||||
]
|
]
|
||||||
NEUTRON_CONF_DIR = "/etc/neutron"
|
NEUTRON_CONF_DIR = "/etc/neutron"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user