Merge "add nagios checks"

This commit is contained in:
Zuul 2022-07-05 08:10:13 +00:00 committed by Gerrit Code Review
commit 661a788e22
12 changed files with 866 additions and 0 deletions

View File

@ -14,3 +14,4 @@ include:
- contrib.network.ip - contrib.network.ip
- contrib.python - contrib.python
- contrib.openstack.policyd - contrib.openstack.policyd
- contrib.charmsupport

View File

@ -457,3 +457,19 @@ options:
description: | description: |
Allow the charm and packages to restart services automatically when Allow the charm and packages to restart services automatically when
required. required.
nagios_context:
default: "juju"
type: string
description: |
Used by the nrpe subordinate charms.
A string that will be prepended to instance name to set the host name
in nagios. So for instance the hostname would be something like:
juju-myservice-0
If you're running multiple environments with the same services in them
this allows you to differentiate between them.
nagios_servicegroups:
default: ""
type: string
description: |
A comma-separated list of nagios servicegroups.
If left empty, the nagios_context will be used as the servicegroup

View File

@ -0,0 +1,51 @@
#!/usr/bin/env python3
# -*- coding: us-ascii -*-
import os
from nagios_plugin3 import (
CriticalError,
UnknownError,
try_check,
check_file_freshness,
)
INPUT_FILE = "/var/lib/nagios/ovs_vsctl.out"
INPUT_RC = "/var/lib/nagios/ovs_vsctl.rc"
def parse_output():
"""Parse the ovs-vsctl list-br output and raise alertable states."""
if not os.path.exists(INPUT_FILE):
raise UnknownError(
"UNKNOWN: {} does not exist (yet?)".format(INPUT_FILE))
if not os.path.exists(INPUT_RC):
raise UnknownError(
"UNKNOWN: {} does not exist (yet?)".format(INPUT_RC))
try_check(check_file_freshness, INPUT_FILE)
with open(INPUT_RC) as rc_raw:
code = rc_raw.readline().strip()
if code != "0":
raise CriticalError("CRITICAL: ovs-vsctl list-br returns error")
with open(INPUT_FILE) as brs_raw:
brs = brs_raw.readlines()
if len(brs) == 0:
raise CriticalError(
"CRITICAL: ovs-vsctl list-br returned no bridges")
msg = ", ".join(br.strip() for br in brs)
print("OK: {}".format(msg))
def main():
"""Define main subroutine."""
try_check(parse_output)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,35 @@
#!/usr/bin/env bash
set -eu
PATH="/snap/bin:/usr/local/bin:$PATH"
FILE=/var/lib/nagios/ovs_vsctl.out
TMP_FILE="$(tempfile)"
RC_FILE=/var/lib/nagios/ovs_vsctl.rc
LOCK_FILE=/var/lib/nagios/ovs_vsctl.lock
CMD="ovs-vsctl list-br"
GROUP="nagios"
if [ $# -gt 0 ]; then
echo "This program will cache the output of '${CMD}' as follows"
echo " stdout + stderr -> ${FILE}"
echo " return code -> ${RC_FILE}"
echo
echo "It does not accept any option or argument"
exit 0
fi
sleep $[$RANDOM % 60 + 10]s # sleep 10-70s
if [ -f "${LOCK_FILE}" ]; then
echo "Lock file (${LOCK_FILE}) in use, abandoning" >&2
exit 1
fi
touch "${LOCK_FILE}"
$CMD 2>&1 > $TMP_FILE
RC=$?
echo $RC > $RC_FILE
mv $TMP_FILE $FILE
chown :$GROUP $FILE
chmod 644 $FILE
rm "${LOCK_FILE}"
exit 0

View File

@ -0,0 +1,13 @@
# Copyright 2014-2015 Canonical Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,522 @@
# Copyright 2012-2021 Canonical Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compatibility with the nrpe-external-master charm"""
#
# Authors:
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
import glob
import grp
import os
import pwd
import re
import shlex
import shutil
import subprocess
import yaml
from charmhelpers.core.hookenv import (
config,
hook_name,
local_unit,
log,
relation_get,
relation_ids,
relation_set,
relations_of_type,
)
from charmhelpers.core.host import service
from charmhelpers.core import host
# This module adds compatibility with the nrpe-external-master and plain nrpe
# subordinate charms. To use it in your charm:
#
# 1. Update metadata.yaml
#
# provides:
# (...)
# nrpe-external-master:
# interface: nrpe-external-master
# scope: container
#
# and/or
#
# provides:
# (...)
# local-monitors:
# interface: local-monitors
# scope: container
#
# 2. Add the following to config.yaml
#
# nagios_context:
# default: "juju"
# type: string
# description: |
# Used by the nrpe subordinate charms.
# A string that will be prepended to instance name to set the host name
# in nagios. So for instance the hostname would be something like:
# juju-myservice-0
# If you're running multiple environments with the same services in them
# this allows you to differentiate between them.
# nagios_servicegroups:
# default: ""
# type: string
# description: |
# A comma-separated list of nagios servicegroups.
# If left empty, the nagios_context will be used as the servicegroup
#
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
#
# 4. Update your hooks.py with something like this:
#
# from charmsupport.nrpe import NRPE
# (...)
# def update_nrpe_config():
# nrpe_compat = NRPE()
# nrpe_compat.add_check(
# shortname = "myservice",
# description = "Check MyService",
# check_cmd = "check_http -w 2 -c 10 http://localhost"
# )
# nrpe_compat.add_check(
# "myservice_other",
# "Check for widget failures",
# check_cmd = "/srv/myapp/scripts/widget_check"
# )
# nrpe_compat.write()
#
# def config_changed():
# (...)
# update_nrpe_config()
#
# def nrpe_external_master_relation_changed():
# update_nrpe_config()
#
# def local_monitors_relation_changed():
# update_nrpe_config()
#
# 4.a If your charm is a subordinate charm set primary=False
#
# from charmsupport.nrpe import NRPE
# (...)
# def update_nrpe_config():
# nrpe_compat = NRPE(primary=False)
#
# 5. ln -s hooks.py nrpe-external-master-relation-changed
# ln -s hooks.py local-monitors-relation-changed
class CheckException(Exception):
pass
class Check(object):
shortname_re = '[A-Za-z0-9-_.@]+$'
service_template = ("""
#---------------------------------------------------
# This file is Juju managed
#---------------------------------------------------
define service {{
use active-service
host_name {nagios_hostname}
service_description {nagios_hostname}[{shortname}] """
"""{description}
check_command check_nrpe!{command}
servicegroups {nagios_servicegroup}
{service_config_overrides}
}}
""")
def __init__(self, shortname, description, check_cmd, max_check_attempts=None):
super(Check, self).__init__()
# XXX: could be better to calculate this from the service name
if not re.match(self.shortname_re, shortname):
raise CheckException("shortname must match {}".format(
Check.shortname_re))
self.shortname = shortname
self.command = "check_{}".format(shortname)
# Note: a set of invalid characters is defined by the
# Nagios server config
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
self.description = description
self.check_cmd = self._locate_cmd(check_cmd)
self.max_check_attempts = max_check_attempts
def _get_check_filename(self):
return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command))
def _get_service_filename(self, hostname):
return os.path.join(NRPE.nagios_exportdir,
'service__{}_{}.cfg'.format(hostname, self.command))
def _locate_cmd(self, check_cmd):
search_path = (
'/usr/lib/nagios/plugins',
'/usr/local/lib/nagios/plugins',
)
parts = shlex.split(check_cmd)
for path in search_path:
if os.path.exists(os.path.join(path, parts[0])):
command = os.path.join(path, parts[0])
if len(parts) > 1:
command += " " + " ".join(parts[1:])
return command
log('Check command not found: {}'.format(parts[0]))
return ''
def _remove_service_files(self):
if not os.path.exists(NRPE.nagios_exportdir):
return
for f in os.listdir(NRPE.nagios_exportdir):
if f.endswith('_{}.cfg'.format(self.command)):
os.remove(os.path.join(NRPE.nagios_exportdir, f))
def remove(self, hostname):
nrpe_check_file = self._get_check_filename()
if os.path.exists(nrpe_check_file):
os.remove(nrpe_check_file)
self._remove_service_files()
def write(self, nagios_context, hostname, nagios_servicegroups):
nrpe_check_file = self._get_check_filename()
with open(nrpe_check_file, 'w') as nrpe_check_config:
nrpe_check_config.write("# check {}\n".format(self.shortname))
if nagios_servicegroups:
nrpe_check_config.write(
"# The following header was added automatically by juju\n")
nrpe_check_config.write(
"# Modifying it will affect nagios monitoring and alerting\n")
nrpe_check_config.write(
"# servicegroups: {}\n".format(nagios_servicegroups))
nrpe_check_config.write("command[{}]={}\n".format(
self.command, self.check_cmd))
if not os.path.exists(NRPE.nagios_exportdir):
log('Not writing service config as {} is not accessible'.format(
NRPE.nagios_exportdir))
else:
self.write_service_config(nagios_context, hostname,
nagios_servicegroups)
def write_service_config(self, nagios_context, hostname,
nagios_servicegroups):
self._remove_service_files()
if self.max_check_attempts:
service_config_overrides = ' max_check_attempts {}'.format(
self.max_check_attempts
) # Note indentation is here rather than in the template to avoid trailing spaces
else:
service_config_overrides = '' # empty string to avoid printing 'None'
templ_vars = {
'nagios_hostname': hostname,
'nagios_servicegroup': nagios_servicegroups,
'description': self.description,
'shortname': self.shortname,
'command': self.command,
'service_config_overrides': service_config_overrides,
}
nrpe_service_text = Check.service_template.format(**templ_vars)
nrpe_service_file = self._get_service_filename(hostname)
with open(nrpe_service_file, 'w') as nrpe_service_config:
nrpe_service_config.write(str(nrpe_service_text))
def run(self):
subprocess.call(self.check_cmd)
class NRPE(object):
nagios_logdir = '/var/log/nagios'
nagios_exportdir = '/var/lib/nagios/export'
nrpe_confdir = '/etc/nagios/nrpe.d'
homedir = '/var/lib/nagios' # home dir provided by nagios-nrpe-server
def __init__(self, hostname=None, primary=True):
super(NRPE, self).__init__()
self.config = config()
self.primary = primary
self.nagios_context = self.config['nagios_context']
if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']:
self.nagios_servicegroups = self.config['nagios_servicegroups']
else:
self.nagios_servicegroups = self.nagios_context
self.unit_name = local_unit().replace('/', '-')
if hostname:
self.hostname = hostname
else:
nagios_hostname = get_nagios_hostname()
if nagios_hostname:
self.hostname = nagios_hostname
else:
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
self.checks = []
# Iff in an nrpe-external-master relation hook, set primary status
relation = relation_ids('nrpe-external-master')
if relation:
log("Setting charm primary status {}".format(primary))
for rid in relation:
relation_set(relation_id=rid, relation_settings={'primary': self.primary})
self.remove_check_queue = set()
@classmethod
def does_nrpe_conf_dir_exist(cls):
"""Return True if th nrpe_confdif directory exists."""
return os.path.isdir(cls.nrpe_confdir)
def add_check(self, *args, **kwargs):
shortname = None
if kwargs.get('shortname') is None:
if len(args) > 0:
shortname = args[0]
else:
shortname = kwargs['shortname']
self.checks.append(Check(*args, **kwargs))
try:
self.remove_check_queue.remove(shortname)
except KeyError:
pass
def remove_check(self, *args, **kwargs):
if kwargs.get('shortname') is None:
raise ValueError('shortname of check must be specified')
# Use sensible defaults if they're not specified - these are not
# actually used during removal, but they're required for constructing
# the Check object; check_disk is chosen because it's part of the
# nagios-plugins-basic package.
if kwargs.get('check_cmd') is None:
kwargs['check_cmd'] = 'check_disk'
if kwargs.get('description') is None:
kwargs['description'] = ''
check = Check(*args, **kwargs)
check.remove(self.hostname)
self.remove_check_queue.add(kwargs['shortname'])
def write(self):
try:
nagios_uid = pwd.getpwnam('nagios').pw_uid
nagios_gid = grp.getgrnam('nagios').gr_gid
except Exception:
log("Nagios user not set up, nrpe checks not updated")
return
if not os.path.exists(NRPE.nagios_logdir):
os.mkdir(NRPE.nagios_logdir)
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
nrpe_monitors = {}
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
# check that the charm can write to the conf dir. If not, then nagios
# probably isn't installed, and we can defer.
if not self.does_nrpe_conf_dir_exist():
return
for nrpecheck in self.checks:
nrpecheck.write(self.nagios_context, self.hostname,
self.nagios_servicegroups)
nrpe_monitors[nrpecheck.shortname] = {
"command": nrpecheck.command,
}
# If we were passed max_check_attempts, add that to the relation data
if nrpecheck.max_check_attempts is not None:
nrpe_monitors[nrpecheck.shortname]['max_check_attempts'] = nrpecheck.max_check_attempts
# update-status hooks are configured to firing every 5 minutes by
# default. When nagios-nrpe-server is restarted, the nagios server
# reports checks failing causing unnecessary alerts. Let's not restart
# on update-status hooks.
if not hook_name() == 'update-status':
service('restart', 'nagios-nrpe-server')
monitor_ids = relation_ids("local-monitors") + \
relation_ids("nrpe-external-master")
for rid in monitor_ids:
reldata = relation_get(unit=local_unit(), rid=rid)
if 'monitors' in reldata:
# update the existing set of monitors with the new data
old_monitors = yaml.safe_load(reldata['monitors'])
old_nrpe_monitors = old_monitors['monitors']['remote']['nrpe']
# remove keys that are in the remove_check_queue
old_nrpe_monitors = {k: v for k, v in old_nrpe_monitors.items()
if k not in self.remove_check_queue}
# update/add nrpe_monitors
old_nrpe_monitors.update(nrpe_monitors)
old_monitors['monitors']['remote']['nrpe'] = old_nrpe_monitors
# write back to the relation
relation_set(relation_id=rid, monitors=yaml.dump(old_monitors))
else:
# write a brand new set of monitors, as no existing ones.
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
self.remove_check_queue.clear()
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
"""
Query relation with nrpe subordinate, return the nagios_host_context
:param str relation_name: Name of relation nrpe sub joined to
"""
for rel in relations_of_type(relation_name):
if 'nagios_host_context' in rel:
return rel['nagios_host_context']
def get_nagios_hostname(relation_name='nrpe-external-master'):
"""
Query relation with nrpe subordinate, return the nagios_hostname
:param str relation_name: Name of relation nrpe sub joined to
"""
for rel in relations_of_type(relation_name):
if 'nagios_hostname' in rel:
return rel['nagios_hostname']
def get_nagios_unit_name(relation_name='nrpe-external-master'):
"""
Return the nagios unit name prepended with host_context if needed
:param str relation_name: Name of relation nrpe sub joined to
"""
host_context = get_nagios_hostcontext(relation_name)
if host_context:
unit = "%s:%s" % (host_context, local_unit())
else:
unit = local_unit()
return unit
def add_init_service_checks(nrpe, services, unit_name, immediate_check=True):
"""
Add checks for each service in list
:param NRPE nrpe: NRPE object to add check to
:param list services: List of services to check
:param str unit_name: Unit name to use in check description
:param bool immediate_check: For sysv init, run the service check immediately
"""
for svc in services:
# Don't add a check for these services from neutron-gateway
if svc in ['ext-port', 'os-charm-phy-nic-mtu']:
next
upstart_init = '/etc/init/%s.conf' % svc
sysv_init = '/etc/init.d/%s' % svc
if host.init_is_systemd(service_name=svc):
nrpe.add_check(
shortname=svc,
description='process check {%s}' % unit_name,
check_cmd='check_systemd.py %s' % svc
)
elif os.path.exists(upstart_init):
nrpe.add_check(
shortname=svc,
description='process check {%s}' % unit_name,
check_cmd='check_upstart_job %s' % svc
)
elif os.path.exists(sysv_init):
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
checkpath = '%s/service-check-%s.txt' % (nrpe.homedir, svc)
croncmd = (
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
'-e -s /etc/init.d/%s status' % svc
)
cron_file = '*/5 * * * * root %s > %s\n' % (croncmd, checkpath)
f = open(cronpath, 'w')
f.write(cron_file)
f.close()
nrpe.add_check(
shortname=svc,
description='service check {%s}' % unit_name,
check_cmd='check_status_file.py -f %s' % checkpath,
)
# if /var/lib/nagios doesn't exist open(checkpath, 'w') will fail
# (LP: #1670223).
if immediate_check and os.path.isdir(nrpe.homedir):
f = open(checkpath, 'w')
subprocess.call(
croncmd.split(),
stdout=f,
stderr=subprocess.STDOUT
)
f.close()
os.chmod(checkpath, 0o644)
def copy_nrpe_checks(nrpe_files_dir=None):
"""
Copy the nrpe checks into place
"""
NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
if nrpe_files_dir is None:
# determine if "charmhelpers" is in CHARMDIR or CHARMDIR/hooks
for segment in ['.', 'hooks']:
nrpe_files_dir = os.path.abspath(os.path.join(
os.getenv('CHARM_DIR'),
segment,
'charmhelpers',
'contrib',
'openstack',
'files'))
if os.path.isdir(nrpe_files_dir):
break
else:
raise RuntimeError("Couldn't find charmhelpers directory")
if not os.path.exists(NAGIOS_PLUGINS):
os.makedirs(NAGIOS_PLUGINS)
for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")):
if os.path.isfile(fname):
shutil.copy2(fname,
os.path.join(NAGIOS_PLUGINS, os.path.basename(fname)))
def add_haproxy_checks(nrpe, unit_name):
"""
Add checks for each service in list
:param NRPE nrpe: NRPE object to add check to
:param str unit_name: Unit name to use in check description
"""
nrpe.add_check(
shortname='haproxy_servers',
description='Check HAProxy {%s}' % unit_name,
check_cmd='check_haproxy.sh')
nrpe.add_check(
shortname='haproxy_queue',
description='Check HAProxy queue depth {%s}' % unit_name,
check_cmd='check_haproxy_queue_depth.sh')
def remove_deprecated_check(nrpe, deprecated_services):
"""
Remove checks for deprecated services in list
:param nrpe: NRPE object to remove check from
:type nrpe: NRPE
:param deprecated_services: List of deprecated services that are removed
:type deprecated_services: list
"""
for dep_svc in deprecated_services:
log('Deprecated service: {}'.format(dep_svc))
nrpe.remove_check(shortname=dep_svc)

View File

@ -0,0 +1,173 @@
# Copyright 2014-2021 Canonical Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Functions for managing volumes in juju units. One volume is supported per unit.
Subordinates may have their own storage, provided it is on its own partition.
Configuration stanzas::
volume-ephemeral:
type: boolean
default: true
description: >
If false, a volume is mounted as specified in "volume-map"
If true, ephemeral storage will be used, meaning that log data
will only exist as long as the machine. YOU HAVE BEEN WARNED.
volume-map:
type: string
default: {}
description: >
YAML map of units to device names, e.g:
"{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }"
Service units will raise a configure-error if volume-ephemeral
is 'true' and no volume-map value is set. Use 'juju set' to set a
value and 'juju resolved' to complete configuration.
Usage::
from charmsupport.volumes import configure_volume, VolumeConfigurationError
from charmsupport.hookenv import log, ERROR
def post_mount_hook():
stop_service('myservice')
def post_mount_hook():
start_service('myservice')
if __name__ == '__main__':
try:
configure_volume(before_change=pre_mount_hook,
after_change=post_mount_hook)
except VolumeConfigurationError:
log('Storage could not be configured', ERROR)
'''
# XXX: Known limitations
# - fstab is neither consulted nor updated
import os
from charmhelpers.core import hookenv
from charmhelpers.core import host
import yaml
MOUNT_BASE = '/srv/juju/volumes'
class VolumeConfigurationError(Exception):
'''Volume configuration data is missing or invalid'''
pass
def get_config():
'''Gather and sanity-check volume configuration data'''
volume_config = {}
config = hookenv.config()
errors = False
if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'):
volume_config['ephemeral'] = True
else:
volume_config['ephemeral'] = False
try:
volume_map = yaml.safe_load(config.get('volume-map', '{}'))
except yaml.YAMLError as e:
hookenv.log("Error parsing YAML volume-map: {}".format(e),
hookenv.ERROR)
errors = True
if volume_map is None:
# probably an empty string
volume_map = {}
elif not isinstance(volume_map, dict):
hookenv.log("Volume-map should be a dictionary, not {}".format(
type(volume_map)))
errors = True
volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME'])
if volume_config['device'] and volume_config['ephemeral']:
# asked for ephemeral storage but also defined a volume ID
hookenv.log('A volume is defined for this unit, but ephemeral '
'storage was requested', hookenv.ERROR)
errors = True
elif not volume_config['device'] and not volume_config['ephemeral']:
# asked for permanent storage but did not define volume ID
hookenv.log('Ephemeral storage was requested, but there is no volume '
'defined for this unit.', hookenv.ERROR)
errors = True
unit_mount_name = hookenv.local_unit().replace('/', '-')
volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name)
if errors:
return None
return volume_config
def mount_volume(config):
if os.path.exists(config['mountpoint']):
if not os.path.isdir(config['mountpoint']):
hookenv.log('Not a directory: {}'.format(config['mountpoint']))
raise VolumeConfigurationError()
else:
host.mkdir(config['mountpoint'])
if os.path.ismount(config['mountpoint']):
unmount_volume(config)
if not host.mount(config['device'], config['mountpoint'], persist=True):
raise VolumeConfigurationError()
def unmount_volume(config):
if os.path.ismount(config['mountpoint']):
if not host.umount(config['mountpoint'], persist=True):
raise VolumeConfigurationError()
def managed_mounts():
'''List of all mounted managed volumes'''
return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts())
def configure_volume(before_change=lambda: None, after_change=lambda: None):
'''Set up storage (or don't) according to the charm's volume configuration.
Returns the mount point or "ephemeral". before_change and after_change
are optional functions to be called if the volume configuration changes.
'''
config = get_config()
if not config:
hookenv.log('Failed to read volume configuration', hookenv.CRITICAL)
raise VolumeConfigurationError()
if config['ephemeral']:
if os.path.ismount(config['mountpoint']):
before_change()
unmount_volume(config)
after_change()
return 'ephemeral'
else:
# persistent storage
if os.path.ismount(config['mountpoint']):
mounts = dict(managed_mounts())
if mounts.get(config['mountpoint']) != config['device']:
before_change()
unmount_volume(config)
mount_volume(config)
after_change()
else:
before_change()
mount_volume(config)
after_change()
return config['mountpoint']

View File

@ -14,8 +14,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import sys import sys
import uuid import uuid
import shutil
from copy import deepcopy from copy import deepcopy
@ -41,6 +43,7 @@ from charmhelpers.core.hookenv import (
log, log,
relation_set, relation_set,
relation_ids, relation_ids,
charm_dir,
) )
from charmhelpers.core.sysctl import create as create_sysctl from charmhelpers.core.sysctl import create as create_sysctl
@ -50,6 +53,8 @@ from charmhelpers.core.host import (
) )
from charmhelpers.core.unitdata import kv from charmhelpers.core.unitdata import kv
from charmhelpers.fetch import apt_install
from charmhelpers.contrib.charmsupport import nrpe
from neutron_ovs_utils import ( from neutron_ovs_utils import (
DHCP_PACKAGES, DHCP_PACKAGES,
@ -62,6 +67,7 @@ from neutron_ovs_utils import (
get_shared_secret, get_shared_secret,
register_configs, register_configs,
restart_map, restart_map,
services,
use_dvr, use_dvr,
use_l3ha, use_l3ha,
enable_nova_metadata, enable_nova_metadata,
@ -178,6 +184,8 @@ def config_changed(check_deferred_restarts=True):
relation_id=rid, relation_id=rid,
request_restart=request_nova_compute_restart) request_restart=request_nova_compute_restart)
update_nrpe_config()
@hooks.hook('neutron-plugin-api-relation-changed') @hooks.hook('neutron-plugin-api-relation-changed')
# NOTE(fnordahl): we need to act immediately to changes to OVS_DEFAULT in-line # NOTE(fnordahl): we need to act immediately to changes to OVS_DEFAULT in-line
@ -287,6 +295,47 @@ def post_series_upgrade():
resume_unit_helper, CONFIGS) resume_unit_helper, CONFIGS)
def install_nrpe_cron():
src = os.path.join(charm_dir(), "files", "ovs_vsctl", "cron_ovs_vsctl.sh")
dst = shutil.copy(src, "/usr/local/lib/nagios/plugins/")
os.chmod(dst, 0o100755)
os.chown(dst, uid=0, gid=0)
cronjob_line = "3 * * * * root {cmd}\n".format(cmd=dst)
crond_file = "/etc/cron.d/neutron_openvswitch_ovs_vsctl"
with open(crond_file, "w") as crond_fd:
crond_fd.write(cronjob_line)
return dst
def install_nrpe_plugin():
src = os.path.join(charm_dir(), "files", "ovs_vsctl", "check_ovs_vsctl.py")
dst = shutil.copy(src, "/usr/local/lib/nagios/plugins")
os.chmod(dst, 0o100755)
os.chown(dst, uid=0, gid=0)
return dst
@hooks.hook('nrpe-external-master-relation-joined',
'nrpe-external-master-relation-changed')
def update_nrpe_config():
# python-dbus is used by check_upstart_job
apt_install('python-dbus')
hostname = nrpe.get_nagios_hostname()
current_unit = nrpe.get_nagios_unit_name()
nrpe_setup = nrpe.NRPE(hostname=hostname)
nrpe.copy_nrpe_checks()
nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
install_nrpe_cron()
cmd = install_nrpe_plugin()
nrpe_setup.add_check(
shortname="ovs_vsctl",
description="Check ovs-vsctl list-br for predictable operation.",
check_cmd=cmd
)
nrpe_setup.write()
@hooks.hook('update-status') @hooks.hook('update-status')
def dummy_update_status(): def dummy_update_status():
"""Dummy function to silence missing hook log entry""" """Dummy function to silence missing hook log entry"""

View File

@ -0,0 +1 @@
neutron_ovs_hooks.py

View File

@ -0,0 +1 @@
neutron_ovs_hooks.py

View File

@ -22,6 +22,9 @@ series:
extra-bindings: extra-bindings:
data: data:
provides: provides:
nrpe-external-master:
interface: nrpe-external-master
scope: container
neutron-plugin: neutron-plugin:
interface: neutron-plugin interface: neutron-plugin
scope: container scope: container

View File

@ -53,6 +53,7 @@ TO_PATCH = [
'determine_purge_packages', 'determine_purge_packages',
'is_container', 'is_container',
'is_hook_allowed', 'is_hook_allowed',
'update_nrpe_config',
] ]
NEUTRON_CONF_DIR = "/etc/neutron" NEUTRON_CONF_DIR = "/etc/neutron"