diff --git a/charm-helpers-hooks.yaml b/charm-helpers-hooks.yaml index 781ff589..0e1627b0 100644 --- a/charm-helpers-hooks.yaml +++ b/charm-helpers-hooks.yaml @@ -14,3 +14,4 @@ include: - contrib.network.ip - contrib.python - contrib.openstack.policyd + - contrib.charmsupport diff --git a/config.yaml b/config.yaml index 764ddeed..4f0f3870 100644 --- a/config.yaml +++ b/config.yaml @@ -457,3 +457,19 @@ options: description: | Allow the charm and packages to restart services automatically when required. + nagios_context: + default: "juju" + type: string + description: | + Used by the nrpe subordinate charms. + A string that will be prepended to instance name to set the host name + in nagios. So for instance the hostname would be something like: + juju-myservice-0 + If you're running multiple environments with the same services in them + this allows you to differentiate between them. + nagios_servicegroups: + default: "" + type: string + description: | + A comma-separated list of nagios servicegroups. + If left empty, the nagios_context will be used as the servicegroup diff --git a/files/ovs_vsctl/check_ovs_vsctl.py b/files/ovs_vsctl/check_ovs_vsctl.py new file mode 100644 index 00000000..c3af1fa1 --- /dev/null +++ b/files/ovs_vsctl/check_ovs_vsctl.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# -*- coding: us-ascii -*- + +import os + +from nagios_plugin3 import ( + CriticalError, + UnknownError, + try_check, + check_file_freshness, +) + +INPUT_FILE = "/var/lib/nagios/ovs_vsctl.out" +INPUT_RC = "/var/lib/nagios/ovs_vsctl.rc" + + +def parse_output(): + """Parse the ovs-vsctl list-br output and raise alertable states.""" + + if not os.path.exists(INPUT_FILE): + raise UnknownError( + "UNKNOWN: {} does not exist (yet?)".format(INPUT_FILE)) + + if not os.path.exists(INPUT_RC): + raise UnknownError( + "UNKNOWN: {} does not exist (yet?)".format(INPUT_RC)) + + try_check(check_file_freshness, INPUT_FILE) + + with open(INPUT_RC) as rc_raw: + code = rc_raw.readline().strip() + if code != "0": + raise CriticalError("CRITICAL: ovs-vsctl list-br returns error") + + with open(INPUT_FILE) as brs_raw: + brs = brs_raw.readlines() + if len(brs) == 0: + raise CriticalError( + "CRITICAL: ovs-vsctl list-br returned no bridges") + + msg = ", ".join(br.strip() for br in brs) + print("OK: {}".format(msg)) + + +def main(): + """Define main subroutine.""" + try_check(parse_output) + + +if __name__ == "__main__": + main() diff --git a/files/ovs_vsctl/cron_ovs_vsctl.sh b/files/ovs_vsctl/cron_ovs_vsctl.sh new file mode 100644 index 00000000..71700f68 --- /dev/null +++ b/files/ovs_vsctl/cron_ovs_vsctl.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -eu + +PATH="/snap/bin:/usr/local/bin:$PATH" +FILE=/var/lib/nagios/ovs_vsctl.out +TMP_FILE="$(tempfile)" +RC_FILE=/var/lib/nagios/ovs_vsctl.rc +LOCK_FILE=/var/lib/nagios/ovs_vsctl.lock +CMD="ovs-vsctl list-br" +GROUP="nagios" + +if [ $# -gt 0 ]; then + echo "This program will cache the output of '${CMD}' as follows" + echo " stdout + stderr -> ${FILE}" + echo " return code -> ${RC_FILE}" + echo + echo "It does not accept any option or argument" + exit 0 +fi + +sleep $[$RANDOM % 60 + 10]s # sleep 10-70s +if [ -f "${LOCK_FILE}" ]; then + echo "Lock file (${LOCK_FILE}) in use, abandoning" >&2 + exit 1 +fi + +touch "${LOCK_FILE}" +$CMD 2>&1 > $TMP_FILE +RC=$? +echo $RC > $RC_FILE +mv $TMP_FILE $FILE +chown :$GROUP $FILE +chmod 644 $FILE +rm "${LOCK_FILE}" +exit 0 diff --git a/hooks/charmhelpers/contrib/charmsupport/__init__.py b/hooks/charmhelpers/contrib/charmsupport/__init__.py new file mode 100644 index 00000000..d7567b86 --- /dev/null +++ b/hooks/charmhelpers/contrib/charmsupport/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2014-2015 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/hooks/charmhelpers/contrib/charmsupport/nrpe.py b/hooks/charmhelpers/contrib/charmsupport/nrpe.py new file mode 100644 index 00000000..8d1753c3 --- /dev/null +++ b/hooks/charmhelpers/contrib/charmsupport/nrpe.py @@ -0,0 +1,522 @@ +# Copyright 2012-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Compatibility with the nrpe-external-master charm""" +# +# Authors: +# Matthew Wedgwood + +import glob +import grp +import os +import pwd +import re +import shlex +import shutil +import subprocess +import yaml + +from charmhelpers.core.hookenv import ( + config, + hook_name, + local_unit, + log, + relation_get, + relation_ids, + relation_set, + relations_of_type, +) + +from charmhelpers.core.host import service +from charmhelpers.core import host + +# This module adds compatibility with the nrpe-external-master and plain nrpe +# subordinate charms. To use it in your charm: +# +# 1. Update metadata.yaml +# +# provides: +# (...) +# nrpe-external-master: +# interface: nrpe-external-master +# scope: container +# +# and/or +# +# provides: +# (...) +# local-monitors: +# interface: local-monitors +# scope: container + +# +# 2. Add the following to config.yaml +# +# nagios_context: +# default: "juju" +# type: string +# description: | +# Used by the nrpe subordinate charms. +# A string that will be prepended to instance name to set the host name +# in nagios. So for instance the hostname would be something like: +# juju-myservice-0 +# If you're running multiple environments with the same services in them +# this allows you to differentiate between them. +# nagios_servicegroups: +# default: "" +# type: string +# description: | +# A comma-separated list of nagios servicegroups. +# If left empty, the nagios_context will be used as the servicegroup +# +# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master +# +# 4. Update your hooks.py with something like this: +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE() +# nrpe_compat.add_check( +# shortname = "myservice", +# description = "Check MyService", +# check_cmd = "check_http -w 2 -c 10 http://localhost" +# ) +# nrpe_compat.add_check( +# "myservice_other", +# "Check for widget failures", +# check_cmd = "/srv/myapp/scripts/widget_check" +# ) +# nrpe_compat.write() +# +# def config_changed(): +# (...) +# update_nrpe_config() +# +# def nrpe_external_master_relation_changed(): +# update_nrpe_config() +# +# def local_monitors_relation_changed(): +# update_nrpe_config() +# +# 4.a If your charm is a subordinate charm set primary=False +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE(primary=False) +# +# 5. ln -s hooks.py nrpe-external-master-relation-changed +# ln -s hooks.py local-monitors-relation-changed + + +class CheckException(Exception): + pass + + +class Check(object): + shortname_re = '[A-Za-z0-9-_.@]+$' + service_template = (""" +#--------------------------------------------------- +# This file is Juju managed +#--------------------------------------------------- +define service {{ + use active-service + host_name {nagios_hostname} + service_description {nagios_hostname}[{shortname}] """ + """{description} + check_command check_nrpe!{command} + servicegroups {nagios_servicegroup} +{service_config_overrides} +}} +""") + + def __init__(self, shortname, description, check_cmd, max_check_attempts=None): + super(Check, self).__init__() + # XXX: could be better to calculate this from the service name + if not re.match(self.shortname_re, shortname): + raise CheckException("shortname must match {}".format( + Check.shortname_re)) + self.shortname = shortname + self.command = "check_{}".format(shortname) + # Note: a set of invalid characters is defined by the + # Nagios server config + # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()= + self.description = description + self.check_cmd = self._locate_cmd(check_cmd) + self.max_check_attempts = max_check_attempts + + def _get_check_filename(self): + return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command)) + + def _get_service_filename(self, hostname): + return os.path.join(NRPE.nagios_exportdir, + 'service__{}_{}.cfg'.format(hostname, self.command)) + + def _locate_cmd(self, check_cmd): + search_path = ( + '/usr/lib/nagios/plugins', + '/usr/local/lib/nagios/plugins', + ) + parts = shlex.split(check_cmd) + for path in search_path: + if os.path.exists(os.path.join(path, parts[0])): + command = os.path.join(path, parts[0]) + if len(parts) > 1: + command += " " + " ".join(parts[1:]) + return command + log('Check command not found: {}'.format(parts[0])) + return '' + + def _remove_service_files(self): + if not os.path.exists(NRPE.nagios_exportdir): + return + for f in os.listdir(NRPE.nagios_exportdir): + if f.endswith('_{}.cfg'.format(self.command)): + os.remove(os.path.join(NRPE.nagios_exportdir, f)) + + def remove(self, hostname): + nrpe_check_file = self._get_check_filename() + if os.path.exists(nrpe_check_file): + os.remove(nrpe_check_file) + self._remove_service_files() + + def write(self, nagios_context, hostname, nagios_servicegroups): + nrpe_check_file = self._get_check_filename() + with open(nrpe_check_file, 'w') as nrpe_check_config: + nrpe_check_config.write("# check {}\n".format(self.shortname)) + if nagios_servicegroups: + nrpe_check_config.write( + "# The following header was added automatically by juju\n") + nrpe_check_config.write( + "# Modifying it will affect nagios monitoring and alerting\n") + nrpe_check_config.write( + "# servicegroups: {}\n".format(nagios_servicegroups)) + nrpe_check_config.write("command[{}]={}\n".format( + self.command, self.check_cmd)) + + if not os.path.exists(NRPE.nagios_exportdir): + log('Not writing service config as {} is not accessible'.format( + NRPE.nagios_exportdir)) + else: + self.write_service_config(nagios_context, hostname, + nagios_servicegroups) + + def write_service_config(self, nagios_context, hostname, + nagios_servicegroups): + self._remove_service_files() + + if self.max_check_attempts: + service_config_overrides = ' max_check_attempts {}'.format( + self.max_check_attempts + ) # Note indentation is here rather than in the template to avoid trailing spaces + else: + service_config_overrides = '' # empty string to avoid printing 'None' + templ_vars = { + 'nagios_hostname': hostname, + 'nagios_servicegroup': nagios_servicegroups, + 'description': self.description, + 'shortname': self.shortname, + 'command': self.command, + 'service_config_overrides': service_config_overrides, + } + nrpe_service_text = Check.service_template.format(**templ_vars) + nrpe_service_file = self._get_service_filename(hostname) + with open(nrpe_service_file, 'w') as nrpe_service_config: + nrpe_service_config.write(str(nrpe_service_text)) + + def run(self): + subprocess.call(self.check_cmd) + + +class NRPE(object): + nagios_logdir = '/var/log/nagios' + nagios_exportdir = '/var/lib/nagios/export' + nrpe_confdir = '/etc/nagios/nrpe.d' + homedir = '/var/lib/nagios' # home dir provided by nagios-nrpe-server + + def __init__(self, hostname=None, primary=True): + super(NRPE, self).__init__() + self.config = config() + self.primary = primary + self.nagios_context = self.config['nagios_context'] + if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']: + self.nagios_servicegroups = self.config['nagios_servicegroups'] + else: + self.nagios_servicegroups = self.nagios_context + self.unit_name = local_unit().replace('/', '-') + if hostname: + self.hostname = hostname + else: + nagios_hostname = get_nagios_hostname() + if nagios_hostname: + self.hostname = nagios_hostname + else: + self.hostname = "{}-{}".format(self.nagios_context, self.unit_name) + self.checks = [] + # Iff in an nrpe-external-master relation hook, set primary status + relation = relation_ids('nrpe-external-master') + if relation: + log("Setting charm primary status {}".format(primary)) + for rid in relation: + relation_set(relation_id=rid, relation_settings={'primary': self.primary}) + self.remove_check_queue = set() + + @classmethod + def does_nrpe_conf_dir_exist(cls): + """Return True if th nrpe_confdif directory exists.""" + return os.path.isdir(cls.nrpe_confdir) + + def add_check(self, *args, **kwargs): + shortname = None + if kwargs.get('shortname') is None: + if len(args) > 0: + shortname = args[0] + else: + shortname = kwargs['shortname'] + + self.checks.append(Check(*args, **kwargs)) + try: + self.remove_check_queue.remove(shortname) + except KeyError: + pass + + def remove_check(self, *args, **kwargs): + if kwargs.get('shortname') is None: + raise ValueError('shortname of check must be specified') + + # Use sensible defaults if they're not specified - these are not + # actually used during removal, but they're required for constructing + # the Check object; check_disk is chosen because it's part of the + # nagios-plugins-basic package. + if kwargs.get('check_cmd') is None: + kwargs['check_cmd'] = 'check_disk' + if kwargs.get('description') is None: + kwargs['description'] = '' + + check = Check(*args, **kwargs) + check.remove(self.hostname) + self.remove_check_queue.add(kwargs['shortname']) + + def write(self): + try: + nagios_uid = pwd.getpwnam('nagios').pw_uid + nagios_gid = grp.getgrnam('nagios').gr_gid + except Exception: + log("Nagios user not set up, nrpe checks not updated") + return + + if not os.path.exists(NRPE.nagios_logdir): + os.mkdir(NRPE.nagios_logdir) + os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid) + + nrpe_monitors = {} + monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}} + + # check that the charm can write to the conf dir. If not, then nagios + # probably isn't installed, and we can defer. + if not self.does_nrpe_conf_dir_exist(): + return + + for nrpecheck in self.checks: + nrpecheck.write(self.nagios_context, self.hostname, + self.nagios_servicegroups) + nrpe_monitors[nrpecheck.shortname] = { + "command": nrpecheck.command, + } + # If we were passed max_check_attempts, add that to the relation data + if nrpecheck.max_check_attempts is not None: + nrpe_monitors[nrpecheck.shortname]['max_check_attempts'] = nrpecheck.max_check_attempts + + # update-status hooks are configured to firing every 5 minutes by + # default. When nagios-nrpe-server is restarted, the nagios server + # reports checks failing causing unnecessary alerts. Let's not restart + # on update-status hooks. + if not hook_name() == 'update-status': + service('restart', 'nagios-nrpe-server') + + monitor_ids = relation_ids("local-monitors") + \ + relation_ids("nrpe-external-master") + for rid in monitor_ids: + reldata = relation_get(unit=local_unit(), rid=rid) + if 'monitors' in reldata: + # update the existing set of monitors with the new data + old_monitors = yaml.safe_load(reldata['monitors']) + old_nrpe_monitors = old_monitors['monitors']['remote']['nrpe'] + # remove keys that are in the remove_check_queue + old_nrpe_monitors = {k: v for k, v in old_nrpe_monitors.items() + if k not in self.remove_check_queue} + # update/add nrpe_monitors + old_nrpe_monitors.update(nrpe_monitors) + old_monitors['monitors']['remote']['nrpe'] = old_nrpe_monitors + # write back to the relation + relation_set(relation_id=rid, monitors=yaml.dump(old_monitors)) + else: + # write a brand new set of monitors, as no existing ones. + relation_set(relation_id=rid, monitors=yaml.dump(monitors)) + + self.remove_check_queue.clear() + + +def get_nagios_hostcontext(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_host_context + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_host_context' in rel: + return rel['nagios_host_context'] + + +def get_nagios_hostname(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_hostname + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_hostname' in rel: + return rel['nagios_hostname'] + + +def get_nagios_unit_name(relation_name='nrpe-external-master'): + """ + Return the nagios unit name prepended with host_context if needed + + :param str relation_name: Name of relation nrpe sub joined to + """ + host_context = get_nagios_hostcontext(relation_name) + if host_context: + unit = "%s:%s" % (host_context, local_unit()) + else: + unit = local_unit() + return unit + + +def add_init_service_checks(nrpe, services, unit_name, immediate_check=True): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param list services: List of services to check + :param str unit_name: Unit name to use in check description + :param bool immediate_check: For sysv init, run the service check immediately + """ + for svc in services: + # Don't add a check for these services from neutron-gateway + if svc in ['ext-port', 'os-charm-phy-nic-mtu']: + next + + upstart_init = '/etc/init/%s.conf' % svc + sysv_init = '/etc/init.d/%s' % svc + + if host.init_is_systemd(service_name=svc): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_systemd.py %s' % svc + ) + elif os.path.exists(upstart_init): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_upstart_job %s' % svc + ) + elif os.path.exists(sysv_init): + cronpath = '/etc/cron.d/nagios-service-check-%s' % svc + checkpath = '%s/service-check-%s.txt' % (nrpe.homedir, svc) + croncmd = ( + '/usr/local/lib/nagios/plugins/check_exit_status.pl ' + '-e -s /etc/init.d/%s status' % svc + ) + cron_file = '*/5 * * * * root %s > %s\n' % (croncmd, checkpath) + f = open(cronpath, 'w') + f.write(cron_file) + f.close() + nrpe.add_check( + shortname=svc, + description='service check {%s}' % unit_name, + check_cmd='check_status_file.py -f %s' % checkpath, + ) + # if /var/lib/nagios doesn't exist open(checkpath, 'w') will fail + # (LP: #1670223). + if immediate_check and os.path.isdir(nrpe.homedir): + f = open(checkpath, 'w') + subprocess.call( + croncmd.split(), + stdout=f, + stderr=subprocess.STDOUT + ) + f.close() + os.chmod(checkpath, 0o644) + + +def copy_nrpe_checks(nrpe_files_dir=None): + """ + Copy the nrpe checks into place + + """ + NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' + if nrpe_files_dir is None: + # determine if "charmhelpers" is in CHARMDIR or CHARMDIR/hooks + for segment in ['.', 'hooks']: + nrpe_files_dir = os.path.abspath(os.path.join( + os.getenv('CHARM_DIR'), + segment, + 'charmhelpers', + 'contrib', + 'openstack', + 'files')) + if os.path.isdir(nrpe_files_dir): + break + else: + raise RuntimeError("Couldn't find charmhelpers directory") + if not os.path.exists(NAGIOS_PLUGINS): + os.makedirs(NAGIOS_PLUGINS) + for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")): + if os.path.isfile(fname): + shutil.copy2(fname, + os.path.join(NAGIOS_PLUGINS, os.path.basename(fname))) + + +def add_haproxy_checks(nrpe, unit_name): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param str unit_name: Unit name to use in check description + """ + nrpe.add_check( + shortname='haproxy_servers', + description='Check HAProxy {%s}' % unit_name, + check_cmd='check_haproxy.sh') + nrpe.add_check( + shortname='haproxy_queue', + description='Check HAProxy queue depth {%s}' % unit_name, + check_cmd='check_haproxy_queue_depth.sh') + + +def remove_deprecated_check(nrpe, deprecated_services): + """ + Remove checks for deprecated services in list + + :param nrpe: NRPE object to remove check from + :type nrpe: NRPE + :param deprecated_services: List of deprecated services that are removed + :type deprecated_services: list + """ + for dep_svc in deprecated_services: + log('Deprecated service: {}'.format(dep_svc)) + nrpe.remove_check(shortname=dep_svc) diff --git a/hooks/charmhelpers/contrib/charmsupport/volumes.py b/hooks/charmhelpers/contrib/charmsupport/volumes.py new file mode 100644 index 00000000..f7c6fbdc --- /dev/null +++ b/hooks/charmhelpers/contrib/charmsupport/volumes.py @@ -0,0 +1,173 @@ +# Copyright 2014-2021 Canonical Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +Functions for managing volumes in juju units. One volume is supported per unit. +Subordinates may have their own storage, provided it is on its own partition. + +Configuration stanzas:: + + volume-ephemeral: + type: boolean + default: true + description: > + If false, a volume is mounted as specified in "volume-map" + If true, ephemeral storage will be used, meaning that log data + will only exist as long as the machine. YOU HAVE BEEN WARNED. + volume-map: + type: string + default: {} + description: > + YAML map of units to device names, e.g: + "{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }" + Service units will raise a configure-error if volume-ephemeral + is 'true' and no volume-map value is set. Use 'juju set' to set a + value and 'juju resolved' to complete configuration. + +Usage:: + + from charmsupport.volumes import configure_volume, VolumeConfigurationError + from charmsupport.hookenv import log, ERROR + def post_mount_hook(): + stop_service('myservice') + def post_mount_hook(): + start_service('myservice') + + if __name__ == '__main__': + try: + configure_volume(before_change=pre_mount_hook, + after_change=post_mount_hook) + except VolumeConfigurationError: + log('Storage could not be configured', ERROR) + +''' + +# XXX: Known limitations +# - fstab is neither consulted nor updated + +import os +from charmhelpers.core import hookenv +from charmhelpers.core import host +import yaml + + +MOUNT_BASE = '/srv/juju/volumes' + + +class VolumeConfigurationError(Exception): + '''Volume configuration data is missing or invalid''' + pass + + +def get_config(): + '''Gather and sanity-check volume configuration data''' + volume_config = {} + config = hookenv.config() + + errors = False + + if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'): + volume_config['ephemeral'] = True + else: + volume_config['ephemeral'] = False + + try: + volume_map = yaml.safe_load(config.get('volume-map', '{}')) + except yaml.YAMLError as e: + hookenv.log("Error parsing YAML volume-map: {}".format(e), + hookenv.ERROR) + errors = True + if volume_map is None: + # probably an empty string + volume_map = {} + elif not isinstance(volume_map, dict): + hookenv.log("Volume-map should be a dictionary, not {}".format( + type(volume_map))) + errors = True + + volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME']) + if volume_config['device'] and volume_config['ephemeral']: + # asked for ephemeral storage but also defined a volume ID + hookenv.log('A volume is defined for this unit, but ephemeral ' + 'storage was requested', hookenv.ERROR) + errors = True + elif not volume_config['device'] and not volume_config['ephemeral']: + # asked for permanent storage but did not define volume ID + hookenv.log('Ephemeral storage was requested, but there is no volume ' + 'defined for this unit.', hookenv.ERROR) + errors = True + + unit_mount_name = hookenv.local_unit().replace('/', '-') + volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name) + + if errors: + return None + return volume_config + + +def mount_volume(config): + if os.path.exists(config['mountpoint']): + if not os.path.isdir(config['mountpoint']): + hookenv.log('Not a directory: {}'.format(config['mountpoint'])) + raise VolumeConfigurationError() + else: + host.mkdir(config['mountpoint']) + if os.path.ismount(config['mountpoint']): + unmount_volume(config) + if not host.mount(config['device'], config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def unmount_volume(config): + if os.path.ismount(config['mountpoint']): + if not host.umount(config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def managed_mounts(): + '''List of all mounted managed volumes''' + return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts()) + + +def configure_volume(before_change=lambda: None, after_change=lambda: None): + '''Set up storage (or don't) according to the charm's volume configuration. + Returns the mount point or "ephemeral". before_change and after_change + are optional functions to be called if the volume configuration changes. + ''' + + config = get_config() + if not config: + hookenv.log('Failed to read volume configuration', hookenv.CRITICAL) + raise VolumeConfigurationError() + + if config['ephemeral']: + if os.path.ismount(config['mountpoint']): + before_change() + unmount_volume(config) + after_change() + return 'ephemeral' + else: + # persistent storage + if os.path.ismount(config['mountpoint']): + mounts = dict(managed_mounts()) + if mounts.get(config['mountpoint']) != config['device']: + before_change() + unmount_volume(config) + mount_volume(config) + after_change() + else: + before_change() + mount_volume(config) + after_change() + return config['mountpoint'] diff --git a/hooks/neutron_ovs_hooks.py b/hooks/neutron_ovs_hooks.py index 544e2603..14e43586 100755 --- a/hooks/neutron_ovs_hooks.py +++ b/hooks/neutron_ovs_hooks.py @@ -14,8 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import sys import uuid +import shutil from copy import deepcopy @@ -41,6 +43,7 @@ from charmhelpers.core.hookenv import ( log, relation_set, relation_ids, + charm_dir, ) from charmhelpers.core.sysctl import create as create_sysctl @@ -50,6 +53,8 @@ from charmhelpers.core.host import ( ) from charmhelpers.core.unitdata import kv +from charmhelpers.fetch import apt_install +from charmhelpers.contrib.charmsupport import nrpe from neutron_ovs_utils import ( DHCP_PACKAGES, @@ -62,6 +67,7 @@ from neutron_ovs_utils import ( get_shared_secret, register_configs, restart_map, + services, use_dvr, use_l3ha, enable_nova_metadata, @@ -178,6 +184,8 @@ def config_changed(check_deferred_restarts=True): relation_id=rid, request_restart=request_nova_compute_restart) + update_nrpe_config() + @hooks.hook('neutron-plugin-api-relation-changed') # NOTE(fnordahl): we need to act immediately to changes to OVS_DEFAULT in-line @@ -287,6 +295,47 @@ def post_series_upgrade(): resume_unit_helper, CONFIGS) +def install_nrpe_cron(): + src = os.path.join(charm_dir(), "files", "ovs_vsctl", "cron_ovs_vsctl.sh") + dst = shutil.copy(src, "/usr/local/lib/nagios/plugins/") + os.chmod(dst, 0o100755) + os.chown(dst, uid=0, gid=0) + + cronjob_line = "3 * * * * root {cmd}\n".format(cmd=dst) + crond_file = "/etc/cron.d/neutron_openvswitch_ovs_vsctl" + with open(crond_file, "w") as crond_fd: + crond_fd.write(cronjob_line) + return dst + + +def install_nrpe_plugin(): + src = os.path.join(charm_dir(), "files", "ovs_vsctl", "check_ovs_vsctl.py") + dst = shutil.copy(src, "/usr/local/lib/nagios/plugins") + os.chmod(dst, 0o100755) + os.chown(dst, uid=0, gid=0) + return dst + + +@hooks.hook('nrpe-external-master-relation-joined', + 'nrpe-external-master-relation-changed') +def update_nrpe_config(): + # python-dbus is used by check_upstart_job + apt_install('python-dbus') + hostname = nrpe.get_nagios_hostname() + current_unit = nrpe.get_nagios_unit_name() + nrpe_setup = nrpe.NRPE(hostname=hostname) + nrpe.copy_nrpe_checks() + nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) + install_nrpe_cron() + cmd = install_nrpe_plugin() + nrpe_setup.add_check( + shortname="ovs_vsctl", + description="Check ovs-vsctl list-br for predictable operation.", + check_cmd=cmd + ) + nrpe_setup.write() + + @hooks.hook('update-status') def dummy_update_status(): """Dummy function to silence missing hook log entry""" diff --git a/hooks/nrpe-external-master-relation-changed b/hooks/nrpe-external-master-relation-changed new file mode 120000 index 00000000..55aa8e52 --- /dev/null +++ b/hooks/nrpe-external-master-relation-changed @@ -0,0 +1 @@ +neutron_ovs_hooks.py \ No newline at end of file diff --git a/hooks/nrpe-external-master-relation-joined b/hooks/nrpe-external-master-relation-joined new file mode 120000 index 00000000..55aa8e52 --- /dev/null +++ b/hooks/nrpe-external-master-relation-joined @@ -0,0 +1 @@ +neutron_ovs_hooks.py \ No newline at end of file diff --git a/metadata.yaml b/metadata.yaml index 1780aa68..16714a58 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -22,6 +22,9 @@ series: extra-bindings: data: provides: + nrpe-external-master: + interface: nrpe-external-master + scope: container neutron-plugin: interface: neutron-plugin scope: container diff --git a/unit_tests/test_neutron_ovs_hooks.py b/unit_tests/test_neutron_ovs_hooks.py index 1c1f88ca..99110bbf 100644 --- a/unit_tests/test_neutron_ovs_hooks.py +++ b/unit_tests/test_neutron_ovs_hooks.py @@ -53,6 +53,7 @@ TO_PATCH = [ 'determine_purge_packages', 'is_container', 'is_hook_allowed', + 'update_nrpe_config', ] NEUTRON_CONF_DIR = "/etc/neutron"