From 9c3ca6e7431a7e6d2bc88853956b9d746a553781 Mon Sep 17 00:00:00 2001 From: Brad Marshall Date: Thu, 12 Feb 2015 09:49:44 +1000 Subject: [PATCH] [bradm] Sync charmhelpers nrpe support, and add nrpe checks --- charm-helpers.yaml | 1 + files/nrpe/check_corosync_rings | 99 ++++ files/nrpe/check_crm | 201 ++++++++ files/nrpe/check_haproxy.sh | 32 ++ files/nrpe/check_haproxy_queue_depth.sh | 30 ++ .../contrib/charmsupport/__init__.py | 15 + .../charmhelpers/contrib/charmsupport/nrpe.py | 324 ++++++++++++ .../contrib/charmsupport/volumes.py | 175 +++++++ hooks/charmhelpers/core/host.py | 10 +- hooks/charmhelpers/core/sysctl.py | 16 +- hooks/charmhelpers/core/templating.py | 6 +- hooks/charmhelpers/core/unitdata.py | 477 ++++++++++++++++++ hooks/hooks.py | 70 ++- hooks/nrpe-external-master-relation-changed | 1 + hooks/nrpe-external-master-relation-joined | 1 + metadata.yaml | 3 + 16 files changed, 1447 insertions(+), 14 deletions(-) create mode 100755 files/nrpe/check_corosync_rings create mode 100755 files/nrpe/check_crm create mode 100755 files/nrpe/check_haproxy.sh create mode 100755 files/nrpe/check_haproxy_queue_depth.sh create mode 100644 hooks/charmhelpers/contrib/charmsupport/__init__.py create mode 100644 hooks/charmhelpers/contrib/charmsupport/nrpe.py create mode 100644 hooks/charmhelpers/contrib/charmsupport/volumes.py create mode 100644 hooks/charmhelpers/core/unitdata.py create mode 120000 hooks/nrpe-external-master-relation-changed create mode 120000 hooks/nrpe-external-master-relation-joined diff --git a/charm-helpers.yaml b/charm-helpers.yaml index 3f3e737..5913e9b 100644 --- a/charm-helpers.yaml +++ b/charm-helpers.yaml @@ -9,3 +9,4 @@ include: - contrib.network.ip - contrib.openstack.utils - contrib.python.packages + - contrib.charmsupport diff --git a/files/nrpe/check_corosync_rings b/files/nrpe/check_corosync_rings new file mode 100755 index 0000000..860153a --- /dev/null +++ b/files/nrpe/check_corosync_rings @@ -0,0 +1,99 @@ +#!/usr/bin/perl +# +# check_corosync_rings +# +# Copyright © 2011 Phil Garner, Sysnix Consultants Limited +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Authors: Phil Garner - phil@sysnix.com & Peter Mottram peter@sysnix.com +# +# v0.1 05/01/2011 +# v0.2 31/10/2011 - additional crit when closing the file handle and additional +# comments added +# +# NOTE:- Requires Perl 5.8 or higher & the Perl Module Nagios::Plugin +# Nagios user will need sudo acces - suggest adding line below to +# sudoers. +# nagios ALL=(ALL) NOPASSWD: /usr/sbin/corosync-cfgtool -s +# +# In sudoers if requiretty is on (off state is default) +# you will also need to add the line below +# Defaults:nagios !requiretty +# + +use warnings; +use strict; +use Nagios::Plugin; + +# Lines below may need changing if corosync-cfgtool or sudo installed in a +# diffrent location. + +my $sudo = '/usr/bin/sudo'; +my $cfgtool = '/usr/sbin/corosync-cfgtool -s'; + +# Now set up the plugin +my $np = Nagios::Plugin->new( + shortname => 'check_cororings', + version => '0.2', + usage => "Usage: %s \n\t\t--help for help\n", + license => "License - GPL v3 see code for more details", + url => "http://www.sysnix.com", + blurb => +"\tNagios plugin that checks the status of corosync rings, requires Perl \t5.8+ and CPAN modules Nagios::Plugin.", +); + +#Args +$np->add_arg( + spec => 'rings|r=s', + help => +'How many rings should be running (optinal) sends Crit if incorrect number of rings found.', + required => 0, +); + +$np->getopts; + +my $found = 0; +my $fh; +my $rings = $np->opts->rings; + +# Run cfgtools spin through output and get info needed + +open( $fh, "$sudo $cfgtool |" ) + or $np->nagios_exit( CRITICAL, "Running corosync-cfgtool failed" ); + +foreach my $line (<$fh>) { + if ( $line =~ m/status\s*=\s*(\S.+)/ ) { + my $status = $1; + if ( $status =~ m/^ring (\d+) active with no faults/ ) { + $np->add_message( OK, "ring $1 OK" ); + } + else { + $np->add_message( CRITICAL, $status ); + } + $found++; + } +} + +close($fh) or $np->nagios_exit( CRITICAL, "Running corosync-cfgtool failed" ); + +# Check we found some rings and apply -r arg if needed +if ( $found == 0 ) { + $np->nagios_exit( CRITICAL, "No Rings Found" ); +} +elsif ( defined $rings && $rings != $found ) { + $np->nagios_exit( CRITICAL, "Expected $rings rings but found $found" ); +} + +$np->nagios_exit( $np->check_messages() ); diff --git a/files/nrpe/check_crm b/files/nrpe/check_crm new file mode 100755 index 0000000..4f93462 --- /dev/null +++ b/files/nrpe/check_crm @@ -0,0 +1,201 @@ +#!/usr/bin/perl +# +# check_crm_v0_7 +# +# Copyright © 2013 Philip Garner, Sysnix Consultants Limited +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Authors: Phil Garner - phil@sysnix.com & Peter Mottram - peter@sysnix.com +# +# v0.1 09/01/2011 +# v0.2 11/01/2011 +# v0.3 22/08/2011 - bug fix and changes suggested by Vadym Chepkov +# v0.4 23/08/2011 - update for spelling and anchor regex capture (Vadym Chepkov) +# v0.5 29/09/2011 - Add standby warn/crit suggested by Sönke Martens & removal +# of 'our' to 'my' to completely avoid problems with ePN +# v0.6 14/03/2013 - Change from \w+ to \S+ in stopped check to cope with +# Servers that have non word charachters in. Suggested by +# Igal Baevsky. +# v0.7 01/09/2013 - In testing as still not fully tested. Adds optional +# constraints check (Boris Wesslowski). Adds fail count +# threshold ( Zoran Bosnjak & Marko Hrastovec ) +# +# NOTES: Requires Perl 5.8 or higher & the Perl Module Nagios::Plugin +# Nagios user will need sudo acces - suggest adding line below to +# sudoers +# nagios ALL=(ALL) NOPASSWD: /usr/sbin/crm_mon -1 -r -f +# +# if you want to check for location constraints (-c) also add +# nagios ALL=(ALL) NOPASSWD: /usr/sbin/crm configure show +# +# In sudoers if requiretty is on (off state is default) +# you will also need to add the line below +# Defaults:nagios !requiretty +# + +use warnings; +use strict; +use Nagios::Plugin; + +# Lines below may need changing if crm_mon or sudo installed in a +# different location. + +my $sudo = '/usr/bin/sudo'; +my $crm_mon = '/usr/sbin/crm_mon -1 -r -f'; +my $crm_configure_show = '/usr/sbin/crm configure show'; + +my $np = Nagios::Plugin->new( + shortname => 'check_crm', + version => '0.7', + usage => "Usage: %s \n\t\t--help for help\n", +); + +$np->add_arg( + spec => 'warning|w', + help => +'If failed Nodes, stopped Resources detected or Standby Nodes sends Warning instead of Critical (default) as long as there are no other errors and there is Quorum', + required => 0, +); + +$np->add_arg( + spec => 'standbyignore|s', + help => 'Ignore any node(s) in standby, by default sends Critical', + required => 0, +); + +$np->add_arg( + spec => 'constraint|constraints|c', + help => 'Also check configuration for location constraints (caused by migrations) and warn if there are any. Requires additional privileges see notes', + required => 0, +); + +$np->add_arg( + spec => 'failcount|failcounts|f=i', + help => 'resource fail count to start warning on [default = 1].', + required => 0, + default => 1, +); + +$np->getopts; +my $ConstraintsFlag = $np->opts->constraint; + +my @standby; + +# Check for -w option set warn if this is case instead of crit +my $warn_or_crit = 'CRITICAL'; +$warn_or_crit = 'WARNING' if $np->opts->warning; + +my $fh; + +open( $fh, "$sudo $crm_mon |" ) + or $np->nagios_exit( CRITICAL, "Running $sudo $crm_mon has failed" ); + +foreach my $line (<$fh>) { + + if ( $line =~ m/Connection to cluster failed\:(.*)/i ) { + + # Check Cluster connected + $np->nagios_exit( CRITICAL, "Connection to cluster FAILED: $1" ); + } + elsif ( $line =~ m/Current DC:/ ) { + + # Check for Quorum + if ( $line =~ m/partition with quorum$/ ) { + + # Assume cluster is OK - we only add warn/crit after here + + $np->add_message( OK, "Cluster OK" ); + } + else { + $np->add_message( CRITICAL, "No Quorum" ); + } + } + elsif ( $line =~ m/^offline:\s*\[\s*(\S.*?)\s*\]/i ) { + + # Count offline nodes + my @offline = split( /\s+/, $1 ); + my $numoffline = scalar @offline; + $np->add_message( $warn_or_crit, ": $numoffline Nodes Offline" ); + } + elsif ( $line =~ m/^node\s+(\S.*):\s*standby/i ) { + + # Check for standby nodes (suggested by Sönke Martens) + # See later in code for message created from this + push @standby, $1; + } + + elsif ( $line =~ m/\s*(\S+)\s+\(\S+\)\:\s+Stopped/ ) { + + # Check Resources Stopped + $np->add_message( $warn_or_crit, ": $1 Stopped" ); + } + elsif ( $line =~ m/\s*stopped\:\s*\[(.*)\]/i ) { + + # Check Master/Slave stopped + $np->add_message( $warn_or_crit, ": $1 Stopped" ); + } + elsif ( $line =~ m/^Failed actions\:/ ) { + + # Check Failed Actions + $np->add_message( CRITICAL, + ": FAILED actions detected or not cleaned up" ); + } + elsif ( $line =~ m/\s*(\S+?)\s+ \(.*\)\:\s+\w+\s+\w+\s+\(unmanaged\)\s+/i ) + { + + # Check Unmanaged + $np->add_message( CRITICAL, ": $1 unmanaged FAILED" ); + } + elsif ( $line =~ m/\s*(\S+?)\s+ \(.*\)\:\s+not installed/i ) { + + # Check for errors + $np->add_message( CRITICAL, ": $1 not installed" ); + } + elsif ( $line =~ m/\s*(\S+?):.*fail-count=(\d+)/i ) { + if ( $2 >= $np->opts->failcount ) { + + # Check for resource Fail count (suggested by Vadym Chepkov) + $np->add_message( WARNING, ": $1 failure detected, fail-count=$2" ); + } + } +} + +# If found any Nodes in standby & no -s option used send warn/crit +if ( scalar @standby > 0 && !$np->opts->standbyignore ) { + $np->add_message( $warn_or_crit, + ": " . join( ', ', @standby ) . " in Standby" ); +} + +close($fh) or $np->nagios_exit( CRITICAL, "Running $crm_mon FAILED" ); + +# if -c flag set check configuration for constraints +if ($ConstraintsFlag) { + + open( $fh, "$sudo $crm_configure_show|" ) + or $np->nagios_exit( CRITICAL, + "Running $sudo $crm_configure_show has failed" ); + + foreach my $line (<$fh>) { + if ( $line =~ m/location cli-(prefer|standby)-\S+\s+(\S+)/ ) { + $np->add_message( WARNING, + ": $2 blocking location constraint detected" ); + } + } + close($fh) + or $np->nagios_exit( CRITICAL, "Running $crm_configure_show FAILED" ); +} + +$np->nagios_exit( $np->check_messages() ); + diff --git a/files/nrpe/check_haproxy.sh b/files/nrpe/check_haproxy.sh new file mode 100755 index 0000000..eb8527f --- /dev/null +++ b/files/nrpe/check_haproxy.sh @@ -0,0 +1,32 @@ +#!/bin/bash +#-------------------------------------------- +# This file is managed by Juju +#-------------------------------------------- +# +# Copyright 2009,2012 Canonical Ltd. +# Author: Tom Haddon + +CRITICAL=0 +NOTACTIVE='' +LOGFILE=/var/log/nagios/check_haproxy.log +AUTH=$(grep -r "stats auth" /etc/haproxy | head -1 | awk '{print $4}') + +for appserver in $(grep ' server' /etc/haproxy/haproxy.cfg | awk '{print $2'}); +do + output=$(/usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 --regex="class=\"(active|backup)(2|3).*${appserver}" -e ' 200 OK') + if [ $? != 0 ]; then + date >> $LOGFILE + echo $output >> $LOGFILE + /usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 -v | grep $appserver >> $LOGFILE 2>&1 + CRITICAL=1 + NOTACTIVE="${NOTACTIVE} $appserver" + fi +done + +if [ $CRITICAL = 1 ]; then + echo "CRITICAL:${NOTACTIVE}" + exit 2 +fi + +echo "OK: All haproxy instances looking good" +exit 0 diff --git a/files/nrpe/check_haproxy_queue_depth.sh b/files/nrpe/check_haproxy_queue_depth.sh new file mode 100755 index 0000000..3ebb532 --- /dev/null +++ b/files/nrpe/check_haproxy_queue_depth.sh @@ -0,0 +1,30 @@ +#!/bin/bash +#-------------------------------------------- +# This file is managed by Juju +#-------------------------------------------- +# +# Copyright 2009,2012 Canonical Ltd. +# Author: Tom Haddon + +# These should be config options at some stage +CURRQthrsh=0 +MAXQthrsh=100 + +AUTH=$(grep -r "stats auth" /etc/haproxy | head -1 | awk '{print $4}') + +HAPROXYSTATS=$(/usr/lib/nagios/plugins/check_http -a ${AUTH} -I 127.0.0.1 -p 8888 -u '/;csv' -v) + +for BACKEND in $(echo $HAPROXYSTATS| xargs -n1 | grep BACKEND | awk -F , '{print $1}') +do + CURRQ=$(echo "$HAPROXYSTATS" | grep $BACKEND | grep BACKEND | cut -d , -f 3) + MAXQ=$(echo "$HAPROXYSTATS" | grep $BACKEND | grep BACKEND | cut -d , -f 4) + + if [[ $CURRQ -gt $CURRQthrsh || $MAXQ -gt $MAXQthrsh ]] ; then + echo "CRITICAL: queue depth for $BACKEND - CURRENT:$CURRQ MAX:$MAXQ" + exit 2 + fi +done + +echo "OK: All haproxy queue depths looking good" +exit 0 + diff --git a/hooks/charmhelpers/contrib/charmsupport/__init__.py b/hooks/charmhelpers/contrib/charmsupport/__init__.py new file mode 100644 index 0000000..d1400a0 --- /dev/null +++ b/hooks/charmhelpers/contrib/charmsupport/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2014-2015 Canonical Limited. +# +# This file is part of charm-helpers. +# +# charm-helpers is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 as +# published by the Free Software Foundation. +# +# charm-helpers is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with charm-helpers. If not, see . diff --git a/hooks/charmhelpers/contrib/charmsupport/nrpe.py b/hooks/charmhelpers/contrib/charmsupport/nrpe.py new file mode 100644 index 0000000..0fd0a9d --- /dev/null +++ b/hooks/charmhelpers/contrib/charmsupport/nrpe.py @@ -0,0 +1,324 @@ +# Copyright 2014-2015 Canonical Limited. +# +# This file is part of charm-helpers. +# +# charm-helpers is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 as +# published by the Free Software Foundation. +# +# charm-helpers is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with charm-helpers. If not, see . + +"""Compatibility with the nrpe-external-master charm""" +# Copyright 2012 Canonical Ltd. +# +# Authors: +# Matthew Wedgwood + +import subprocess +import pwd +import grp +import os +import re +import shlex +import yaml + +from charmhelpers.core.hookenv import ( + config, + local_unit, + log, + relation_ids, + relation_set, + relations_of_type, +) + +from charmhelpers.core.host import service + +# This module adds compatibility with the nrpe-external-master and plain nrpe +# subordinate charms. To use it in your charm: +# +# 1. Update metadata.yaml +# +# provides: +# (...) +# nrpe-external-master: +# interface: nrpe-external-master +# scope: container +# +# and/or +# +# provides: +# (...) +# local-monitors: +# interface: local-monitors +# scope: container + +# +# 2. Add the following to config.yaml +# +# nagios_context: +# default: "juju" +# type: string +# description: | +# Used by the nrpe subordinate charms. +# A string that will be prepended to instance name to set the host name +# in nagios. So for instance the hostname would be something like: +# juju-myservice-0 +# If you're running multiple environments with the same services in them +# this allows you to differentiate between them. +# nagios_servicegroups: +# default: "" +# type: string +# description: | +# A comma-separated list of nagios servicegroups. +# If left empty, the nagios_context will be used as the servicegroup +# +# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master +# +# 4. Update your hooks.py with something like this: +# +# from charmsupport.nrpe import NRPE +# (...) +# def update_nrpe_config(): +# nrpe_compat = NRPE() +# nrpe_compat.add_check( +# shortname = "myservice", +# description = "Check MyService", +# check_cmd = "check_http -w 2 -c 10 http://localhost" +# ) +# nrpe_compat.add_check( +# "myservice_other", +# "Check for widget failures", +# check_cmd = "/srv/myapp/scripts/widget_check" +# ) +# nrpe_compat.write() +# +# def config_changed(): +# (...) +# update_nrpe_config() +# +# def nrpe_external_master_relation_changed(): +# update_nrpe_config() +# +# def local_monitors_relation_changed(): +# update_nrpe_config() +# +# 5. ln -s hooks.py nrpe-external-master-relation-changed +# ln -s hooks.py local-monitors-relation-changed + + +class CheckException(Exception): + pass + + +class Check(object): + shortname_re = '[A-Za-z0-9-_]+$' + service_template = (""" +#--------------------------------------------------- +# This file is Juju managed +#--------------------------------------------------- +define service {{ + use active-service + host_name {nagios_hostname} + service_description {nagios_hostname}[{shortname}] """ + """{description} + check_command check_nrpe!{command} + servicegroups {nagios_servicegroup} +}} +""") + + def __init__(self, shortname, description, check_cmd): + super(Check, self).__init__() + # XXX: could be better to calculate this from the service name + if not re.match(self.shortname_re, shortname): + raise CheckException("shortname must match {}".format( + Check.shortname_re)) + self.shortname = shortname + self.command = "check_{}".format(shortname) + # Note: a set of invalid characters is defined by the + # Nagios server config + # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()= + self.description = description + self.check_cmd = self._locate_cmd(check_cmd) + + def _locate_cmd(self, check_cmd): + search_path = ( + '/usr/lib/nagios/plugins', + '/usr/local/lib/nagios/plugins', + ) + parts = shlex.split(check_cmd) + for path in search_path: + if os.path.exists(os.path.join(path, parts[0])): + command = os.path.join(path, parts[0]) + if len(parts) > 1: + command += " " + " ".join(parts[1:]) + return command + log('Check command not found: {}'.format(parts[0])) + return '' + + def write(self, nagios_context, hostname, nagios_servicegroups=None): + nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format( + self.command) + with open(nrpe_check_file, 'w') as nrpe_check_config: + nrpe_check_config.write("# check {}\n".format(self.shortname)) + nrpe_check_config.write("command[{}]={}\n".format( + self.command, self.check_cmd)) + + if not os.path.exists(NRPE.nagios_exportdir): + log('Not writing service config as {} is not accessible'.format( + NRPE.nagios_exportdir)) + else: + self.write_service_config(nagios_context, hostname, + nagios_servicegroups) + + def write_service_config(self, nagios_context, hostname, + nagios_servicegroups=None): + for f in os.listdir(NRPE.nagios_exportdir): + if re.search('.*{}.cfg'.format(self.command), f): + os.remove(os.path.join(NRPE.nagios_exportdir, f)) + + if not nagios_servicegroups: + nagios_servicegroups = nagios_context + + templ_vars = { + 'nagios_hostname': hostname, + 'nagios_servicegroup': nagios_servicegroups, + 'description': self.description, + 'shortname': self.shortname, + 'command': self.command, + } + nrpe_service_text = Check.service_template.format(**templ_vars) + nrpe_service_file = '{}/service__{}_{}.cfg'.format( + NRPE.nagios_exportdir, hostname, self.command) + with open(nrpe_service_file, 'w') as nrpe_service_config: + nrpe_service_config.write(str(nrpe_service_text)) + + def run(self): + subprocess.call(self.check_cmd) + + +class NRPE(object): + nagios_logdir = '/var/log/nagios' + nagios_exportdir = '/var/lib/nagios/export' + nrpe_confdir = '/etc/nagios/nrpe.d' + + def __init__(self, hostname=None): + super(NRPE, self).__init__() + self.config = config() + self.nagios_context = self.config['nagios_context'] + if 'nagios_servicegroups' in self.config: + self.nagios_servicegroups = self.config['nagios_servicegroups'] + else: + self.nagios_servicegroups = 'juju' + self.unit_name = local_unit().replace('/', '-') + if hostname: + self.hostname = hostname + else: + self.hostname = "{}-{}".format(self.nagios_context, self.unit_name) + self.checks = [] + + def add_check(self, *args, **kwargs): + self.checks.append(Check(*args, **kwargs)) + + def write(self): + try: + nagios_uid = pwd.getpwnam('nagios').pw_uid + nagios_gid = grp.getgrnam('nagios').gr_gid + except: + log("Nagios user not set up, nrpe checks not updated") + return + + if not os.path.exists(NRPE.nagios_logdir): + os.mkdir(NRPE.nagios_logdir) + os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid) + + nrpe_monitors = {} + monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}} + for nrpecheck in self.checks: + nrpecheck.write(self.nagios_context, self.hostname, + self.nagios_servicegroups) + nrpe_monitors[nrpecheck.shortname] = { + "command": nrpecheck.command, + } + + service('restart', 'nagios-nrpe-server') + + for rid in relation_ids("local-monitors"): + relation_set(relation_id=rid, monitors=yaml.dump(monitors)) + + +def get_nagios_hostcontext(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_host_context + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_hostname' in rel: + return rel['nagios_host_context'] + + +def get_nagios_hostname(relation_name='nrpe-external-master'): + """ + Query relation with nrpe subordinate, return the nagios_hostname + + :param str relation_name: Name of relation nrpe sub joined to + """ + for rel in relations_of_type(relation_name): + if 'nagios_hostname' in rel: + return rel['nagios_hostname'] + + +def get_nagios_unit_name(relation_name='nrpe-external-master'): + """ + Return the nagios unit name prepended with host_context if needed + + :param str relation_name: Name of relation nrpe sub joined to + """ + host_context = get_nagios_hostcontext(relation_name) + if host_context: + unit = "%s:%s" % (host_context, local_unit()) + else: + unit = local_unit() + return unit + + +def add_init_service_checks(nrpe, services, unit_name): + """ + Add checks for each service in list + + :param NRPE nrpe: NRPE object to add check to + :param list services: List of services to check + :param str unit_name: Unit name to use in check description + """ + for svc in services: + upstart_init = '/etc/init/%s.conf' % svc + sysv_init = '/etc/init.d/%s' % svc + if os.path.exists(upstart_init): + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_upstart_job %s' % svc + ) + elif os.path.exists(sysv_init): + cronpath = '/etc/cron.d/nagios-service-check-%s' % svc + cron_file = ('*/5 * * * * root ' + '/usr/local/lib/nagios/plugins/check_exit_status.pl ' + '-s /etc/init.d/%s status > ' + '/var/lib/nagios/service-check-%s.txt\n' % (svc, + svc) + ) + f = open(cronpath, 'w') + f.write(cron_file) + f.close() + nrpe.add_check( + shortname=svc, + description='process check {%s}' % unit_name, + check_cmd='check_status_file.py -f ' + '/var/lib/nagios/service-check-%s.txt' % svc, + ) diff --git a/hooks/charmhelpers/contrib/charmsupport/volumes.py b/hooks/charmhelpers/contrib/charmsupport/volumes.py new file mode 100644 index 0000000..320961b --- /dev/null +++ b/hooks/charmhelpers/contrib/charmsupport/volumes.py @@ -0,0 +1,175 @@ +# Copyright 2014-2015 Canonical Limited. +# +# This file is part of charm-helpers. +# +# charm-helpers is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 as +# published by the Free Software Foundation. +# +# charm-helpers is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with charm-helpers. If not, see . + +''' +Functions for managing volumes in juju units. One volume is supported per unit. +Subordinates may have their own storage, provided it is on its own partition. + +Configuration stanzas:: + + volume-ephemeral: + type: boolean + default: true + description: > + If false, a volume is mounted as sepecified in "volume-map" + If true, ephemeral storage will be used, meaning that log data + will only exist as long as the machine. YOU HAVE BEEN WARNED. + volume-map: + type: string + default: {} + description: > + YAML map of units to device names, e.g: + "{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }" + Service units will raise a configure-error if volume-ephemeral + is 'true' and no volume-map value is set. Use 'juju set' to set a + value and 'juju resolved' to complete configuration. + +Usage:: + + from charmsupport.volumes import configure_volume, VolumeConfigurationError + from charmsupport.hookenv import log, ERROR + def post_mount_hook(): + stop_service('myservice') + def post_mount_hook(): + start_service('myservice') + + if __name__ == '__main__': + try: + configure_volume(before_change=pre_mount_hook, + after_change=post_mount_hook) + except VolumeConfigurationError: + log('Storage could not be configured', ERROR) + +''' + +# XXX: Known limitations +# - fstab is neither consulted nor updated + +import os +from charmhelpers.core import hookenv +from charmhelpers.core import host +import yaml + + +MOUNT_BASE = '/srv/juju/volumes' + + +class VolumeConfigurationError(Exception): + '''Volume configuration data is missing or invalid''' + pass + + +def get_config(): + '''Gather and sanity-check volume configuration data''' + volume_config = {} + config = hookenv.config() + + errors = False + + if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'): + volume_config['ephemeral'] = True + else: + volume_config['ephemeral'] = False + + try: + volume_map = yaml.safe_load(config.get('volume-map', '{}')) + except yaml.YAMLError as e: + hookenv.log("Error parsing YAML volume-map: {}".format(e), + hookenv.ERROR) + errors = True + if volume_map is None: + # probably an empty string + volume_map = {} + elif not isinstance(volume_map, dict): + hookenv.log("Volume-map should be a dictionary, not {}".format( + type(volume_map))) + errors = True + + volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME']) + if volume_config['device'] and volume_config['ephemeral']: + # asked for ephemeral storage but also defined a volume ID + hookenv.log('A volume is defined for this unit, but ephemeral ' + 'storage was requested', hookenv.ERROR) + errors = True + elif not volume_config['device'] and not volume_config['ephemeral']: + # asked for permanent storage but did not define volume ID + hookenv.log('Ephemeral storage was requested, but there is no volume ' + 'defined for this unit.', hookenv.ERROR) + errors = True + + unit_mount_name = hookenv.local_unit().replace('/', '-') + volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name) + + if errors: + return None + return volume_config + + +def mount_volume(config): + if os.path.exists(config['mountpoint']): + if not os.path.isdir(config['mountpoint']): + hookenv.log('Not a directory: {}'.format(config['mountpoint'])) + raise VolumeConfigurationError() + else: + host.mkdir(config['mountpoint']) + if os.path.ismount(config['mountpoint']): + unmount_volume(config) + if not host.mount(config['device'], config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def unmount_volume(config): + if os.path.ismount(config['mountpoint']): + if not host.umount(config['mountpoint'], persist=True): + raise VolumeConfigurationError() + + +def managed_mounts(): + '''List of all mounted managed volumes''' + return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts()) + + +def configure_volume(before_change=lambda: None, after_change=lambda: None): + '''Set up storage (or don't) according to the charm's volume configuration. + Returns the mount point or "ephemeral". before_change and after_change + are optional functions to be called if the volume configuration changes. + ''' + + config = get_config() + if not config: + hookenv.log('Failed to read volume configuration', hookenv.CRITICAL) + raise VolumeConfigurationError() + + if config['ephemeral']: + if os.path.ismount(config['mountpoint']): + before_change() + unmount_volume(config) + after_change() + return 'ephemeral' + else: + # persistent storage + if os.path.ismount(config['mountpoint']): + mounts = dict(managed_mounts()) + if mounts.get(config['mountpoint']) != config['device']: + before_change() + unmount_volume(config) + mount_volume(config) + after_change() + else: + before_change() + mount_volume(config) + after_change() + return config['mountpoint'] diff --git a/hooks/charmhelpers/core/host.py b/hooks/charmhelpers/core/host.py index cf2cbe1..b771c61 100644 --- a/hooks/charmhelpers/core/host.py +++ b/hooks/charmhelpers/core/host.py @@ -191,11 +191,11 @@ def mkdir(path, owner='root', group='root', perms=0o555, force=False): def write_file(path, content, owner='root', group='root', perms=0o444): - """Create or overwrite a file with the contents of a string""" + """Create or overwrite a file with the contents of a byte string.""" log("Writing file {} {}:{} {:o}".format(path, owner, group, perms)) uid = pwd.getpwnam(owner).pw_uid gid = grp.getgrnam(group).gr_gid - with open(path, 'w') as target: + with open(path, 'wb') as target: os.fchown(target.fileno(), uid, gid) os.fchmod(target.fileno(), perms) target.write(content) @@ -305,11 +305,11 @@ def restart_on_change(restart_map, stopstart=False): ceph_client_changed function. """ def wrap(f): - def wrapped_f(*args): + def wrapped_f(*args, **kwargs): checksums = {} for path in restart_map: checksums[path] = file_hash(path) - f(*args) + f(*args, **kwargs) restarts = [] for path in restart_map: if checksums[path] != file_hash(path): @@ -361,7 +361,7 @@ def list_nics(nic_type): ip_output = (line for line in ip_output if line) for line in ip_output: if line.split()[1].startswith(int_type): - matched = re.search('.*: (bond[0-9]+\.[0-9]+)@.*', line) + matched = re.search('.*: (' + int_type + r'[0-9]+\.[0-9]+)@.*', line) if matched: interface = matched.groups()[0] else: diff --git a/hooks/charmhelpers/core/sysctl.py b/hooks/charmhelpers/core/sysctl.py index d642a37..8e1b9ee 100644 --- a/hooks/charmhelpers/core/sysctl.py +++ b/hooks/charmhelpers/core/sysctl.py @@ -26,25 +26,31 @@ from subprocess import check_call from charmhelpers.core.hookenv import ( log, DEBUG, + ERROR, ) def create(sysctl_dict, sysctl_file): """Creates a sysctl.conf file from a YAML associative array - :param sysctl_dict: a dict of sysctl options eg { 'kernel.max_pid': 1337 } - :type sysctl_dict: dict + :param sysctl_dict: a YAML-formatted string of sysctl options eg "{ 'kernel.max_pid': 1337 }" + :type sysctl_dict: str :param sysctl_file: path to the sysctl file to be saved :type sysctl_file: str or unicode :returns: None """ - sysctl_dict = yaml.load(sysctl_dict) + try: + sysctl_dict_parsed = yaml.safe_load(sysctl_dict) + except yaml.YAMLError: + log("Error parsing YAML sysctl_dict: {}".format(sysctl_dict), + level=ERROR) + return with open(sysctl_file, "w") as fd: - for key, value in sysctl_dict.items(): + for key, value in sysctl_dict_parsed.items(): fd.write("{}={}\n".format(key, value)) - log("Updating sysctl_file: %s values: %s" % (sysctl_file, sysctl_dict), + log("Updating sysctl_file: %s values: %s" % (sysctl_file, sysctl_dict_parsed), level=DEBUG) check_call(["sysctl", "-p", sysctl_file]) diff --git a/hooks/charmhelpers/core/templating.py b/hooks/charmhelpers/core/templating.py index 9766909..4531999 100644 --- a/hooks/charmhelpers/core/templating.py +++ b/hooks/charmhelpers/core/templating.py @@ -21,7 +21,7 @@ from charmhelpers.core import hookenv def render(source, target, context, owner='root', group='root', - perms=0o444, templates_dir=None): + perms=0o444, templates_dir=None, encoding='UTF-8'): """ Render a template. @@ -64,5 +64,5 @@ def render(source, target, context, owner='root', group='root', level=hookenv.ERROR) raise e content = template.render(context) - host.mkdir(os.path.dirname(target), owner, group) - host.write_file(target, content, owner, group, perms) + host.mkdir(os.path.dirname(target), owner, group, perms=0o755) + host.write_file(target, content.encode(encoding), owner, group, perms) diff --git a/hooks/charmhelpers/core/unitdata.py b/hooks/charmhelpers/core/unitdata.py new file mode 100644 index 0000000..01329ab --- /dev/null +++ b/hooks/charmhelpers/core/unitdata.py @@ -0,0 +1,477 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2014-2015 Canonical Limited. +# +# This file is part of charm-helpers. +# +# charm-helpers is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 as +# published by the Free Software Foundation. +# +# charm-helpers is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with charm-helpers. If not, see . +# +# +# Authors: +# Kapil Thangavelu +# +""" +Intro +----- + +A simple way to store state in units. This provides a key value +storage with support for versioned, transactional operation, +and can calculate deltas from previous values to simplify unit logic +when processing changes. + + +Hook Integration +---------------- + +There are several extant frameworks for hook execution, including + + - charmhelpers.core.hookenv.Hooks + - charmhelpers.core.services.ServiceManager + +The storage classes are framework agnostic, one simple integration is +via the HookData contextmanager. It will record the current hook +execution environment (including relation data, config data, etc.), +setup a transaction and allow easy access to the changes from +previously seen values. One consequence of the integration is the +reservation of particular keys ('rels', 'unit', 'env', 'config', +'charm_revisions') for their respective values. + +Here's a fully worked integration example using hookenv.Hooks:: + + from charmhelper.core import hookenv, unitdata + + hook_data = unitdata.HookData() + db = unitdata.kv() + hooks = hookenv.Hooks() + + @hooks.hook + def config_changed(): + # Print all changes to configuration from previously seen + # values. + for changed, (prev, cur) in hook_data.conf.items(): + print('config changed', changed, + 'previous value', prev, + 'current value', cur) + + # Get some unit specific bookeeping + if not db.get('pkg_key'): + key = urllib.urlopen('https://example.com/pkg_key').read() + db.set('pkg_key', key) + + # Directly access all charm config as a mapping. + conf = db.getrange('config', True) + + # Directly access all relation data as a mapping + rels = db.getrange('rels', True) + + if __name__ == '__main__': + with hook_data(): + hook.execute() + + +A more basic integration is via the hook_scope context manager which simply +manages transaction scope (and records hook name, and timestamp):: + + >>> from unitdata import kv + >>> db = kv() + >>> with db.hook_scope('install'): + ... # do work, in transactional scope. + ... db.set('x', 1) + >>> db.get('x') + 1 + + +Usage +----- + +Values are automatically json de/serialized to preserve basic typing +and complex data struct capabilities (dicts, lists, ints, booleans, etc). + +Individual values can be manipulated via get/set:: + + >>> kv.set('y', True) + >>> kv.get('y') + True + + # We can set complex values (dicts, lists) as a single key. + >>> kv.set('config', {'a': 1, 'b': True'}) + + # Also supports returning dictionaries as a record which + # provides attribute access. + >>> config = kv.get('config', record=True) + >>> config.b + True + + +Groups of keys can be manipulated with update/getrange:: + + >>> kv.update({'z': 1, 'y': 2}, prefix="gui.") + >>> kv.getrange('gui.', strip=True) + {'z': 1, 'y': 2} + +When updating values, its very helpful to understand which values +have actually changed and how have they changed. The storage +provides a delta method to provide for this:: + + >>> data = {'debug': True, 'option': 2} + >>> delta = kv.delta(data, 'config.') + >>> delta.debug.previous + None + >>> delta.debug.current + True + >>> delta + {'debug': (None, True), 'option': (None, 2)} + +Note the delta method does not persist the actual change, it needs to +be explicitly saved via 'update' method:: + + >>> kv.update(data, 'config.') + +Values modified in the context of a hook scope retain historical values +associated to the hookname. + + >>> with db.hook_scope('config-changed'): + ... db.set('x', 42) + >>> db.gethistory('x') + [(1, u'x', 1, u'install', u'2015-01-21T16:49:30.038372'), + (2, u'x', 42, u'config-changed', u'2015-01-21T16:49:30.038786')] + +""" + +import collections +import contextlib +import datetime +import json +import os +import pprint +import sqlite3 +import sys + +__author__ = 'Kapil Thangavelu ' + + +class Storage(object): + """Simple key value database for local unit state within charms. + + Modifications are automatically committed at hook exit. That's + currently regardless of exit code. + + To support dicts, lists, integer, floats, and booleans values + are automatically json encoded/decoded. + """ + def __init__(self, path=None): + self.db_path = path + if path is None: + self.db_path = os.path.join( + os.environ.get('CHARM_DIR', ''), '.unit-state.db') + self.conn = sqlite3.connect('%s' % self.db_path) + self.cursor = self.conn.cursor() + self.revision = None + self._closed = False + self._init() + + def close(self): + if self._closed: + return + self.flush(False) + self.cursor.close() + self.conn.close() + self._closed = True + + def _scoped_query(self, stmt, params=None): + if params is None: + params = [] + return stmt, params + + def get(self, key, default=None, record=False): + self.cursor.execute( + *self._scoped_query( + 'select data from kv where key=?', [key])) + result = self.cursor.fetchone() + if not result: + return default + if record: + return Record(json.loads(result[0])) + return json.loads(result[0]) + + def getrange(self, key_prefix, strip=False): + stmt = "select key, data from kv where key like '%s%%'" % key_prefix + self.cursor.execute(*self._scoped_query(stmt)) + result = self.cursor.fetchall() + + if not result: + return None + if not strip: + key_prefix = '' + return dict([ + (k[len(key_prefix):], json.loads(v)) for k, v in result]) + + def update(self, mapping, prefix=""): + for k, v in mapping.items(): + self.set("%s%s" % (prefix, k), v) + + def unset(self, key): + self.cursor.execute('delete from kv where key=?', [key]) + if self.revision and self.cursor.rowcount: + self.cursor.execute( + 'insert into kv_revisions values (?, ?, ?)', + [key, self.revision, json.dumps('DELETED')]) + + def set(self, key, value): + serialized = json.dumps(value) + + self.cursor.execute( + 'select data from kv where key=?', [key]) + exists = self.cursor.fetchone() + + # Skip mutations to the same value + if exists: + if exists[0] == serialized: + return value + + if not exists: + self.cursor.execute( + 'insert into kv (key, data) values (?, ?)', + (key, serialized)) + else: + self.cursor.execute(''' + update kv + set data = ? + where key = ?''', [serialized, key]) + + # Save + if not self.revision: + return value + + self.cursor.execute( + 'select 1 from kv_revisions where key=? and revision=?', + [key, self.revision]) + exists = self.cursor.fetchone() + + if not exists: + self.cursor.execute( + '''insert into kv_revisions ( + revision, key, data) values (?, ?, ?)''', + (self.revision, key, serialized)) + else: + self.cursor.execute( + ''' + update kv_revisions + set data = ? + where key = ? + and revision = ?''', + [serialized, key, self.revision]) + + return value + + def delta(self, mapping, prefix): + """ + return a delta containing values that have changed. + """ + previous = self.getrange(prefix, strip=True) + if not previous: + pk = set() + else: + pk = set(previous.keys()) + ck = set(mapping.keys()) + delta = DeltaSet() + + # added + for k in ck.difference(pk): + delta[k] = Delta(None, mapping[k]) + + # removed + for k in pk.difference(ck): + delta[k] = Delta(previous[k], None) + + # changed + for k in pk.intersection(ck): + c = mapping[k] + p = previous[k] + if c != p: + delta[k] = Delta(p, c) + + return delta + + @contextlib.contextmanager + def hook_scope(self, name=""): + """Scope all future interactions to the current hook execution + revision.""" + assert not self.revision + self.cursor.execute( + 'insert into hooks (hook, date) values (?, ?)', + (name or sys.argv[0], + datetime.datetime.utcnow().isoformat())) + self.revision = self.cursor.lastrowid + try: + yield self.revision + self.revision = None + except: + self.flush(False) + self.revision = None + raise + else: + self.flush() + + def flush(self, save=True): + if save: + self.conn.commit() + elif self._closed: + return + else: + self.conn.rollback() + + def _init(self): + self.cursor.execute(''' + create table if not exists kv ( + key text, + data text, + primary key (key) + )''') + self.cursor.execute(''' + create table if not exists kv_revisions ( + key text, + revision integer, + data text, + primary key (key, revision) + )''') + self.cursor.execute(''' + create table if not exists hooks ( + version integer primary key autoincrement, + hook text, + date text + )''') + self.conn.commit() + + def gethistory(self, key, deserialize=False): + self.cursor.execute( + ''' + select kv.revision, kv.key, kv.data, h.hook, h.date + from kv_revisions kv, + hooks h + where kv.key=? + and kv.revision = h.version + ''', [key]) + if deserialize is False: + return self.cursor.fetchall() + return map(_parse_history, self.cursor.fetchall()) + + def debug(self, fh=sys.stderr): + self.cursor.execute('select * from kv') + pprint.pprint(self.cursor.fetchall(), stream=fh) + self.cursor.execute('select * from kv_revisions') + pprint.pprint(self.cursor.fetchall(), stream=fh) + + +def _parse_history(d): + return (d[0], d[1], json.loads(d[2]), d[3], + datetime.datetime.strptime(d[-1], "%Y-%m-%dT%H:%M:%S.%f")) + + +class HookData(object): + """Simple integration for existing hook exec frameworks. + + Records all unit information, and stores deltas for processing + by the hook. + + Sample:: + + from charmhelper.core import hookenv, unitdata + + changes = unitdata.HookData() + db = unitdata.kv() + hooks = hookenv.Hooks() + + @hooks.hook + def config_changed(): + # View all changes to configuration + for changed, (prev, cur) in changes.conf.items(): + print('config changed', changed, + 'previous value', prev, + 'current value', cur) + + # Get some unit specific bookeeping + if not db.get('pkg_key'): + key = urllib.urlopen('https://example.com/pkg_key').read() + db.set('pkg_key', key) + + if __name__ == '__main__': + with changes(): + hook.execute() + + """ + def __init__(self): + self.kv = kv() + self.conf = None + self.rels = None + + @contextlib.contextmanager + def __call__(self): + from charmhelpers.core import hookenv + hook_name = hookenv.hook_name() + + with self.kv.hook_scope(hook_name): + self._record_charm_version(hookenv.charm_dir()) + delta_config, delta_relation = self._record_hook(hookenv) + yield self.kv, delta_config, delta_relation + + def _record_charm_version(self, charm_dir): + # Record revisions.. charm revisions are meaningless + # to charm authors as they don't control the revision. + # so logic dependnent on revision is not particularly + # useful, however it is useful for debugging analysis. + charm_rev = open( + os.path.join(charm_dir, 'revision')).read().strip() + charm_rev = charm_rev or '0' + revs = self.kv.get('charm_revisions', []) + if not charm_rev in revs: + revs.append(charm_rev.strip() or '0') + self.kv.set('charm_revisions', revs) + + def _record_hook(self, hookenv): + data = hookenv.execution_environment() + self.conf = conf_delta = self.kv.delta(data['conf'], 'config') + self.rels = rels_delta = self.kv.delta(data['rels'], 'rels') + self.kv.set('env', data['env']) + self.kv.set('unit', data['unit']) + self.kv.set('relid', data.get('relid')) + return conf_delta, rels_delta + + +class Record(dict): + + __slots__ = () + + def __getattr__(self, k): + if k in self: + return self[k] + raise AttributeError(k) + + +class DeltaSet(Record): + + __slots__ = () + + +Delta = collections.namedtuple('Delta', ['previous', 'current']) + + +_KV = None + + +def kv(): + global _KV + if _KV is None: + _KV = Storage() + return _KV diff --git a/hooks/hooks.py b/hooks/hooks.py index e812a49..d4b6ccc 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -11,6 +11,7 @@ import ast import shutil import sys import os +import glob from base64 import b64decode import maas as MAAS @@ -24,6 +25,7 @@ from charmhelpers.core.hookenv import ( related_units, relation_ids, relation_set, + relations_of_type, unit_get, config, Hooks, UnregisteredHookError, @@ -56,6 +58,8 @@ from charmhelpers.contrib.hahelpers.cluster import ( from charmhelpers.contrib.openstack.utils import get_host_ip +from charmhelpers.contrib.charmsupport.nrpe import NRPE + hooks = Hooks() COROSYNC_CONF = '/etc/corosync/corosync.conf' @@ -68,7 +72,8 @@ COROSYNC_CONF_FILES = [ COROSYNC_CONF ] -PACKAGES = ['corosync', 'pacemaker', 'python-netaddr', 'ipmitool'] +PACKAGES = ['corosync', 'pacemaker', 'python-netaddr', 'ipmitool', + 'libnagios-plugin-perl'] SUPPORTED_TRANSPORTS = ['udp', 'udpu', 'multicast', 'unicast'] @@ -207,10 +212,13 @@ def config_changed(): configure_monitor_host() configure_stonith() + update_nrpe_config() + @hooks.hook() def upgrade_charm(): install() + update_nrpe_config() def restart_corosync(): @@ -582,6 +590,66 @@ def assert_charm_supports_ipv6(): "versions less than Trusty 14.04") +@hooks.hook('nrpe-external-master-relation-joined', + 'nrpe-external-master-relation-changed') +def update_nrpe_config(): + scripts_src = os.path.join(os.environ["CHARM_DIR"], "files", + "nrpe") + scripts_dst = "/usr/local/lib/nagios/plugins" + if not os.path.exists(scripts_dst): + os.makedirs(scripts_dst) + for fname in glob.glob(os.path.join(scripts_src, "*")): + if os.path.isfile(fname): + shutil.copy2(fname, + os.path.join(scripts_dst, os.path.basename(fname))) + + sudoers_src = os.path.join(os.environ["CHARM_DIR"], "files", + "sudoers") + sudoers_dst = "/etc/sudoers.d" + for fname in glob.glob(os.path.join(sudoers_src, "*")): + if os.path.isfile(fname): + shutil.copy2(fname, + os.path.join(sudoers_dst, os.path.basename(fname))) + + # Find out if nrpe set nagios_hostname + hostname = None + host_context = None + for rel in relations_of_type('nrpe-external-master'): + if 'nagios_hostname' in rel: + hostname = rel['nagios_hostname'] + host_context = rel['nagios_host_context'] + break + nrpe = NRPE(hostname=hostname) + apt_install('python-dbus') + + if host_context: + current_unit = "%s:%s" % (host_context, local_unit()) + else: + current_unit = local_unit() + + # haproxy checks + nrpe.add_check( + shortname='haproxy_servers', + description='Check HAProxy {%s}' % current_unit, + check_cmd='check_haproxy.sh') + nrpe.add_check( + shortname='haproxy_queue', + description='Check HAProxy queue depth {%s}' % current_unit, + check_cmd='check_haproxy_queue_depth.sh') + + # corosync/crm checks + nrpe.add_check( + shortname='corosync_rings', + description='Check Corosync rings {%s}' % current_unit, + check_cmd='check_corosync_rings') + nrpe.add_check( + shortname='crm_status', + description='Check crm status {%s}' % current_unit, + check_cmd='check_crm') + + nrpe.write() + + if __name__ == '__main__': try: hooks.execute(sys.argv) diff --git a/hooks/nrpe-external-master-relation-changed b/hooks/nrpe-external-master-relation-changed new file mode 120000 index 0000000..9416ca6 --- /dev/null +++ b/hooks/nrpe-external-master-relation-changed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/hooks/nrpe-external-master-relation-joined b/hooks/nrpe-external-master-relation-joined new file mode 120000 index 0000000..9416ca6 --- /dev/null +++ b/hooks/nrpe-external-master-relation-joined @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/metadata.yaml b/metadata.yaml index 41d4bc9..8609915 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -14,6 +14,9 @@ provides: ha: interface: hacluster scope: container + nrpe-external-master: + interface: nrpe-external-master + scope: container peers: hanode: interface: hacluster