
Juju 2.0 provides support for display of the version of an application deployed by a charm in juju status. Insert the os_application_version_set function into the existing assess_status function - this gets called after all hook executions, and periodically after that, so any changes in package versions due to normal system updates will also be reflected in the status output. This review also includes a resync of charm-helpers to pickup hookenv and contrib.openstack support for this feature. Change-Id: I059d03fd0ae0c445b5822b3e48476e54b839689d
421 lines
14 KiB
Python
421 lines
14 KiB
Python
# Copyright 2014-2015 Canonical Limited.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Compatibility with the nrpe-external-master charm"""
|
|
# Copyright 2012 Canonical Ltd.
|
|
#
|
|
# Authors:
|
|
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
|
|
|
|
import subprocess
|
|
import pwd
|
|
import grp
|
|
import os
|
|
import glob
|
|
import shutil
|
|
import re
|
|
import shlex
|
|
import yaml
|
|
|
|
from charmhelpers.core.hookenv import (
|
|
config,
|
|
local_unit,
|
|
log,
|
|
relation_ids,
|
|
relation_set,
|
|
relations_of_type,
|
|
)
|
|
|
|
from charmhelpers.core.host import service
|
|
from charmhelpers.core import host
|
|
|
|
# This module adds compatibility with the nrpe-external-master and plain nrpe
|
|
# subordinate charms. To use it in your charm:
|
|
#
|
|
# 1. Update metadata.yaml
|
|
#
|
|
# provides:
|
|
# (...)
|
|
# nrpe-external-master:
|
|
# interface: nrpe-external-master
|
|
# scope: container
|
|
#
|
|
# and/or
|
|
#
|
|
# provides:
|
|
# (...)
|
|
# local-monitors:
|
|
# interface: local-monitors
|
|
# scope: container
|
|
|
|
#
|
|
# 2. Add the following to config.yaml
|
|
#
|
|
# nagios_context:
|
|
# default: "juju"
|
|
# type: string
|
|
# description: |
|
|
# Used by the nrpe subordinate charms.
|
|
# A string that will be prepended to instance name to set the host name
|
|
# in nagios. So for instance the hostname would be something like:
|
|
# juju-myservice-0
|
|
# If you're running multiple environments with the same services in them
|
|
# this allows you to differentiate between them.
|
|
# nagios_servicegroups:
|
|
# default: ""
|
|
# type: string
|
|
# description: |
|
|
# A comma-separated list of nagios servicegroups.
|
|
# If left empty, the nagios_context will be used as the servicegroup
|
|
#
|
|
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
|
|
#
|
|
# 4. Update your hooks.py with something like this:
|
|
#
|
|
# from charmsupport.nrpe import NRPE
|
|
# (...)
|
|
# def update_nrpe_config():
|
|
# nrpe_compat = NRPE()
|
|
# nrpe_compat.add_check(
|
|
# shortname = "myservice",
|
|
# description = "Check MyService",
|
|
# check_cmd = "check_http -w 2 -c 10 http://localhost"
|
|
# )
|
|
# nrpe_compat.add_check(
|
|
# "myservice_other",
|
|
# "Check for widget failures",
|
|
# check_cmd = "/srv/myapp/scripts/widget_check"
|
|
# )
|
|
# nrpe_compat.write()
|
|
#
|
|
# def config_changed():
|
|
# (...)
|
|
# update_nrpe_config()
|
|
#
|
|
# def nrpe_external_master_relation_changed():
|
|
# update_nrpe_config()
|
|
#
|
|
# def local_monitors_relation_changed():
|
|
# update_nrpe_config()
|
|
#
|
|
# 4.a If your charm is a subordinate charm set primary=False
|
|
#
|
|
# from charmsupport.nrpe import NRPE
|
|
# (...)
|
|
# def update_nrpe_config():
|
|
# nrpe_compat = NRPE(primary=False)
|
|
#
|
|
# 5. ln -s hooks.py nrpe-external-master-relation-changed
|
|
# ln -s hooks.py local-monitors-relation-changed
|
|
|
|
|
|
class CheckException(Exception):
|
|
pass
|
|
|
|
|
|
class Check(object):
|
|
shortname_re = '[A-Za-z0-9-_]+$'
|
|
service_template = ("""
|
|
#---------------------------------------------------
|
|
# This file is Juju managed
|
|
#---------------------------------------------------
|
|
define service {{
|
|
use active-service
|
|
host_name {nagios_hostname}
|
|
service_description {nagios_hostname}[{shortname}] """
|
|
"""{description}
|
|
check_command check_nrpe!{command}
|
|
servicegroups {nagios_servicegroup}
|
|
}}
|
|
""")
|
|
|
|
def __init__(self, shortname, description, check_cmd):
|
|
super(Check, self).__init__()
|
|
# XXX: could be better to calculate this from the service name
|
|
if not re.match(self.shortname_re, shortname):
|
|
raise CheckException("shortname must match {}".format(
|
|
Check.shortname_re))
|
|
self.shortname = shortname
|
|
self.command = "check_{}".format(shortname)
|
|
# Note: a set of invalid characters is defined by the
|
|
# Nagios server config
|
|
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
|
|
self.description = description
|
|
self.check_cmd = self._locate_cmd(check_cmd)
|
|
|
|
def _get_check_filename(self):
|
|
return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command))
|
|
|
|
def _get_service_filename(self, hostname):
|
|
return os.path.join(NRPE.nagios_exportdir,
|
|
'service__{}_{}.cfg'.format(hostname, self.command))
|
|
|
|
def _locate_cmd(self, check_cmd):
|
|
search_path = (
|
|
'/usr/lib/nagios/plugins',
|
|
'/usr/local/lib/nagios/plugins',
|
|
)
|
|
parts = shlex.split(check_cmd)
|
|
for path in search_path:
|
|
if os.path.exists(os.path.join(path, parts[0])):
|
|
command = os.path.join(path, parts[0])
|
|
if len(parts) > 1:
|
|
command += " " + " ".join(parts[1:])
|
|
return command
|
|
log('Check command not found: {}'.format(parts[0]))
|
|
return ''
|
|
|
|
def _remove_service_files(self):
|
|
if not os.path.exists(NRPE.nagios_exportdir):
|
|
return
|
|
for f in os.listdir(NRPE.nagios_exportdir):
|
|
if f.endswith('_{}.cfg'.format(self.command)):
|
|
os.remove(os.path.join(NRPE.nagios_exportdir, f))
|
|
|
|
def remove(self, hostname):
|
|
nrpe_check_file = self._get_check_filename()
|
|
if os.path.exists(nrpe_check_file):
|
|
os.remove(nrpe_check_file)
|
|
self._remove_service_files()
|
|
|
|
def write(self, nagios_context, hostname, nagios_servicegroups):
|
|
nrpe_check_file = self._get_check_filename()
|
|
with open(nrpe_check_file, 'w') as nrpe_check_config:
|
|
nrpe_check_config.write("# check {}\n".format(self.shortname))
|
|
nrpe_check_config.write("command[{}]={}\n".format(
|
|
self.command, self.check_cmd))
|
|
|
|
if not os.path.exists(NRPE.nagios_exportdir):
|
|
log('Not writing service config as {} is not accessible'.format(
|
|
NRPE.nagios_exportdir))
|
|
else:
|
|
self.write_service_config(nagios_context, hostname,
|
|
nagios_servicegroups)
|
|
|
|
def write_service_config(self, nagios_context, hostname,
|
|
nagios_servicegroups):
|
|
self._remove_service_files()
|
|
|
|
templ_vars = {
|
|
'nagios_hostname': hostname,
|
|
'nagios_servicegroup': nagios_servicegroups,
|
|
'description': self.description,
|
|
'shortname': self.shortname,
|
|
'command': self.command,
|
|
}
|
|
nrpe_service_text = Check.service_template.format(**templ_vars)
|
|
nrpe_service_file = self._get_service_filename(hostname)
|
|
with open(nrpe_service_file, 'w') as nrpe_service_config:
|
|
nrpe_service_config.write(str(nrpe_service_text))
|
|
|
|
def run(self):
|
|
subprocess.call(self.check_cmd)
|
|
|
|
|
|
class NRPE(object):
|
|
nagios_logdir = '/var/log/nagios'
|
|
nagios_exportdir = '/var/lib/nagios/export'
|
|
nrpe_confdir = '/etc/nagios/nrpe.d'
|
|
|
|
def __init__(self, hostname=None, primary=True):
|
|
super(NRPE, self).__init__()
|
|
self.config = config()
|
|
self.primary = primary
|
|
self.nagios_context = self.config['nagios_context']
|
|
if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']:
|
|
self.nagios_servicegroups = self.config['nagios_servicegroups']
|
|
else:
|
|
self.nagios_servicegroups = self.nagios_context
|
|
self.unit_name = local_unit().replace('/', '-')
|
|
if hostname:
|
|
self.hostname = hostname
|
|
else:
|
|
nagios_hostname = get_nagios_hostname()
|
|
if nagios_hostname:
|
|
self.hostname = nagios_hostname
|
|
else:
|
|
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
|
|
self.checks = []
|
|
# Iff in an nrpe-external-master relation hook, set primary status
|
|
relation = relation_ids('nrpe-external-master')
|
|
if relation:
|
|
log("Setting charm primary status {}".format(primary))
|
|
for rid in relation_ids('nrpe-external-master'):
|
|
relation_set(relation_id=rid, relation_settings={'primary': self.primary})
|
|
|
|
def add_check(self, *args, **kwargs):
|
|
self.checks.append(Check(*args, **kwargs))
|
|
|
|
def remove_check(self, *args, **kwargs):
|
|
if kwargs.get('shortname') is None:
|
|
raise ValueError('shortname of check must be specified')
|
|
|
|
# Use sensible defaults if they're not specified - these are not
|
|
# actually used during removal, but they're required for constructing
|
|
# the Check object; check_disk is chosen because it's part of the
|
|
# nagios-plugins-basic package.
|
|
if kwargs.get('check_cmd') is None:
|
|
kwargs['check_cmd'] = 'check_disk'
|
|
if kwargs.get('description') is None:
|
|
kwargs['description'] = ''
|
|
|
|
check = Check(*args, **kwargs)
|
|
check.remove(self.hostname)
|
|
|
|
def write(self):
|
|
try:
|
|
nagios_uid = pwd.getpwnam('nagios').pw_uid
|
|
nagios_gid = grp.getgrnam('nagios').gr_gid
|
|
except:
|
|
log("Nagios user not set up, nrpe checks not updated")
|
|
return
|
|
|
|
if not os.path.exists(NRPE.nagios_logdir):
|
|
os.mkdir(NRPE.nagios_logdir)
|
|
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
|
|
|
|
nrpe_monitors = {}
|
|
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
|
|
for nrpecheck in self.checks:
|
|
nrpecheck.write(self.nagios_context, self.hostname,
|
|
self.nagios_servicegroups)
|
|
nrpe_monitors[nrpecheck.shortname] = {
|
|
"command": nrpecheck.command,
|
|
}
|
|
|
|
service('restart', 'nagios-nrpe-server')
|
|
|
|
monitor_ids = relation_ids("local-monitors") + \
|
|
relation_ids("nrpe-external-master")
|
|
for rid in monitor_ids:
|
|
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
|
|
|
|
|
|
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
|
|
"""
|
|
Query relation with nrpe subordinate, return the nagios_host_context
|
|
|
|
:param str relation_name: Name of relation nrpe sub joined to
|
|
"""
|
|
for rel in relations_of_type(relation_name):
|
|
if 'nagios_host_context' in rel:
|
|
return rel['nagios_host_context']
|
|
|
|
|
|
def get_nagios_hostname(relation_name='nrpe-external-master'):
|
|
"""
|
|
Query relation with nrpe subordinate, return the nagios_hostname
|
|
|
|
:param str relation_name: Name of relation nrpe sub joined to
|
|
"""
|
|
for rel in relations_of_type(relation_name):
|
|
if 'nagios_hostname' in rel:
|
|
return rel['nagios_hostname']
|
|
|
|
|
|
def get_nagios_unit_name(relation_name='nrpe-external-master'):
|
|
"""
|
|
Return the nagios unit name prepended with host_context if needed
|
|
|
|
:param str relation_name: Name of relation nrpe sub joined to
|
|
"""
|
|
host_context = get_nagios_hostcontext(relation_name)
|
|
if host_context:
|
|
unit = "%s:%s" % (host_context, local_unit())
|
|
else:
|
|
unit = local_unit()
|
|
return unit
|
|
|
|
|
|
def add_init_service_checks(nrpe, services, unit_name):
|
|
"""
|
|
Add checks for each service in list
|
|
|
|
:param NRPE nrpe: NRPE object to add check to
|
|
:param list services: List of services to check
|
|
:param str unit_name: Unit name to use in check description
|
|
"""
|
|
for svc in services:
|
|
# Don't add a check for these services from neutron-gateway
|
|
if svc in ['ext-port', 'os-charm-phy-nic-mtu']:
|
|
next
|
|
|
|
upstart_init = '/etc/init/%s.conf' % svc
|
|
sysv_init = '/etc/init.d/%s' % svc
|
|
|
|
if host.init_is_systemd():
|
|
nrpe.add_check(
|
|
shortname=svc,
|
|
description='process check {%s}' % unit_name,
|
|
check_cmd='check_systemd.py %s' % svc
|
|
)
|
|
elif os.path.exists(upstart_init):
|
|
nrpe.add_check(
|
|
shortname=svc,
|
|
description='process check {%s}' % unit_name,
|
|
check_cmd='check_upstart_job %s' % svc
|
|
)
|
|
elif os.path.exists(sysv_init):
|
|
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
|
|
cron_file = ('*/5 * * * * root '
|
|
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
|
|
'-s /etc/init.d/%s status > '
|
|
'/var/lib/nagios/service-check-%s.txt\n' % (svc,
|
|
svc)
|
|
)
|
|
f = open(cronpath, 'w')
|
|
f.write(cron_file)
|
|
f.close()
|
|
nrpe.add_check(
|
|
shortname=svc,
|
|
description='process check {%s}' % unit_name,
|
|
check_cmd='check_status_file.py -f '
|
|
'/var/lib/nagios/service-check-%s.txt' % svc,
|
|
)
|
|
|
|
|
|
def copy_nrpe_checks():
|
|
"""
|
|
Copy the nrpe checks into place
|
|
|
|
"""
|
|
NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
|
|
nrpe_files_dir = os.path.join(os.getenv('CHARM_DIR'), 'hooks',
|
|
'charmhelpers', 'contrib', 'openstack',
|
|
'files')
|
|
|
|
if not os.path.exists(NAGIOS_PLUGINS):
|
|
os.makedirs(NAGIOS_PLUGINS)
|
|
for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")):
|
|
if os.path.isfile(fname):
|
|
shutil.copy2(fname,
|
|
os.path.join(NAGIOS_PLUGINS, os.path.basename(fname)))
|
|
|
|
|
|
def add_haproxy_checks(nrpe, unit_name):
|
|
"""
|
|
Add checks for each service in list
|
|
|
|
:param NRPE nrpe: NRPE object to add check to
|
|
:param str unit_name: Unit name to use in check description
|
|
"""
|
|
nrpe.add_check(
|
|
shortname='haproxy_servers',
|
|
description='Check HAProxy {%s}' % unit_name,
|
|
check_cmd='check_haproxy.sh')
|
|
nrpe.add_check(
|
|
shortname='haproxy_queue',
|
|
description='Check HAProxy queue depth {%s}' % unit_name,
|
|
check_cmd='check_haproxy_queue_depth.sh')
|