Added swift and ntp checks and more automatic detection.

Added a bunch of swift diagnostic commands.

Added postfix checking back in but warned when root access is not
granted.

Ported over ntp check from the datadog agent and added detection.

Moved detection plugin list out of the main of monasca-setup just so it
looks tidy.

Change-Id: I9a76d0cc009545d30df60c17f65a1db6e8329b63
This commit is contained in:
Tim Kuhlman
2015-02-09 14:19:48 -07:00
parent 3ba44c29c3
commit 2a4e1e0080
10 changed files with 177 additions and 42 deletions

12
conf.d/ntp.yaml.example Normal file
View File

@@ -0,0 +1,12 @@
init_config:
instances:
# All params are optional
- host: pool.ntp.org
# Optional params:
#
# port: ntp
# version: 3
# timeout: 5
# dimensions:
# dim1: value1

View File

@@ -1,8 +1,8 @@
# The user running dd-agent must have passwordless sudo access for the find
# The user running monasca-agent must have passwordless sudo access for the find
# command to run the postfix check. Here's an example:
#
# example /etc/sudoers entry:
# dd-agent ALL=(ALL) NOPASSWD:/usr/bin/find
# monasca-agent ALL=(ALL) NOPASSWD:/usr/bin/find
#
init_config:

View File

@@ -0,0 +1,35 @@
import ntplib
from monasca_agent.collector.checks import AgentCheck
DEFAULT_NTP_VERSION = 3
DEFAULT_TIMEOUT = 1 # in seconds
DEFAULT_HOST = "pool.ntp.org"
DEFAULT_PORT = "ntp"
class NtpCheck(AgentCheck):
""" Uses ntplib to grab a metric for the ntp offset
"""
def check(self, instance):
dimensions = instance.get('dimensions', {})
req_args = {
'host': instance.get('host', DEFAULT_HOST),
'port': instance.get('port', DEFAULT_PORT),
'version': int(instance.get('version', DEFAULT_NTP_VERSION)),
'timeout': float(instance.get('timeout', DEFAULT_TIMEOUT)),
}
try:
ntp_stats = ntplib.NTPClient().request(**req_args)
except ntplib.NTPException:
self.log.error("Could not connect to NTP Server")
raise
else:
ntp_offset = ntp_stats.offset
# Use the ntp server's timestamp for the time of the result in
# case the agent host's clock is messed up.
ntp_ts = ntp_stats.recv_time
self.gauge('ntp.offset', ntp_offset, timestamp=ntp_ts, dimensions=dimensions)

View File

@@ -7,11 +7,11 @@ class PostfixCheck(AgentCheck):
"""This check provides metrics on the number of messages in a given postfix queue
WARNING: the user that dd-agent runs as must have sudo access for the 'find' command
sudo access is not required when running dd-agent as root (not recommended)
WARNING: the user that monasca-agent runs as must have sudo access for the 'find' command
sudo access is not required when running monasca-agent as root (not recommended)
example /etc/sudoers entry:
dd-agent ALL=(ALL) NOPASSWD:/usr/bin/find
monasca-agent ALL=(ALL) NOPASSWD:/usr/bin/find
YAML config options:
"directory" - the value of 'postconf -h queue_directory'
@@ -51,21 +51,17 @@ class PostfixCheck(AgentCheck):
count = 0
if os.geteuid() == 0:
# dd-agent is running as root (not recommended)
# agent is running as root (not recommended)
count = sum(len(files) for root, dirs, files in os.walk(queue_path))
else:
# can dd-agent user run sudo?
test_sudo = os.system('setsid sudo -l < /dev/null')
# can agent user run sudo?
test_sudo = os.system('setsid sudo -l > /dev/null')
if test_sudo == 0:
count = os.popen('sudo find %s -type f | wc -l' % queue_path)
count = count.readlines()[0].strip()
else:
raise Exception('The dd-agent user does not have sudo access')
raise Exception('The monasca-agent user does not have sudo access')
# emit an individually tagged metric
dimensions.update({'queue': queue, 'instance': os.path.basename(directory)})
self.gauge('postfix.queue.size', count, dimensions=dimensions)
# these can be retrieved in a single graph statement
# for example:
# sum:postfix.queue.size{instance:postfix-2,queue:incoming,host:hostname.domain.tld}
self.gauge('postfix.queue_size', count, dimensions=dimensions)

View File

@@ -0,0 +1,39 @@
# Enabled plugins
from apache import Apache
from ceilometer import Ceilometer
from cinder import Cinder
from glance import Glance
from kafka_consumer import Kafka
from keystone import Keystone
from libvirt import Libvirt
from mon import MonAPI, MonPersister, MonThresh
from mysql import MySQL
from network import Network
from neutron import Neutron
from nova import Nova
from ntp import Ntp
from postfix import Postfix
from rabbitmq import RabbitMQ
from swift import Swift
from zookeeper import Zookeeper
DETECTION_PLUGINS = [Apache,
Ceilometer,
Cinder,
Glance,
Kafka,
Keystone,
Libvirt,
MonAPI,
MonPersister,
MonThresh,
MySQL,
Network,
Neutron,
Nova,
Ntp,
Postfix,
RabbitMQ,
Swift,
Zookeeper]

View File

@@ -0,0 +1,39 @@
import logging
import os
import yaml
import monasca_setup.agent_config
import monasca_setup.detection
log = logging.getLogger(__name__)
class Ntp(monasca_setup.detection.Plugin):
"""Detect NTP daemon and setup configuration to monitor them.
"""
def _detect(self):
"""Run detection, set self.available True if the service is detected.
"""
if monasca_setup.detection.find_process_cmdline('ntp') is not None:
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.
"""
config = monasca_setup.agent_config.Plugins()
log.info("\tEnabling the ntp plugin")
with open(os.path.join(self.template_dir, 'conf.d/ntp.yaml.example'), 'r') as ntp_template:
ntp_config = yaml.load(ntp_template.read())
config['ntp'] = ntp_config
return config
def dependencies_installed(self):
try:
import ntplib
except ImportError:
return False
else:
return True

View File

@@ -1,25 +1,29 @@
import logging
import os
import yaml
import monasca_setup.agent_config
import monasca_setup.detection
log = logging.getLogger(__name__)
class Postfix(monasca_setup.detection.Plugin):
"""If postfix is running install the default config.
"""
# todo this is is disabled as postfix requires passwordless sudo for the
# monasca-agent user, a bad practice
def _detect(self):
"""Run detection, set self.available True if the service is detected.
"""
if monasca_setup.detection.find_process_name('postfix') is not None:
self.available = True
if monasca_setup.detection.find_process_cmdline('postfix') is not None:
# Test for sudo access
test_sudo = os.system('sudo -l -U monasca-agent find /var/spool/postfix/incoming -type f > /dev/null')
if test_sudo != 0:
log.info("Postfix found but the required sudo access is not configured.\n\t" +
"Refer to plugin documentation for more detail")
return False
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.

View File

@@ -1,3 +1,5 @@
import os
import monasca_setup.detection
@@ -24,3 +26,29 @@ class Swift(monasca_setup.detection.ServicePlugin):
}
super(Swift, self).__init__(service_params)
def build_config(self):
config = super(Swift, self).build_config(self)
# This is a bit of an abuse of the nagios_wrapper but the commands will return failed error code properly
swift_health = "/bin/sh -c '" + \
"/usr/local/bin/diagnostics --check_mounts && " + \
"/usr/local/bin/diagnostics --disk_monitoring && " + \
"/usr/local/bin/diagnostics --file_ownership && " + \
"/usr/local/bin/diagnostics --network_interface && " + \
"/usr/local/bin/diagnostics --ping_hosts && " + \
"/usr/local/bin/diagnostics --swift_services && " + \
"/usr/local/bin/swift-checker --diskusage && " + \
"/usr/local/bin/swift-checker --healthcheck && " + \
"/usr/local/bin/swift-checker --replication'"
if os.path.exists('/usr/local/bin/diagnostics') and os.path.exists('/usr/local/bin/swift-checker'):
config['nagios_wrapper'] = {'init_config': None,
'instances': [
{'name': 'Swift.health',
'check_command': swift_health,
'check_interval': 60,
'dimensions': {'service': 'swift'}}
]}
return config

View File

@@ -24,6 +24,7 @@ except AttributeError:
raise CalledProcessError(retcode, cmd)
return output
def find_process_cmdline(search_string):
"""Simple function to search running process for one with cmdline containing.
"""

View File

@@ -13,31 +13,12 @@ import sys
import yaml
import agent_config
import detection.plugins.apache as apache
import detection.plugins.ceilometer as ceilometer
import detection.plugins.cinder as cinder
import detection.plugins.glance as glance
import detection.plugins.kafka_consumer as kafka_consumer
import detection.plugins.keystone as keystone
import detection.plugins.libvirt as libvirt
import detection.plugins.mon as mon
import detection.plugins.mysql as mysql
import detection.plugins.network as network
import detection.plugins.neutron as neutron
import detection.plugins.nova as nova
import detection.plugins.rabbitmq as rabbitmq
import detection.plugins.swift as swift
import detection.plugins.zookeeper as zookeeper
from detection.plugins import DETECTION_PLUGINS
import service.sysv as sysv
from detection.utils import check_output
# List of all detection plugins to run
DETECTION_PLUGINS = [apache.Apache, ceilometer.Ceilometer, cinder.Cinder,
glance.Glance, kafka_consumer.Kafka, keystone.Keystone,
libvirt.Libvirt, mon.MonAPI, mon.MonPersister, mon.MonThresh,
mysql.MySQL, network.Network, neutron.Neutron, nova.Nova,
rabbitmq.RabbitMQ, swift.Swift, zookeeper.Zookeeper]
# Map OS to service type
OS_SERVICE_MAP = {'Linux': sysv.SysV}