From c376ee8655c4ad2878bcad37ddef7ed0f94723ff Mon Sep 17 00:00:00 2001 From: Craig Bryant Date: Thu, 6 Oct 2016 14:24:51 -0600 Subject: [PATCH] Add a target_hostname to Host Alive plugin To handle the case where the checked system has multiple IP Addresses and the network name to be used for liveness checking is not the same as the usual name used to identify the server in Monasca, an additional target_hostname parameter can be configured. It is the network hostname or IP Address to check instead of hostname. Add unit tests as well Change-Id: I33721764e64ef5079b26f78df84c94ed7a1009e7 --- docs/Plugins.md | 39 +++++++-- .../collector/checks_d/host_alive.py | 30 +++++-- monasca_setup/detection/plugins/host_alive.py | 27 ++++-- tests/checks_d/test_host_alive.py | 70 ++++++++++++++++ tests/detection/test_host_alive.py | 84 +++++++++++++++++++ 5 files changed, 226 insertions(+), 24 deletions(-) create mode 100644 tests/checks_d/test_host_alive.py create mode 100644 tests/detection/test_host_alive.py diff --git a/docs/Plugins.md b/docs/Plugins.md index 6ce5f9f9..57a4b4c7 100644 --- a/docs/Plugins.md +++ b/docs/Plugins.md @@ -805,14 +805,6 @@ See [the example configuration](https://github.com/openstack/monasca-agent/blob/ ## Host Alive An extension to the Agent can provide basic "aliveness" checks of other systems, verifying that the remote host (or device) is online. This check currently provides two methods of determining connectivity: - default dimensions: - observer_host: fqdn - hostname: fqdn | supplied - test_type: ping | ssh | Unrecognized alive_test - - default value_meta - error: error_message - * ping (ICMP) * SSH (banner test, port 22 by default) @@ -862,14 +854,43 @@ The instances section contains the hostname/IP to check, and the type of check t alive_test: ssh ``` +To handle the case where the target system has multiple IP Addresses and the network name to be used for +liveness checking is not the same as the usual name used to identify the server in Monasca, +an additional target_hostname parameter can be configured. It is the network hostname or IP +Address to check instead of host_name. The hostname dimension will always be set to the value of +host_name even if target_hostname is specified. A dimension target_hostname will be added +with the value of target_hostname if it is different from host_name. + +To simplify configuring multiple checks, when the host_alive detection plugin is configured, hostname can +be a comma separated list. Instances will be created for each value. target_hostname can also +be a comma separated list, however, empty values for an individual entry can be given if there is +no target_hostname for a given hostname entry. + +Here is an example of configuring target_hostname : +``` + - name: ping somenode + host_name: somenode + target_hostname: somenode.mgmt.net + alive_test: ssh +``` + The host alive checks return the following metrics | Metric Name | Dimensions | Semantics | | ----------- | ---------- | --------- | -| host_alive_status | observer_host=fqdn, hostname=supplied hostname being checked, test_type=ping or ssh | Status of remote host(device) is online or not. (0=online, 1=offline) +| host_alive_status | observer_host=fqdn of checking host, hostname=supplied hostname being checked, test_type=ping or ssh | Status of remote host(device) is online or not. (0=online, 1=offline) Also in the case of an error the value_meta contains an error message. +The default dimensions are: + observer_host: fqdn + hostname: fqdn | supplied + target_hostname: Set to target_hostname only if that is different than host_name + test_type: ping | ssh | Unrecognized alive_test + +default value_meta + error: error_message + ## HTTP (endpoint status) This section describes the http endpoint check that can be performed by the Agent. Http endpoint checks are checks that perform simple up/down checks on services, such as HTTP/REST APIs. An agent, given a list of URLs, can dispatch an http request and report to the API success/failure as a metric. diff --git a/monasca_agent/collector/checks_d/host_alive.py b/monasca_agent/collector/checks_d/host_alive.py index b00ec115..f4df1ddf 100644 --- a/monasca_agent/collector/checks_d/host_alive.py +++ b/monasca_agent/collector/checks_d/host_alive.py @@ -1,5 +1,5 @@ #!/bin/env python -# (C) Copyright 2015,2016 Hewlett Packard Enterprise Development Company LP +# (C) Copyright 2015,2016 Hewlett Packard Enterprise Development LP """Monitoring Agent remote host aliveness checker. """ @@ -86,26 +86,38 @@ class HostAlive(services_checks.ServicesCheck): return True, None def _check(self, instance): - """Run the desired host-alive check againt this host. + """Run the desired host-alive check against this host. """ - if not instance['host_name']: - raise ValueError('Target hostname not specified!') + host_name = instance.get('host_name', None) + if not host_name: + raise ValueError('host_name not specified!') - dimensions = self._set_dimensions({'hostname': instance['host_name'], - 'observer_host': util.get_hostname()}, + # Allow a different network name to be used for the check + # to handle multi-homed systems + if instance.get('target_hostname', None): + target_hostname = instance.get('target_hostname') + else: + target_hostname = host_name + + host_dimensions = {'hostname': host_name, 'observer_host': util.get_hostname()} + # If the check is against a different network name than host_name, add it to + # the dimensions + if target_hostname != host_name: + host_dimensions['target_hostname'] = target_hostname + + dimensions = self._set_dimensions(host_dimensions, instance) success = False - test_type = instance['alive_test'] if test_type == 'ssh': - success, error_message = self._test_ssh(instance['host_name'], + success, error_message = self._test_ssh(target_hostname, self.init_config.get('ssh_port'), self.init_config.get('ssh_timeout')) elif test_type == 'ping': - success, error_message = self._test_ping(instance['host_name'], + success, error_message = self._test_ping(target_hostname, self.init_config.get('ping_timeout')) else: error_message = 'Unrecognized alive_test: {0}'.format(test_type) diff --git a/monasca_setup/detection/plugins/host_alive.py b/monasca_setup/detection/plugins/host_alive.py index 9354170e..2e8695cd 100644 --- a/monasca_setup/detection/plugins/host_alive.py +++ b/monasca_setup/detection/plugins/host_alive.py @@ -1,4 +1,4 @@ -# (C) Copyright 2015 Hewlett Packard Enterprise Development Company LP +# (C) Copyright 2015,2016 Hewlett Packard Enterprise Development LP import logging @@ -18,9 +18,13 @@ class HostAlive(monasca_setup.detection.ArgsPlugin): monasca-setup -d hostalive -a "hostname=remotebox type=ping" - monasca-setup -d hostalive -a "hostname=remotebox,remotebox2 type=ssh" + monasca-setup -d hostalive -a "hostname=rb,rb2 target_hostname=,rb2-nic2 type=ssh" """ + DEFAULT_PING_TIMEOUT = 1 + DEFAULT_SSH_TIMEOUT = 2 + DEFAULT_SSH_PORT = 22 + def _detect(self): """Run detection, set self.available True if the service is detected. """ @@ -43,10 +47,21 @@ class HostAlive(monasca_setup.detection.ArgsPlugin): 'host_name': hostname, 'alive_test': self.args['type']}) instances.append(instance) + if 'target_hostname' in self.args: + index = 0 + network_names_to_check = self.args['target_hostname'].split(',') + for target_hostname in network_names_to_check: + if target_hostname: + if index >= len(instances): + raise Exception('Too many target_hostname values') + instance = instances[index] + instance.update({'target_hostname': target_hostname}) + index += 1 - config['host_alive'] = {'init_config': {'ping_timeout': 1, - 'ssh_timeout': 2, - 'ssh_port': 22}, - 'instances': instances} + config['host_alive'] = { + 'init_config': {'ping_timeout': self.DEFAULT_PING_TIMEOUT, + 'ssh_timeout': self.DEFAULT_SSH_TIMEOUT, + 'ssh_port': self.DEFAULT_SSH_PORT}, + 'instances': instances} return config diff --git a/tests/checks_d/test_host_alive.py b/tests/checks_d/test_host_alive.py new file mode 100644 index 00000000..95e2e8ad --- /dev/null +++ b/tests/checks_d/test_host_alive.py @@ -0,0 +1,70 @@ +# (C) Copyright 2016 Hewlett Packard Enterprise Development LP + +import mock +import unittest + +import monasca_agent.common.util as util +from monasca_agent.collector.checks_d.host_alive import HostAlive + +HOST_ALIVE_STATUS = 'host_alive_status' +SUCCESS = 0 +FAILURE = 1 + + +class TestHostAlive(unittest.TestCase): + + def setUp(self): + unittest.TestCase.setUp(self) + init_config = {} + agent_config = {} + self._host_alive = HostAlive('TestHostAlive', init_config, agent_config) + self._gauge = mock.Mock() + self._host_alive.gauge = self._gauge + self._host_name = 'monasca' + self._instance = {'host_name': self._host_name, + 'alive_test': 'ping'} + self._base_dimensions = { + 'test_type': 'ping', + 'hostname': self._host_name, + 'observer_host': util.get_hostname() + } + + def _run_check(self, host_name, instance, ping_result): + mock_ping = mock.Mock(return_value=ping_result) + self._host_alive._test_ping = mock_ping + self._host_alive._check(instance) + mock_ping.assert_called_with(host_name, None) + + def test_host_is_alive(self): + ping_result = (True, None) + self._run_check(self._host_name, self._instance, ping_result) + self._gauge.assert_called_with(HOST_ALIVE_STATUS, + SUCCESS, + dimensions=self._base_dimensions) + + def test_host_is_dead(self): + error_message = '''I'm not dead yet''' + self._run_check(self._host_name, self._instance, + (False, error_message)) + + self._gauge.assert_called_with('host_alive_status', + FAILURE, + dimensions=self._base_dimensions, + value_meta={'error': error_message}) + + def test_host_is_alive_with_target_hostname(self): + check_name = 'otherMonasca' + self._instance['target_hostname'] = check_name + self._run_check(check_name, self._instance, (True, None)) + self._base_dimensions['target_hostname'] = check_name + self._gauge.assert_called_with(HOST_ALIVE_STATUS, + SUCCESS, + dimensions=self._base_dimensions) + + def test_host_is_alive_with_dup_target_hostname(self): + host_name = 'monasca' + self._instance['target_hostname'] = host_name + self._run_check(host_name, self._instance, (True, None)) + self._gauge.assert_called_with(HOST_ALIVE_STATUS, + SUCCESS, + dimensions=self._base_dimensions) \ No newline at end of file diff --git a/tests/detection/test_host_alive.py b/tests/detection/test_host_alive.py new file mode 100644 index 00000000..f15f2060 --- /dev/null +++ b/tests/detection/test_host_alive.py @@ -0,0 +1,84 @@ +# (C) Copyright 2016 Hewlett Packard Enterprise Development LP + +import mock +import unittest + +from monasca_setup.detection.plugins.host_alive import HostAlive + +class TestHostAliveDetect(unittest.TestCase): + + def setUp(self): + unittest.TestCase.setUp(self) + self._host_alive = HostAlive('AAAA') + self._expected_config = { + 'host_alive': + { + 'init_config': + { + 'ssh_timeout': self._host_alive.DEFAULT_SSH_TIMEOUT, + 'ping_timeout': self._host_alive.DEFAULT_PING_TIMEOUT, + 'ssh_port': self._host_alive.DEFAULT_SSH_PORT + } + } + } + + def _create_instances(self, host_names, target_hostnames=None): + instances = [] + index = 0 + for name in host_names: + instance = { + 'alive_test': 'ping', + 'name': name + ' ping', + 'host_name': name} + if (target_hostnames and + index < len(target_hostnames)): + target_hostname = target_hostnames[index] + # It is possible that a target_hostname is not + # set for each hostname + if target_hostname: + instance['target_hostname'] = target_hostname + index += 1 + instances.append(instance) + self._expected_config['host_alive']['instances'] = instances + + def _run_build_config(self, host_names, target_hostnames=None): + hostname = ','.join(host_names) + args = { + 'type': 'ping', + 'hostname': hostname, + } + if target_hostnames: + args['target_hostname'] = ','.join(target_hostnames) + self._host_alive.args = args + config = self._host_alive.build_config() + self._create_instances(host_names, target_hostnames) + self.assertEqual(config, self._expected_config) + + def test_build_config_simple(self): + hostname = 'aaaa' + self._run_build_config([hostname]) + + def test_build_config_multiple(self): + host_names = ['aaaa', 'bbbb', 'cccc'] + self._run_build_config(host_names) + + def test_build_config_complex(self): + host_names = ['aaaa', 'bbbb', 'cccc'] + target_hostnames = ['dddd', 'eeee', 'ffff'] + self._run_build_config(host_names, target_hostnames) + + def test_build_config_complex_sparse(self): + host_names = ['aaaa', 'bbbb', 'cccc'] + target_hostnames = ['dddd', '', 'ffff'] + self._run_build_config(host_names, target_hostnames) + + def test_build_config_complex_not_matching(self): + host_names = ['aaaa', 'bbbb', 'cccc'] + target_hostnames = ['dddd'] + self._run_build_config(host_names, target_hostnames) + + def test_build_config_complex_too_many(self): + host_names = ['aaaa', 'bbbb', 'cccc'] + target_hostnames = ['dddd', 'eeee', 'ffff', 'gggg'] + with self.assertRaises(Exception): + self._run_build_config(host_names, target_hostnames) \ No newline at end of file