Adds metrics collection scenario

This test suite verifies that the instance metrics are properly published and collected and have non-zero values. The verification is done via the ceilometer API. Waiting for the ceilometer compute agent to poll the resources is crucial, otherwise the test suite will fail due to the fact that no samples would be found published before checking the samples. The ceilometer agent's polling interval should have the same value as the test suite's polled_metric_delay. This can be done in two ways: a. Configure tempest's polled_metric_delay, by adding the following line in tempest.conf, in the telemetry section: polled_metric_delay = <desired value> b. Set the interval value in pipeline.yaml on the compute node to the desired value and restart the ceilometer compute agent. The second method is preferred, as the interval value defined in pipeline.yaml is 600 seconds, which would mean each test would last at least 10 minutes. Change-Id: I2ad696c221fba860a30621686b464b8486bea583
2017-08-30 08:50:52 -07:00 · 2017-08-30 08:50:52 -07:00 · 35d61df1ec
commit 35d61df1ec
parent ab50a17aff
8 changed files with 217 additions and 6 deletions
--- a/oswin_tempest_plugin/config.py
+++ b/oswin_tempest_plugin/config.py
@ -14,6 +14,7 @@
 #    under the License.
 from oslo_config import cfg
 from oslo_config import types
 from tempest import config
 CONF = config.CONF
@ -61,6 +62,25 @@ HyperVGroup = [
               help="The maximum number of NUMA cells the compute nodes "
                    "have. If it's less than 2, resize negative tests for "
                    "vNUMA will be skipped."),
    cfg.ListOpt('collected_metrics',
                item_type=types.String(
                    choices=('cpu', 'network.outgoing.bytes',
                             'disk.read.bytes')),
                default=[],
                help="The ceilometer metrics to check. If this config value "
                     "is empty, the telemetry tests are skipped. This config "
                     "option assumes that the compute nodes are configured "
                     "and capable of collecting ceilometer metrics. WARNING: "
                     "neutron-ovs-agent is not capable of enabling network "
                     "metrics collection."),
    cfg.IntOpt('polled_metrics_delay',
               default=620,
               help="The number of seconds to wait for the metrics to be "
                    "published by the compute node's ceilometer-polling "
                    "agent. The value must be greater by ~15-20 seconds "
                    "than the agent's publish interval, defined in its "
                    "pipeline.yaml file (typically, the intervals are 600 "
                    "seconds)."),
 ]
 hyperv_host_auth_group = cfg.OptGroup(name='hyperv_host_auth',
--- a/oswin_tempest_plugin/tests/_mixins/migrate.py
+++ b/oswin_tempest_plugin/tests/_mixins/migrate.py
@ -43,7 +43,7 @@ class _MigrateMixin(object):
    def test_migration(self):
        server_tuple = self._create_server()
        self._migrate_server(server_tuple)
-        self._check_server_connectivity(server_tuple)
+        self._check_scenario(server_tuple)
 class _LiveMigrateMixin(object):
@ -104,4 +104,4 @@ class _LiveMigrateMixin(object):
    def test_live_migration(self):
        server_tuple = self._create_server()
        self._live_migrate_server(server_tuple)
-        self._check_server_connectivity(server_tuple)
+        self._check_scenario(server_tuple)
--- a/oswin_tempest_plugin/tests/_mixins/optional_feature.py
+++ b/oswin_tempest_plugin/tests/_mixins/optional_feature.py
@ -71,7 +71,7 @@ class _OptionalFeatureMixin(resize._ResizeUtils):
    def test_feature(self):
        server_tuple = self._create_server()
-        self._check_server_connectivity(server_tuple)
+        self._check_scenario(server_tuple)
    @testtools.skipUnless(CONF.compute_feature_enabled.resize,
                          'Resize is not available.')
--- a/oswin_tempest_plugin/tests/_mixins/resize.py
+++ b/oswin_tempest_plugin/tests/_mixins/resize.py
@ -77,7 +77,7 @@ class _ResizeUtils(object):
        # assert that the server is still reachable, even if the resize
        # failed.
-        self._check_server_connectivity(server_tuple)
+        self._check_scenario(server_tuple)
 class _ResizeMixin(_ResizeUtils):
--- a/oswin_tempest_plugin/tests/scenario/test_cluster.py
+++ b/oswin_tempest_plugin/tests/scenario/test_cluster.py
@ -150,4 +150,4 @@ class HyperVClusterTest(migrate._MigrateMixin,
    def test_clustered_vm(self):
        server_tuple = self._create_server()
-        self._check_server_connectivity(server_tuple)
+        self._check_scenario(server_tuple)
--- a/oswin_tempest_plugin/tests/scenario/test_disks.py
+++ b/oswin_tempest_plugin/tests/scenario/test_disks.py
@ -49,7 +49,7 @@ class _BaseDiskTestMixin(migrate._MigrateMixin,
    def test_disk(self):
        server_tuple = self._create_server()
-        self._check_server_connectivity(server_tuple)
+        self._check_scenario(server_tuple)
    @testtools.skipUnless(CONF.compute_feature_enabled.resize,
                          'Resize is not available.')
--- a/oswin_tempest_plugin/tests/scenario/test_metrics_collection.py
+++ b/oswin_tempest_plugin/tests/scenario/test_metrics_collection.py
@ -0,0 +1,184 @@
 # Copyright 2017 Cloudbase Solutions
 # All Rights Reserved.
 #
 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
 #    not use this file except in compliance with the License. You may obtain
 #    a copy of the License at
 #
 #         http://www.apache.org/licenses/LICENSE-2.0
 #
 #    Unless required by applicable law or agreed to in writing, software
 #    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
 import time
 try:
    # NOTE(claudiub): ceilometer might not be installed, it is not mandatory.
    from ceilometer.tests.tempest.service import client as telemetry_client
 except Exception:
    telemetry_client = None
 from oslo_log import log as logging
 from tempest import clients
 from oswin_tempest_plugin import config
 from oswin_tempest_plugin.tests._mixins import migrate
 from oswin_tempest_plugin.tests import test_base
 CONF = config.CONF
 LOG = logging.getLogger(__name__)
 class ClientManager(clients.Manager):
    def __init__(self, *args, **kwargs):
        super(ClientManager, self).__init__(*args, **kwargs)
        self._set_telemetry_clients()
    def _set_telemetry_clients(self):
        self.telemetry_client = telemetry_client.TelemetryClient(
            self.auth_provider, **telemetry_client.Manager.telemetry_params)
 class MetricsCollectionTestCase(migrate._MigrateMixin,
                                migrate._LiveMigrateMixin,
                                test_base.TestBase):
    """Adds metrics collection scenario tests.
    This test suite verifies that the instance metrics are properly published
    and collected and have non-zero values. The verification is done via the
    ceilometer API.
    setup:
        1. spins a new instance.
        2. waits until the instance was created succesfully (ACTIVE status).
        3. wait an interval of time which represents the polling period of the
        ceilometer-polling agent.
    Waiting for the ceilometer-polling agent to poll the resources is crucial,
    otherwise the test suite will fail due to the fact that no samples
    would be found published before checking the samples.
    The test suite's polled_metrics_delay must have a greater value than the
    ceilometer agent's polling interval. This can be done in two ways:
        a. Configure tempest's polled_metric_delay, by adding the
        following line in tempest.conf, in the hyperv section:
        polled_metrics_delay = <desired value>
        b. Set the interval value in pipeline.yaml on the compute node to
        the desired value and restart the ceilometer polling agent. The
        interval value is set either for the 'meter_source' or for each
        of the following: 'cpu_source', 'disk_source', 'network_source'.
    Note: If the polled_metrics_delay value is too low, the tests might not
    find any samples and fail because of this. As a recommandation,
    polled_metrics_delay's value should be:
        polled_metric_delay = <pipeline.yaml interval value> + <15-20 seconds>
    tests:
        1. test_metrics - tests values for the following metrics:
            - cpu
            - network.outgoing.bytes
            - disk.read.bytes
    assumptions:
        1. Ceilometer agent on the compute node is running.
        2. Ceilometer agent on the compute node has the polling interval
        defined in pipeline.yaml lower than the polled_metrics_delay defined
        in this test suite.
        3. The compute nodes' nova-compute and neutron-hyperv-agent services
        have been configured to enable metrics collection.
    """
    client_manager = ClientManager
    @classmethod
    def skip_checks(cls):
        super(MetricsCollectionTestCase, cls).skip_checks()
        if (not CONF.service_available.ceilometer or
                not CONF.telemetry.deprecated_api_enabled):
            raise cls.skipException("Ceilometer API support is required.")
        if not CONF.hyperv.collected_metrics:
            raise cls.skipException("Collected metrics not configured.")
    @classmethod
    def setup_clients(cls):
        super(MetricsCollectionTestCase, cls).setup_clients()
        # Telemetry client
        cls.telemetry_client = cls.os_primary.telemetry_client
    def _telemetry_check_samples(self, resource_id, meter_name):
        LOG.info("Checking %(meter_name)s for resource %(resource_id)s" % {
            'meter_name': meter_name, 'resource_id': resource_id})
        samples = self.telemetry_client.list_samples(meter_name)
        self.assertNotEmpty(samples,
                            'Telemetry client returned no samples.')
        expected_samples = [s for s in samples if
                            s['resource_id'] == resource_id]
        self.assertNotEmpty(
            expected_samples,
            'No meter %(meter_name)s samples for resource '
            '%(resource_id)s found.' % {'meter_name': meter_name,
                                        'resource_id': resource_id})
        non_zero_valued_samples = [s for s in expected_samples if
                                   s['counter_volume'] > 0]
        self.assertNotEmpty(
            non_zero_valued_samples,
            'All meter %(meter_name)s samples for resource '
            '%(resource_id)s are 0.' % {'meter_name': meter_name,
                                        'resource_id': resource_id})
    def _get_instance_cpu_resource_id(self, server):
        return server['id']
    def _get_instance_disk_resource_id(self, server):
        return server['id']
    def _get_instance_port_resource_id(self, server):
        # Note(claudiub): the format for the instance_port_resource_id is:
        # %(OS-EXT-SRV-ATTR:instance_name)s-%(instance_id)s-%(port_id)s
        # the instance returned by self.servers_client does not contain the
        # OS-EXT-SRV-ATTR:instance_name field. Which means that the resource_id
        # must be found in ceilometer's resources.
        start_res_id = server['id']
        resources = self.telemetry_client.list_resources()
        res_ids = [r['resource_id'] for r in resources
                   if r['resource_id'].startswith('instance-') and
                   start_res_id in r['resource_id']]
        self.assertEqual(1, len(res_ids))
        return res_ids[0]
    def _check_scenario(self, server_tuple):
        server = server_tuple.server
        LOG.info("Waiting %s seconds for the ceilometer compute agents to "
                 "publish the samples.", CONF.hyperv.polled_metrics_delay)
        time.sleep(CONF.hyperv.polled_metrics_delay)
        # TODO(claudiub): Add more metrics.
        if 'cpu' in CONF.hyperv.collected_metrics:
            cpu_res_id = self._get_instance_cpu_resource_id(server)
            self._telemetry_check_samples(cpu_res_id, 'cpu')
        if 'network.outgoing.bytes' in CONF.hyperv.collected_metrics:
            port_res_id = self._get_instance_port_resource_id(server)
            self._telemetry_check_samples(port_res_id,
                                          'network.outgoing.bytes')
        if 'disk.read.bytes' in CONF.hyperv.collected_metrics:
            disk_resource_id = self._get_instance_disk_resource_id(server)
            self._telemetry_check_samples(disk_resource_id, 'disk.read.bytes')
    def test_metrics(self):
        server_tuple = self._create_server()
        self._check_scenario(server_tuple)
--- a/oswin_tempest_plugin/tests/test_base.py
+++ b/oswin_tempest_plugin/tests/test_base.py
@ -284,3 +284,10 @@ class TestBase(tempest.test.BaseTestCase):
    def _check_server_connectivity(self, server_tuple):
        # if server connectivity works, an SSH client can be opened.
        self._get_server_client(server_tuple)
    def _check_scenario(self, server_tuple):
        # NOTE(claudiub): This method is to be used when verifying a
        # particular scenario. If a scenario has a different way of validating
        # a scenario (e.g.: metrics collection), it should overwrite this
        # method.
        self._check_server_connectivity(self, server_tuple)