Merge "Add test that checks http and logs alarms"
This commit is contained in:
		@@ -12,6 +12,7 @@
 | 
			
		||||
#    License for the specific language governing permissions and limitations
 | 
			
		||||
#    under the License.
 | 
			
		||||
 | 
			
		||||
import contextlib
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import signal
 | 
			
		||||
@@ -119,7 +120,7 @@ class PluginHelper(object):
 | 
			
		||||
        :param plugin: name of the plugin.
 | 
			
		||||
        :type plugin: str
 | 
			
		||||
        :param parameter: name of the parameter.
 | 
			
		||||
        :type name: str
 | 
			
		||||
        :type parameter: str
 | 
			
		||||
        :returns: parameter's value
 | 
			
		||||
        """
 | 
			
		||||
        asserts.assert_true(
 | 
			
		||||
@@ -202,7 +203,7 @@ class PluginHelper(object):
 | 
			
		||||
        """Get the virtual IP address.
 | 
			
		||||
 | 
			
		||||
        :param vip_name: name of the VIP.
 | 
			
		||||
        :type name: str
 | 
			
		||||
        :type vip_name: str
 | 
			
		||||
        :returns: the VIP address in dotted-decimal notation
 | 
			
		||||
        :rtype: str
 | 
			
		||||
        """
 | 
			
		||||
@@ -713,6 +714,34 @@ class PluginHelper(object):
 | 
			
		||||
        else:
 | 
			
		||||
            return result
 | 
			
		||||
 | 
			
		||||
    @contextlib.contextmanager
 | 
			
		||||
    def make_logical_db_unavailable(self, db_name, controller):
 | 
			
		||||
        """Context manager that renames all tables in provided database
 | 
			
		||||
        to make it unavailable and renames it back on exit.
 | 
			
		||||
 | 
			
		||||
        :param db_name: logical database name
 | 
			
		||||
        :type db_name: str
 | 
			
		||||
        :param controller: controller with MySQL database
 | 
			
		||||
        :type controller: nailgun node
 | 
			
		||||
        :returns: None, works as context manager
 | 
			
		||||
        """
 | 
			
		||||
        cmd = (
 | 
			
		||||
            "mysql -AN -e "
 | 
			
		||||
            "\"select concat("
 | 
			
		||||
            "'rename table {db_name}.', table_name, ' "
 | 
			
		||||
            "to {db_name}.' , {method}(table_name) , ';') "
 | 
			
		||||
            "from information_schema.tables "
 | 
			
		||||
            "where table_schema = '{db_name}';"
 | 
			
		||||
            "\" | mysql")
 | 
			
		||||
 | 
			
		||||
        with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote:
 | 
			
		||||
            remote.check_call(cmd.format(db_name=db_name, method="upper"))
 | 
			
		||||
 | 
			
		||||
        yield
 | 
			
		||||
 | 
			
		||||
        with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote:
 | 
			
		||||
            remote.check_call(cmd.format(db_name=db_name, method="lower"))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _raise_TimeOut(sig, stack):
 | 
			
		||||
    raise TimeoutException()
 | 
			
		||||
 
 | 
			
		||||
@@ -105,9 +105,9 @@ def clear_resource(remote, resource, wait=None):
 | 
			
		||||
        :param remote: SSH connection to the node.
 | 
			
		||||
        :type remote: SSHClient
 | 
			
		||||
        :param resource: resource name.
 | 
			
		||||
        :type name: str
 | 
			
		||||
        :type resource: str
 | 
			
		||||
        :param wait: number of seconds to wait for the operation to complete.
 | 
			
		||||
        :type operation: int
 | 
			
		||||
        :type wait: int
 | 
			
		||||
    """
 | 
			
		||||
    cmd = "pcs resource clear {}".format(resource)
 | 
			
		||||
    if wait is not None:
 | 
			
		||||
 
 | 
			
		||||
@@ -94,6 +94,25 @@ class TestToolchainAlarms(api.ToolchainApi):
 | 
			
		||||
            self.check_alarms("service", "rabbitmq", "memory",
 | 
			
		||||
                              controller["hostname"], OKAY_STATUS)
 | 
			
		||||
 | 
			
		||||
    def _verify_service_alarms(self, trigger_fn, trigger_count,
 | 
			
		||||
                               metrics, status):
 | 
			
		||||
        """Check services' alarm metrics.
 | 
			
		||||
 | 
			
		||||
        :param trigger_fn: function that affects an alarm of needed service
 | 
			
		||||
        :type trigger_fn: callable
 | 
			
		||||
        :param trigger_count: how many times call trigger function
 | 
			
		||||
        :type trigger_count: int
 | 
			
		||||
        :param metrics: mapping with needed metrics of alarms to check
 | 
			
		||||
        :type metrics: dict
 | 
			
		||||
        :param status: value of metric to check
 | 
			
		||||
        :type status: int (in most cases)
 | 
			
		||||
        :return: None
 | 
			
		||||
        """
 | 
			
		||||
        for _ in range(trigger_count):
 | 
			
		||||
            trigger_fn()
 | 
			
		||||
        for service, source in metrics.items():
 | 
			
		||||
            self.check_alarms("service", service, source, None, status)
 | 
			
		||||
 | 
			
		||||
    @test(depends_on_groups=["deploy_toolchain"],
 | 
			
		||||
          groups=["check_mysql_fs_alarms", "toolchain", "alarms"])
 | 
			
		||||
    @log_snapshot_after_test
 | 
			
		||||
@@ -301,3 +320,184 @@ class TestToolchainAlarms(api.ToolchainApi):
 | 
			
		||||
            self.helpers.cluster_id, ["compute"])[0]
 | 
			
		||||
        self._check_filesystem_alarms(compute, "/var/lib/nova", "nova-fs",
 | 
			
		||||
                                      "/var/lib/nova/bigfile", "compute")
 | 
			
		||||
 | 
			
		||||
    @test(depends_on_groups=["deploy_toolchain"],
 | 
			
		||||
          groups=["check_nova_api_logs_errors_alarms",
 | 
			
		||||
                  "http_logs_errors_alarms", "toolchain", "alarms"])
 | 
			
		||||
    @log_snapshot_after_test
 | 
			
		||||
    def check_nova_api_logs_errors_alarms(self):
 | 
			
		||||
        """Check that nova-logs-error and nova-api-http-errors alarms work as
 | 
			
		||||
        expected.
 | 
			
		||||
 | 
			
		||||
        Scenario:
 | 
			
		||||
            1. Rename all nova tables to UPPERCASE.
 | 
			
		||||
            2. Run some nova list command repeatedly.
 | 
			
		||||
            3. Check the last value of the nova-logs-error alarm in InfluxDB.
 | 
			
		||||
            4. Check the last value of the nova-api-http-errors alarm
 | 
			
		||||
               in InfluxDB.
 | 
			
		||||
            5. Revert all nova tables names to lowercase.
 | 
			
		||||
 | 
			
		||||
        Duration 10m
 | 
			
		||||
        """
 | 
			
		||||
        def get_servers_list():
 | 
			
		||||
            try:
 | 
			
		||||
                self.helpers.os_conn.get_servers()
 | 
			
		||||
            except Exception:
 | 
			
		||||
                pass
 | 
			
		||||
        self.env.revert_snapshot("deploy_toolchain")
 | 
			
		||||
 | 
			
		||||
        controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
 | 
			
		||||
            self.helpers.cluster_id, ["controller"])[0]
 | 
			
		||||
 | 
			
		||||
        with self.helpers.make_logical_db_unavailable("nova", controller):
 | 
			
		||||
            metrics = {"nova-logs": "error",
 | 
			
		||||
                       "nova-api": "http_errors"}
 | 
			
		||||
            self._verify_service_alarms(
 | 
			
		||||
                get_servers_list, 100, metrics, WARNING_STATUS)
 | 
			
		||||
 | 
			
		||||
    @test(depends_on_groups=["deploy_toolchain"],
 | 
			
		||||
          groups=["check_neutron_api_logs_errors_alarms",
 | 
			
		||||
                  "http_logs_errors_alarms", "toolchain", "alarms"])
 | 
			
		||||
    @log_snapshot_after_test
 | 
			
		||||
    def check_neutron_api_logs_errors_alarms(self):
 | 
			
		||||
        """Check that neutron-logs-error and neutron-api-http-errors
 | 
			
		||||
        alarms work as expected.
 | 
			
		||||
 | 
			
		||||
        Scenario:
 | 
			
		||||
            1. Rename all neutron tables to UPPERCASE.
 | 
			
		||||
            2. Run some neutron agents list command repeatedly.
 | 
			
		||||
            3. Check the last value of the neutron-logs-error alarm
 | 
			
		||||
               in InfluxDB.
 | 
			
		||||
            4. Check the last value of the neutron-api-http-errors alarm
 | 
			
		||||
               in InfluxDB.
 | 
			
		||||
            5. Revert all neutron tables names to lowercase.
 | 
			
		||||
 | 
			
		||||
        Duration 10m
 | 
			
		||||
        """
 | 
			
		||||
        def get_agents_list():
 | 
			
		||||
            try:
 | 
			
		||||
                self.helpers.os_conn.list_agents()
 | 
			
		||||
            except Exception:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        self.env.revert_snapshot("deploy_toolchain")
 | 
			
		||||
 | 
			
		||||
        controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
 | 
			
		||||
            self.helpers.cluster_id, ["controller"])[0]
 | 
			
		||||
 | 
			
		||||
        with self.helpers.make_logical_db_unavailable("neutron", controller):
 | 
			
		||||
            metrics = {"neutron-logs": "error",
 | 
			
		||||
                       "neutron-api": "http_errors"}
 | 
			
		||||
            self._verify_service_alarms(
 | 
			
		||||
                get_agents_list, 100, metrics, WARNING_STATUS)
 | 
			
		||||
 | 
			
		||||
    @test(depends_on_groups=["deploy_toolchain"],
 | 
			
		||||
          groups=["check_glance_api_logs_errors_alarms",
 | 
			
		||||
                  "http_logs_errors_alarms", "toolchain", "alarms"])
 | 
			
		||||
    @log_snapshot_after_test
 | 
			
		||||
    def check_glance_api_logs_errors_alarms(self):
 | 
			
		||||
        """Check that glance-logs-error and glance-api-http-errors alarms work as
 | 
			
		||||
        expected.
 | 
			
		||||
 | 
			
		||||
        Scenario:
 | 
			
		||||
            1. Rename all glance tables to UPPERCASE.
 | 
			
		||||
            2. Run some glance image list command repeatedly.
 | 
			
		||||
            3. Check the last value of the glance-logs-error alarm in InfluxDB.
 | 
			
		||||
            4. Check the last value of the glance-api-http-errors alarm
 | 
			
		||||
               in InfluxDB.
 | 
			
		||||
            5. Revert all glance tables names to lowercase.
 | 
			
		||||
 | 
			
		||||
        Duration 10m
 | 
			
		||||
        """
 | 
			
		||||
        def get_images_list():
 | 
			
		||||
            try:
 | 
			
		||||
                # NOTE(rpromyshlennikov): List is needed here
 | 
			
		||||
                # because glance image list is lazy method
 | 
			
		||||
                return list(self.helpers.os_conn.get_image_list())
 | 
			
		||||
            except Exception:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        self.env.revert_snapshot("deploy_toolchain")
 | 
			
		||||
 | 
			
		||||
        controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
 | 
			
		||||
            self.helpers.cluster_id, ["controller"])[0]
 | 
			
		||||
 | 
			
		||||
        with self.helpers.make_logical_db_unavailable("glance", controller):
 | 
			
		||||
            metrics = {"glance-logs": "error",
 | 
			
		||||
                       "glance-api": "http_errors"}
 | 
			
		||||
            self._verify_service_alarms(
 | 
			
		||||
                get_images_list, 100, metrics, WARNING_STATUS)
 | 
			
		||||
 | 
			
		||||
    @test(depends_on_groups=["deploy_toolchain"],
 | 
			
		||||
          groups=["check_heat_api_logs_errors_alarms",
 | 
			
		||||
                  "http_logs_errors_alarms", "toolchain", "alarms"])
 | 
			
		||||
    @log_snapshot_after_test
 | 
			
		||||
    def check_heat_api_logs_errors_alarms(self):
 | 
			
		||||
        """Check that heat-logs-error and heat-api-http-errors alarms work as
 | 
			
		||||
        expected.
 | 
			
		||||
 | 
			
		||||
        Scenario:
 | 
			
		||||
            1. Rename all heat tables to UPPERCASE.
 | 
			
		||||
            2. Run some heat stack list command repeatedly.
 | 
			
		||||
            3. Check the last value of the heat-logs-error alarm in InfluxDB.
 | 
			
		||||
            4. Check the last value of the heat-api-http-errors alarm
 | 
			
		||||
               in InfluxDB.
 | 
			
		||||
            5. Revert all heat tables names to lowercase.
 | 
			
		||||
 | 
			
		||||
        Duration 10m
 | 
			
		||||
        """
 | 
			
		||||
        def get_stacks_list():
 | 
			
		||||
            try:
 | 
			
		||||
                with self.fuel_web.get_ssh_for_nailgun_node(
 | 
			
		||||
                        controller) as remote:
 | 
			
		||||
                    return remote.execute(
 | 
			
		||||
                        ". openrc && heat stack-list > /dev/null 2>&1")
 | 
			
		||||
            except Exception:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        self.env.revert_snapshot("deploy_toolchain")
 | 
			
		||||
 | 
			
		||||
        controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
 | 
			
		||||
            self.helpers.cluster_id, ["controller"])[0]
 | 
			
		||||
 | 
			
		||||
        with self.helpers.make_logical_db_unavailable("heat", controller):
 | 
			
		||||
            metrics = {"heat-logs": "error",
 | 
			
		||||
                       "heat-api": "http_errors"}
 | 
			
		||||
            self._verify_service_alarms(
 | 
			
		||||
                get_stacks_list, 100, metrics, WARNING_STATUS)
 | 
			
		||||
 | 
			
		||||
    @test(depends_on_groups=["deploy_toolchain"],
 | 
			
		||||
          groups=["check_cinder_api_logs_errors_alarms",
 | 
			
		||||
                  "http_logs_errors_alarms", "toolchain", "alarms"])
 | 
			
		||||
    @log_snapshot_after_test
 | 
			
		||||
    def check_cinder_api_logs_errors_alarms(self):
 | 
			
		||||
        """Check that cinder-logs-error and cinder-api-http-errors alarms work as
 | 
			
		||||
        expected.
 | 
			
		||||
 | 
			
		||||
        Scenario:
 | 
			
		||||
            1. Rename all cinder tables to UPPERCASE.
 | 
			
		||||
            2. Run some cinder list command repeatedly.
 | 
			
		||||
            3. Check the last value of the cinder-logs-error alarm in InfluxDB.
 | 
			
		||||
            4. Check the last value of the cinder-api-http-errors alarm
 | 
			
		||||
               in InfluxDB.
 | 
			
		||||
            5. Revert all cinder tables names to lowercase.
 | 
			
		||||
 | 
			
		||||
        Duration 10m
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        def get_volumes_list():
 | 
			
		||||
            try:
 | 
			
		||||
                self.helpers.os_conn.cinder.volumes.list()
 | 
			
		||||
            except Exception:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        self.env.revert_snapshot("deploy_toolchain")
 | 
			
		||||
 | 
			
		||||
        controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
 | 
			
		||||
            self.helpers.cluster_id, ["controller"])[0]
 | 
			
		||||
 | 
			
		||||
        with self.helpers.make_logical_db_unavailable("cinder", controller):
 | 
			
		||||
            metrics = {"cinder-logs": "error",
 | 
			
		||||
                       "cinder-api": "http_errors"}
 | 
			
		||||
            self._verify_service_alarms(
 | 
			
		||||
                get_volumes_list, 100, metrics, WARNING_STATUS)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user