Test for pacemaker resources when disk is full

Adding test case for covering filling root situation on primary controller. Add health_check_all, health_check_ha to ostf_base_actions.py. Move health_check to ostf_base_actions.py Closes-Bug: #1500446 Change-Id: I17055527e99c72b790544e7baef997eab20258e0
2015-11-10 16:15:32 +02:00 · 2015-11-10 16:15:32 +02:00 · a88049c182
parent 37c05e38a6
commit a88049c182
7 changed files with 448 additions and 19 deletions
--- a/doc/system_tests.rst
+++ b/doc/system_tests.rst
@ -39,6 +39,16 @@ Destroy controllers
 .. automodule:: system_test.tests.strength.destroy_controllers
   :members:

+Fill root and check pacemaker
+-----------------------------
+.. automodule:: system_test.tests.strength.filling_root
+   :members:
+
+OSTF base actions
+-----------------
+.. automodule:: system_test.tests.ostf_base_actions
+   :members:
+
 Strength Base
 -------------
 .. automodule:: system_test.tests.strength.strength_base
--- a/fuelweb_test/helpers/pacemaker.py
+++ b/fuelweb_test/helpers/pacemaker.py
@ -11,7 +11,9 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
-from xml import etree
+from xml.etree import ElementTree
+
+from fuelweb_test.helpers.utils import run_on_remote_get_results


 def get_pacemaker_nodes_attributes(cibadmin_status_xml):
@ -50,7 +52,7 @@ def get_pacemaker_nodes_attributes(cibadmin_status_xml):
              ...
            }
    """
-    root = etree.fromstring(cibadmin_status_xml)
+    root = ElementTree.fromstring(cibadmin_status_xml)
    nodes = {}
    for node_state in root.iter('node_state'):
        node_name = node_state.get('uname')
@ -88,9 +90,19 @@ def get_pcs_nodes(pcs_status_xml):
            }
    """

-    root = etree.fromstring(pcs_status_xml)
+    root = ElementTree.fromstring(pcs_status_xml)
    nodes = {}
    for nodes_group in root.iter('nodes'):
        for node in nodes_group:
            nodes[node.get('name')] = node.attrib
    return nodes
+
+
+def parse_pcs_status_xml(remote):
+    """Parse 'pcs status xml'. <Nodes> section
+    :param remote: SSHClient instance
+    :return: nested dictionary with node-fqdn and attribute name as keys
+    """
+    pcs_status_dict = run_on_remote_get_results(
+        remote, 'pcs status xml')['stdout_str']
+    return pcs_status_dict
--- a/fuelweb_test/run_tests.py
+++ b/fuelweb_test/run_tests.py
@ -128,6 +128,7 @@ def import_tests():
    from system_test.tests import test_create_deploy_ostf  # noqa
    from system_test.tests import test_deploy_check_rados  # noqa
    from system_test.tests.strength import destroy_controllers  # noqa
+    from system_test.tests.strength import filling_root  # noqa
    from system_test.tests import test_fuel_migration  # noqa


--- a/system_test/tests/actions_base.py
+++ b/system_test/tests/actions_base.py
@ -23,6 +23,7 @@ from fuelweb_test import settings as test_settings

 from system_test import logger
 from system_test.tests import base_actions_factory
+from system_test.tests.ostf_base_actions import HealthCheckActions
 from system_test.helpers.decorators import make_snapshot_if_step_fail
 from system_test.helpers.decorators import deferred_decorator
 from system_test.helpers.decorators import action
@ -137,7 +138,7 @@ class PrepareBase(base_actions_factory.BaseActionsFactory):
        ]


-class ActionsBase(PrepareBase):
+class ActionsBase(PrepareBase, HealthCheckActions):
    """Basic actions for acceptance cases

    For choosing action order use actions_order variable, set list of actions
@ -148,7 +149,6 @@ class ActionsBase(PrepareBase):
        add_nodes - add nodes to environment
        deploy_cluster - deploy en environment
        network_check - run network check
-        health_check - run all ostf tests
        reset_cluster - reset an environment (NotImplemented)
        delete_cluster - delete en environment (NotImplemented)
        stop_deploy - stop deploying of environment (NotImplemented)
@ -264,20 +264,6 @@ class ActionsBase(PrepareBase):

        self.fuel_web.verify_network(self.cluster_id)

-    @deferred_decorator([make_snapshot_if_step_fail])
-    @action
-    def health_check(self):
-        """Run health checker
-
-        Skip action if cluster doesn't exist
-        """
-        if self.cluster_id is None:
-            raise SkipTest()
-
-        self.fuel_web.run_ostf(
-            cluster_id=self.cluster_id,
-            should_fail=getattr(self, 'ostf_tests_should_failed', 0))
-
    @deferred_decorator([make_snapshot_if_step_fail])
    @action
    def save_load_environment(self):
--- a/system_test/tests/ostf_base_actions.py
+++ b/system_test/tests/ostf_base_actions.py
@ -0,0 +1,79 @@
+#    Copyright 2015 Mirantis, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE_2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from proboscis import SkipTest
+
+from system_test.helpers.decorators import make_snapshot_if_step_fail
+from system_test.helpers.decorators import deferred_decorator
+from system_test.helpers.decorators import action
+from system_test.tests import base_actions_factory
+
+
+class HealthCheckActions(base_actions_factory.BaseActionsFactory):
+    """Basic actions for OSTF tests
+    health_check - run sanity and smoke OSTF tests
+    health_check_sanity_smoke_ha - run sanity, smoke and ha OSTF tests
+    """
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def health_check(self):
+        """Run health checker
+
+        Skip action if cluster doesn't exist
+        """
+        if self.cluster_id is None:
+            raise SkipTest(
+                "The cluster_id is not specified, can not run ostf"
+            )
+
+        self.fuel_web.run_ostf(
+            cluster_id=self.cluster_id,
+            should_fail=getattr(self, 'ostf_tests_should_failed', 0),
+            failed_test_name=getattr(self, 'failed_test_name', None))
+
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def health_check_sanity_smoke_ha(self):
+        """Run health checker Sanity, Smoke and HA
+
+        Skip action if cluster doesn't exist
+        """
+        if self.cluster_id is None:
+            raise SkipTest(
+                "The cluster_id is not specified, can not run ostf"
+            )
+
+        self.fuel_web.run_ostf(
+            cluster_id=self.cluster_id,
+            test_sets=['sanity', 'smoke', 'ha'],
+            should_fail=getattr(self, 'ostf_tests_should_failed', 0),
+            failed_test_name=getattr(self, 'failed_test_name', None))
+
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def health_check_ha(self):
+        """Run health checker HA
+
+        Skip action if cluster doesn't exist
+        """
+        if self.cluster_id is None:
+            raise SkipTest(
+                "The cluster_id is not specified, can not run ostf"
+            )
+
+        self.fuel_web.run_ostf(
+            cluster_id=self.cluster_id,
+            test_sets=['ha'],
+            should_fail=getattr(self, 'ostf_tests_should_failed', 0),
+            failed_test_name=getattr(self, 'failed_test_name', None))
--- a/system_test/tests/strength/filling_root.py
+++ b/system_test/tests/strength/filling_root.py
@ -0,0 +1,86 @@
+#    Copyright 2015 Mirantis, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE_2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from proboscis import factory
+
+from system_test.helpers.utils import case_factory
+
+from system_test.tests.strength import strength_base
+
+
+class FillRootPrimaryController(
+    strength_base.FillRootBaseActions
+):
+    """Fill root filesystem on primary controller and check pacemaker
+
+    Scenario:
+        1. Setup master node
+        2. Config default repositories for release
+        3. Bootstap slaves and make snapshot ready
+        4. Revert snapshot ready
+        5. Create Environment
+        6. Add nodes to Environment
+        7. Run network checker
+        8. Deploy Environment
+        9. Run network checker
+        10. Run OSTF
+        11. Make or use existing snapshot of ready Environment
+        12. Get pcs initial state
+        13. Fill root filesystem on primary controller
+            above rabbit_disk_free_limit of 5Mb
+        14. Check for stopping pacemaker resources
+        15. Run OSTF Sanity and Smoke tests
+        16. Fill root filesystem on primary controller
+            below rabbit_disk_free_limit of 5Mb
+        17. Check for stopped pacemaker resources
+        18. Run OSTF Sanity and Smoke tests
+        19. Clean up space on root filesystem on
+            primary controller
+        20. Check for started pacemaker resources
+        21. Run OSTF Sanity, Smoke, HA
+    """
+
+    base_group = ['system_test',
+                  'system_test.failover',
+                  'system_test.failover.filling_root'
+                  ]
+
+    actions_order = [
+        'setup_master',
+        'config_release',
+        'make_slaves',
+        'revert_slaves',
+        'create_env',
+        'add_nodes',
+        'network_check',
+        'deploy_cluster',
+        'network_check',
+        'health_check',
+        'save_load_environment',
+        'get_pcs_initial_state',
+        'fill_root_above_rabbit_disk_free_limit',
+        'check_stopping_resources',
+        'health_check',
+        'fill_root_below_rabbit_disk_free_limit',
+        'check_stopping_resources',
+        'health_check',
+        'clean_up_space_on_root',
+        'check_starting_resources',
+        'health_check_sanity_smoke_ha',
+    ]
+
+
+@factory
+def cases():
+    return (case_factory(FillRootPrimaryController))
--- a/system_test/tests/strength/strength_base.py
+++ b/system_test/tests/strength/strength_base.py
@ -12,10 +12,18 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.

+import time
+
 from proboscis.asserts import assert_true

 from devops.helpers.helpers import wait

+from fuelweb_test.helpers.checkers import check_file_exists
+from fuelweb_test.helpers.utils import run_on_remote_get_results
+from fuelweb_test.helpers.pacemaker import get_pacemaker_nodes_attributes
+from fuelweb_test.helpers.pacemaker import get_pcs_nodes
+from fuelweb_test.helpers.pacemaker import parse_pcs_status_xml
+
 from system_test.tests import actions_base
 from system_test.helpers.decorators import make_snapshot_if_step_fail
 from system_test.helpers.decorators import deferred_decorator
@ -100,3 +108,250 @@ class StrengthBaseActions(actions_base.ActionsBase):
                node.name,
                online_d_ctrls,
                self.destroyed_devops_nodes)
+
+
+class FillRootBaseActions(actions_base.ActionsBase):
+
+    def __init__(self, config=None):
+        super(FillRootBaseActions, self).__init__(config)
+        self.ostf_tests_should_failed = 0
+
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def get_pcs_initial_state(self):
+        """Get controllers initial status in pacemaker"""
+        self.primary_controller = self.fuel_web.get_nailgun_primary_node(
+            self.env.d_env.nodes().slaves[0])
+
+        self.primary_controller_fqdn = str(
+            self.fuel_web.fqdn(self.primary_controller))
+
+        with self.fuel_web.get_ssh_for_node(
+                self.primary_controller.name) as remote:
+
+            pcs_status = parse_pcs_status_xml(remote)
+
+            root_free = run_on_remote_get_results(
+                remote, 'cibadmin --query --scope status')['stdout_str']
+
+        self.primary_controller_space_on_root = get_pacemaker_nodes_attributes(
+            root_free)[self.primary_controller_fqdn]['root_free']
+
+        self.disk_monitor_limit = 512
+
+        self.rabbit_disk_free_limit = 5
+
+        self.pacemaker_restart_timeout = 600
+
+        self.pcs_check_timeout = 300
+
+        self.primary_controller_space_to_filled = str(
+            int(
+                self.primary_controller_space_on_root
+            ) - self.disk_monitor_limit - 1)
+
+        self.pcs_status = get_pcs_nodes(pcs_status)
+
+        self.slave_nodes_fqdn = list(
+            set(self.pcs_status.keys()).difference(
+                set(self.primary_controller_fqdn.split())))
+        running_resources_slave_1 = int(
+            self.pcs_status[self.slave_nodes_fqdn[0]]['resources_running'])
+
+        running_resources_slave_2 = int(
+            self.pcs_status[self.slave_nodes_fqdn[1]]['resources_running'])
+
+        self.slave_node_running_resources = str(min(running_resources_slave_1,
+                                                    running_resources_slave_2
+                                                    )
+                                                )
+
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def fill_root_above_rabbit_disk_free_limit(self):
+        """Filling root filesystem on primary controller"""
+
+        logger.info(
+            "Free space in root on primary controller - {}".format(
+                self.primary_controller_space_on_root
+            ))
+
+        logger.info(
+            "Need to fill space on root - {}".format(
+                self.primary_controller_space_to_filled
+            ))
+
+        with self.fuel_web.get_ssh_for_node(
+                self.primary_controller.name) as remote:
+            run_on_remote_get_results(
+                remote, 'fallocate -l {}M /root/bigfile'.format(
+                    self.primary_controller_space_to_filled))
+            check_file_exists(remote, '/root/bigfile')
+
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def fill_root_below_rabbit_disk_free_limit(self):
+        """Fill root more to below rabbit disk free limit"""
+
+        with self.fuel_web.get_ssh_for_node(
+                self.primary_controller.name) as remote:
+
+            pacemaker_attributes = run_on_remote_get_results(
+                remote, 'cibadmin --query --scope status')['stdout_str']
+
+            controller_space_on_root = get_pacemaker_nodes_attributes(
+                pacemaker_attributes)[self.primary_controller_fqdn][
+                'root_free']
+
+            logger.info(
+                "Free space in root on primary controller - {}".format(
+                    controller_space_on_root
+                ))
+
+            controller_space_to_filled = str(
+                int(
+                    controller_space_on_root
+                ) - self.rabbit_disk_free_limit - 1)
+
+            logger.info(
+                "Need to fill space on root - {}".format(
+                    controller_space_to_filled
+                ))
+
+            run_on_remote_get_results(
+                remote, 'fallocate -l {}M /root/bigfile2'.format(
+                    controller_space_to_filled))
+            check_file_exists(remote, '/root/bigfile2')
+
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def check_stopping_resources(self):
+        """Check stopping pacemaker resources"""
+
+        logger.info(
+            "Waiting {} seconds for changing pacemaker status of {}".format(
+                self.pacemaker_restart_timeout,
+                self.primary_controller_fqdn))
+        time.sleep(self.pacemaker_restart_timeout)
+
+        with self.fuel_web.get_ssh_for_node(
+                self.primary_controller.name) as remote:
+
+            def checking_health_disk_attribute():
+                logger.info("Checking for '#health_disk' attribute")
+                cibadmin_status_xml = run_on_remote_get_results(
+                    remote, 'cibadmin --query --scope status')[
+                    'stdout_str']
+                pcs_attribs = get_pacemaker_nodes_attributes(
+                    cibadmin_status_xml)
+                return '#health_disk' in pcs_attribs[
+                    self.primary_controller_fqdn]
+
+            def checking_for_red_in_health_disk_attribute():
+                logger.info(
+                    "Checking for '#health_disk' attribute have 'red' value")
+                cibadmin_status_xml = run_on_remote_get_results(
+                    remote, 'cibadmin --query --scope status')[
+                    'stdout_str']
+                pcs_attribs = get_pacemaker_nodes_attributes(
+                    cibadmin_status_xml)
+                return pcs_attribs[self.primary_controller_fqdn][
+                    '#health_disk'] == 'red'
+
+            def check_stopping_resources():
+                logger.info(
+                    "Checking for 'running_resources "
+                    "attribute have '0' value")
+                pcs_status = parse_pcs_status_xml(remote)
+                pcs_attribs = get_pcs_nodes(pcs_status)
+                return pcs_attribs[self.primary_controller_fqdn][
+                    'resources_running'] == '0'
+
+            wait(checking_health_disk_attribute,
+                 "Attribute #health_disk wasn't appeared "
+                 "in attributes on node {} in {} seconds".format(
+                     self.primary_controller_fqdn,
+                     self.pcs_check_timeout),
+                 timeout=self.pcs_check_timeout)
+
+            wait(checking_for_red_in_health_disk_attribute,
+                 "Attribute #health_disk doesn't have 'red' value "
+                 "on node {} in {} seconds".format(
+                     self.primary_controller_fqdn,
+                     self.pcs_check_timeout),
+                 timeout=self.pcs_check_timeout)
+
+            wait(check_stopping_resources,
+                 "Attribute 'running_resources' doesn't have '0' value "
+                 "on node {} in {} seconds".format(
+                     self.primary_controller_fqdn,
+                     self.pcs_check_timeout),
+                 timeout=self.pcs_check_timeout)
+
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def clean_up_space_on_root(self):
+        """Clean up space on root filesystem on primary controller"""
+
+        with self.fuel_web.get_ssh_for_node(
+                self.primary_controller.name) as remote:
+            run_on_remote_get_results(
+                remote, 'rm /root/bigfile /root/bigfile2')
+
+            run_on_remote_get_results(
+                remote,
+                'crm node status-attr {} delete "#health_disk"'.format(
+                    self.primary_controller_fqdn))
+
+    @deferred_decorator([make_snapshot_if_step_fail])
+    @action
+    def check_starting_resources(self):
+        """Check starting pacemaker resources"""
+
+        logger.info(
+            "Waiting {} seconds for changing pacemaker status of {}".format(
+                self.pacemaker_restart_timeout,
+                self.primary_controller_fqdn))
+        time.sleep(self.pacemaker_restart_timeout)
+
+        with self.fuel_web.get_ssh_for_node(
+                self.primary_controller.name) as remote:
+
+            def checking_health_disk_attribute_is_not_present():
+                logger.info(
+                    "Checking for '#health_disk' attribute "
+                    "is not present on node {}".format(
+                        self.primary_controller_fqdn))
+                cibadmin_status_xml = run_on_remote_get_results(
+                    remote, 'cibadmin --query --scope status')[
+                    'stdout_str']
+                pcs_attribs = get_pacemaker_nodes_attributes(
+                    cibadmin_status_xml)
+                return '#health_disk' not in pcs_attribs[
+                    self.primary_controller_fqdn]
+
+            def check_started_resources():
+                logger.info(
+                    "Checking for 'running_resources' attribute "
+                    "have {} value on node {}".format(
+                        self.slave_node_running_resources,
+                        self.primary_controller_fqdn))
+                pcs_status = parse_pcs_status_xml(remote)
+                pcs_attribs = get_pcs_nodes(pcs_status)
+                return pcs_attribs[self.primary_controller_fqdn][
+                    'resources_running'] == self.slave_node_running_resources
+
+            wait(checking_health_disk_attribute_is_not_present,
+                 "Attribute #health_disk was appeared in attributes "
+                 "on node {} in {} seconds".format(
+                     self.primary_controller_fqdn,
+                     self.pcs_check_timeout),
+                 timeout=self.pcs_check_timeout)
+
+            wait(check_started_resources,
+                 "Attribute 'running_resources' doesn't have {} value "
+                 "on node {} in {} seconds".format(
+                     self.slave_node_running_resources,
+                     self.primary_controller_fqdn,
+                     self.pcs_check_timeout),
+                 self.pcs_check_timeout)