Test for pacemaker resources when disk is full

Adding test case for covering filling root situation on primary controller.
 Add health_check_all, health_check_ha to ostf_base_actions.py.
 Move health_check to ostf_base_actions.py

 Closes-Bug: #1500446

Change-Id: I17055527e99c72b790544e7baef997eab20258e0
This commit is contained in:
Artem Grechanichenko 2015-11-10 16:15:32 +02:00
parent 37c05e38a6
commit a88049c182
7 changed files with 448 additions and 19 deletions

View File

@ -39,6 +39,16 @@ Destroy controllers
.. automodule:: system_test.tests.strength.destroy_controllers
:members:
Fill root and check pacemaker
-----------------------------
.. automodule:: system_test.tests.strength.filling_root
:members:
OSTF base actions
-----------------
.. automodule:: system_test.tests.ostf_base_actions
:members:
Strength Base
-------------
.. automodule:: system_test.tests.strength.strength_base

View File

@ -11,7 +11,9 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from xml import etree
from xml.etree import ElementTree
from fuelweb_test.helpers.utils import run_on_remote_get_results
def get_pacemaker_nodes_attributes(cibadmin_status_xml):
@ -50,7 +52,7 @@ def get_pacemaker_nodes_attributes(cibadmin_status_xml):
...
}
"""
root = etree.fromstring(cibadmin_status_xml)
root = ElementTree.fromstring(cibadmin_status_xml)
nodes = {}
for node_state in root.iter('node_state'):
node_name = node_state.get('uname')
@ -88,9 +90,19 @@ def get_pcs_nodes(pcs_status_xml):
}
"""
root = etree.fromstring(pcs_status_xml)
root = ElementTree.fromstring(pcs_status_xml)
nodes = {}
for nodes_group in root.iter('nodes'):
for node in nodes_group:
nodes[node.get('name')] = node.attrib
return nodes
def parse_pcs_status_xml(remote):
"""Parse 'pcs status xml'. <Nodes> section
:param remote: SSHClient instance
:return: nested dictionary with node-fqdn and attribute name as keys
"""
pcs_status_dict = run_on_remote_get_results(
remote, 'pcs status xml')['stdout_str']
return pcs_status_dict

View File

@ -128,6 +128,7 @@ def import_tests():
from system_test.tests import test_create_deploy_ostf # noqa
from system_test.tests import test_deploy_check_rados # noqa
from system_test.tests.strength import destroy_controllers # noqa
from system_test.tests.strength import filling_root # noqa
from system_test.tests import test_fuel_migration # noqa

View File

@ -23,6 +23,7 @@ from fuelweb_test import settings as test_settings
from system_test import logger
from system_test.tests import base_actions_factory
from system_test.tests.ostf_base_actions import HealthCheckActions
from system_test.helpers.decorators import make_snapshot_if_step_fail
from system_test.helpers.decorators import deferred_decorator
from system_test.helpers.decorators import action
@ -137,7 +138,7 @@ class PrepareBase(base_actions_factory.BaseActionsFactory):
]
class ActionsBase(PrepareBase):
class ActionsBase(PrepareBase, HealthCheckActions):
"""Basic actions for acceptance cases
For choosing action order use actions_order variable, set list of actions
@ -148,7 +149,6 @@ class ActionsBase(PrepareBase):
add_nodes - add nodes to environment
deploy_cluster - deploy en environment
network_check - run network check
health_check - run all ostf tests
reset_cluster - reset an environment (NotImplemented)
delete_cluster - delete en environment (NotImplemented)
stop_deploy - stop deploying of environment (NotImplemented)
@ -264,20 +264,6 @@ class ActionsBase(PrepareBase):
self.fuel_web.verify_network(self.cluster_id)
@deferred_decorator([make_snapshot_if_step_fail])
@action
def health_check(self):
"""Run health checker
Skip action if cluster doesn't exist
"""
if self.cluster_id is None:
raise SkipTest()
self.fuel_web.run_ostf(
cluster_id=self.cluster_id,
should_fail=getattr(self, 'ostf_tests_should_failed', 0))
@deferred_decorator([make_snapshot_if_step_fail])
@action
def save_load_environment(self):

View File

@ -0,0 +1,79 @@
# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE_2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from proboscis import SkipTest
from system_test.helpers.decorators import make_snapshot_if_step_fail
from system_test.helpers.decorators import deferred_decorator
from system_test.helpers.decorators import action
from system_test.tests import base_actions_factory
class HealthCheckActions(base_actions_factory.BaseActionsFactory):
"""Basic actions for OSTF tests
health_check - run sanity and smoke OSTF tests
health_check_sanity_smoke_ha - run sanity, smoke and ha OSTF tests
"""
@deferred_decorator([make_snapshot_if_step_fail])
@action
def health_check(self):
"""Run health checker
Skip action if cluster doesn't exist
"""
if self.cluster_id is None:
raise SkipTest(
"The cluster_id is not specified, can not run ostf"
)
self.fuel_web.run_ostf(
cluster_id=self.cluster_id,
should_fail=getattr(self, 'ostf_tests_should_failed', 0),
failed_test_name=getattr(self, 'failed_test_name', None))
@deferred_decorator([make_snapshot_if_step_fail])
@action
def health_check_sanity_smoke_ha(self):
"""Run health checker Sanity, Smoke and HA
Skip action if cluster doesn't exist
"""
if self.cluster_id is None:
raise SkipTest(
"The cluster_id is not specified, can not run ostf"
)
self.fuel_web.run_ostf(
cluster_id=self.cluster_id,
test_sets=['sanity', 'smoke', 'ha'],
should_fail=getattr(self, 'ostf_tests_should_failed', 0),
failed_test_name=getattr(self, 'failed_test_name', None))
@deferred_decorator([make_snapshot_if_step_fail])
@action
def health_check_ha(self):
"""Run health checker HA
Skip action if cluster doesn't exist
"""
if self.cluster_id is None:
raise SkipTest(
"The cluster_id is not specified, can not run ostf"
)
self.fuel_web.run_ostf(
cluster_id=self.cluster_id,
test_sets=['ha'],
should_fail=getattr(self, 'ostf_tests_should_failed', 0),
failed_test_name=getattr(self, 'failed_test_name', None))

View File

@ -0,0 +1,86 @@
# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE_2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from proboscis import factory
from system_test.helpers.utils import case_factory
from system_test.tests.strength import strength_base
class FillRootPrimaryController(
strength_base.FillRootBaseActions
):
"""Fill root filesystem on primary controller and check pacemaker
Scenario:
1. Setup master node
2. Config default repositories for release
3. Bootstap slaves and make snapshot ready
4. Revert snapshot ready
5. Create Environment
6. Add nodes to Environment
7. Run network checker
8. Deploy Environment
9. Run network checker
10. Run OSTF
11. Make or use existing snapshot of ready Environment
12. Get pcs initial state
13. Fill root filesystem on primary controller
above rabbit_disk_free_limit of 5Mb
14. Check for stopping pacemaker resources
15. Run OSTF Sanity and Smoke tests
16. Fill root filesystem on primary controller
below rabbit_disk_free_limit of 5Mb
17. Check for stopped pacemaker resources
18. Run OSTF Sanity and Smoke tests
19. Clean up space on root filesystem on
primary controller
20. Check for started pacemaker resources
21. Run OSTF Sanity, Smoke, HA
"""
base_group = ['system_test',
'system_test.failover',
'system_test.failover.filling_root'
]
actions_order = [
'setup_master',
'config_release',
'make_slaves',
'revert_slaves',
'create_env',
'add_nodes',
'network_check',
'deploy_cluster',
'network_check',
'health_check',
'save_load_environment',
'get_pcs_initial_state',
'fill_root_above_rabbit_disk_free_limit',
'check_stopping_resources',
'health_check',
'fill_root_below_rabbit_disk_free_limit',
'check_stopping_resources',
'health_check',
'clean_up_space_on_root',
'check_starting_resources',
'health_check_sanity_smoke_ha',
]
@factory
def cases():
return (case_factory(FillRootPrimaryController))

View File

@ -12,10 +12,18 @@
# License for the specific language governing permissions and limitations
# under the License.
import time
from proboscis.asserts import assert_true
from devops.helpers.helpers import wait
from fuelweb_test.helpers.checkers import check_file_exists
from fuelweb_test.helpers.utils import run_on_remote_get_results
from fuelweb_test.helpers.pacemaker import get_pacemaker_nodes_attributes
from fuelweb_test.helpers.pacemaker import get_pcs_nodes
from fuelweb_test.helpers.pacemaker import parse_pcs_status_xml
from system_test.tests import actions_base
from system_test.helpers.decorators import make_snapshot_if_step_fail
from system_test.helpers.decorators import deferred_decorator
@ -100,3 +108,250 @@ class StrengthBaseActions(actions_base.ActionsBase):
node.name,
online_d_ctrls,
self.destroyed_devops_nodes)
class FillRootBaseActions(actions_base.ActionsBase):
def __init__(self, config=None):
super(FillRootBaseActions, self).__init__(config)
self.ostf_tests_should_failed = 0
@deferred_decorator([make_snapshot_if_step_fail])
@action
def get_pcs_initial_state(self):
"""Get controllers initial status in pacemaker"""
self.primary_controller = self.fuel_web.get_nailgun_primary_node(
self.env.d_env.nodes().slaves[0])
self.primary_controller_fqdn = str(
self.fuel_web.fqdn(self.primary_controller))
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
pcs_status = parse_pcs_status_xml(remote)
root_free = run_on_remote_get_results(
remote, 'cibadmin --query --scope status')['stdout_str']
self.primary_controller_space_on_root = get_pacemaker_nodes_attributes(
root_free)[self.primary_controller_fqdn]['root_free']
self.disk_monitor_limit = 512
self.rabbit_disk_free_limit = 5
self.pacemaker_restart_timeout = 600
self.pcs_check_timeout = 300
self.primary_controller_space_to_filled = str(
int(
self.primary_controller_space_on_root
) - self.disk_monitor_limit - 1)
self.pcs_status = get_pcs_nodes(pcs_status)
self.slave_nodes_fqdn = list(
set(self.pcs_status.keys()).difference(
set(self.primary_controller_fqdn.split())))
running_resources_slave_1 = int(
self.pcs_status[self.slave_nodes_fqdn[0]]['resources_running'])
running_resources_slave_2 = int(
self.pcs_status[self.slave_nodes_fqdn[1]]['resources_running'])
self.slave_node_running_resources = str(min(running_resources_slave_1,
running_resources_slave_2
)
)
@deferred_decorator([make_snapshot_if_step_fail])
@action
def fill_root_above_rabbit_disk_free_limit(self):
"""Filling root filesystem on primary controller"""
logger.info(
"Free space in root on primary controller - {}".format(
self.primary_controller_space_on_root
))
logger.info(
"Need to fill space on root - {}".format(
self.primary_controller_space_to_filled
))
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
run_on_remote_get_results(
remote, 'fallocate -l {}M /root/bigfile'.format(
self.primary_controller_space_to_filled))
check_file_exists(remote, '/root/bigfile')
@deferred_decorator([make_snapshot_if_step_fail])
@action
def fill_root_below_rabbit_disk_free_limit(self):
"""Fill root more to below rabbit disk free limit"""
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
pacemaker_attributes = run_on_remote_get_results(
remote, 'cibadmin --query --scope status')['stdout_str']
controller_space_on_root = get_pacemaker_nodes_attributes(
pacemaker_attributes)[self.primary_controller_fqdn][
'root_free']
logger.info(
"Free space in root on primary controller - {}".format(
controller_space_on_root
))
controller_space_to_filled = str(
int(
controller_space_on_root
) - self.rabbit_disk_free_limit - 1)
logger.info(
"Need to fill space on root - {}".format(
controller_space_to_filled
))
run_on_remote_get_results(
remote, 'fallocate -l {}M /root/bigfile2'.format(
controller_space_to_filled))
check_file_exists(remote, '/root/bigfile2')
@deferred_decorator([make_snapshot_if_step_fail])
@action
def check_stopping_resources(self):
"""Check stopping pacemaker resources"""
logger.info(
"Waiting {} seconds for changing pacemaker status of {}".format(
self.pacemaker_restart_timeout,
self.primary_controller_fqdn))
time.sleep(self.pacemaker_restart_timeout)
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
def checking_health_disk_attribute():
logger.info("Checking for '#health_disk' attribute")
cibadmin_status_xml = run_on_remote_get_results(
remote, 'cibadmin --query --scope status')[
'stdout_str']
pcs_attribs = get_pacemaker_nodes_attributes(
cibadmin_status_xml)
return '#health_disk' in pcs_attribs[
self.primary_controller_fqdn]
def checking_for_red_in_health_disk_attribute():
logger.info(
"Checking for '#health_disk' attribute have 'red' value")
cibadmin_status_xml = run_on_remote_get_results(
remote, 'cibadmin --query --scope status')[
'stdout_str']
pcs_attribs = get_pacemaker_nodes_attributes(
cibadmin_status_xml)
return pcs_attribs[self.primary_controller_fqdn][
'#health_disk'] == 'red'
def check_stopping_resources():
logger.info(
"Checking for 'running_resources "
"attribute have '0' value")
pcs_status = parse_pcs_status_xml(remote)
pcs_attribs = get_pcs_nodes(pcs_status)
return pcs_attribs[self.primary_controller_fqdn][
'resources_running'] == '0'
wait(checking_health_disk_attribute,
"Attribute #health_disk wasn't appeared "
"in attributes on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout),
timeout=self.pcs_check_timeout)
wait(checking_for_red_in_health_disk_attribute,
"Attribute #health_disk doesn't have 'red' value "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout),
timeout=self.pcs_check_timeout)
wait(check_stopping_resources,
"Attribute 'running_resources' doesn't have '0' value "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout),
timeout=self.pcs_check_timeout)
@deferred_decorator([make_snapshot_if_step_fail])
@action
def clean_up_space_on_root(self):
"""Clean up space on root filesystem on primary controller"""
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
run_on_remote_get_results(
remote, 'rm /root/bigfile /root/bigfile2')
run_on_remote_get_results(
remote,
'crm node status-attr {} delete "#health_disk"'.format(
self.primary_controller_fqdn))
@deferred_decorator([make_snapshot_if_step_fail])
@action
def check_starting_resources(self):
"""Check starting pacemaker resources"""
logger.info(
"Waiting {} seconds for changing pacemaker status of {}".format(
self.pacemaker_restart_timeout,
self.primary_controller_fqdn))
time.sleep(self.pacemaker_restart_timeout)
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
def checking_health_disk_attribute_is_not_present():
logger.info(
"Checking for '#health_disk' attribute "
"is not present on node {}".format(
self.primary_controller_fqdn))
cibadmin_status_xml = run_on_remote_get_results(
remote, 'cibadmin --query --scope status')[
'stdout_str']
pcs_attribs = get_pacemaker_nodes_attributes(
cibadmin_status_xml)
return '#health_disk' not in pcs_attribs[
self.primary_controller_fqdn]
def check_started_resources():
logger.info(
"Checking for 'running_resources' attribute "
"have {} value on node {}".format(
self.slave_node_running_resources,
self.primary_controller_fqdn))
pcs_status = parse_pcs_status_xml(remote)
pcs_attribs = get_pcs_nodes(pcs_status)
return pcs_attribs[self.primary_controller_fqdn][
'resources_running'] == self.slave_node_running_resources
wait(checking_health_disk_attribute_is_not_present,
"Attribute #health_disk was appeared in attributes "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout),
timeout=self.pcs_check_timeout)
wait(check_started_resources,
"Attribute 'running_resources' doesn't have {} value "
"on node {} in {} seconds".format(
self.slave_node_running_resources,
self.primary_controller_fqdn,
self.pcs_check_timeout),
self.pcs_check_timeout)