fuel-qa/system_test/actions/strength_actions.py
Artem Grechanichenko e7a08c1a21 Add check that calculated space to filling not "negative"
To calculate space to filling on root partition to activate rabbit
alarm need to calculate left space on root and take away additional 6M.
If calculated valute less than 0 - skip this action.

Change-Id: I901833d4211789672b6cedd12db80ac8a92f8d5d
Closes-Bug: #1631928
(cherry picked from commit 3faf7d9798)
2016-12-22 12:41:51 +00:00

379 lines
14 KiB
Python

# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE_2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import unicode_literals
import time
from devops.helpers.helpers import wait
from proboscis.asserts import assert_true
from fuelweb_test.helpers.pacemaker import get_pacemaker_nodes_attributes
from fuelweb_test.helpers.pacemaker import get_pcs_nodes
from fuelweb_test.helpers.pacemaker import parse_pcs_status_xml
from fuelweb_test.helpers.ssh_manager import SSHManager
from system_test import logger
from system_test import deferred_decorator
from system_test import action
from system_test.helpers.decorators import make_snapshot_if_step_fail
ssh_manager = SSHManager()
# pylint: disable=no-member
# noinspection PyUnresolvedReferences
class StrengthActions(object):
destroyed_devops_nodes = []
ostf_tests_should_failed = 0
os_service_should_failed = 0
def _destroy_controller(self, devops_node_name):
logger.info("Suspend {} node".format(devops_node_name))
d_node = self.env.d_env.get_node(name=devops_node_name)
d_node.suspend(False)
self.ostf_tests_should_failed += 1
self.os_service_should_failed += 1
if d_node not in self.destroyed_devops_nodes:
self.destroyed_devops_nodes.append(d_node)
else:
logger.warning("Try destroy already destroyed node")
@deferred_decorator([make_snapshot_if_step_fail])
@action
def wait_offline_nodes(self):
"""Wait offline status of destroyed nodes"""
assert_true(self.destroyed_devops_nodes,
"No destroyed nodes in Environment")
def wait_offline_nodes():
n_nodes = [
self.fuel_web.get_nailgun_node_by_devops_node(node) for node in
self.destroyed_devops_nodes]
n_nodes = [node['online'] for node in n_nodes]
return n_nodes.count(False) == 0
wait(wait_offline_nodes, timeout=60 * 5,
timeout_msg='Nodes failed to become offline')
@deferred_decorator([make_snapshot_if_step_fail])
@action
def check_ha_service_ready(self):
"""Wait for HA services ready"""
self.fuel_web.assert_ha_services_ready(self.cluster_id)
@deferred_decorator([make_snapshot_if_step_fail])
@action
def check_os_services_ready(self):
"""Wait until OpenStack services are UP"""
self.fuel_web.assert_os_services_ready(
self.cluster_id,
should_fail=self.os_service_should_failed)
@deferred_decorator([make_snapshot_if_step_fail])
@action
def wait_galera_cluster(self):
"""Wait until MySQL Galera is UP on online controllers"""
n_ctrls = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
self.cluster_id,
['controller'])
d_ctrls = {self.fuel_web.get_devops_node_by_nailgun_node(node)
for node in n_ctrls}
self.fuel_web.wait_mysql_galera_is_up(
[n.name for n in d_ctrls - set(self.destroyed_devops_nodes)],
timeout=300)
@deferred_decorator([make_snapshot_if_step_fail])
@action
def check_pacemaker_status(self):
"""Check controllers status in pacemaker"""
n_ctrls = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
self.cluster_id,
['controller'])
d_ctrls = {self.fuel_web.get_devops_node_by_nailgun_node(node)
for node in n_ctrls}
online_d_ctrls = d_ctrls - set(self.destroyed_devops_nodes)
for node in online_d_ctrls:
logger.info("Check pacemaker status on {}".format(node.name))
self.fuel_web.assert_pacemaker(
node.name,
online_d_ctrls,
self.destroyed_devops_nodes)
# noinspection PyUnresolvedReferences
class FillRootActions(object):
ostf_tests_should_failed = 0
primary_controller = None
primary_controller_fqdn = None
primary_controller_space_on_root = 0
disk_monitor_limit = 512
rabbit_disk_free_limit = 5
pacemaker_restart_timeout = 600
pcs_check_timeout = 300
primary_controller_space_to_filled = 0
pcs_status = None
slave_nodes_fqdn = None
slave_node_running_resources = None
@deferred_decorator([make_snapshot_if_step_fail])
@action
def get_pcs_initial_state(self):
"""Get controllers initial status in pacemaker"""
self.primary_controller = self.fuel_web.get_nailgun_primary_node(
self.env.d_env.nodes().slaves[0])
self.primary_controller_fqdn = str(
self.fuel_web.fqdn(self.primary_controller))
nail_node = self.fuel_web.get_nailgun_node_by_devops_node(
self.primary_controller)
pcs_status = parse_pcs_status_xml(nail_node['ip'])
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
root_free = remote.check_call(
'cibadmin --query --scope status').stdout_str
self.primary_controller_space_on_root = get_pacemaker_nodes_attributes(
root_free)[self.primary_controller_fqdn]['root_free']
self.disk_monitor_limit = 512
self.rabbit_disk_free_limit = 5
self.pacemaker_restart_timeout = 600
self.pcs_check_timeout = 300
self.primary_controller_space_to_filled = str(
int(
self.primary_controller_space_on_root
) - self.disk_monitor_limit - 1)
self.pcs_status = get_pcs_nodes(pcs_status)
self.slave_nodes_fqdn = list(
set(self.pcs_status.keys()).difference(
set(self.primary_controller_fqdn.split())))
running_resources_slave_1 = int(
self.pcs_status[self.slave_nodes_fqdn[0]]['resources_running'])
running_resources_slave_2 = int(
self.pcs_status[self.slave_nodes_fqdn[1]]['resources_running'])
self.slave_node_running_resources = str(min(running_resources_slave_1,
running_resources_slave_2
)
)
@deferred_decorator([make_snapshot_if_step_fail])
@action
def fill_root_above_rabbit_disk_free_limit(self):
"""Filling root filesystem on primary controller"""
logger.info(
"Free space in root on primary controller - {}".format(
self.primary_controller_space_on_root
))
logger.info(
"Need to fill space on root - {}".format(
self.primary_controller_space_to_filled
))
node = self.fuel_web.get_nailgun_node_by_name(
self.primary_controller.name)
self.ssh_manager.execute_on_remote(
ip=node['ip'],
cmd='fallocate -l {}M /root/bigfile && sync'.format(
self.primary_controller_space_to_filled)
)
self.ssh_manager.execute_on_remote(
ip=node['ip'],
cmd='ls /root/bigfile',
assert_ec_equal=[0])
@deferred_decorator([make_snapshot_if_step_fail])
@action
def fill_root_below_rabbit_disk_free_limit(self):
"""Fill root more to below rabbit disk free limit"""
node = self.fuel_web.get_nailgun_node_by_name(
self.primary_controller.name)
pacemaker_attributes = self.ssh_manager.execute_on_remote(
ip=node['ip'],
cmd='cibadmin --query --scope status'
)['stdout_str']
controller_space_on_root = get_pacemaker_nodes_attributes(
pacemaker_attributes)[self.primary_controller_fqdn]['root_free']
logger.info("Free space in root on primary controller - {}".format(
controller_space_on_root))
controller_space_to_filled = str(
int(controller_space_on_root) - self.rabbit_disk_free_limit - 1
)
if int(controller_space_to_filled) < 1:
logger.info(
"Nothing to do."
" Free space in root partition already less than {}.".format(
self.rabbit_disk_free_limit))
return
logger.info("Need to fill space on root - {}".format(
controller_space_to_filled))
self.ssh_manager.execute_on_remote(
ip=node['ip'],
cmd='fallocate -l {}M /root/bigfile2 && sync'.format(
controller_space_to_filled)
)
self.ssh_manager.execute_on_remote(
ip=node['ip'],
cmd='ls /root/bigfile2',
assert_ec_equal=[0])
@deferred_decorator([make_snapshot_if_step_fail])
@action
def check_stopping_resources(self):
"""Check stopping pacemaker resources"""
logger.info(
"Waiting {} seconds for changing pacemaker status of {}".format(
self.pacemaker_restart_timeout,
self.primary_controller_fqdn))
time.sleep(self.pacemaker_restart_timeout)
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
def checking_health_disk_attribute():
logger.info("Checking for '#health_disk' attribute")
cibadmin_status_xml = remote.check_call(
'cibadmin --query --scope status').stdout_str
pcs_attribs = get_pacemaker_nodes_attributes(
cibadmin_status_xml)
return '#health_disk' in pcs_attribs[
self.primary_controller_fqdn]
def checking_for_red_in_health_disk_attribute():
logger.info(
"Checking for '#health_disk' attribute have 'red' value")
cibadmin_status_xml = remote.check_call(
'cibadmin --query --scope status').stdout_str
pcs_attribs = get_pacemaker_nodes_attributes(
cibadmin_status_xml)
return pcs_attribs[self.primary_controller_fqdn][
'#health_disk'] == 'red'
def check_stopping_resources():
logger.info(
"Checking for 'running_resources "
"attribute have '0' value")
nail_node = self.fuel_web.get_nailgun_node_by_devops_node(
self.primary_controller)
pcs_status = parse_pcs_status_xml(nail_node['ip'])
pcs_attribs = get_pcs_nodes(pcs_status)
return pcs_attribs[self.primary_controller_fqdn][
'resources_running'] == '0'
wait(checking_health_disk_attribute,
timeout=self.pcs_check_timeout,
timeout_msg="Attribute #health_disk wasn't appeared "
"in attributes on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout))
wait(checking_for_red_in_health_disk_attribute,
timeout=self.pcs_check_timeout,
timeout_msg="Attribute #health_disk doesn't have 'red' value "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout))
wait(check_stopping_resources,
timeout=self.pcs_check_timeout,
timeout_msg="Attribute 'running_resources' "
"doesn't have '0' value "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout))
@deferred_decorator([make_snapshot_if_step_fail])
@action
def clean_up_space_on_root(self):
"""Clean up space on root filesystem on primary controller"""
node = self.fuel_web.get_nailgun_node_by_name(
self.primary_controller.name)
bigfile_2 = ssh_manager.isfile_on_remote(
ip=node['ip'],
path='/root/bigfile2')
path = str("/root/bigfile {}".format(
"/root/bigfile2" if bigfile_2 else ""))
ssh_manager.rm_rf_on_remote(
ip=node['ip'],
path=path)
delete_attr = str(
'crm node status-attr {} delete "#health_disk"'.format(
self.primary_controller_fqdn))
ssh_manager.check_call(
ip=node['ip'],
command=delete_attr)
@deferred_decorator([make_snapshot_if_step_fail])
@action
def check_starting_resources(self):
"""Check starting pacemaker resources"""
logger.info(
"Waiting {} seconds for changing pacemaker status of {}".format(
self.pacemaker_restart_timeout,
self.primary_controller_fqdn))
time.sleep(self.pacemaker_restart_timeout)
with self.fuel_web.get_ssh_for_node(
self.primary_controller.name) as remote:
def checking_health_disk_attribute_is_not_present():
logger.info(
"Checking for '#health_disk' attribute "
"is not present on node {}".format(
self.primary_controller_fqdn))
cibadmin_status_xml = remote.check_call(
'cibadmin --query --scope status').stdout_str
pcs_attribs = get_pacemaker_nodes_attributes(
cibadmin_status_xml)
return '#health_disk' not in pcs_attribs[
self.primary_controller_fqdn]
wait(checking_health_disk_attribute_is_not_present,
timeout=self.pcs_check_timeout,
timeout_msg="Attribute #health_disk was appeared "
"in attributes on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout))
self.fuel_web.assert_ha_services_ready(self.cluster_id)