tobiko/tobiko/tests/faults/ha/test_cloud_recovery.py

89 lines
2.7 KiB
Python

from __future__ import absolute_import
import random
import testtools
from tobiko.shell import ping
from tobiko.shell import sh
from tobiko.tests.faults.ha import cloud_disruptions
from tobiko.tripleo import pacemaker
from tobiko.tripleo import processes
from tobiko.tripleo import containers
from tobiko.tripleo import neutron
from tobiko.tripleo import nova
from tobiko.openstack import stacks
import tobiko
def nodes_health_check():
# this method will be changed in future commit
check_pacemaker_resources_health()
check_overcloud_processes_health()
nova.check_nova_services_health()
neutron.check_neutron_agents_health()
containers.assert_all_tripleo_containers_running()
# create a uniq stack
check_vm_create(stack_name='stack{}'.format(random.randint(0, 10000)))
# TODO:
# Test existing created servers
# check vm create with ssh and ping checks
def check_vm_create(stack_name):
"""stack_name: unique stack name ,
so that each time a new vm is created"""
# create a vm
stack = stacks.CirrosServerStackFixture(
stack_name=stack_name)
tobiko.reset_fixture(stack)
stack.wait_for_create_complete()
# Test SSH connectivity to floating IP address
sh.get_hostname(ssh_client=stack.ssh_client)
# Test ICMP connectivity to floating IP address
ping.ping_until_received(
stack.floating_ip_address).assert_replied()
# check cluster failed statuses
def check_pacemaker_resources_health():
return pacemaker.PacemakerResourcesStatus().all_healthy
def check_overcloud_processes_health():
return processes.OvercloudProcessesStatus(
).basic_overcloud_processes_running
class RebootNodesTest(testtools.TestCase):
""" HA Tests: run health check -> disruptive action -> health check
disruptive_action: a function that runs some
disruptive scenarion on a overcloud"""
def test_overcloud_health_check(self):
nodes_health_check()
def test_reboot_controllers_recovery(self):
nodes_health_check()
cloud_disruptions.reset_all_controller_nodes()
nodes_health_check()
def test_reboot_computes_recovery(self):
nodes_health_check()
computes_containers_dict_before = \
containers.list_containers(group='compute')
cloud_disruptions.reset_all_compute_nodes(hard_reset=True)
nodes_health_check()
computes_containers_dict_after = \
containers.list_containers(group='compute')
nova.start_all_instances()
containers.assert_equal_containers_state(
computes_containers_dict_before, computes_containers_dict_after)
# [..]
# more tests to follow
# run health checks
# faults stop rabbitmq service on one controller
# run health checks again