make reboot computes tests more robust and logging clearer

Change-Id: Ie9d6f2d2007443dacdb2ce30cedd1b8ded4b2361
This commit is contained in:
pinikomarov 2020-03-05 02:27:59 +02:00
parent d6c94d9c22
commit 8781f5281f
5 changed files with 31 additions and 49 deletions
roles/infrared/tasks/templates
tobiko

@ -1,35 +0,0 @@
cloud_management:
driver: universal
node_discover:
driver: node_list
args:
{% for host_name in groups.overcloud_nodes|sort %}
- fqdn: {{ host_name }}
ip: {{ hostvars[host_name]['ansible_host'] }}
auth:
username: heat-admin
private_key_file: /home/stack/.ssh/id_rsa
become: true
{% endfor %}
services:
openvswitch:
driver: system_service
args:
service_name: openvswitch
grep: openvswitch
containers:
neutron_ovs_agent:
driver: docker_container
args:
container_name: neutron_ovs_agent
neutron_metadata_agent:
driver: docker_container
args:
container_name: neutron_metadata_agent
neutron_api:
driver: docker_container
args:
container_name: neutron_api

@ -1,5 +1,5 @@
[DEFAULT] [DEFAULT]
debug = true #debug = true
log_file = "{{ test.dir }}/tobiko.log" log_file = "{{ test.dir }}/tobiko.log"
log_dir = . log_dir = .

@ -15,18 +15,16 @@ from tobiko.openstack import stacks
import tobiko import tobiko
def nodes_health_check(): def overcloud_health_checks(passive_checks_only=False):
# this method will be changed in future commit # this method will be changed in future commit
check_pacemaker_resources_health() check_pacemaker_resources_health()
check_overcloud_processes_health() check_overcloud_processes_health()
nova.check_nova_services_health() nova.check_nova_services_health()
neutron.check_neutron_agents_health() neutron.check_neutron_agents_health()
containers.assert_all_tripleo_containers_running() containers.assert_all_tripleo_containers_running()
# create a uniq stack if not passive_checks_only:
check_vm_create(stack_name='stack{}'.format(random.randint(0, 10000))) # create a uniq stack
check_vm_create(stack_name='stack{}'.format(random.randint(0, 10000)))
# TODO:
# Test existing created servers
# check vm create with ssh and ping checks # check vm create with ssh and ping checks
@ -62,25 +60,33 @@ class RebootNodesTest(testtools.TestCase):
disruptive_action: a function that runs some disruptive_action: a function that runs some
disruptive scenarion on a overcloud""" disruptive scenarion on a overcloud"""
def test_overcloud_health_check(self): def test_overcloud_health_check(self):
nodes_health_check() overcloud_health_checks()
def test_reboot_controllers_recovery(self): def test_reboot_controllers_recovery(self):
nodes_health_check() overcloud_health_checks()
cloud_disruptions.reset_all_controller_nodes() cloud_disruptions.reset_all_controller_nodes()
nodes_health_check() overcloud_health_checks()
def test_reboot_computes_recovery(self): def test_reboot_computes_recovery(self):
nodes_health_check()
overcloud_health_checks()
computes_containers_dict_before = \ computes_containers_dict_before = \
containers.list_containers(group='compute') containers.list_containers(group='compute')
cloud_disruptions.reset_all_compute_nodes(hard_reset=True) cloud_disruptions.reset_all_compute_nodes(hard_reset=True)
nodes_health_check()
overcloud_health_checks(passive_checks_only=True)
nova.start_all_instances()
computes_containers_dict_after = \ computes_containers_dict_after = \
containers.list_containers(group='compute') containers.list_containers(group='compute')
nova.start_all_instances()
containers.assert_equal_containers_state( containers.assert_equal_containers_state(
computes_containers_dict_before, computes_containers_dict_after) computes_containers_dict_before, computes_containers_dict_after)
# [..] # [..]
# more tests to follow # more tests to follow
# run health checks # run health checks

@ -229,6 +229,8 @@ def assert_equal_containers_state(expected_containers_list,
to_string(index=False))) to_string(index=False)))
LOG.info('container states mismatched:\n{}\n'.format(failures)) LOG.info('container states mismatched:\n{}\n'.format(failures))
time.sleep(interval) time.sleep(interval)
LOG.info('Retrying , timeout at: {}'
.format(timeout-(time.time() - start)))
actual_containers_list = list_containers(group='compute') actual_containers_list = list_containers(group='compute')
else: else:
LOG.info("assert_equal_containers_state :" LOG.info("assert_equal_containers_state :"

@ -44,4 +44,13 @@ def start_all_instances():
nova_client = nova.get_nova_client() nova_client = nova.get_nova_client()
servers = nova_client.servers.list() servers = nova_client.servers.list()
for instance in servers: for instance in servers:
nova.activate_server(instance) activated_instance = nova.activate_server(instance)
time.sleep(3)
instance_info = 'instance {nova_instance} is {state} on {host}'.format(
nova_instance=activated_instance.name,
state=activated_instance.status,
host=activated_instance._info[ # pylint: disable=W0212
'OS-EXT-SRV-ATTR:hypervisor_hostname'])
LOG.info(instance_info)
if activated_instance.status != 'ACTIVE':
tobiko.fail(instance_info)