make reboot computes tests more robust and logging clearer

Change-Id: Ie9d6f2d2007443dacdb2ce30cedd1b8ded4b2361
2020-03-05 02:27:59 +02:00 · 2020-03-05 02:27:59 +02:00 · 8781f5281f
commit 8781f5281f
parent d6c94d9c22
5 changed files with 31 additions and 49 deletions
--- a/roles/infrared/tasks/templates/os-faults.yaml.j2
+++ b/roles/infrared/tasks/templates/os-faults.yaml.j2
@ -1,35 +0,0 @@
 cloud_management:
  driver: universal
 node_discover:
  driver: node_list
  args:
 {% for host_name in groups.overcloud_nodes|sort %}
     - fqdn: {{ host_name }}
       ip: {{ hostvars[host_name]['ansible_host'] }}
       auth:
        username: heat-admin
        private_key_file: /home/stack/.ssh/id_rsa
        become: true
 {% endfor %}
 services:
  openvswitch:
    driver: system_service
    args:
      service_name: openvswitch
      grep: openvswitch
 containers:
  neutron_ovs_agent:
    driver: docker_container
    args:
      container_name: neutron_ovs_agent
  neutron_metadata_agent:
    driver: docker_container
    args:
      container_name: neutron_metadata_agent
  neutron_api:
    driver: docker_container
    args:
      container_name: neutron_api
--- a/roles/infrared/tasks/templates/tobiko.conf.j2
+++ b/roles/infrared/tasks/templates/tobiko.conf.j2
@ -1,5 +1,5 @@
 [DEFAULT]
-debug = true
+#debug = true
 log_file = "{{ test.dir }}/tobiko.log"
 log_dir = .
--- a/tobiko/tests/faults/ha/test_cloud_recovery.py
+++ b/tobiko/tests/faults/ha/test_cloud_recovery.py
@ -15,18 +15,16 @@ from tobiko.openstack import stacks
 import tobiko
-def nodes_health_check():
+def overcloud_health_checks(passive_checks_only=False):
    # this method will be changed in future commit
    check_pacemaker_resources_health()
    check_overcloud_processes_health()
    nova.check_nova_services_health()
    neutron.check_neutron_agents_health()
    containers.assert_all_tripleo_containers_running()
-    # create a uniq stack
+    if not passive_checks_only:
-    check_vm_create(stack_name='stack{}'.format(random.randint(0, 10000)))
+        # create a uniq stack
-
+        check_vm_create(stack_name='stack{}'.format(random.randint(0, 10000)))
    # TODO:
    # Test existing created servers
 # check vm create with ssh and ping checks
@ -62,25 +60,33 @@ class RebootNodesTest(testtools.TestCase):
    disruptive_action: a function that runs some
    disruptive scenarion on a overcloud"""
    def test_overcloud_health_check(self):
-        nodes_health_check()
+        overcloud_health_checks()
    def test_reboot_controllers_recovery(self):
-        nodes_health_check()
+        overcloud_health_checks()
        cloud_disruptions.reset_all_controller_nodes()
-        nodes_health_check()
+        overcloud_health_checks()
    def test_reboot_computes_recovery(self):
-        nodes_health_check()
+
        overcloud_health_checks()
        computes_containers_dict_before = \
            containers.list_containers(group='compute')
        cloud_disruptions.reset_all_compute_nodes(hard_reset=True)
-        nodes_health_check()
+
        overcloud_health_checks(passive_checks_only=True)
        nova.start_all_instances()
        computes_containers_dict_after = \
            containers.list_containers(group='compute')
-        nova.start_all_instances()
+
        containers.assert_equal_containers_state(
            computes_containers_dict_before, computes_containers_dict_after)
 # [..]
 # more tests to follow
 # run health checks
--- a/tobiko/tripleo/containers.py
+++ b/tobiko/tripleo/containers.py
@ -229,6 +229,8 @@ def assert_equal_containers_state(expected_containers_list,
                                            to_string(index=False)))
            LOG.info('container states mismatched:\n{}\n'.format(failures))
            time.sleep(interval)
            LOG.info('Retrying , timeout at: {}'
                     .format(timeout-(time.time() - start)))
            actual_containers_list = list_containers(group='compute')
        else:
            LOG.info("assert_equal_containers_state :"
--- a/tobiko/tripleo/nova.py
+++ b/tobiko/tripleo/nova.py
@ -44,4 +44,13 @@ def start_all_instances():
    nova_client = nova.get_nova_client()
    servers = nova_client.servers.list()
    for instance in servers:
-        nova.activate_server(instance)
+        activated_instance = nova.activate_server(instance)
        time.sleep(3)
        instance_info = 'instance {nova_instance} is {state} on {host}'.format(
            nova_instance=activated_instance.name,
            state=activated_instance.status,
            host=activated_instance._info[  # pylint: disable=W0212
                'OS-EXT-SRV-ATTR:hypervisor_hostname'])
        LOG.info(instance_info)
        if activated_instance.status != 'ACTIVE':
            tobiko.fail(instance_info)