diff --git a/tobiko/podman/_client.py b/tobiko/podman/_client.py index 8fee43eba..38b905919 100644 --- a/tobiko/podman/_client.py +++ b/tobiko/podman/_client.py @@ -113,12 +113,16 @@ class PodmanClientFixture(tobiko.SharedFixture): try: podman_remote_socket = self.discover_podman_socket() - podman_remote_socket_uri = 'unix:/tmp/podman.sock' + username = self.ssh_client.connect_parameters['username'] + host = self.ssh_client.connect_parameters["hostname"] + socket = podman_remote_socket + podman_remote_socket_uri = \ + 'unix:/tmp/podman.sock_{}'.format(host) remote_uri = 'ssh://{username}@{host}{socket}'.format( - username=self.ssh_client.connect_parameters['username'], - host=self.ssh_client.connect_parameters["hostname"], - socket=podman_remote_socket) + username=username, + host=host, + socket=socket) client = podman.Client(uri=podman_remote_socket_uri, remote_uri=remote_uri, diff --git a/tobiko/tests/faults/ha/test_cloud_recovery.py b/tobiko/tests/faults/ha/test_cloud_recovery.py index 942660206..54f97e72c 100644 --- a/tobiko/tests/faults/ha/test_cloud_recovery.py +++ b/tobiko/tests/faults/ha/test_cloud_recovery.py @@ -10,6 +10,7 @@ from tobiko.tripleo import pacemaker from tobiko.tripleo import processes from tobiko.tripleo import containers from tobiko.tripleo import neutron +from tobiko.tripleo import nova from tobiko.openstack import stacks import tobiko @@ -18,14 +19,14 @@ def nodes_health_check(): # this method will be changed in future commit check_pacemaker_resources_health() check_overcloud_processes_health() + nova.check_nova_services_health() neutron.check_neutron_agents_health() + containers.assert_all_tripleo_containers_running() # create a uniq stack check_vm_create(stack_name='stack{}'.format(random.randint(0, 10000))) # TODO: - # Test existing created serverstest_controller_containers - # ServerStackResourcesTest().test_server_create() - # Add specific container checks + # Test existing created servers # check vm create with ssh and ping checks diff --git a/tobiko/tripleo/containers.py b/tobiko/tripleo/containers.py index 0c0806357..2c0c5420f 100644 --- a/tobiko/tripleo/containers.py +++ b/tobiko/tripleo/containers.py @@ -15,7 +15,7 @@ LOG = log.getLogger(__name__) def container_runtime(): """check what container runtime is running and return a handle to it""" - + # TODO THIS LOCKS SSH CLIENT TO CONTROLLER ssh_client = topology.list_openstack_nodes(group='controller')[ 0].ssh_client if docker.is_docker_running(ssh_client=ssh_client): @@ -27,7 +27,7 @@ def container_runtime(): container_runtime_type = container_runtime() -def list_node_containers(client=None): +def list_node_containers(client): """returns a list of containers and their run state""" if container_runtime_type == podman: @@ -70,6 +70,94 @@ def list_containers(group=None): return containers_list +def assert_containers_running(group, excpected_containers): + + """assert that all containers specified in the list are running + on the specified openstack group(controller or compute etc..)""" + + failures = [] + + openstack_nodes = topology.list_openstack_nodes(group=group) + for node in openstack_nodes: + container_client = get_container_client(node.ssh_client) + node_containers = list_node_containers(client=container_client) + containers_list_df = pandas.DataFrame( + get_container_states_list(node_containers), + columns=['container_host', 'container_name', 'container_state']) + # check that the containers are present + LOG.info('node: {} containers list : {}'.format( + node.name, containers_list_df.to_string(index=False))) + for container in excpected_containers: + # get container attrs dataframe + container_attrs = containers_list_df.query( + 'container_name == "{}"'.format(container)) + # check if the container exists + LOG.info('checking container: {}'.format(container)) + if container_attrs.empty: + failures.append( + 'expected container {} not found on node {} ! : \n\n'. + format(container, node.name)) + # if container exists, check it is running + else: + container_state = \ + container_attrs.container_state.values.item() + if not container_state == 'running': + failures.append( + 'expected container {} is not running on node {} , ' + 'its state is {}! : \n\n'.format(container, + node.name, + container_state)) + + if failures: + tobiko.fail('container states mismatched:\n{!s}', '\n'.join(failures)) + else: + LOG.info('All tripleo common containers are in running state! ') + + +def assert_all_tripleo_containers_running(): + """check that all common tripleo containers are running + param: group controller or compute , check containers + sets in computes or controllers""" + + common_controller_tripleo_containers = ['cinder_api', 'cinder_api_cron', + 'cinder_scheduler', 'clustercheck', + 'glance_api', 'heat_api', + 'heat_api_cfn', + 'heat_api_cron', 'heat_engine', + 'horizon', 'iscsid', 'keystone', + 'logrotate_crond', 'memcached', + 'neutron_api', 'nova_api', + 'nova_api_cron', 'nova_conductor', + 'nova_metadata', 'nova_scheduler', + 'nova_vnc_proxy', + 'swift_account_auditor', + 'swift_account_reaper', + 'swift_account_replicator', + 'swift_account_server', + 'swift_container_auditor', + 'swift_container_replicator', + 'swift_container_server', + 'swift_container_updater', + 'swift_object_auditor', + 'swift_object_expirer', + 'swift_object_replicator', + 'swift_object_server', + 'swift_object_updater', + 'swift_proxy', 'swift_rsync'] + + common_compute_tripleo_containers = ['iscsid', 'logrotate_crond', + 'nova_compute', 'nova_libvirt', + 'nova_migration_target', + 'nova_virtlogd'] + + for group, group_containers in [('controller', + common_controller_tripleo_containers), + ('compute', + common_compute_tripleo_containers)]: + assert_containers_running(group, group_containers) + # TODO: need to address OSP-version specific containers here. + + def comparable_container_keys(container): """returns the tuple : 'container_host','container_name', 'container_state' diff --git a/tobiko/tripleo/neutron.py b/tobiko/tripleo/neutron.py index 1ec59bc71..fcc3f4284 100644 --- a/tobiko/tripleo/neutron.py +++ b/tobiko/tripleo/neutron.py @@ -11,7 +11,7 @@ def check_neutron_agents_health(): for agent in agents['agents']: if not agent['alive']: - failures.append('failed agent: {}'.format(agent)) + failures.append('failed agent: {}\n\n'.format(agent)) if failures: tobiko.fail( diff --git a/tobiko/tripleo/nova.py b/tobiko/tripleo/nova.py new file mode 100644 index 000000000..7ace67eb7 --- /dev/null +++ b/tobiko/tripleo/nova.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import + +import tobiko +from tobiko.openstack import nova + + +def check_nova_services_health(): + failures = [] + nova_client = nova.get_nova_client() + services = nova_client.services.list() + + for service in services: + if not service.state == 'up': + failures.append('failed service: {}\n\n'.format(vars(service))) + + if failures: + tobiko.fail( + 'nova agents are unhealthy:\n{!s}', '\n'.join(failures))