diff --git a/doc/source/roles/role-tripleo-container-manage.rst b/doc/source/roles/role-tripleo-container-manage.rst index 4792881cd..e7a542d2a 100644 --- a/doc/source/roles/role-tripleo-container-manage.rst +++ b/doc/source/roles/role-tripleo-container-manage.rst @@ -61,6 +61,11 @@ This Ansible role allows to do the following tasks: Note: `tripleo_container_manage_concurrency` parameter is set to 1 by default, and putting higher value than 2 can be expose issue with Podman locks. + If a container is meant to exit after running a script (defined in + EntryPoint), we can check its return code and fail if the code isn't + expected. It can be done with `tripleo_container_manage_valid_exit_code`. + If defined to a list of integers, the role will wait for the container to be + exited and then checks the return code. Here is an example of a playbook: @@ -106,6 +111,11 @@ Roles variables | tripleo_container_manage_config_overrides | {} | Allows to override any | | | | container configuration | +------------------------------------------------+-----------------------------+----------------------------+ +| tripleo_container_manage_valid_exit_code | [] | Allow to check if a | +| | | container returned the | +| | | exit code in parameter. | +| | | Must be a list. e.g. [0,3] | ++------------------------------------------------+-----------------------------+----------------------------+ Debug ~~~~~ diff --git a/tripleo_ansible/roles/tripleo-container-manage/defaults/main.yml b/tripleo_ansible/roles/tripleo-container-manage/defaults/main.yml index 3126da642..83a862709 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/defaults/main.yml +++ b/tripleo_ansible/roles/tripleo-container-manage/defaults/main.yml @@ -28,3 +28,4 @@ tripleo_container_manage_healthcheck_disabled: false tripleo_container_manage_log_path: '/var/log/containers/stdouts' tripleo_container_manage_systemd_order: false tripleo_container_manage_systemd_teardown: true +tripleo_container_manage_valid_exit_code: [] diff --git a/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/create.yml b/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/create.yml index ce7e32056..74824ea34 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/create.yml +++ b/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/create.yml @@ -103,3 +103,40 @@ debug: var: containers_changed when: tripleo_container_manage_debug | bool + +- name: "Block for container exit codes" + when: + - tripleo_container_manage_valid_exit_code|length != 0 + - not ansible_check_mode|bool + block: + - name: "Wait for containers to be exited" + podman_container_info: + name: "{{ batched_container_data | haskey(attribute='action', reverse=True) | list_of_keys }}" + register: podman_containers_infos + until: ( podman_containers_infos.containers | selectattr('State.Running', 'equalto', True) |list|length ) == 0 + # Retry 30 times every 10 seconds so we wait 5 min in total + retries: 30 + delay: 10 + # We need to ignore the failures since later we print some debug. + # We can't use "rescue" here because the debug tasks use + # "podman_containers_infos". + ignore_errors: true + no_log: true + - name: Create a list of containers which didn't exit + set_fact: + running_containers: >- + {{ podman_containers_infos.containers | + selectattr('State.Running', 'equalto', True) | map(attribute='Name') | list }} + - name: Create a list of containers with bad Exit Codes + set_fact: + broken_containers: >- + {{ podman_containers_infos.containers | + rejectattr('State.ExitCode', 'in', tripleo_container_manage_valid_exit_code) | map(attribute='Name') | list }} + - name: "Print running containers" + fail: + msg: "Container(s) which are still running after 5 min: {{ running_containers }}, check logs in /var/log/containers/stdout/" + when: running_containers|length != 0 + - name: "Print failing containers" + fail: + msg: "Container(s) with bad ExitCode: {{ broken_containers }}, check logs in /var/log/containers/stdout/" + when: broken_containers|length != 0