diff --git a/tripleo_ansible/ansible_plugins/action/container_status.py b/tripleo_ansible/ansible_plugins/action/container_status.py new file mode 100644 index 000000000..43e6d9ff6 --- /dev/null +++ b/tripleo_ansible/ansible_plugins/action/container_status.py @@ -0,0 +1,334 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2020 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +import copy +import tenacity +import yaml + +from ansible.errors import AnsibleActionFail +from ansible.plugins.action import ActionBase +from ansible.utils.display import Display + +DISPLAY = Display() + +# Default delay/retries used to fetch containers status and wait for them to be +# finished. +DELAY = 10 +RETRIES = 30 +TIMEOUT = DELAY * RETRIES + +ANSIBLE_METADATA = { + 'metadata_version': '1.1', + 'status': ['preview'], + 'supported_by': 'community' +} + +DOCUMENTATION = """ +module: container_status +author: + - "TripleO team" +version_added: '2.9' +short_description: Check and report containers status +notes: [] +description: + - For each container that isn't an exec or a container supposed to be + controlled by systemd, we expect it to terminate with a return code. + This module will check that code and make sure it's correct. If not, it + will report the failure for easier debug. +requirements: + - None +options: + container_async_results: + description: + - Async results of a podman_container invocation. + type: list + container_data: + description: + - List of dictionaries which have the container configurations. + type: list + valid_exit_codes: + description: + - List of valid container exit codes. + default: [] + type: list + debug: + description: + - Whether or not debug is enabled. + default: False + type: boolean +""" +EXAMPLES = """ +- name: Check containers status + containers_status: + container_async_results: "{{ create_async_poll_results.results }}" + container_data: + - keystone: + image: docker.io/keystone + - mysql_bootstrap: + image: docker.io/mysql + valid_exit_codes: + - 0 + - 2 +""" +RETURN = """ +changed_containers: + description: List of containers which changed. + returned: always + type: list + sample: + - keystone + - mysql +commands: + description: List of container cli commands that would be run. + returned: always + type: list + sample: + - podman rm -f keystone + - podman run keystone +""" + + +class ActionModule(ActionBase): + """Action plugin for container status""" + + _VALID_ARGS = yaml.safe_load(DOCUMENTATION)['options'] + + def _get_args(self): + missing = [] + args = {} + + for option, vals in self._VALID_ARGS.items(): + if 'default' not in vals: + if self._task.args.get(option, None) is None: + missing.append(option) + continue + args[option] = self._task.args.get(option) + else: + args[option] = self._task.args.get(option, vals['default']) + + if missing: + raise AnsibleActionFail('Missing required parameters: {}'.format( + ', '.join(missing))) + return args + + def _get_containers_to_check(self, data): + """Return a list of containers that we need to check. + + Given some container_data, figure out what containers terminate with + a return code so later we can check that code. + + :param data: Dictionary of container data. + :returns: List of containers that need to be checked. + """ + containers = [] + # loop through container data to get specific container + for container in data: + # get container name and data + for name, values in container.items(): + if 'action' in values or 'restart' in values: + continue + if 'image' in values: + # We assume that container configs that don't have a + # restart policy nor action (used for podman exec) but have + # an image set, will run something and then exit with a + # return code. + containers.append(name) + if self.debug and len(containers) > 0: + DISPLAY.display('These containers are supposed to terminate with ' + 'a valid exit code and will be checked: ' + '{}'.format(containers)) + return containers + + def _get_create_commands(self, results): + """Return a list of commands that were executed by container tool. + + :param results: Ansible task results. + :returns commands: List of commands. + """ + commands = [] + for item in results: + if item['changed']: + commands.extend(item['podman_actions']) + return commands + + def _is_container_running(self, container): + """Return True if a container has Running State. + + :params container: Dictionary for container infos. + :returns running: Boolean of container running status. + """ + state = container.get('State', {}) + running = state.get('Running', False) + return running + + def _get_container_infos(self, containers, task_vars): + """Return container infos. + + :params containers: List of containers. + :params task_vars: Dictionary of Ansible tasks variables. + :returns container_results: Dictionary of container infos. + """ + tvars = copy.deepcopy(task_vars) + result = self._execute_module( + module_name='podman_container_info', + module_args=dict(name=containers), + task_vars=tvars + ) + return [c for c in result["containers"]] + + @tenacity.retry( + reraise=True, + stop=tenacity.stop_after_attempt(RETRIES), + wait=tenacity.wait_fixed(DELAY) + ) + def _fetch_container_state(self, containers, task_vars): + """Return container states of finished containers with retries. + + :params containers: List of containers. + :params task_vars: Dictionary of Ansible tasks variables. + :returns container_results: Dictionary of container infos. + """ + containers_results = self._get_container_infos(containers, task_vars) + for container in containers_results: + name = container.get('Name') + if self._is_container_running(container): + raise AnsibleActionFail('Container {} has not finished yet, ' + 'retrying...'.format(name)) + return containers_results + + def _check_container_state(self, containers, exit_codes, task_vars): + """Return a tuple of running and failed containers. + + :params containers: List of containers to check. + :params exit_codes: List of valid exit codes. + :params task_vars: Dictionary of Ansible tasks variables. + :returns running, failed: Tuple of lists. + """ + running = [] + failed = [] + try: + self._fetch_container_state(containers, task_vars) + except AnsibleActionFail: + # We fail at the end with all the other infos + if self.debug: + DISPLAY.display('One or more containers did not finish on ' + 'time, the failure will be reported later.') + pass + containers_results = self._get_container_infos(containers, task_vars) + for container in containers_results: + container_name = container.get('Name') + container_state = container.get('State') + if self._is_container_running(container): + running.append(container_name) + elif container_state.get('ExitCode') not in exit_codes: + failed.append(container_name) + return (running, failed) + + def _check_errors_in_ansible_async_results(self, results): + """Get a tuple with changed and failed containers. + + :param results: Ansible results from "Check podman create status" + :returns: Tuple of containers that changed or failed + """ + changed = [] + failed = [] + for item in results: + # if Ansible is run in check mode, the async_results items will + # not contain failed or finished keys. + if self._play_context.check_mode: + break + async_result_item = item['create_async_result_item'] + if item['changed']: + for name, c in async_result_item['container_data'].items(): + changed.append(name) + if (item['failed'] or not item['finished'] + or ('stderr' in async_result_item + and async_result_item['stderr'] != '')): + for name, c in async_result_item['container_data'].items(): + failed.append(name) + return (changed, failed) + + def run(self, tmp=None, task_vars=None): + self._supports_check_mode = True + self.changed = False + self.changed_containers = [] + container_commands = [] + running = [] + failed = [] + + if task_vars is None: + task_vars = dict() + result = super(ActionModule, self).run(tmp, task_vars) + del tmp + # parse args + args = self._get_args() + + async_results = args['container_async_results'] + container_data = args['container_data'] + valid_exit_codes = args['valid_exit_codes'] + self.debug = args['debug'] + + containers_to_check = self._get_containers_to_check(container_data) + + # Check that the containers which are supposed to finish have + # actually finished and also terminated with the right exit code. + if len(valid_exit_codes) > 0 and len(containers_to_check) > 0: + (running, failed) = self._check_container_state( + containers_to_check, + valid_exit_codes, + task_vars) + + # Check the Ansible async results for containers which: + # - reported a changed resources (podman_container created or updated + # a container) and return it as self.changed_containers. + # - reported a failed resource (podman_container failed to create + # the container and return it as self.failed_containers. + # - didn't finish on time and return it as self.failed_containers. + (self.changed_containers, async_failed) = ( + self._check_errors_in_ansible_async_results(async_results)) + + if len(failed) > 0: + DISPLAY.error('Container(s) which finished with wrong return code' + ': {}'.format(failed)) + if len(async_failed) > 0: + DISPLAY.error('Container(s) which failed to be created by ' + 'podman_container module: {}'.format(async_failed)) + if len(running) > 0: + DISPLAY.error('Container(s) which did not finish after {} ' + 'minutes: {}'.format(TIMEOUT, running)) + total_errors = list(set(failed + async_failed + running)) + if len(total_errors) > 0: + raise AnsibleActionFail('Failed container(s): {}, check logs in ' + '/var/log/containers/' + 'stdouts/'.format(total_errors)) + + container_commands = self._get_create_commands(async_results) + if len(container_commands) > 0 and \ + (self._play_context.check_mode or self.debug): + for cmd in container_commands: + DISPLAY.display(cmd) + + if len(container_commands) > 0: + self.changed = True + + result['changed_containers'] = self.changed_containers + result['commands'] = container_commands + result['changed'] = self.changed + return result diff --git a/tripleo_ansible/roles/tripleo-container-manage/molecule/default/playbook.yml b/tripleo_ansible/roles/tripleo-container-manage/molecule/default/playbook.yml index bb9bbb408..e6642f6f3 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/molecule/default/playbook.yml +++ b/tripleo_ansible/roles/tripleo-container-manage/molecule/default/playbook.yml @@ -290,7 +290,7 @@ { "image": "fedora:rawhide", "net": "host", - "command": "sleep 3600" + "command": "sleep 10" } dest: '/tmp/container-configs/fedora_bis.json' - include_role: diff --git a/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/check_exit_code.yml b/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/check_exit_code.yml deleted file mode 100644 index 83a30f889..000000000 --- a/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/check_exit_code.yml +++ /dev/null @@ -1,51 +0,0 @@ ---- -# Copyright 2020 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -- name: "Wait for containers to be exit" - podman_container_info: - name: "{{ containers_with_exit_code }}" - register: podman_containers_infos - until: ( podman_containers_infos.containers | selectattr('State.Running', 'equalto', True) |list|length ) == 0 - # Retry 30 times every 10 seconds so we wait 5 min in total - retries: 30 - delay: 10 - # We need to ignore the failures since later we print some debug. - # We can't use "rescue" here because the debug tasks use - # "podman_containers_infos". - failed_when: false - no_log: "{{ not tripleo_container_manage_debug }}" - -- name: Create a list of containers which didn't exit - set_fact: - running_containers: >- - {{ podman_containers_infos.containers | - selectattr('State.Running', 'equalto', True) | map(attribute='Name') | list }} - -- name: Create a list of containers with bad Exit Codes - set_fact: - broken_containers: >- - {{ podman_containers_infos.containers | - rejectattr('State.ExitCode', 'in', tripleo_container_manage_valid_exit_code) | map(attribute='Name') | list }} - -- name: "Print running containers" - fail: - msg: "Container(s) which are still running after 5 min: {{ running_containers }}, check logs in /var/log/containers/stdouts/" - when: running_containers|length != 0 - -- name: "Print failing containers" - fail: - msg: "Container(s) with bad ExitCode: {{ broken_containers }}, check logs in /var/log/containers/stdouts/" - when: broken_containers|length != 0 diff --git a/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/create.yml b/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/create.yml index 1cdc34b04..dcc1b85b7 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/create.yml +++ b/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/create.yml @@ -96,46 +96,21 @@ when: - not ansible_check_mode|bool +- name: Check containers status + container_status: + container_async_results: "{{ create_async_poll_results.results }}" + container_data: "{{ batched_container_data }}" + valid_exit_codes: "{{ tripleo_container_manage_valid_exit_code }}" + debug: "{{ tripleo_container_manage_debug | bool }}" + register: container_status_results + - name: "Create fact for containers which changed" set_fact: # List of containers which have changed (created or updated) - containers_changed: "{{ create_async_poll_results.results | get_changed_containers | default([]) }}" + containers_changed: "{{ container_status_results.changed_containers | default([]) }}" -- name: "Create fact for containers which failed" +- name: "Append the list of all podman commands that are run for containers with changes" set_fact: - # List of containers which returned an error when creating or updating them - containers_failed: "{{ create_async_poll_results.results | get_failed_containers | default([]) }}" - -- name: "Create fact for containers which require rc check" - set_fact: - # List of containers which would terminate with a return code that needs to be valid. - # We assume that container configs that don't have a restart policy nor action - # (used for podman exec) will run something and then exit with a return code. - containers_to_check: >- - {{ batched_container_data | haskey(attribute='image', excluded_keys=['action', 'restart']) | - list_of_keys | default([]) | difference(containers_failed) }} - -- name: Print the containers that failed to start - fail: - msg: "{{ containers_failed }} failed to start, check logs in /var/log/containers/stdouts/" - when: - - containers_failed|length != 0 - -- name: Block for container commands - include_tasks: podman/get_commands_create.yml + all_containers_commands: "{{ container_status_results.commands | default([]) + (all_containers_commands | default([]) | list) }}" when: - ansible_check_mode|bool - -- name: "Print the list of containers which changed" - debug: - var: containers_changed - when: tripleo_container_manage_debug | bool - -- name: "Block for container exit codes" - when: - - not ansible_check_mode|bool - - tripleo_container_manage_valid_exit_code|length != 0 - - containers_to_check|length != 0 - include_tasks: podman/check_exit_code.yml - vars: - containers_with_exit_code: "{{ containers_to_check }}" diff --git a/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/get_commands_create.yml b/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/get_commands_create.yml deleted file mode 100644 index 8673f6fe6..000000000 --- a/tripleo_ansible/roles/tripleo-container-manage/tasks/podman/get_commands_create.yml +++ /dev/null @@ -1,29 +0,0 @@ ---- -# Copyright 2020 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -- name: "Create a list of podman commands that are run for containers with changes" - set_fact: - containers_commands: >- - {{ create_async_results.results | selectattr('changed', 'equalto', true) | - map(attribute='podman_actions') | default([]) | list }} - -- name: "Print the list of commands that are run for containers with changes" - debug: - var: containers_commands - -- name: "Append the list of all podman commands that are run for containers with changes" - set_fact: - all_containers_commands: "{{ containers_commands|default([], true) + (all_containers_commands | default([]) | list) }}" diff --git a/zuul.d/molecule.yaml b/zuul.d/molecule.yaml index 614dae9ec..44fed781d 100644 --- a/zuul.d/molecule.yaml +++ b/zuul.d/molecule.yaml @@ -353,6 +353,7 @@ files: - ^tripleo_ansible/roles/tripleo-container-manage/.* - ^tripleo_ansible/roles/tripleo-container-rm/.* + - ^tripleo_ansible/ansible_plugins/action/container_status.py$ - ^tripleo_ansible/ansible_plugins/filter/helpers.py$ - ^tripleo_ansible/ansible_plugins/modules/container_config_data.py$ - ^tripleo_ansible/ansible_plugins/modules/container_puppet_config.py$