diff --git a/doc/source/roles/role-tripleo_container_manage.rst b/doc/source/roles/role-tripleo_container_manage.rst index 045a126a6..d53757320 100644 --- a/doc/source/roles/role-tripleo_container_manage.rst +++ b/doc/source/roles/role-tripleo_container_manage.rst @@ -61,11 +61,6 @@ This Ansible role allows to do the following tasks: Note: `tripleo_container_manage_concurrency` parameter is set to 1 by default, and putting higher value than 2 can be expose issue with Podman locks. - If a container is meant to exit after running a script (defined in - EntryPoint), we can check its return code and fail if the code isn't - expected. It can be done with `tripleo_container_manage_valid_exit_code`. - If defined to a list of integers, the role will wait for the container to be - exited and then checks the return code. Here is an example of a playbook: @@ -109,11 +104,6 @@ Roles variables +------------------------------------------------+-----------------------------+----------------------------+ | tripleo_container_manage_clean_orphans | true | Option to clean orphans | +------------------------------------------------+-----------------------------+----------------------------+ -| tripleo_container_manage_valid_exit_code | [] | Allow to check if a | -| | | container returned the | -| | | exit code in parameter. | -| | | Must be a list. e.g. [0,3] | -+------------------------------------------------+-----------------------------+----------------------------+ Healthchecks ~~~~~~~~~~~~ diff --git a/tripleo_ansible/ansible_plugins/action/container_status.py b/tripleo_ansible/ansible_plugins/action/container_status.py deleted file mode 100644 index fb3c2b12b..000000000 --- a/tripleo_ansible/ansible_plugins/action/container_status.py +++ /dev/null @@ -1,359 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# Copyright 2020 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from __future__ import absolute_import, division, print_function -__metaclass__ = type - -import copy -import tenacity -import yaml - -from ansible.errors import AnsibleActionFail -from ansible.plugins.action import ActionBase -from ansible.utils.display import Display - -DISPLAY = Display() - -# Default delay/retries used to fetch containers status and wait for them to be -# finished. -DELAY = 10 -RETRIES = 30 -TIMEOUT = DELAY * RETRIES - -ANSIBLE_METADATA = { - 'metadata_version': '1.1', - 'status': ['preview'], - 'supported_by': 'community' -} - -DOCUMENTATION = """ -module: container_status -author: - - "TripleO team" -version_added: '2.9' -short_description: Check and report containers status -notes: [] -description: - - For each container that isn't an exec or a container supposed to be - controlled by systemd, we expect it to terminate with a return code. - This module will check that code and make sure it's correct. If not, it - will report the failure for easier debug. -requirements: - - None -options: - container_async_results: - description: - - Async results of a podman_container invocation. - type: list - container_data: - description: - - List of dictionaries which have the container configurations. - type: list - valid_exit_codes: - description: - - List of valid container exit codes. - default: [] - type: list - debug: - description: - - Whether or not debug is enabled. - default: False - type: boolean -""" -EXAMPLES = """ -- name: Check containers status - containers_status: - container_async_results: "{{ create_async_poll_results.results }}" - container_data: - - keystone: - image: docker.io/keystone - - mysql_bootstrap: - image: docker.io/mysql - valid_exit_codes: - - 0 - - 2 -""" -RETURN = """ -changed_containers: - description: List of containers which changed. - returned: always - type: list - sample: - - keystone - - mysql -commands: - description: List of container cli commands that would be run. - returned: always - type: list - sample: - - podman rm -f keystone - - podman run keystone -""" - - -class ActionModule(ActionBase): - """Action plugin for container status""" - - _VALID_ARGS = yaml.safe_load(DOCUMENTATION)['options'] - - def _get_args(self): - missing = [] - args = {} - - for option, vals in self._VALID_ARGS.items(): - if 'default' not in vals: - if self._task.args.get(option, None) is None: - missing.append(option) - continue - args[option] = self._task.args.get(option) - else: - args[option] = self._task.args.get(option, vals['default']) - - if missing: - raise AnsibleActionFail('Missing required parameters: {}'.format( - ', '.join(missing))) - return args - - def _get_containers_to_check(self, data): - """Return a list of containers that we need to check. - - Given some container_data, figure out what containers terminate with - a return code so later we can check that code. - - :param data: Dictionary of container data. - :returns: List of containers that need to be checked. - """ - containers_exec = [] - containers_run = [] - # loop through container data to get specific container - for container in data: - # get container name and data - for name, values in container.items(): - if 'restart' in values: - continue - if 'action' in values: - containers_exec.append(name) - if 'image' in values: - # We assume that container configs that don't have a - # restart policy nor action (used for podman exec) but have - # an image set, will run something and then exit with a - # return code. - containers_run.append(name) - if self.debug and len(containers_run) > 0: - DISPLAY.display('These containers are supposed to terminate with ' - 'a valid exit code and will be checked: ' - '{}'.format(containers_run)) - if self.debug and len(containers_exec) > 0: - DISPLAY.display('These containers exec are supposed to terminate ' - 'with a valid exit code and will be checked: ' - '{}'.format(containers_exec)) - return containers_run - - def _get_commands(self, results): - """Return a list of commands that were executed by container tool. - - :param results: Ansible task results. - :returns commands: List of commands. - """ - commands = [] - for item in results: - try: - if item['changed']: - commands.extend(item['podman_actions']) - except KeyError: - if 'cmd' in item: - commands.append(' '.join(item['cmd'])) - else: - raise AnsibleActionFail('Wrong async result data, missing' - ' podman_actions or cmd:' - ' {}'.format(item)) - return commands - - def _is_container_running(self, container): - """Return True if a container has Running State. - - :params container: Dictionary for container infos. - :returns running: Boolean of container running status. - """ - state = container.get('State', {}) - running = state.get('Running', False) - return running - - def _get_container_infos(self, containers, task_vars): - """Return container infos. - - :params containers: List of containers. - :params task_vars: Dictionary of Ansible tasks variables. - :returns container_results: Dictionary of container infos. - """ - tvars = copy.deepcopy(task_vars) - result = self._execute_module( - module_name='containers.podman.podman_container_info', - module_args=dict(name=containers), - task_vars=tvars - ) - return [c for c in result["containers"] if "containers" in result] - - @tenacity.retry( - reraise=True, - stop=tenacity.stop_after_attempt(RETRIES), - wait=tenacity.wait_fixed(DELAY) - ) - def _fetch_container_state(self, containers, task_vars): - """Return container states of finished containers with retries. - - :params containers: List of containers. - :params task_vars: Dictionary of Ansible tasks variables. - :returns container_results: Dictionary of container infos. - """ - containers_results = self._get_container_infos(containers, task_vars) - for container in containers_results: - name = container.get('Name') - if self._is_container_running(container): - raise AnsibleActionFail('Container {} has not finished yet, ' - 'retrying...'.format(name)) - return containers_results - - def _check_container_state(self, containers, exit_codes, task_vars): - """Return a tuple of running and failed containers. - - :params containers: List of containers to check. - :params exit_codes: List of valid exit codes. - :params task_vars: Dictionary of Ansible tasks variables. - :returns running, failed: Tuple of lists. - """ - running = [] - failed = [] - try: - self._fetch_container_state(containers, task_vars) - except AnsibleActionFail: - # We fail at the end with all the other infos - if self.debug: - DISPLAY.display('One or more containers did not finish on ' - 'time, the failure will be reported later.') - pass - containers_results = self._get_container_infos(containers, task_vars) - for container in containers_results: - container_name = container.get('Name') - container_state = container.get('State') - if self._is_container_running(container): - running.append(container_name) - elif container_state.get('ExitCode') not in exit_codes: - failed.append(container_name) - return (running, failed) - - def _check_errors_in_ansible_async_results(self, results): - """Get a tuple with changed and failed containers. - - :param results: Ansible results from "Check podman create status" - :returns: Tuple of containers that changed or failed - """ - changed = [] - create_failed = [] - exec_failed = [] - for item in results: - # if Ansible is run in check mode, the async_results items will - # not contain failed or finished keys. - if self._play_context.check_mode: - break - if 'create_async_result_item' in item: - async_item = item['create_async_result_item'] - if item['changed']: - for name, c in async_item['container_data'].items(): - changed.append(name) - if (item['failed'] or not item['finished'] - or ('stderr' in async_item - and async_item['stderr'] != '')): - for name, c in async_item['container_data'].items(): - create_failed.append(name) - if 'exec_async_result_item' in item: - async_item = item['exec_async_result_item'] - if item['rc'] != 0: - for name, c in async_item['container_exec_data'].items(): - exec_failed.append(name) - return (changed, create_failed, exec_failed) - - def run(self, tmp=None, task_vars=None): - self._supports_check_mode = True - self.changed = False - self.changed_containers = [] - container_commands = [] - running = [] - failed = [] - - if task_vars is None: - task_vars = dict() - result = super(ActionModule, self).run(tmp, task_vars) - del tmp - # parse args - args = self._get_args() - - async_results = args['container_async_results'] - container_data = args['container_data'] - valid_exit_codes = args['valid_exit_codes'] - self.debug = args['debug'] - - containers_run_to_check = self._get_containers_to_check(container_data) - - # Check that the containers which are supposed to finish have - # actually finished and also terminated with the right exit code. - if len(valid_exit_codes) > 0 and len(containers_run_to_check) > 0: - (running, failed) = self._check_container_state( - containers_run_to_check, - valid_exit_codes, - task_vars) - - # Check the Ansible async results for containers which: - # - reported a changed resources (podman_container created or updated - # a container) and return it as self.changed_containers. - # - reported a failed resource (podman_container failed to create - # the container and return it as self.failed_containers. - # - didn't finish on time and return it as self.failed_containers. - (self.changed_containers, async_failed, exec_failed) = ( - self._check_errors_in_ansible_async_results(async_results)) - - if len(exec_failed) > 0: - DISPLAY.error('Container(s) exec commands which failed to execute' - ': {}'.format(failed)) - if len(failed) > 0: - DISPLAY.error('Container(s) which finished with wrong return code' - ': {}'.format(failed)) - if len(async_failed) > 0: - DISPLAY.error('Container(s) which failed to be created by ' - 'podman_container module: {}'.format(async_failed)) - if len(running) > 0: - DISPLAY.error('Container(s) which did not finish after {} ' - 'minutes: {}'.format(TIMEOUT, running)) - total_errors = list(set(failed + exec_failed + async_failed + running)) - if len(total_errors) > 0: - raise AnsibleActionFail('Failed container(s): {}, check logs in ' - '/var/log/containers/' - 'stdouts/'.format(total_errors)) - - container_commands = self._get_commands(async_results) - if len(container_commands) > 0 and \ - (self._play_context.check_mode or self.debug): - for cmd in container_commands: - DISPLAY.display(cmd) - - if len(container_commands) > 0: - self.changed = True - - result['changed_containers'] = self.changed_containers - result['commands'] = container_commands - result['changed'] = self.changed - return result diff --git a/tripleo_ansible/roles/tripleo_container_manage/defaults/main.yml b/tripleo_ansible/roles/tripleo_container_manage/defaults/main.yml index 295f0a9a6..b2a860d4a 100644 --- a/tripleo_ansible/roles/tripleo_container_manage/defaults/main.yml +++ b/tripleo_ansible/roles/tripleo_container_manage/defaults/main.yml @@ -37,4 +37,3 @@ tripleo_container_manage_exec_retries: 120 tripleo_container_manage_healthcheck_disabled: false tripleo_container_manage_log_path: '/var/log/containers/stdouts' tripleo_container_manage_systemd_teardown: true -tripleo_container_manage_valid_exit_code: [] diff --git a/tripleo_ansible/roles/tripleo_container_manage/molecule/default/converge.yml b/tripleo_ansible/roles/tripleo_container_manage/molecule/default/converge.yml index 91e2047e6..750ea734e 100644 --- a/tripleo_ansible/roles/tripleo_container_manage/molecule/default/converge.yml +++ b/tripleo_ansible/roles/tripleo_container_manage/molecule/default/converge.yml @@ -62,7 +62,6 @@ tripleo_container_manage_config: '/tmp/container-configs' tripleo_container_manage_debug: true tripleo_container_manage_config_patterns: '*.json' - tripleo_container_manage_valid_exit_code: [0] tasks: - include_role: name: tripleo_container_manage diff --git a/zuul.d/molecule.yaml b/zuul.d/molecule.yaml index c42f871f2..de8ec08b0 100644 --- a/zuul.d/molecule.yaml +++ b/zuul.d/molecule.yaml @@ -305,7 +305,6 @@ files: - ^tripleo_ansible/roles/tripleo_container_manage/.* - ^tripleo_ansible/roles/tripleo_container_rm/.* - - ^tripleo_ansible/ansible_plugins/action/container_status.py$ - ^tripleo_ansible/ansible_plugins/action/container_systemd.py$ - ^tripleo_ansible/ansible_plugins/filter/helpers.py$ - ^tripleo_ansible/ansible_plugins/modules/container_config_data.py$