tripleo-ansible/tripleo_ansible/ansible_plugins/action/container_status.py

360 lines
13 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2020 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
__metaclass__ = type
import copy
import tenacity
import yaml
from ansible.errors import AnsibleActionFail
from ansible.plugins.action import ActionBase
from ansible.utils.display import Display
DISPLAY = Display()
# Default delay/retries used to fetch containers status and wait for them to be
# finished.
DELAY = 10
RETRIES = 30
TIMEOUT = DELAY * RETRIES
ANSIBLE_METADATA = {
'metadata_version': '1.1',
'status': ['preview'],
'supported_by': 'community'
}
DOCUMENTATION = """
module: container_status
author:
- "TripleO team"
version_added: '2.9'
short_description: Check and report containers status
notes: []
description:
- For each container that isn't an exec or a container supposed to be
controlled by systemd, we expect it to terminate with a return code.
This module will check that code and make sure it's correct. If not, it
will report the failure for easier debug.
requirements:
- None
options:
container_async_results:
description:
- Async results of a podman_container invocation.
type: list
container_data:
description:
- List of dictionaries which have the container configurations.
type: list
valid_exit_codes:
description:
- List of valid container exit codes.
default: []
type: list
debug:
description:
- Whether or not debug is enabled.
default: False
type: boolean
"""
EXAMPLES = """
- name: Check containers status
containers_status:
container_async_results: "{{ create_async_poll_results.results }}"
container_data:
- keystone:
image: docker.io/keystone
- mysql_bootstrap:
image: docker.io/mysql
valid_exit_codes:
- 0
- 2
"""
RETURN = """
changed_containers:
description: List of containers which changed.
returned: always
type: list
sample:
- keystone
- mysql
commands:
description: List of container cli commands that would be run.
returned: always
type: list
sample:
- podman rm -f keystone
- podman run keystone
"""
class ActionModule(ActionBase):
"""Action plugin for container status"""
_VALID_ARGS = yaml.safe_load(DOCUMENTATION)['options']
def _get_args(self):
missing = []
args = {}
for option, vals in self._VALID_ARGS.items():
if 'default' not in vals:
if self._task.args.get(option, None) is None:
missing.append(option)
continue
args[option] = self._task.args.get(option)
else:
args[option] = self._task.args.get(option, vals['default'])
if missing:
raise AnsibleActionFail('Missing required parameters: {}'.format(
', '.join(missing)))
return args
def _get_containers_to_check(self, data):
"""Return a list of containers that we need to check.
Given some container_data, figure out what containers terminate with
a return code so later we can check that code.
:param data: Dictionary of container data.
:returns: List of containers that need to be checked.
"""
containers_exec = []
containers_run = []
# loop through container data to get specific container
for container in data:
# get container name and data
for name, values in container.items():
if 'restart' in values:
continue
if 'action' in values:
containers_exec.append(name)
if 'image' in values:
# We assume that container configs that don't have a
# restart policy nor action (used for podman exec) but have
# an image set, will run something and then exit with a
# return code.
containers_run.append(name)
if self.debug and len(containers_run) > 0:
DISPLAY.display('These containers are supposed to terminate with '
'a valid exit code and will be checked: '
'{}'.format(containers_run))
if self.debug and len(containers_exec) > 0:
DISPLAY.display('These containers exec are supposed to terminate '
'with a valid exit code and will be checked: '
'{}'.format(containers_exec))
return containers_run
def _get_commands(self, results):
"""Return a list of commands that were executed by container tool.
:param results: Ansible task results.
:returns commands: List of commands.
"""
commands = []
for item in results:
try:
if item['changed']:
commands.extend(item['podman_actions'])
except KeyError:
if 'cmd' in item:
commands.append(' '.join(item['cmd']))
else:
raise AnsibleActionFail('Wrong async result data, missing'
' podman_actions or cmd:'
' {}'.format(item))
return commands
def _is_container_running(self, container):
"""Return True if a container has Running State.
:params container: Dictionary for container infos.
:returns running: Boolean of container running status.
"""
state = container.get('State', {})
running = state.get('Running', False)
return running
def _get_container_infos(self, containers, task_vars):
"""Return container infos.
:params containers: List of containers.
:params task_vars: Dictionary of Ansible tasks variables.
:returns container_results: Dictionary of container infos.
"""
tvars = copy.deepcopy(task_vars)
result = self._execute_module(
module_name='podman_container_info',
module_args=dict(name=containers),
task_vars=tvars
)
return [c for c in result["containers"] if "containers" in result]
@tenacity.retry(
reraise=True,
stop=tenacity.stop_after_attempt(RETRIES),
wait=tenacity.wait_fixed(DELAY)
)
def _fetch_container_state(self, containers, task_vars):
"""Return container states of finished containers with retries.
:params containers: List of containers.
:params task_vars: Dictionary of Ansible tasks variables.
:returns container_results: Dictionary of container infos.
"""
containers_results = self._get_container_infos(containers, task_vars)
for container in containers_results:
name = container.get('Name')
if self._is_container_running(container):
raise AnsibleActionFail('Container {} has not finished yet, '
'retrying...'.format(name))
return containers_results
def _check_container_state(self, containers, exit_codes, task_vars):
"""Return a tuple of running and failed containers.
:params containers: List of containers to check.
:params exit_codes: List of valid exit codes.
:params task_vars: Dictionary of Ansible tasks variables.
:returns running, failed: Tuple of lists.
"""
running = []
failed = []
try:
self._fetch_container_state(containers, task_vars)
except AnsibleActionFail:
# We fail at the end with all the other infos
if self.debug:
DISPLAY.display('One or more containers did not finish on '
'time, the failure will be reported later.')
pass
containers_results = self._get_container_infos(containers, task_vars)
for container in containers_results:
container_name = container.get('Name')
container_state = container.get('State')
if self._is_container_running(container):
running.append(container_name)
elif container_state.get('ExitCode') not in exit_codes:
failed.append(container_name)
return (running, failed)
def _check_errors_in_ansible_async_results(self, results):
"""Get a tuple with changed and failed containers.
:param results: Ansible results from "Check podman create status"
:returns: Tuple of containers that changed or failed
"""
changed = []
create_failed = []
exec_failed = []
for item in results:
# if Ansible is run in check mode, the async_results items will
# not contain failed or finished keys.
if self._play_context.check_mode:
break
if 'create_async_result_item' in item:
async_item = item['create_async_result_item']
if item['changed']:
for name, c in async_item['container_data'].items():
changed.append(name)
if (item['failed'] or not item['finished']
or ('stderr' in async_item
and async_item['stderr'] != '')):
for name, c in async_item['container_data'].items():
create_failed.append(name)
if 'exec_async_result_item' in item:
async_item = item['exec_async_result_item']
if item['rc'] != 0:
for name, c in async_item['container_exec_data'].items():
exec_failed.append(name)
return (changed, create_failed, exec_failed)
def run(self, tmp=None, task_vars=None):
self._supports_check_mode = True
self.changed = False
self.changed_containers = []
container_commands = []
running = []
failed = []
if task_vars is None:
task_vars = dict()
result = super(ActionModule, self).run(tmp, task_vars)
del tmp
# parse args
args = self._get_args()
async_results = args['container_async_results']
container_data = args['container_data']
valid_exit_codes = args['valid_exit_codes']
self.debug = args['debug']
containers_run_to_check = self._get_containers_to_check(container_data)
# Check that the containers which are supposed to finish have
# actually finished and also terminated with the right exit code.
if len(valid_exit_codes) > 0 and len(containers_run_to_check) > 0:
(running, failed) = self._check_container_state(
containers_run_to_check,
valid_exit_codes,
task_vars)
# Check the Ansible async results for containers which:
# - reported a changed resources (podman_container created or updated
# a container) and return it as self.changed_containers.
# - reported a failed resource (podman_container failed to create
# the container and return it as self.failed_containers.
# - didn't finish on time and return it as self.failed_containers.
(self.changed_containers, async_failed, exec_failed) = (
self._check_errors_in_ansible_async_results(async_results))
if len(exec_failed) > 0:
DISPLAY.error('Container(s) exec commands which failed to execute'
': {}'.format(failed))
if len(failed) > 0:
DISPLAY.error('Container(s) which finished with wrong return code'
': {}'.format(failed))
if len(async_failed) > 0:
DISPLAY.error('Container(s) which failed to be created by '
'podman_container module: {}'.format(async_failed))
if len(running) > 0:
DISPLAY.error('Container(s) which did not finish after {} '
'minutes: {}'.format(TIMEOUT, running))
total_errors = list(set(failed + exec_failed + async_failed + running))
if len(total_errors) > 0:
raise AnsibleActionFail('Failed container(s): {}, check logs in '
'/var/log/containers/'
'stdouts/'.format(total_errors))
container_commands = self._get_commands(async_results)
if len(container_commands) > 0 and \
(self._play_context.check_mode or self.debug):
for cmd in container_commands:
DISPLAY.display(cmd)
if len(container_commands) > 0:
self.changed = True
result['changed_containers'] = self.changed_containers
result['commands'] = container_commands
result['changed'] = self.changed
return result