New validation: detect failed containers

Failed containers are pretty bad, since we have a degraded service.

Running this validation before an upgrade is a good thing, and
running it after a deploy/upgrade will ensure we're in a right state

Co-Authored-by: Gaël Chamoulaud <gchamoul@redhat.com>
Change-Id: I242f1c7cff76e8304696ea10b32c1545fa5b8ea5
changes/85/656785/12
Cédric Jeanneret 3 years ago
parent 1d85e29f79
commit bd8cdf41a5
  1. 6
      doc/source/roles/role-container-status.rst
  2. 13
      playbooks/container-status.yaml
  3. 45
      roles/container-status/tasks/main.yaml

@ -0,0 +1,6 @@
================
container-status
================
.. ansibleautoplugin::
:role: roles/container-status

@ -0,0 +1,13 @@
---
- hosts: undercloud, overcloud
vars:
metadata:
name: Ensure container status
description: >
Detect failed containers and raise an error.
groups:
- pre-upgrade
- post-deployment
- post-upgrade
roles:
- container-status

@ -0,0 +1,45 @@
---
- name: Set oc_container_cli fact for the Overcloud nodes
set_fact:
oc_container_cli: "{{ hostvars[inventory_hostname].container_cli | default('podman', true) }}"
when:
- "'overcloud' in group_names"
- oc_container_cli is not defined
- when: "'Undercloud' in group_names"
block:
- name: Set container_cli fact from undercloud.conf
block:
- name: Get the path of tripleo undercloud config file
become: true
hiera:
name: "tripleo_undercloud_conf_file"
- name: Get container client from undercloud.conf
ini:
path: "{{ tripleo_undercloud_conf_file }}"
section: DEFAULT
key: container_cli
ignore_missing_file: true
register: container_cli
- name: Set uc_container_cli for the Undercloud
set_fact:
uc_container_cli: "{{ container_cli.value|default('podman', true) }}"
when: uc_container_cli is not defined
- name: Get failed containers for podman
changed_when: false
become: True
command: >
{% if oc_container_cli is defined %}{{ oc_container_cli }}{% else %}{{ uc_container_cli }}{% endif %}
{% raw %}
ps -a --filter 'status=exited' --format '{{ .Names }} {{ .Status }}'
{% endraw %}
register: failed_containers
- name: Fail if we detect failed containers
fail:
msg: "Failed container detected: {{ item }}."
when: item is not match(".* Exited \(0\) .* ago")
loop: "{{ failed_containers.stdout_lines }}"
Loading…
Cancel
Save