New validation: detect failed containers
Failed containers are pretty bad, since we have a degraded service. Running this validation before an upgrade is a good thing, and running it after a deploy/upgrade will ensure we're in a right state Co-Authored-by: Gaël Chamoulaud <gchamoul@redhat.com> Change-Id: I242f1c7cff76e8304696ea10b32c1545fa5b8ea5
This commit is contained in:
parent
1d85e29f79
commit
bd8cdf41a5
|
@ -0,0 +1,6 @@
|
||||||
|
================
|
||||||
|
container-status
|
||||||
|
================
|
||||||
|
|
||||||
|
.. ansibleautoplugin::
|
||||||
|
:role: roles/container-status
|
|
@ -0,0 +1,13 @@
|
||||||
|
---
|
||||||
|
- hosts: undercloud, overcloud
|
||||||
|
vars:
|
||||||
|
metadata:
|
||||||
|
name: Ensure container status
|
||||||
|
description: >
|
||||||
|
Detect failed containers and raise an error.
|
||||||
|
groups:
|
||||||
|
- pre-upgrade
|
||||||
|
- post-deployment
|
||||||
|
- post-upgrade
|
||||||
|
roles:
|
||||||
|
- container-status
|
|
@ -0,0 +1,45 @@
|
||||||
|
---
|
||||||
|
- name: Set oc_container_cli fact for the Overcloud nodes
|
||||||
|
set_fact:
|
||||||
|
oc_container_cli: "{{ hostvars[inventory_hostname].container_cli | default('podman', true) }}"
|
||||||
|
when:
|
||||||
|
- "'overcloud' in group_names"
|
||||||
|
- oc_container_cli is not defined
|
||||||
|
|
||||||
|
- when: "'Undercloud' in group_names"
|
||||||
|
block:
|
||||||
|
- name: Set container_cli fact from undercloud.conf
|
||||||
|
block:
|
||||||
|
- name: Get the path of tripleo undercloud config file
|
||||||
|
become: true
|
||||||
|
hiera:
|
||||||
|
name: "tripleo_undercloud_conf_file"
|
||||||
|
|
||||||
|
- name: Get container client from undercloud.conf
|
||||||
|
ini:
|
||||||
|
path: "{{ tripleo_undercloud_conf_file }}"
|
||||||
|
section: DEFAULT
|
||||||
|
key: container_cli
|
||||||
|
ignore_missing_file: true
|
||||||
|
register: container_cli
|
||||||
|
|
||||||
|
- name: Set uc_container_cli for the Undercloud
|
||||||
|
set_fact:
|
||||||
|
uc_container_cli: "{{ container_cli.value|default('podman', true) }}"
|
||||||
|
when: uc_container_cli is not defined
|
||||||
|
|
||||||
|
- name: Get failed containers for podman
|
||||||
|
changed_when: false
|
||||||
|
become: True
|
||||||
|
command: >
|
||||||
|
{% if oc_container_cli is defined %}{{ oc_container_cli }}{% else %}{{ uc_container_cli }}{% endif %}
|
||||||
|
{% raw %}
|
||||||
|
ps -a --filter 'status=exited' --format '{{ .Names }} {{ .Status }}'
|
||||||
|
{% endraw %}
|
||||||
|
register: failed_containers
|
||||||
|
|
||||||
|
- name: Fail if we detect failed containers
|
||||||
|
fail:
|
||||||
|
msg: "Failed container detected: {{ item }}."
|
||||||
|
when: item is not match(".* Exited \(0\) .* ago")
|
||||||
|
loop: "{{ failed_containers.stdout_lines }}"
|
Loading…
Reference in New Issue