From bd8cdf41a5d60c49265522302545e19974b15de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Jeanneret?= Date: Wed, 1 May 2019 21:27:25 +0200 Subject: [PATCH] New validation: detect failed containers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Failed containers are pretty bad, since we have a degraded service. Running this validation before an upgrade is a good thing, and running it after a deploy/upgrade will ensure we're in a right state Co-Authored-by: Gaƫl Chamoulaud Change-Id: I242f1c7cff76e8304696ea10b32c1545fa5b8ea5 --- doc/source/roles/role-container-status.rst | 6 +++ playbooks/container-status.yaml | 13 +++++++ roles/container-status/tasks/main.yaml | 45 ++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 doc/source/roles/role-container-status.rst create mode 100644 playbooks/container-status.yaml create mode 100644 roles/container-status/tasks/main.yaml diff --git a/doc/source/roles/role-container-status.rst b/doc/source/roles/role-container-status.rst new file mode 100644 index 000000000..475c0ee55 --- /dev/null +++ b/doc/source/roles/role-container-status.rst @@ -0,0 +1,6 @@ +================ +container-status +================ + +.. ansibleautoplugin:: + :role: roles/container-status diff --git a/playbooks/container-status.yaml b/playbooks/container-status.yaml new file mode 100644 index 000000000..d97bdea3c --- /dev/null +++ b/playbooks/container-status.yaml @@ -0,0 +1,13 @@ +--- +- hosts: undercloud, overcloud + vars: + metadata: + name: Ensure container status + description: > + Detect failed containers and raise an error. + groups: + - pre-upgrade + - post-deployment + - post-upgrade + roles: + - container-status diff --git a/roles/container-status/tasks/main.yaml b/roles/container-status/tasks/main.yaml new file mode 100644 index 000000000..1a54e0f46 --- /dev/null +++ b/roles/container-status/tasks/main.yaml @@ -0,0 +1,45 @@ +--- +- name: Set oc_container_cli fact for the Overcloud nodes + set_fact: + oc_container_cli: "{{ hostvars[inventory_hostname].container_cli | default('podman', true) }}" + when: + - "'overcloud' in group_names" + - oc_container_cli is not defined + +- when: "'Undercloud' in group_names" + block: + - name: Set container_cli fact from undercloud.conf + block: + - name: Get the path of tripleo undercloud config file + become: true + hiera: + name: "tripleo_undercloud_conf_file" + + - name: Get container client from undercloud.conf + ini: + path: "{{ tripleo_undercloud_conf_file }}" + section: DEFAULT + key: container_cli + ignore_missing_file: true + register: container_cli + + - name: Set uc_container_cli for the Undercloud + set_fact: + uc_container_cli: "{{ container_cli.value|default('podman', true) }}" + when: uc_container_cli is not defined + +- name: Get failed containers for podman + changed_when: false + become: True + command: > + {% if oc_container_cli is defined %}{{ oc_container_cli }}{% else %}{{ uc_container_cli }}{% endif %} + {% raw %} + ps -a --filter 'status=exited' --format '{{ .Names }} {{ .Status }}' + {% endraw %} + register: failed_containers + +- name: Fail if we detect failed containers + fail: + msg: "Failed container detected: {{ item }}." + when: item is not match(".* Exited \(0\) .* ago") + loop: "{{ failed_containers.stdout_lines }}"