From 7064cd8e900ce4420055c6caf06a3aba1fbee094 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Wed, 4 Sep 2019 14:07:45 -0400 Subject: [PATCH] nova: use systemd to check container healthchecks Instead of running "podman exec" to test the container healthchecks, we should rather rely on the status of systemd timers which reflect the real state of the healthchecks, since they run under a specific user and pid. Also, we should only test the healthchecks if ContainerHealthcheckDisabled is set to False. Change-Id: I2c044e3d2af7f747acde5ad3bf256386b8c550a3 Closes-Bug: #1842687 --- deployment/nova/nova-api-container-puppet.yaml | 14 ++++++++++++-- .../nova/nova-compute-container-puppet.yaml | 18 ++++++++++++------ .../nova/nova-conductor-container-puppet.yaml | 18 ++++++++++++------ .../nova/nova-libvirt-container-puppet.yaml | 14 ++++++++++++-- .../nova/nova-metadata-container-puppet.yaml | 14 ++++++++++++-- ...nova-migration-target-container-puppet.yaml | 14 ++++++++++++-- .../nova/nova-scheduler-container-puppet.yaml | 18 ++++++++++++------ .../nova/nova-vnc-proxy-container-puppet.yaml | 14 ++++++++++++-- 8 files changed, 96 insertions(+), 28 deletions(-) diff --git a/deployment/nova/nova-api-container-puppet.yaml b/deployment/nova/nova-api-container-puppet.yaml index 7abe9a025c..7185a1ab66 100644 --- a/deployment/nova/nova-api-container-puppet.yaml +++ b/deployment/nova/nova-api-container-puppet.yaml @@ -430,11 +430,21 @@ outputs: get_attr: [ApacheServiceBase, role_data, metadata_settings] deploy_steps_tasks: - name: validate nova api container state - when: step|int == 5 + when: + - not container_healthcheck_disabled + - step|int == 5 tags: - opendev-validation - opendev-validation-nova - command: "{{ container_cli }} exec nova_api /openstack/healthcheck" + block: + - name: Get nova-api healthcheck status + register: nova_api_healthcheck_state + systemd: + name: tripleo_nova_api_healthcheck + - name: Fail if nova-api healthcheck report failed status + fail: + msg: nova-api isn't working (healthcheck failed) + when: nova_api_healthcheck_state.status.ExecMainStatus != '0' host_prep_tasks: {get_attr: [NovaApiLogging, host_prep_tasks]} external_upgrade_tasks: - when: step|int == 1 diff --git a/deployment/nova/nova-compute-container-puppet.yaml b/deployment/nova/nova-compute-container-puppet.yaml index 738e1444d3..6cb9109bdd 100644 --- a/deployment/nova/nova-compute-container-puppet.yaml +++ b/deployment/nova/nova-compute-container-puppet.yaml @@ -942,15 +942,21 @@ outputs: list_concat: - {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]} - - name: validate nova compute container state - when: step|int == 5 + when: + - not container_healthcheck_disabled + - step|int == 5 tags: - opendev-validation - opendev-validation-nova - command: - str_replace: - template: "{{ container_cli }} exec nova_compute HEALTHCHECK" - params: - HEALTHCHECK: {get_attr: [ContainersCommon, healthcheck_rpc_port, test]} + block: + - name: Get nova-compute healthcheck status + register: nova_compute_healthcheck_state + systemd: + name: tripleo_nova_compute_healthcheck + - name: Fail if nova-compute healthcheck report failed status + fail: + msg: nova-compute isn't working (healthcheck failed) + when: nova_compute_healthcheck_state.status.ExecMainStatus != '0' upgrade_tasks: - name: Remove openstack-nova-compute and python-nova package during upgrade package: diff --git a/deployment/nova/nova-conductor-container-puppet.yaml b/deployment/nova/nova-conductor-container-puppet.yaml index dc0f2dda25..499a480af7 100644 --- a/deployment/nova/nova-conductor-container-puppet.yaml +++ b/deployment/nova/nova-conductor-container-puppet.yaml @@ -191,15 +191,21 @@ outputs: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS deploy_steps_tasks: - name: validate nova conductor container state - when: step|int == 5 + when: + - not container_healthcheck_disabled + - step|int == 5 tags: - opendev-validation - opendev-validation-nova - command: - str_replace: - template: "{{ container_cli }} exec nova_conductor HEALTHCHECK" - params: - HEALTHCHECK: {get_attr: [ContainersCommon, healthcheck_rpc_port, test]} + block: + - name: Get nova-conductor healthcheck status + register: nova_conductor_healthcheck_state + systemd: + name: tripleo_nova_conductor_healthcheck + - name: Fail if nova-conductor healthcheck report failed status + fail: + msg: nova-conductor isn't working (healthcheck failed) + when: nova_conductor_healthcheck_state.status.ExecMainStatus != '0' host_prep_tasks: list_concat: - {get_attr: [NovaLogging, host_prep_tasks]} diff --git a/deployment/nova/nova-libvirt-container-puppet.yaml b/deployment/nova/nova-libvirt-container-puppet.yaml index f56272539c..b4c00fbbd8 100644 --- a/deployment/nova/nova-libvirt-container-puppet.yaml +++ b/deployment/nova/nova-libvirt-container-puppet.yaml @@ -801,11 +801,21 @@ outputs: - {} deploy_steps_tasks: - name: validate nova-libvirt container state - when: step|int == 4 + when: + - not container_healthcheck_disabled + - step|int == 4 tags: - opendev-validation - opendev-validation-nova - command: "{{ container_cli }} exec nova_libvirt /openstack/healthcheck" + block: + - name: Get nova-libvirt healthcheck status + register: nova_libvirt_healthcheck_state + systemd: + name: tripleo_nova_libvirt_healthcheck + - name: Fail if nova-libvirt healthcheck report failed status + fail: + msg: nova-libvirt isn't working (healtcheck failed) + when: nova_libvirt_healthcheck_state.status.ExecMainStatus != '0' host_prep_tasks: - name: create libvirt persistent data directories file: diff --git a/deployment/nova/nova-metadata-container-puppet.yaml b/deployment/nova/nova-metadata-container-puppet.yaml index 153cae8193..1b4b60e9fc 100644 --- a/deployment/nova/nova-metadata-container-puppet.yaml +++ b/deployment/nova/nova-metadata-container-puppet.yaml @@ -248,11 +248,21 @@ outputs: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS deploy_steps_tasks: - name: validate nova metadata container state - when: step|int == 5 + when: + - not container_healthcheck_disabled + - step|int == 5 tags: - opendev-validation - opendev-validation-nova - command: "{{ container_cli }} exec nova_metadata /openstack/healthcheck" + block: + - name: Get nova-metadata healthcheck status + register: nova_metadata_healthcheck_state + systemd: + name: tripleo_nova_metadata_healthcheck + - name: Fail if nova-metadata healthcheck report failed status + fail: + msg: nova-metadata isn't working (healthcheck failed) + when: nova_metadata_healthcheck_state.status.ExecMainStatus != '0' host_prep_tasks: {get_attr: [NovaMetadataLogging, host_prep_tasks]} external_upgrade_tasks: - when: diff --git a/deployment/nova/nova-migration-target-container-puppet.yaml b/deployment/nova/nova-migration-target-container-puppet.yaml index e0dffd7f6d..b512c8db56 100644 --- a/deployment/nova/nova-migration-target-container-puppet.yaml +++ b/deployment/nova/nova-migration-target-container-puppet.yaml @@ -173,11 +173,21 @@ outputs: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS deploy_steps_tasks: - name: validate nova migration target container state - when: step|int == 5 + when: + - not container_healthcheck_disabled + - step|int == 5 tags: - opendev-validation - opendev-validation-nova - command: "{{ container_cli }} exec nova_migration_target /openstack/healthcheck" + block: + - name: Get nova-migration-target healthcheck status + register: nova_migration_target_healthcheck_state + systemd: + name: tripleo_nova_migration_target_healthcheck + - name: Fail if nova-migration-target healthcheck report failed status + fail: + msg: nova-migration-target isn't working (healtcheck failed) + when: nova_migration_target_healthcheck_state.status.ExecMainStatus != '0' post_upgrade_tasks: - when: step|int == 1 import_role: diff --git a/deployment/nova/nova-scheduler-container-puppet.yaml b/deployment/nova/nova-scheduler-container-puppet.yaml index 7f2809b5b0..4686600e96 100644 --- a/deployment/nova/nova-scheduler-container-puppet.yaml +++ b/deployment/nova/nova-scheduler-container-puppet.yaml @@ -188,15 +188,21 @@ outputs: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS deploy_steps_tasks: - name: validate nova scheduler container state - when: step|int == 5 + when: + - not container_healthcheck_disabled + - step|int == 5 tags: - opendev-validation - opendev-validation-nova - command: - str_replace: - template: "{{ container_cli }} exec nova_scheduler HEALTHCHECK" - params: - HEALTHCHECK: {get_attr: [ContainersCommon, healthcheck_rpc_port, test]} + block: + - name: Get nova-scheduler healthcheck status + register: nova_scheduler_healthcheck_state + systemd: + name: tripleo_nova_scheduler_healthcheck + - name: Fail if nova-scheduler healthcheck report failed status + fail: + msg: nova-scheduler isn't working (healthcheck failed) + when: nova_scheduler_healthcheck_state.status.ExecMainStatus != '0' host_prep_tasks: list_concat: - {get_attr: [NovaLogging, host_prep_tasks]} diff --git a/deployment/nova/nova-vnc-proxy-container-puppet.yaml b/deployment/nova/nova-vnc-proxy-container-puppet.yaml index 033752bbdc..15e02bd201 100644 --- a/deployment/nova/nova-vnc-proxy-container-puppet.yaml +++ b/deployment/nova/nova-vnc-proxy-container-puppet.yaml @@ -284,11 +284,21 @@ outputs: - null deploy_steps_tasks: - name: validate nova-vnc-proxy container state - when: step|int == 5 + when: + - not container_healthcheck_disabled + - step|int == 5 tags: - opendev-validation - opendev-validation-nova - command: "{{ container_cli }} exec nova_vnc_proxy /openstack/healthcheck" + block: + - name: Get nova-vnc-proxy healthcheck status + register: nova_vnc_proxy_healthcheck_state + systemd: + name: tripleo_nova_vnc_proxy_healthcheck + - name: Fail if nova-vnc-proxy healtcheck report failed status + fail: + msg: nova-vnc-proxy isn't working (healthcheck failed) + when: nova_vnc_proxy_healthcheck_state.status.ExecMainStatus != '0' host_prep_tasks: {get_attr: [NovaLogging, host_prep_tasks]} fast_forward_upgrade_tasks: - when: