From 6464efdc4ee89d73f523fef3e919826407a9caf5 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Tue, 14 Apr 2020 18:06:04 -0400 Subject: [PATCH] Migrate inflight validations to native podman healthchecks The systemd healthchecks are moving away, so we can use the native podman healthchecks interface. See I37508cd8243999389f9e17d5ea354529bb042279 for the whole context. This patch does the following: - Migrate the healthcheck checks to use podman inspect instead of systemd service status. - Force the tasks to not run, because we first need https://review.opendev.org/#/c/720061 to merge Once https://review.opendev.org/#/c/720061 is merged, we'll remove the condition workaround and also migrate to unify the way containers are checked; and use the role in tripleo-validations. Depends-On: https://review.opendev.org/720283 Change-Id: I7172d81d305ac8939bee5e7f64960b0a9fea8627 --- .../keystone/keystone-container-puppet.yaml | 25 ++++++++------- .../nova/nova-api-container-puppet.yaml | 32 ++++++++----------- .../nova/nova-compute-container-puppet.yaml | 32 ++++++++----------- .../nova/nova-conductor-container-puppet.yaml | 32 ++++++++----------- .../nova/nova-libvirt-container-puppet.yaml | 30 +++++++---------- .../nova/nova-metadata-container-puppet.yaml | 32 ++++++++----------- ...ova-migration-target-container-puppet.yaml | 32 ++++++++----------- .../nova/nova-scheduler-container-puppet.yaml | 32 ++++++++----------- .../nova/nova-vnc-proxy-container-puppet.yaml | 30 +++++++---------- 9 files changed, 115 insertions(+), 162 deletions(-) diff --git a/deployment/keystone/keystone-container-puppet.yaml b/deployment/keystone/keystone-container-puppet.yaml index 7a8aa257ad..2e78ae2c4e 100644 --- a/deployment/keystone/keystone-container-puppet.yaml +++ b/deployment/keystone/keystone-container-puppet.yaml @@ -754,21 +754,22 @@ outputs: tripleo_keystone_resources_cloud_name: {get_param: RootStackName} batched_tripleo_keystone_resources_domains: "{{ tripleo_keystone_ldap_domains | list }}" deploy_steps_tasks: - - name: validate keystone service state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 4 + - name: validate keystone container state + podman_container_info: + name: keystone + register: keystone_infos + failed_when: + - "'healthy' not in keystone_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-keystone - block: - - name: Get keystone service healthcheck status - import_role: - name: healthcheck_service_status - vars: - inflight_healthcheck_services: - - tripleo_keystone_healthcheck + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 4 container_puppet_tasks: # Keystone endpoint creation occurs only on single node step_3: diff --git a/deployment/nova/nova-api-container-puppet.yaml b/deployment/nova/nova-api-container-puppet.yaml index 5f5bbe6a53..4c810afbad 100644 --- a/deployment/nova/nova-api-container-puppet.yaml +++ b/deployment/nova/nova-api-container-puppet.yaml @@ -467,28 +467,22 @@ outputs: metadata_settings: get_attr: [ApacheServiceBase, role_data, metadata_settings] deploy_steps_tasks: - - name: validate nova api container state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 5 + - name: validate nova-api container state + podman_container_info: + name: nova_api + register: nova_api_infos + failed_when: + - "'healthy' not in nova_api_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-nova - block: - - name: Get nova-api healthcheck status - register: nova_api_healthcheck_state - systemd: - name: tripleo_nova_api_healthcheck - retries: 10 - delay: 30 - until: nova_api_healthcheck_state.status.ExecMainPID != '0' and - nova_api_healthcheck_state.status.ActiveState in ['inactive', 'failed'] - failed_when: false - - name: Fail if nova-api healthcheck report failed status - fail: - msg: nova-api isn't working (healthcheck failed) - when: nova_api_healthcheck_state.status.ExecMainStatus != '0' + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 4 host_prep_tasks: {get_attr: [NovaApiLogging, host_prep_tasks]} external_upgrade_tasks: - when: step|int == 1 diff --git a/deployment/nova/nova-compute-container-puppet.yaml b/deployment/nova/nova-compute-container-puppet.yaml index 9acc0b3589..f8b2b22742 100644 --- a/deployment/nova/nova-compute-container-puppet.yaml +++ b/deployment/nova/nova-compute-container-puppet.yaml @@ -1053,28 +1053,22 @@ outputs: - ksm.service - ksmtuned.service deploy_steps_tasks: - - name: validate nova compute container state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 6 #FIXME: there is no step6 + - name: validate nova-compute container state + podman_container_info: + name: nova_compute + register: nova_compute_infos + failed_when: + - "'healthy' not in nova_compute_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-nova - block: - - name: Get nova-compute healthcheck status - register: nova_compute_healthcheck_state - systemd: - name: tripleo_nova_compute_healthcheck - retries: 10 - delay: 30 - until: nova_compute_healthcheck_state.status.ExecMainPID != '0' and - nova_compute_healthcheck_state.status.ActiveState in ['inactive', 'failed'] - failed_when: false - - name: Fail if nova-compute healthcheck report failed status - fail: - msg: nova-compute isn't working (healthcheck failed) - when: nova_compute_healthcheck_state.status.ExecMainStatus != '0' + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 6 #FIXME: there is no step6 external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]} upgrade_tasks: - name: Remove openstack-nova-compute and python-nova package during upgrade diff --git a/deployment/nova/nova-conductor-container-puppet.yaml b/deployment/nova/nova-conductor-container-puppet.yaml index 7b7fdee6a9..d85540b225 100644 --- a/deployment/nova/nova-conductor-container-puppet.yaml +++ b/deployment/nova/nova-conductor-container-puppet.yaml @@ -178,28 +178,22 @@ outputs: environment: KOLLA_CONFIG_STRATEGY: COPY_ALWAYS deploy_steps_tasks: - - name: validate nova conductor container state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 5 + - name: validate nova-conductor container state + podman_container_info: + name: nova_conductor + register: nova_conductor_infos + failed_when: + - "'healthy' not in nova_conductor_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-nova - block: - - name: Get nova-conductor healthcheck status - register: nova_conductor_healthcheck_state - systemd: - name: tripleo_nova_conductor_healthcheck - retries: 10 - delay: 30 - until: nova_conductor_healthcheck_state.status.ExecMainPID != '0' and - nova_conductor_healthcheck_state.status.ActiveState in ['inactive', 'failed'] - failed_when: false - - name: Fail if nova-conductor healthcheck report failed status - fail: - msg: nova-conductor isn't working (healthcheck failed) - when: nova_conductor_healthcheck_state.status.ExecMainStatus != '0' + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 5 host_prep_tasks: list_concat: - {get_attr: [NovaLogging, host_prep_tasks]} diff --git a/deployment/nova/nova-libvirt-container-puppet.yaml b/deployment/nova/nova-libvirt-container-puppet.yaml index 8acb8691c3..c4f15f6ba1 100644 --- a/deployment/nova/nova-libvirt-container-puppet.yaml +++ b/deployment/nova/nova-libvirt-container-puppet.yaml @@ -794,27 +794,21 @@ outputs: - {} deploy_steps_tasks: - name: validate nova-libvirt container state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 4 + podman_container_info: + name: nova_libvirt + register: nova_libvirt_infos + failed_when: + - "'healthy' not in nova_libvirt_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-nova - block: - - name: Get nova-libvirt healthcheck status - register: nova_libvirt_healthcheck_state - systemd: - name: tripleo_nova_libvirt_healthcheck - retries: 10 - delay: 30 - until: nova_libvirt_healthcheck_state.status.ExecMainPID != '0' and - nova_libvirt_healthcheck_state.status.ActiveState in ['inactive', 'failed'] - failed_when: false - - name: Fail if nova-libvirt healthcheck report failed status - fail: - msg: nova-libvirt isn't working (healtcheck failed) - when: nova_libvirt_healthcheck_state.status.ExecMainStatus != '0' + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 4 host_prep_tasks: list_concat: - {get_attr: [NovaLibvirtLogging, host_prep_tasks]} diff --git a/deployment/nova/nova-metadata-container-puppet.yaml b/deployment/nova/nova-metadata-container-puppet.yaml index 69ea4424a3..d48390d45c 100644 --- a/deployment/nova/nova-metadata-container-puppet.yaml +++ b/deployment/nova/nova-metadata-container-puppet.yaml @@ -237,28 +237,22 @@ outputs: environment: KOLLA_CONFIG_STRATEGY: COPY_ALWAYS deploy_steps_tasks: - - name: validate nova metadata container state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 5 + - name: validate nova-metadata container state + podman_container_info: + name: nova_metadata + register: nova_metadata_infos + failed_when: + - "'healthy' not in nova_metadata_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-nova - block: - - name: Get nova-metadata healthcheck status - register: nova_metadata_healthcheck_state - systemd: - name: tripleo_nova_metadata_healthcheck - retries: 10 - delay: 30 - until: nova_metadata_healthcheck_state.status.ExecMainPID != '0' and - nova_metadata_healthcheck_state.status.ActiveState in ['inactive', 'failed'] - failed_when: false - - name: Fail if nova-metadata healthcheck report failed status - fail: - msg: nova-metadata isn't working (healthcheck failed) - when: nova_metadata_healthcheck_state.status.ExecMainStatus != '0' + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 5 host_prep_tasks: {get_attr: [NovaMetadataLogging, host_prep_tasks]} external_upgrade_tasks: - when: diff --git a/deployment/nova/nova-migration-target-container-puppet.yaml b/deployment/nova/nova-migration-target-container-puppet.yaml index ee76adc338..5a166fc6ea 100644 --- a/deployment/nova/nova-migration-target-container-puppet.yaml +++ b/deployment/nova/nova-migration-target-container-puppet.yaml @@ -181,25 +181,19 @@ outputs: environment: KOLLA_CONFIG_STRATEGY: COPY_ALWAYS deploy_steps_tasks: - - name: validate nova migration target container state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 5 + - name: validate nova-migration-target container state + podman_container_info: + name: nova_migration_target + register: nova_migration_target_infos + failed_when: + - "'healthy' not in nova_migration_target_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-nova - block: - - name: Get nova-migration-target healthcheck status - register: nova_migration_target_healthcheck_state - systemd: - name: tripleo_nova_migration_target_healthcheck - retries: 10 - delay: 30 - until: nova_migration_target_healthcheck_state.status.ExecMainPID != '0' and - nova_migration_target_healthcheck_state.status.ActiveState in ['inactive', 'failed'] - failed_when: false - - name: Fail if nova-migration-target healthcheck report failed status - fail: - msg: nova-migration-target isn't working (healtcheck failed) - when: nova_migration_target_healthcheck_state.status.ExecMainStatus != '0' + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 5 diff --git a/deployment/nova/nova-scheduler-container-puppet.yaml b/deployment/nova/nova-scheduler-container-puppet.yaml index a49ca676ce..e217479421 100644 --- a/deployment/nova/nova-scheduler-container-puppet.yaml +++ b/deployment/nova/nova-scheduler-container-puppet.yaml @@ -208,28 +208,22 @@ outputs: environment: KOLLA_CONFIG_STRATEGY: COPY_ALWAYS deploy_steps_tasks: - - name: validate nova scheduler container state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 5 + - name: validate nova-scheduler container state + podman_container_info: + name: nova_scheduler + register: nova_scheduler_infos + failed_when: + - "'healthy' not in nova_scheduler_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-nova - block: - - name: Get nova-scheduler healthcheck status - register: nova_scheduler_healthcheck_state - systemd: - name: tripleo_nova_scheduler_healthcheck - retries: 10 - delay: 30 - until: nova_scheduler_healthcheck_state.status.ExecMainPID != '0' and - nova_scheduler_healthcheck_state.status.ActiveState in ['inactive', 'failed'] - failed_when: false - - name: Fail if nova-scheduler healthcheck report failed status - fail: - msg: nova-scheduler isn't working (healthcheck failed) - when: nova_scheduler_healthcheck_state.status.ExecMainStatus != '0' + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 5 host_prep_tasks: list_concat: - {get_attr: [NovaLogging, host_prep_tasks]} diff --git a/deployment/nova/nova-vnc-proxy-container-puppet.yaml b/deployment/nova/nova-vnc-proxy-container-puppet.yaml index ec9ce1796e..0c0d038477 100644 --- a/deployment/nova/nova-vnc-proxy-container-puppet.yaml +++ b/deployment/nova/nova-vnc-proxy-container-puppet.yaml @@ -301,27 +301,21 @@ outputs: - null deploy_steps_tasks: - name: validate nova-vnc-proxy container state - when: - - container_cli == 'podman' - - not container_healthcheck_disabled - - step|int == 5 + podman_container_info: + name: nova_vnc_proxy + register: nova_vnc_proxy_infos + failed_when: + - "'healthy' not in nova_vnc_proxy_infos.containers.0.Healthcheck.Status" + retries: 10 + delay: 30 tags: - opendev-validation - opendev-validation-nova - block: - - name: Get nova-vnc-proxy healthcheck status - register: nova_vnc_proxy_healthcheck_state - systemd: - name: tripleo_nova_vnc_proxy_healthcheck - retries: 10 - delay: 30 - until: nova_vnc_proxy_healthcheck_state.status.ExecMainPID != '0' and - nova_vnc_proxy_healthcheck_state.status.ActiveState in ['inactive', 'failed'] - failed_when: false - - name: Fail if nova-vnc-proxy healtcheck report failed status - fail: - msg: nova-vnc-proxy isn't working (healthcheck failed) - when: nova_vnc_proxy_healthcheck_state.status.ExecMainStatus != '0' + when: + - false + - container_cli == 'podman' + - not container_healthcheck_disabled + - step|int == 5 host_prep_tasks: list_concat: - {get_attr: [NovaLogging, host_prep_tasks]}