diff --git a/container_config_scripts/pacemaker_restart_bundle.sh b/container_config_scripts/pacemaker_restart_bundle.sh new file mode 100644 index 0000000000..f813ff6b7f --- /dev/null +++ b/container_config_scripts/pacemaker_restart_bundle.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +set -u + +# ./pacemaker_restart_bundle.sh galera-bundle galera +RESOURCE=$1 +TRIPLEO_SERVICE=$2 + +# try to restart only if resource has been created already +if /usr/sbin/pcs resource show $RESOURCE; then + if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ]; then + # During a stack update, this script is called in parallel on + # every node the resource runs on, after the service's configs + # have been updated on all nodes. So we need to run pcs only + # once (e.g. on the service's boostrap node). + echo "$(date -u): Restarting ${RESOURCE} globally" + /usr/bin/bootstrap_host_exec $TRIPLEO_SERVICE /sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $RESOURCE + else + # During a minor update workflow however, a host gets fully + # updated before updating the next one. So unlike stack + # update, at the time this script is called, the service's + # configs aren't updated on all nodes yet. So only restart the + # resource locally, where it's guaranteed that the config is + # up to date. + HOST=$(facter hostname) + # XPath rationale: as long as there is a bundle running + # locally and it is managed by pacemaker, no matter the state + # of any inner pcmk_remote or ocf resource, we should restart + # it to give it a chance to read the new config. + # XPath rationale 2: if the resource is being stopped, the + # attribute "target_role" will be present in the output of + # crm_mon. Do not restart the resource if that is the case. + if crm_mon -r --as-xml | xmllint --format --xpath "//bundle[@id='${RESOURCE}']/replica/resource[@managed='true' and (not(boolean(@target_role)) or (boolean(@target_role) and @target_role!='Stopped'))]/node[@name='${HOST}']/../.." - &>/dev/null; then + echo "$(date -u): Restarting ${RESOURCE} locally on '${HOST}'" + /sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $RESOURCE "${HOST}" + else + echo "$(date -u): Resource ${RESOURCE} currently not running on '${HOST}', no restart needed" + fi + fi +fi diff --git a/deployment/cinder/cinder-backup-pacemaker-puppet.yaml b/deployment/cinder/cinder-backup-pacemaker-puppet.yaml index 184930a769..e3a88c1526 100644 --- a/deployment/cinder/cinder-backup-pacemaker-puppet.yaml +++ b/deployment/cinder/cinder-backup-pacemaker-puppet.yaml @@ -52,11 +52,6 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -187,24 +182,12 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'cinder_backup' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show openstack-cinder-backup; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT openstack-cinder-backup; echo "openstack-cinder-backup restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh openstack-cinder-backup cinder_backup image: {get_param: DockerCinderBackupImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro cinder_backup_init_bundle: start_order: 1 detach: false diff --git a/deployment/cinder/cinder-volume-pacemaker-puppet.yaml b/deployment/cinder/cinder-volume-pacemaker-puppet.yaml index ab3e4961fe..cc40e8b165 100644 --- a/deployment/cinder/cinder-volume-pacemaker-puppet.yaml +++ b/deployment/cinder/cinder-volume-pacemaker-puppet.yaml @@ -40,11 +40,6 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -172,24 +167,12 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'cinder_volume' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show openstack-cinder-volume; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT openstack-cinder-volume; echo "openstack-cinder-volume restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh openstack-cinder-volume cinder_volume image: {get_param: DockerCinderVolumeImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro cinder_volume_init_bundle: start_order: 1 detach: false diff --git a/deployment/containers-common.yaml b/deployment/containers-common.yaml index 98d91eb321..495493c960 100644 --- a/deployment/containers-common.yaml +++ b/deployment/containers-common.yaml @@ -47,6 +47,12 @@ parameters: description: The network port for messaging backend type: number + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pacemaker resource to restart when + a config change is detected and the resource is being restarted + type: number + conditions: internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]} @@ -90,6 +96,13 @@ outputs: pyshim.sh: mode: "0755" content: { get_file: ../container_config_scripts/pyshim.sh } + pacemaker_restart_bundle.sh: + mode: "0755" + content: + str_replace: + template: { get_file: ../container_config_scripts/pacemaker_restart_bundle.sh } + params: + __PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout} volumes_base: description: Base volume list @@ -123,6 +136,17 @@ outputs: # required for bootstrap_host_exec - /etc/puppet:/etc/puppet:ro + pacemaker_restart_volumes: + description: Common volumes for the pacemaker restart containers. + value: + list_concat: + - *volumes_base + - - /var/lib/container-config-scripts/pacemaker_restart_bundle.sh:/pacemaker_restart_bundle.sh:ro + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + # required for bootstrap_host_exec, facter + - /etc/puppet:/etc/puppet:ro + container_puppet_apply_volumes: description: Common volumes needed to run the container_puppet_apply.sh from container_config_scripts value: diff --git a/deployment/database/mysql-pacemaker-puppet.yaml b/deployment/database/mysql-pacemaker-puppet.yaml index 204ebbd82b..6e1bcde9b5 100644 --- a/deployment/database/mysql-pacemaker-puppet.yaml +++ b/deployment/database/mysql-pacemaker-puppet.yaml @@ -67,11 +67,6 @@ parameters: description: > Setting this to a unique value will re-run any deployment tasks which perform configuration on a Heat stack-update. - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -271,24 +266,12 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'mysql' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show galera-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT galera-bundle; echo "galera-bundle restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh galera-bundle mysql image: {get_param: DockerMysqlImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/mysql/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/mysql/:/var/lib/kolla/config_files/src:ro mysql_init_bundle: start_order: 1 detach: false diff --git a/deployment/database/redis-pacemaker-puppet.yaml b/deployment/database/redis-pacemaker-puppet.yaml index 7eb5473797..213b02168d 100644 --- a/deployment/database/redis-pacemaker-puppet.yaml +++ b/deployment/database/redis-pacemaker-puppet.yaml @@ -47,11 +47,6 @@ parameters: default: false description: Enable IPv6 in Redis type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -219,24 +214,12 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'redis' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show redis-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT redis-bundle; echo "redis-bundle restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh redis-bundle redis image: {get_param: DockerRedisConfigImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/redis/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/redis/:/var/lib/kolla/config_files/src:ro - redis_init_bundle: start_order: 2 detach: false diff --git a/deployment/haproxy/haproxy-pacemaker-puppet.yaml b/deployment/haproxy/haproxy-pacemaker-puppet.yaml index 2dfde04538..82daaa610e 100644 --- a/deployment/haproxy/haproxy-pacemaker-puppet.yaml +++ b/deployment/haproxy/haproxy-pacemaker-puppet.yaml @@ -82,11 +82,6 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -238,24 +233,12 @@ outputs: config_volume: haproxy environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'haproxy' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show haproxy-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT haproxy-bundle; echo "haproxy-bundle restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh haproxy-bundle haproxy image: {get_param: DockerHAProxyImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/haproxy/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/haproxy/:/var/lib/kolla/config_files/src:ro haproxy_init_bundle: start_order: 3 detach: false diff --git a/deployment/manila/manila-share-pacemaker-puppet.yaml b/deployment/manila/manila-share-pacemaker-puppet.yaml index fc2ded84f3..d485d05ce3 100644 --- a/deployment/manila/manila-share-pacemaker-puppet.yaml +++ b/deployment/manila/manila-share-pacemaker-puppet.yaml @@ -40,11 +40,6 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -153,24 +148,12 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'manila_share' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show openstack-manila-share; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT openstack-manila-share; echo "openstack-manila-share restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh openstack-manila-share manila_share image: {get_param: DockerManilaShareImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/manila/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/manila/:/var/lib/kolla/config_files/src:ro manila_share_init_bundle: start_order: 1 detach: false diff --git a/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml b/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml index 957c7d0aff..c03ad3197b 100644 --- a/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml +++ b/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml @@ -48,11 +48,6 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -146,22 +141,9 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'ovn_dbs' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show ovn-dbs-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT ovn-dbs-bundle; echo "ovn-dbs-bundle restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh ovn-dbs-bundle ovn_dbs image: {get_param: DockerOvnDbsConfigImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, volumes]} - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + volumes: {get_attr: [ContainersCommon, pacemaker_restart_volumes]} ovn_dbs_init_bundle: start_order: 1 detach: false diff --git a/deployment/rabbitmq/rabbitmq-messaging-notify-pacemaker-puppet.yaml b/deployment/rabbitmq/rabbitmq-messaging-notify-pacemaker-puppet.yaml index 5340eb1e1d..bdb8328fa3 100644 --- a/deployment/rabbitmq/rabbitmq-messaging-notify-pacemaker-puppet.yaml +++ b/deployment/rabbitmq/rabbitmq-messaging-notify-pacemaker-puppet.yaml @@ -44,11 +44,6 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -194,24 +189,12 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'oslo_messaging_notify' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh rabbitmq-bundle oslo_messaging_notify image: {get_param: DockerRabbitmqImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro rabbitmq_init_bundle: start_order: 1 detach: false diff --git a/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml b/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml index d0b16a3752..b8955c7633 100644 --- a/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml +++ b/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml @@ -44,11 +44,6 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -194,24 +189,12 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'rabbitmq' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh rabbitmq-bundle rabbitmq image: {get_param: DockerRabbitmqImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro rabbitmq_init_bundle: start_order: 1 detach: false diff --git a/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml b/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml index 86a79ca045..b6e3f42047 100644 --- a/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml +++ b/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml @@ -44,11 +44,6 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean - PcmkConfigRestartTimeout: - default: 600 - description: Time in seconds to wait for a pcmk resource to restart when - a config change is detected and the resource is being restarted - type: number ContainerCli: type: string default: 'podman' @@ -194,24 +189,12 @@ outputs: user: root environment: - TRIPLEO_MINOR_UPDATE - command: - - '/usr/bin/bootstrap_host_exec' - - 'oslo_messaging_rpc' - - str_replace: - template: - 'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi' - params: - PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + command: /pacemaker_restart_bundle.sh rabbitmq-bundle oslo_messaging_rpc image: {get_param: DockerRabbitmqImage} volumes: list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro - - if: - - docker_enabled - - - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro - - null + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro rabbitmq_init_bundle: start_order: 1 detach: false