HA: fix <service>_restart_bundle with minor update workflow

For each HA service we have a paunch container <service>_restart_bundle
which is started by paunch whenever config files changes during stack
deploy/update. This container runs a pcs command on a single node to
restart all the service's containers (e.g. all galera on all controllers).
By design, when it is run, configs have already been regenerated by the
deploy tasks on all nodes.

For minor updates, the workflow runs differently: all the steps of the
deploy tasks are run one node after the other, so when
<service>_restart_bundle is called, there is no guarantee that the
service's configs have been regenerated on all the nodes yet.

To fix the wrong restart behaviour, only restart local containers when
running during a minor update. And run once per node. When the minor
update workflow calls <service>_restart_container, we still have the
guarantee that the config files are already regenerated locally.

Co-Authored-By: Michele Baldessari <michele@acksyn.org>
Co-Authored-By: Luca Miccini <lmiccini@redhat.com>

Change-Id: I92d4ddf2feeac06ce14468ae928c283f3fd04f45
Closes-Bug: #1841629
This commit is contained in:
Damien Ciabrini 2019-08-28 18:25:43 +02:00
parent 759df5a6be
commit 7f785e8757
12 changed files with 93 additions and 200 deletions

View File

@ -0,0 +1,40 @@
#!/bin/bash
set -u
# ./pacemaker_restart_bundle.sh galera-bundle galera
RESOURCE=$1
TRIPLEO_SERVICE=$2
# try to restart only if resource has been created already
if /usr/sbin/pcs resource show $RESOURCE; then
if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ]; then
# During a stack update, this script is called in parallel on
# every node the resource runs on, after the service's configs
# have been updated on all nodes. So we need to run pcs only
# once (e.g. on the service's boostrap node).
echo "$(date -u): Restarting ${RESOURCE} globally"
/usr/bin/bootstrap_host_exec $TRIPLEO_SERVICE /sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $RESOURCE
else
# During a minor update workflow however, a host gets fully
# updated before updating the next one. So unlike stack
# update, at the time this script is called, the service's
# configs aren't updated on all nodes yet. So only restart the
# resource locally, where it's guaranteed that the config is
# up to date.
HOST=$(facter hostname)
# XPath rationale: as long as there is a bundle running
# locally and it is managed by pacemaker, no matter the state
# of any inner pcmk_remote or ocf resource, we should restart
# it to give it a chance to read the new config.
# XPath rationale 2: if the resource is being stopped, the
# attribute "target_role" will be present in the output of
# crm_mon. Do not restart the resource if that is the case.
if crm_mon -r --as-xml | xmllint --format --xpath "//bundle[@id='${RESOURCE}']/replica/resource[@managed='true' and (not(boolean(@target_role)) or (boolean(@target_role) and @target_role!='Stopped'))]/node[@name='${HOST}']/../.." - &>/dev/null; then
echo "$(date -u): Restarting ${RESOURCE} locally on '${HOST}'"
/sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $RESOURCE "${HOST}"
else
echo "$(date -u): Resource ${RESOURCE} currently not running on '${HOST}', no restart needed"
fi
fi
fi

View File

@ -52,11 +52,6 @@ parameters:
default: false default: false
description: Whether to run config management (e.g. Puppet) in debug mode. description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -187,24 +182,12 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh openstack-cinder-backup cinder_backup
- '/usr/bin/bootstrap_host_exec'
- 'cinder_backup'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show openstack-cinder-backup; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT openstack-cinder-backup; echo "openstack-cinder-backup restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerCinderBackupImage} image: {get_param: ContainerCinderBackupImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
cinder_backup_init_bundle: cinder_backup_init_bundle:
start_order: 1 start_order: 1
detach: false detach: false

View File

@ -40,11 +40,6 @@ parameters:
default: false default: false
description: Whether to run config management (e.g. Puppet) in debug mode. description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -172,24 +167,12 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh openstack-cinder-volume cinder_volume
- '/usr/bin/bootstrap_host_exec'
- 'cinder_volume'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show openstack-cinder-volume; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT openstack-cinder-volume; echo "openstack-cinder-volume restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerCinderVolumeImage} image: {get_param: ContainerCinderVolumeImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
cinder_volume_init_bundle: cinder_volume_init_bundle:
start_order: 1 start_order: 1
detach: false detach: false

View File

@ -47,6 +47,12 @@ parameters:
description: The network port for messaging backend description: The network port for messaging backend
type: number type: number
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pacemaker resource to restart when
a config change is detected and the resource is being restarted
type: number
conditions: conditions:
internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]} internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]}
@ -90,6 +96,13 @@ outputs:
pyshim.sh: pyshim.sh:
mode: "0755" mode: "0755"
content: { get_file: ../container_config_scripts/pyshim.sh } content: { get_file: ../container_config_scripts/pyshim.sh }
pacemaker_restart_bundle.sh:
mode: "0755"
content:
str_replace:
template: { get_file: ../container_config_scripts/pacemaker_restart_bundle.sh }
params:
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
volumes_base: volumes_base:
description: Base volume list description: Base volume list
@ -123,6 +136,17 @@ outputs:
# required for bootstrap_host_exec # required for bootstrap_host_exec
- /etc/puppet:/etc/puppet:ro - /etc/puppet:/etc/puppet:ro
pacemaker_restart_volumes:
description: Common volumes for the pacemaker restart containers.
value:
list_concat:
- *volumes_base
- - /var/lib/container-config-scripts/pacemaker_restart_bundle.sh:/pacemaker_restart_bundle.sh:ro
- /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- /dev/shm:/dev/shm:rw
# required for bootstrap_host_exec, facter
- /etc/puppet:/etc/puppet:ro
container_puppet_apply_volumes: container_puppet_apply_volumes:
description: Common volumes needed to run the container_puppet_apply.sh from container_config_scripts description: Common volumes needed to run the container_puppet_apply.sh from container_config_scripts
value: value:

View File

@ -67,11 +67,6 @@ parameters:
description: > description: >
Setting this to a unique value will re-run any deployment tasks which Setting this to a unique value will re-run any deployment tasks which
perform configuration on a Heat stack-update. perform configuration on a Heat stack-update.
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -271,24 +266,12 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh galera-bundle mysql
- '/usr/bin/bootstrap_host_exec'
- 'mysql'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show galera-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT galera-bundle; echo "galera-bundle restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerMysqlImage} image: {get_param: ContainerMysqlImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/mysql/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/mysql/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
mysql_init_bundle: mysql_init_bundle:
start_order: 1 start_order: 1
detach: false detach: false

View File

@ -47,11 +47,6 @@ parameters:
default: false default: false
description: Enable IPv6 in Redis description: Enable IPv6 in Redis
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -219,24 +214,12 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh redis-bundle redis
- '/usr/bin/bootstrap_host_exec'
- 'redis'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show redis-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT redis-bundle; echo "redis-bundle restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerRedisConfigImage} image: {get_param: ContainerRedisConfigImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/redis/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/redis/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
- redis_init_bundle: - redis_init_bundle:
start_order: 2 start_order: 2
detach: false detach: false

View File

@ -82,11 +82,6 @@ parameters:
default: false default: false
description: Whether to run config management (e.g. Puppet) in debug mode. description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -238,24 +233,12 @@ outputs:
config_volume: haproxy config_volume: haproxy
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh haproxy-bundle haproxy
- '/usr/bin/bootstrap_host_exec'
- 'haproxy'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show haproxy-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT haproxy-bundle; echo "haproxy-bundle restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerHAProxyImage} image: {get_param: ContainerHAProxyImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/haproxy/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/haproxy/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
haproxy_init_bundle: haproxy_init_bundle:
start_order: 3 start_order: 3
detach: false detach: false

View File

@ -40,11 +40,6 @@ parameters:
default: false default: false
description: Whether to run config management (e.g. Puppet) in debug mode. description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -153,24 +148,12 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh openstack-manila-share manila_share
- '/usr/bin/bootstrap_host_exec'
- 'manila_share'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show openstack-manila-share; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT openstack-manila-share; echo "openstack-manila-share restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerManilaShareImage} image: {get_param: ContainerManilaShareImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/manila/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/manila/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
manila_share_init_bundle: manila_share_init_bundle:
start_order: 1 start_order: 1
detach: false detach: false

View File

@ -48,11 +48,6 @@ parameters:
default: false default: false
description: Whether to run config management (e.g. Puppet) in debug mode. description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -146,22 +141,9 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh ovn-dbs-bundle ovn_dbs
- '/usr/bin/bootstrap_host_exec'
- 'ovn_dbs'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show ovn-dbs-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT ovn-dbs-bundle; echo "ovn-dbs-bundle restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerOvnDbsConfigImage} image: {get_param: ContainerOvnDbsConfigImage}
volumes: volumes: {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
list_concat:
- {get_attr: [ContainersCommon, volumes]}
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
ovn_dbs_init_bundle: ovn_dbs_init_bundle:
start_order: 1 start_order: 1
detach: false detach: false

View File

@ -44,11 +44,6 @@ parameters:
default: false default: false
description: Whether to run config management (e.g. Puppet) in debug mode. description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -194,24 +189,12 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh rabbitmq-bundle oslo_messaging_notify
- '/usr/bin/bootstrap_host_exec'
- 'oslo_messaging_notify'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerRabbitmqImage} image: {get_param: ContainerRabbitmqImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
rabbitmq_init_bundle: rabbitmq_init_bundle:
start_order: 1 start_order: 1
detach: false detach: false

View File

@ -44,11 +44,6 @@ parameters:
default: false default: false
description: Whether to run config management (e.g. Puppet) in debug mode. description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -194,24 +189,12 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh rabbitmq-bundle rabbitmq
- '/usr/bin/bootstrap_host_exec'
- 'rabbitmq'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerRabbitmqImage} image: {get_param: ContainerRabbitmqImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
rabbitmq_init_bundle: rabbitmq_init_bundle:
start_order: 1 start_order: 1
detach: false detach: false

View File

@ -44,11 +44,6 @@ parameters:
default: false default: false
description: Whether to run config management (e.g. Puppet) in debug mode. description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean type: boolean
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pcmk resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli: ContainerCli:
type: string type: string
default: 'podman' default: 'podman'
@ -194,24 +189,12 @@ outputs:
user: root user: root
environment: environment:
- TRIPLEO_MINOR_UPDATE - TRIPLEO_MINOR_UPDATE
command: command: /pacemaker_restart_bundle.sh rabbitmq-bundle oslo_messaging_rpc
- '/usr/bin/bootstrap_host_exec'
- 'oslo_messaging_rpc'
- str_replace:
template:
'if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ] && /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi'
params:
PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout}
image: {get_param: ContainerRabbitmqImage} image: {get_param: ContainerRabbitmqImage}
volumes: volumes:
list_concat: list_concat:
- {get_attr: [ContainersCommon, volumes]} - {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro
- /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
rabbitmq_init_bundle: rabbitmq_init_bundle:
start_order: 1 start_order: 1
detach: false detach: false