diff --git a/container_config_scripts/pacemaker_restart_bundle.sh b/container_config_scripts/pacemaker_restart_bundle.sh old mode 100644 new mode 100755 index 7a2bd5916f..195bf0c937 --- a/container_config_scripts/pacemaker_restart_bundle.sh +++ b/container_config_scripts/pacemaker_restart_bundle.sh @@ -2,40 +2,69 @@ set -u -# ./pacemaker_restart_bundle.sh galera-bundle galera -RESOURCE=$1 -TRIPLEO_SERVICE=$2 -: ${TRIPLEO_MINOR_UPDATE=false} +# ./pacemaker_restart_bundle.sh mysql galera galera-bundle Master _ +# ./pacemaker_restart_bundle.sh redis redis redis-bundle Slave Master +# ./pacemaker_restart_bundle.sh ovn_dbs ovndb_servers ovn-dbs-bundle Slave Master +RESTART_SCRIPTS_DIR=$(dirname $0) +TRIPLEO_SERVICE=$1 +RESOURCE_NAME=$2 +BUNDLE_NAME=$3 +WAIT_TARGET_LOCAL=$4 +WAIT_TARGET_ANYWHERE=${5:-_} +TRIPLEO_MINOR_UPDATE="${TRIPLEO_MINOR_UPDATE:-false}" -# try to restart only if resource has been created already -if /usr/sbin/pcs resource show $RESOURCE; then - if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ]; then + +if [ x"${TRIPLEO_MINOR_UPDATE,,}" != x"true" ]; then + if hiera -c /etc/puppet/hiera.yaml stack_action | grep -q -x CREATE; then + # Do not restart during initial deployment, as the resource + # has just been created. + exit 0 + else # During a stack update, this script is called in parallel on # every node the resource runs on, after the service's configs # have been updated on all nodes. So we need to run pcs only # once (e.g. on the service's boostrap node). - echo "$(date -u): Restarting ${RESOURCE} globally" - /usr/bin/bootstrap_host_exec $TRIPLEO_SERVICE /sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $RESOURCE + echo "$(date -u): Restarting ${BUNDLE_NAME} globally" + /usr/bin/bootstrap_host_exec $TRIPLEO_SERVICE /sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $BUNDLE_NAME + fi +else + # During a minor update workflow however, a host gets fully + # updated before updating the next one. So unlike stack + # update, at the time this script is called, the service's + # configs aren't updated on all nodes yet. So only restart the + # resource locally, where it's guaranteed that the config is + # up to date. + HOST=$(facter hostname) + + # As long as the resource bundle is managed by pacemaker and is + # not meant to stay stopped, no matter the state of any inner + # pcmk_remote or ocf resource, we should restart it to give it a + # chance to read the new config. + if [ "$(crm_resource --meta -r ${BUNDLE_NAME} -g is-managed 2>/dev/null)" != "false" ] && \ + [ "$(crm_resource --meta -r ${BUNDLE_NAME} -g target-role 2>/dev/null)" != "Stopped" ]; then + # if the resource is running locally, restart it + if crm_resource -r $BUNDLE_NAME --locate 2>&1 | grep -w -q "${HOST}"; then + echo "$(date -u): Restarting ${BUNDLE_NAME} locally on '${HOST}'" + /sbin/pcs resource restart $BUNDLE_NAME "${HOST}" + + else + # At this point, if no resource is running locally, it's + # either because a) it has failed previously, or b) because + # it's an A/P resource running elsewhere. + # By cleaning up resource, we ensure that a) it will try to + # restart, or b) it won't do anything if the resource is + # already running elsewhere. + echo "$(date -u): ${BUNDLE_NAME} is currently not running on '${HOST}'," \ + "cleaning up its state to restart it if necessary" + /sbin/pcs resource cleanup $BUNDLE_NAME --node "${HOST}" + fi + + # Wait until the resource is in the expected target state + $RESTART_SCRIPTS_DIR/pacemaker_wait_bundle.sh \ + $RESOURCE_NAME $BUNDLE_NAME \ + "$WAIT_TARGET_LOCAL" "$WAIT_TARGET_ANYWHERE" \ + "${HOST}" __PCMKTIMEOUT__ else - # During a minor update workflow however, a host gets fully - # updated before updating the next one. So unlike stack - # update, at the time this script is called, the service's - # configs aren't updated on all nodes yet. So only restart the - # resource locally, where it's guaranteed that the config is - # up to date. - HOST=$(facter hostname) - # XPath rationale: as long as there is a bundle running - # locally and it is managed by pacemaker, no matter the state - # of any inner pcmk_remote or ocf resource, we should restart - # it to give it a chance to read the new config. - # XPath rationale 2: if the resource is being stopped, the - # attribute "target_role" will be present in the output of - # crm_mon. Do not restart the resource if that is the case. - if crm_mon -r --as-xml | xmllint --format --xpath "//bundle[@id='${RESOURCE}']/replica/resource[@managed='true' and (not(boolean(@target_role)) or (boolean(@target_role) and @target_role!='Stopped'))]/node[@name='${HOST}']/../.." - &>/dev/null; then - echo "$(date -u): Restarting ${RESOURCE} locally on '${HOST}'" - /sbin/pcs resource restart --wait=__PCMKTIMEOUT__ $RESOURCE "${HOST}" - else - echo "$(date -u): Resource ${RESOURCE} currently not running on '${HOST}', no restart needed" - fi + echo "$(date -u): No restart needed for ${BUNDLE_NAME}." fi fi diff --git a/container_config_scripts/pacemaker_wait_bundle.sh b/container_config_scripts/pacemaker_wait_bundle.sh new file mode 100755 index 0000000000..20701fdfc2 --- /dev/null +++ b/container_config_scripts/pacemaker_wait_bundle.sh @@ -0,0 +1,320 @@ +#!/bin/bash + +# ---- +# Wait for an OCF resource or a bundle to be restarted +# ---- +# e.g.: +# M/S OCF: $0 galera galera-bundle Master +# clone OCF: $0 rabbitmq rabbitmq-bundle Started +# A/P M/S OCF: $0 redis redis-bundle Slave Master +# A/P bundle: $0 openstack-cinder-volume openstack-cinder-volume _ Started +# clone bundle: $0 haproxy-bundle haproxy-bundle Started + +# design note 1: +# - this script is called during a minor update; it is called +# once per node that hosts a service replica. +# - the purpose of this script is to ensure that restarting the +# service replica locally won't disrupt the service availability +# for the end user. To reach that goal, the script waits until the +# service is restarted locally or globallu and reaches a given +# target state (i.e. Started, Slave or Master). +# design note 2: +# - we don't want to track restart error: our only job is to ensure +# service restart synchronization, not service health. +# - In particular, we don't want to error out in case the resource +# cannot be restarted locally, because that would make the minor +# update fail, even if potentially other replicas still provide +# the service. +# design note 3: +# - we can bail out early if we determine that the resource can't +# be restarted automatically by pacemaker (e.g. its "blocked", +# unmanaged or disabled). + +log() { + local msg=$1 + echo "$(date -u): $1" +} + +usage() { + echo 2>&1 "Usage: $0 NAME BUNDLE_NAME ROLE_LOCAL [ROLE_ANYWHERE] [HOST] [TIMEOUT]" + exit 1 +} + + +# +# Utility functions to detect stuck resources +# + +bundle_failures_locally() { + local engine=$BUNDLE_CONTAINER_ENGINE + local replicas=$BUNDLE_REPLICAS + local last=$(($replicas - 1)) + local replica_name + for i in $(seq 0 $last); do + replica_name=${BUNDLE_NAME}-${engine}-${i} + crm_failcount -q -G -r $replica_name -N $HOST + done +} + +bundle_failures_globally() { + local engine=$BUNDLE_CONTAINER_ENGINE + local replicas=$BUNDLE_REPLICAS + local last=$(($replicas - 1)) + for i in $(seq 0 $last); do + crm_failcount -q -G -r ${BUNDLE_NAME}-${engine}-${i} + done +} + +bundle_running_globally() { + local engine=$BUNDLE_CONTAINER_ENGINE + # return the number of running bundles replica, i.e. the number of + # docker/podman resource replicas currently running in the cluster + crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='ocf::heartbeat:${engine}']/node)" - +} + +ocf_failures_globally() { + local replicas=$BUNDLE_REPLICAS + local last=$(($replicas - 1)) + local bundle_node + for i in $(seq 0 $last); do + bundle_node=${BUNDLE_NAME}-${i} + crm_failcount -q -G -r $NAME -N $bundle_node + done +} + +did_resource_failed_locally() { + local failures + local running + local remotehost + if [ "${NAME}" != "${BUNDLE_NAME}" ]; then + # if we're dealing with an ocf resource, it is running on a + # pacemaker_remote rather that on the real host, and the + # failcounts are thus associated to the pcmk remote. Replace + # the host's name with the pcmk remote's name. + remotehost=$(crm_mon --as-xml | xmllint --xpath "string(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource/node[@name='${HOST}']/../../resource[@resource_agent='ocf::pacemaker:remote']/@id)" -) + if [ -n "${remotehost}" ]; then + crm_failcount -q -G -r $NAME -N $remotehost | grep -q -w INFINITY + return $? + fi + # If no pcmk remote is currently running, the failcount from + # the ocf resource is useless, compute the failcount from the + # bundle case instead (computed below). + fi + + # for bundles, pacemaker can run any bundle replica locally + # (e.g. galera-bundle-docker-{0,1,2}), and a failure happens when + # there are no more replica to try. + # That is, when _at least_ one replica failed locally, and all the + # others either failed or are currently running elsewhere. + failures=$(bundle_failures_locally $HOST | grep -c -w INFINITY) + running=$(bundle_running_globally) + test $failures -gt 0 && \ + test $(( $failures + $running )) -ge $BUNDLE_REPLICAS +} + +did_resource_failed_globally() { + local remotecount + local failures + if [ "${NAME}" != "${BUNDLE_NAME}" ]; then + # we check the state of an ocf resource only if the + # pcmkremotes are started + remotecount=$(crm_mon --as-xml | xmllint --xpath "count(//resources/bundle[@id='${BUNDLE_NAME}']/replica/resource[@resource_agent='ocf::pacemaker:remote']/node)" -) + if [ "${remotecount}" = "0" ]; then + # no pcmkremote is running, so check the bundle state + # instead of checking the ocf resource + # bundle failed if all ${BUNDLE_REPLICAS} replicas failed + failures=$(bundle_failures_globally | grep -c -w INFINITY) + test $failures -eq $BUNDLE_REPLICAS + else + # ocf resource failed if it failed to start on + # all $BUNDLE_REPLICAS bundle nodes + failures=$(ocf_failures_globally | grep -c -w INFINITY) + test $failures -eq $BUNDLE_REPLICAS + fi + else + # bundle failed if all ${BUNDLE_REPLICAS} replicas failed + failures=$(bundle_failures_globally | grep -c -w INFINITY) + test $failures -eq $BUNDLE_REPLICAS + fi +} + + +# Input validation +# + +NAME=$1 +if [ -z "${NAME}" ]; then + echo 2>&1 "Error: argument NAME must not be empty" + exit 1 +fi + +BUNDLE_NAME=$2 +if [ -z "${BUNDLE_NAME}" ]; then + echo 2>&1 "Error: argument BUNDLE_NAME must not be empty" + exit 1 +fi + +ROLE_LOCAL=$3 +if [ "${ROLE_LOCAL}" = "_" ]; then + ROLE_LOCAL="" +fi + +ROLE_ANYWHERE=$4 +if [ "${ROLE_ANYWHERE}" = "_" ]; then + ROLE_ANYWHERE="" +fi + +if [ -z "${ROLE_LOCAL}" ]; then + if [ -z "${ROLE_ANYWHERE}" ]; then + echo 2>&1 "Error: either ROLE_LOCAL or ROLE_ANYWHERE must be non empty" + exit 1 + fi +else + if !(echo "${ROLE_LOCAL}" | grep -q -x -E "(Started|Slave|Master)"); then + echo 2>&1 "Error: argument ROLE_LOCAL must be either 'Started' 'Slave' or 'Master'" + exit 1 + fi +fi + +if [ -n "${ROLE_ANYWHERE}" ] && !(echo "${ROLE_ANYWHERE}" | grep -q -x -E "(Started|Slave|Master)"); then + echo 2>&1 "Error: argument ROLE_ANYWHERE must be either 'Started' 'Slave' or 'Master'" + exit 1 +fi + +HOST=${5:-$(facter hostname)} +TIMEOUT=${6:-__PCMKTIMEOUT__} + + +# Configure the search +# ---- +# Note: we can't use crm_resource in all searches because we can't +# easily extract the host the OCF resources run on (crm_resource +# returns the pcmk-remote nodes rather than the hosts) +# So instead, we implement various searches with XPath directly. + +if [ "${BUNDLE_NAME}" != "${NAME}" ]; then +# ocf resource +local_resource_xpath="//bundle/replica/resource[@resource_agent='ocf::pacemaker:remote']/node[@name='${HOST}']/../../resource[@id='${NAME}']" +any_resource_xpath="//bundle//resource[@id='${NAME}']" +replicas_xpath="//bundle/primitive[@id='${BUNDLE_NAME}']/../*[boolean(@image) and boolean(@replicas)]" +else +# bundle resource +local_resource_xpath="//bundle[@id='${NAME}']/replica/resource/node[@name='${HOST}']/../../resource" +any_resource_xpath="//bundle[@id='${NAME}']//resource" +replicas_xpath="//bundle[@id='${BUNDLE_NAME}']/*[boolean(@image) and boolean(@replicas)]" +fi + +bundle_def_xpath="//bundle[@id='${BUNDLE_NAME}']/*[boolean(@image) and boolean(@replicas)]" +BUNDLE_CONTAINER_ENGINE=$(cibadmin -Q | xmllint --xpath "name(${bundle_def_xpath})" -) +BUNDLE_REPLICAS=$(cibadmin -Q | xmllint --xpath "string(${bundle_def_xpath}/@replicas)" -) + + +# The wait algorithm follows a two-stage approach +# 1. Depending on how the script is called, we first check whether +# the resource is restarted locally. An A/P resource may be +# restarted elsewhere in the cluster. +# 2. If needed, check whether the A/P resource has restarted +# elsewhere. For A/P M/S resources, in case the resource is +# restarted as Slave locally, ensure a Master is available. + +success=1 +bailout=1 +timeout=$TIMEOUT +role="" + +# Stage 1: local check +if [ -n "$ROLE_LOCAL" ]; then + log "Waiting until ${NAME} has restarted on ${HOST} and is in state ${ROLE_LOCAL}" + log "Will probe resource state with the following XPath pattern: ${local_resource_xpath}" + + while [ $timeout -gt 0 ] && [ $bailout -ne 0 ] && [ $success -ne 0 ]; do + resource=$(crm_mon -r --as-xml | xmllint --xpath "${local_resource_xpath}" - 2>/dev/null) + role=$(echo "${resource}" | sed -ne 's/.*\Wrole="\([^"]*\)".*/\1/p') + + if [ "$(crm_resource --meta -r ${NAME} -g is-managed 2>/dev/null)" = "false" ]; then + log "${NAME} is unmanaged, will never reach target role. Bailing out" + bailout=0 + continue + elif [ "$(crm_resource --meta -r ${NAME} -g target-role 2>/dev/null)" = "Stopped" ]; then + log "${NAME} is disabled, will never reach target role. Bailing out" + bailout=0 + continue + elif echo "${resource}" | grep -q -w "\Wblocked=\"true\""; then + log "${NAME} is blocked, will never reach target role. Bailing out" + bailout=0 + continue + elif did_resource_failed_locally; then + log "${NAME} is in failed state, will never reach target role. Bailing out" + bailout=0 + continue + elif [ "$role" = "$ROLE_LOCAL" ]; then + success=0 + continue + elif [ -n "$ROLE_ANYWHERE" ] && [ "$role" = "$ROLE_ANYWHERE" ]; then + # A/P: we are restarted in the expected state + success=0 + continue + else + log "Waiting for ${NAME} to transition to role ${ROLE_LOCAL} on ${HOST}" + fi + + if [ $bailout -ne 0 ] && [ $success -ne 0 ]; then + sleep 4 + timeout=$((timeout-4)) + fi + done +fi + +# Stage 2: global check +if [ $timeout -gt 0 ] && [ -n "$ROLE_ANYWHERE" ] && [ "$role" != "$ROLE_ANYWHERE" ]; then + log "Wait until ${NAME} is restarted anywhere in the cluster in state ${ROLE_ANYWHERE}" + log "Will probe resource state with the following XPath pattern: ${any_resource_xpath}" + + success=1 + bailout=1 + while [ $timeout -gt 0 ] && [ $bailout -ne 0 ] && [ $success -ne 0 ]; do + resources=$(crm_mon -r --as-xml | xmllint --xpath "${any_resource_xpath}" - 2>/dev/null) + if [ "$(crm_resource --meta -r ${NAME} -g is-managed 2>/dev/null)" = "false" ]; then + log "${NAME} is unmanaged, will never reach target role. Bailing out" + bailout=0 + continue + elif [ "$(crm_resource --meta -r ${NAME} -g target-role 2>/dev/null)" = "Stopped" ]; then + log "${NAME} is disabled, will never reach target role. Bailing out" + bailout=0 + continue + elif ! (echo "${resources}" | grep -q -w "\Wblocked=\"false\""); then + log "${NAME} blocked, will never reach target role. Bailing out" + bailout=0 + continue + elif did_resource_failed_globally; then + log "${NAME} is in failed state, will never reach target role. Bailing out" + bailout=0 + continue + elif echo "${resources}" | grep -q -w "\Wrole=\"${ROLE_ANYWHERE}\""; then + success=0 + continue + else + log "Waiting for ${NAME} to transition to role ${ROLE_ANYWHERE} anywhere in the cluster" + fi + + if [ $bailout -ne 0 ] && [ $success -ne 0 ]; then + sleep 4 + timeout=$((timeout-4)) + fi + done +fi + +if [ $timeout -le 0 ]; then + log "Timeout reached after ${TIMEOUT}s while waiting for ${NAME} to be restarted" +elif [ $bailout -le 0 ]; then + log "Restart monitoring for ${NAME} cancelled" +fi + +if [ $success -eq 0 ]; then + log "${NAME} successfully restarted" +else + log "${NAME} was not restarted properly" +fi + +# Don't block minor update or stack update if the wait was unsuccessful +exit 0 diff --git a/deployment/cinder/cinder-backup-pacemaker-puppet.yaml b/deployment/cinder/cinder-backup-pacemaker-puppet.yaml index 22e1cc25af..21e05ecb5e 100644 --- a/deployment/cinder/cinder-backup-pacemaker-puppet.yaml +++ b/deployment/cinder/cinder-backup-pacemaker-puppet.yaml @@ -183,23 +183,8 @@ outputs: - /var/log/containers/cinder:/var/log/cinder:z command: ['/bin/bash', '-c', 'chown -R cinder:cinder /var/log/cinder'] step_5: - cinder_backup_restart_bundle: - start_order: 0 - config_volume: cinder - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh openstack-cinder-backup cinder_backup - image: {get_param: ContainerCinderBackupImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/cinder:/var/lib/kolla/config_files/src:ro cinder_backup_init_bundle: - start_order: 1 + start_order: 0 detach: false net: host ipc: host @@ -226,6 +211,21 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + cinder_backup_restart_bundle: + start_order: 1 + config_volume: cinder + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh cinder_backup openstack-cinder-backup openstack-cinder-backup _ Started + image: {get_param: ContainerCinderBackupImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/cinder:/var/lib/kolla/config_files/src:ro host_prep_tasks: {get_attr: [CinderCommon, cinder_backup_host_prep_tasks]} deploy_steps_tasks: - name: Cinder Backup tag container image for pacemaker diff --git a/deployment/cinder/cinder-volume-pacemaker-puppet.yaml b/deployment/cinder/cinder-volume-pacemaker-puppet.yaml index d2a1347f6b..431b42dd1e 100644 --- a/deployment/cinder/cinder-volume-pacemaker-puppet.yaml +++ b/deployment/cinder/cinder-volume-pacemaker-puppet.yaml @@ -168,23 +168,8 @@ outputs: - /var/log/containers/cinder:/var/log/cinder command: ['/bin/bash', '-c', 'chown -R cinder:cinder /var/log/cinder'] step_5: - cinder_volume_restart_bundle: - start_order: 0 - config_volume: cinder - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh openstack-cinder-volume cinder_volume - image: {get_param: ContainerCinderVolumeImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/cinder:/var/lib/kolla/config_files/src:ro cinder_volume_init_bundle: - start_order: 1 + start_order: 0 detach: false net: host ipc: host @@ -211,6 +196,21 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + cinder_volume_restart_bundle: + start_order: 1 + config_volume: cinder + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh cinder_volume openstack-cinder-volume openstack-cinder-volume _ Started + image: {get_param: ContainerCinderVolumeImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/cinder:/var/lib/kolla/config_files/src:ro host_prep_tasks: {get_attr: [CinderCommon, cinder_volume_host_prep_tasks]} deploy_steps_tasks: - name: Cinder Volume tag container image for pacemaker diff --git a/deployment/containers-common.yaml b/deployment/containers-common.yaml index d773b79c30..40efc13932 100644 --- a/deployment/containers-common.yaml +++ b/deployment/containers-common.yaml @@ -111,6 +111,13 @@ outputs: template: { get_file: ../container_config_scripts/pacemaker_restart_bundle.sh } params: __PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout} + pacemaker_wait_bundle.sh: + mode: "0755" + content: + str_replace: + template: { get_file: ../container_config_scripts/pacemaker_wait_bundle.sh } + params: + __PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout} volumes_base: description: Base volume list @@ -150,6 +157,7 @@ outputs: list_concat: - *volumes_base - - /var/lib/container-config-scripts/pacemaker_restart_bundle.sh:/pacemaker_restart_bundle.sh:ro + - /var/lib/container-config-scripts/pacemaker_wait_bundle.sh:/pacemaker_wait_bundle.sh:ro - /dev/shm:/dev/shm:rw # required for bootstrap_host_exec, facter - /etc/puppet:/etc/puppet:ro diff --git a/deployment/database/mysql-pacemaker-puppet.yaml b/deployment/database/mysql-pacemaker-puppet.yaml index f0c3a7b9a9..be5c5d2035 100644 --- a/deployment/database/mysql-pacemaker-puppet.yaml +++ b/deployment/database/mysql-pacemaker-puppet.yaml @@ -261,23 +261,8 @@ outputs: - {get_param: MysqlRootPassword} - {get_param: [DefaultPasswords, mysql_root_password]} step_2: - mysql_restart_bundle: - start_order: 0 - config_volume: mysql - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh galera-bundle mysql - image: {get_param: ContainerMysqlImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/mysql:/var/lib/kolla/config_files/src:ro mysql_init_bundle: - start_order: 1 + start_order: 0 detach: false net: host ipc: host @@ -306,6 +291,21 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + mysql_restart_bundle: + start_order: 1 + config_volume: mysql + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh mysql galera galera-bundle Master + image: {get_param: ContainerMysqlImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/mysql:/var/lib/kolla/config_files/src:ro host_prep_tasks: - name: create persistent directories file: diff --git a/deployment/database/redis-pacemaker-puppet.yaml b/deployment/database/redis-pacemaker-puppet.yaml index 177c5574dd..bdad79cf10 100644 --- a/deployment/database/redis-pacemaker-puppet.yaml +++ b/deployment/database/redis-pacemaker-puppet.yaml @@ -217,23 +217,8 @@ outputs: docker_config: step_2: map_merge: - - redis_restart_bundle: - start_order: 1 - config_volume: redis - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh redis-bundle redis - image: {get_param: ContainerRedisConfigImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/redis:/var/lib/kolla/config_files/src:ro - redis_init_bundle: - start_order: 2 + start_order: 1 detach: false net: host ipc: host @@ -261,6 +246,21 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + - redis_restart_bundle: + start_order: 2 + config_volume: redis + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh redis redis redis-bundle Slave Master + image: {get_param: ContainerRedisConfigImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/redis:/var/lib/kolla/config_files/src:ro - if: - internal_tls_enabled - redis_tls_proxy: diff --git a/deployment/haproxy/haproxy-pacemaker-puppet.yaml b/deployment/haproxy/haproxy-pacemaker-puppet.yaml index eb8d1b59c2..133229a478 100644 --- a/deployment/haproxy/haproxy-pacemaker-puppet.yaml +++ b/deployment/haproxy/haproxy-pacemaker-puppet.yaml @@ -250,23 +250,8 @@ outputs: container_config_scripts: {get_attr: [ContainersCommon, container_config_scripts]} docker_config: step_2: - haproxy_restart_bundle: - start_order: 2 - detach: false - net: host - ipc: host - user: root - config_volume: haproxy - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh haproxy-bundle haproxy - image: {get_param: ContainerHAProxyImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/haproxy:/var/lib/kolla/config_files/src:ro haproxy_init_bundle: - start_order: 3 + start_order: 2 detach: false net: host ipc: host @@ -297,6 +282,21 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + haproxy_restart_bundle: + start_order: 3 + detach: false + net: host + ipc: host + user: root + config_volume: haproxy + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh haproxy haproxy-bundle haproxy-bundle Started + image: {get_param: ContainerHAProxyImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/haproxy:/var/lib/kolla/config_files/src:ro host_prep_tasks: - {get_attr: [HAProxyBase, role_data, host_prep_tasks]} - name: create persistent directories diff --git a/deployment/manila/manila-share-pacemaker-puppet.yaml b/deployment/manila/manila-share-pacemaker-puppet.yaml index 3cf7df1ca9..dccae2c5dd 100644 --- a/deployment/manila/manila-share-pacemaker-puppet.yaml +++ b/deployment/manila/manila-share-pacemaker-puppet.yaml @@ -151,23 +151,8 @@ outputs: - /var/log/containers/manila:/var/log/manila:z command: ['/bin/bash', '-c', 'chown -R manila:manila /var/log/manila'] step_5: - manila_share_restart_bundle: - start_order: 0 - config_volume: manila - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh openstack-manila-share manila_share - image: {get_param: ContainerManilaShareImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/manila:/var/lib/kolla/config_files/src:ro manila_share_init_bundle: - start_order: 1 + start_order: 0 detach: false net: host ipc: host @@ -194,6 +179,21 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + manila_share_restart_bundle: + start_order: 1 + config_volume: manila + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh manila_share openstack-manila-share openstack-manila-share _ Started + image: {get_param: ContainerManilaShareImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/manila:/var/lib/kolla/config_files/src:ro host_prep_tasks: - name: create persistent directories file: diff --git a/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml b/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml index 63b950e8b0..0b8208328f 100644 --- a/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml +++ b/deployment/ovn/ovn-dbs-pacemaker-puppet.yaml @@ -190,28 +190,8 @@ outputs: - null docker_config: step_3: - ovn_dbs_restart_bundle: - start_order: 0 - config_volume: ovn_dbs - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh ovn-dbs-bundle ovn_dbs - image: {get_param: ContainerOvnDbsConfigImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - if: - - internal_tls_enabled - - - - /etc/pki/tls/certs/ovn_dbs.crt:/etc/pki/tls/certs/ovn_dbs.crt:ro - - /etc/pki/tls/private/ovn_dbs.key:/etc/pki/tls/private/ovn_dbs.key:ro - - null ovn_dbs_init_bundle: - start_order: 1 + start_order: 0 detach: false net: host ipc: host @@ -245,6 +225,26 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + ovn_dbs_restart_bundle: + start_order: 1 + config_volume: ovn_dbs + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh ovn_dbs ovndb_servers ovn-dbs-bundle Slave Master + image: {get_param: ContainerOvnDbsConfigImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - if: + - internal_tls_enabled + - + - /etc/pki/tls/certs/ovn_dbs.crt:/etc/pki/tls/certs/ovn_dbs.crt:ro + - /etc/pki/tls/private/ovn_dbs.key:/etc/pki/tls/private/ovn_dbs.key:ro + - null host_prep_tasks: - name: create persistent directories file: diff --git a/deployment/rabbitmq/rabbitmq-messaging-notify-pacemaker-puppet.yaml b/deployment/rabbitmq/rabbitmq-messaging-notify-pacemaker-puppet.yaml index 32bc2d8994..e354367f4e 100644 --- a/deployment/rabbitmq/rabbitmq-messaging-notify-pacemaker-puppet.yaml +++ b/deployment/rabbitmq/rabbitmq-messaging-notify-pacemaker-puppet.yaml @@ -191,23 +191,8 @@ outputs: - {get_param: RabbitCookie} - {get_param: [DefaultPasswords, rabbit_cookie]} step_2: - rabbitmq_restart_bundle: - start_order: 0 - config_volume: rabbitmq - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh rabbitmq-bundle oslo_messaging_notify - image: {get_param: ContainerRabbitmqImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/rabbitmq:/var/lib/kolla/config_files/src:ro rabbitmq_init_bundle: - start_order: 1 + start_order: 0 detach: false net: host ipc: host @@ -238,6 +223,21 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + rabbitmq_restart_bundle: + start_order: 1 + config_volume: rabbitmq + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh oslo_messaging_notify rabbitmq rabbitmq-bundle Started + image: {get_param: ContainerRabbitmqImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/rabbitmq:/var/lib/kolla/config_files/src:ro host_prep_tasks: - name: create persistent directories file: diff --git a/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml b/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml index 822c0eefda..453cf195b9 100644 --- a/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml +++ b/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml @@ -191,23 +191,8 @@ outputs: - {get_param: RabbitCookie} - {get_param: [DefaultPasswords, rabbit_cookie]} step_2: - rabbitmq_restart_bundle: - start_order: 0 - config_volume: rabbitmq - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh rabbitmq-bundle rabbitmq - image: {get_param: ContainerRabbitmqImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/rabbitmq:/var/lib/kolla/config_files/src:ro rabbitmq_init_bundle: - start_order: 1 + start_order: 0 detach: false net: host ipc: host @@ -238,6 +223,21 @@ outputs: # NOTE: this should force this container to re-run on each # update (scale-out, etc.) TRIPLEO_DEPLOY_IDENTIFIER: {get_param: DeployIdentifier} + rabbitmq_restart_bundle: + start_order: 1 + config_volume: rabbitmq + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh rabbitmq rabbitmq rabbitmq-bundle Started + image: {get_param: ContainerRabbitmqImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/rabbitmq:/var/lib/kolla/config_files/src:ro host_prep_tasks: - name: create persistent directories file: diff --git a/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml b/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml index 083b8e0974..3558a83f60 100644 --- a/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml +++ b/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml @@ -191,23 +191,8 @@ outputs: - {get_param: RabbitCookie} - {get_param: [DefaultPasswords, rabbit_cookie]} step_2: - rabbitmq_restart_bundle: - start_order: 0 - config_volume: rabbitmq - detach: false - net: host - ipc: host - user: root - environment: - TRIPLEO_MINOR_UPDATE: '' - command: /pacemaker_restart_bundle.sh rabbitmq-bundle oslo_messaging_rpc - image: {get_param: ContainerRabbitmqImage} - volumes: - list_concat: - - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} - - - /var/lib/config-data/puppet-generated/rabbitmq:/var/lib/kolla/config_files/src:ro rabbitmq_init_bundle: - start_order: 1 + start_order: 0 detach: false net: host ipc: host @@ -244,6 +229,21 @@ outputs: passwords: - {get_param: RabbitCookie} - {get_param: [DefaultPasswords, rabbit_cookie]} + rabbitmq_restart_bundle: + start_order: 1 + config_volume: rabbitmq + detach: false + net: host + ipc: host + user: root + environment: + TRIPLEO_MINOR_UPDATE: '' + command: /pacemaker_restart_bundle.sh oslo_messaging_rpc rabbitmq rabbitmq-bundle Started + image: {get_param: ContainerRabbitmqImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, pacemaker_restart_volumes]} + - - /var/lib/config-data/puppet-generated/rabbitmq:/var/lib/kolla/config_files/src:ro metadata_settings: get_attr: [RabbitmqBase, role_data, metadata_settings] host_prep_tasks: