tripleo-heat-templates/deployment/containers-common.yaml
Damien Ciabrini 8e9798caf6 Serialize shutdown of pacemaker nodes
When running minor update in a composable HA, different
roles could run ansible tasks concurrently. However,
there is currently a race when pacemaker nodes are
stopped in parallel [1,2], that could cause nodes to
incorrectly stop themselves once they reconnect to the
cluster.

To prevent concurrent shutdown, use a cluster-wide lock
to signals that one node is about to shutdown, and block
the others until the node disconnects from the cluster.

Tested the minor update in a composable HA environment:
  . when run with "openstack update run", every role
    is updated sequentially, and the shutdown lock
    doesn't interfere.
  . when running multiple ansible tasks in parallel
    "openstack update run --limit role<X>", pacemaker
    nodes are correctly stopped sequentially thanks
    to the shutdown lock.
  . when updating an existing overcloud, the new
    locking script used in the review is correctly
    injected on the overcloud, thanks to [3].

[1] https://bugzilla.redhat.com/show_bug.cgi?id=1791841
[2] https://bugzilla.redhat.com/show_bug.cgi?id=1872404
[3] I2ac6bb98e1d4183327e888240fc8d5a70e0d6fcb

Closes-Bug: #1904193
Change-Id: I0e041c6a95a7f53019967f9263df2326b1408c6f
(cherry picked from commit cb55cc8ce5)
2021-01-21 13:15:12 +00:00

198 lines
6.5 KiB
YAML

heat_template_version: rocky
description: >
Contains a static list of common things necessary for containers
parameters:
# Required parameters
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
ServiceData:
default: {}
description: Dictionary packing service data
type: json
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
RoleName:
default: ''
description: Role name on which the service is applied
type: string
RoleParameters:
default: {}
description: Parameters specific to the role
type: json
EnableInternalTLS:
type: boolean
default: false
InternalTLSCAFile:
default: '/etc/ipa/ca.crt'
type: string
description: Specifies the default CA cert to use if TLS is used for
services in the internal network.
RpcPort:
default: 5672
description: The network port for messaging backend
type: number
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pacemaker resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli:
type: string
default: 'podman'
description: CLI tool used to manage containers.
constraints:
- allowed_values: ['docker', 'podman']
conditions:
internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]}
docker_enabled: {equals: [{get_param: ContainerCli}, 'docker']}
outputs:
container_config_scripts:
description: Shared container config scripts
value:
container_puppet_apply.sh:
mode: "0700"
content: |
#!/bin/bash
set -eux
STEP=$1
TAGS=$2
CONFIG=$3
EXTRA_ARGS=${4:-''}
if [ -d /tmp/puppet-etc ]; then
# ignore copy failures as these may be the same file depending on docker mounts
cp -a /tmp/puppet-etc/* /etc/puppet || true
fi
echo "{\"step\": ${STEP}}" > /etc/puppet/hieradata/docker_puppet.json
# $::deployment_type in puppet-tripleo
export FACTER_deployment_type=containers
set +e
puppet apply $EXTRA_ARGS \
--verbose \
--detailed-exitcodes \
--summarize \
--color=false \
--modulepath /etc/puppet/modules:/opt/stack/puppet-modules:/usr/share/openstack-puppet/modules \
--tags $TAGS \
-e "noop_resource('package'); ${CONFIG}"
rc=$?
set -e
set +ux
if [ $rc -eq 2 -o $rc -eq 0 ]; then
exit 0
fi
exit $rc
pyshim.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pyshim.sh }
pacemaker_restart_bundle.sh:
mode: "0755"
content:
str_replace:
template: { get_file: ../container_config_scripts/pacemaker_restart_bundle.sh }
params:
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
pacemaker_wait_bundle.sh:
mode: "0755"
content:
str_replace:
template: { get_file: ../container_config_scripts/pacemaker_wait_bundle.sh }
params:
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
wait-port-and-run.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/wait-port-and-run.sh }
pacemaker_resource_lock.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pacemaker_resource_lock.sh }
pacemaker_mutex_restart_bundle.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pacemaker_mutex_restart_bundle.sh }
pacemaker_mutex_shutdown.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pacemaker_mutex_shutdown.sh }
volumes_base:
description: Base volume list
value: &volumes_base
list_concat:
- - /etc/hosts:/etc/hosts:ro
- /etc/localtime:/etc/localtime:ro
# OpenSSL trusted CAs
- /etc/pki/ca-trust/extracted:/etc/pki/ca-trust/extracted:ro
- /etc/pki/ca-trust/source/anchors:/etc/pki/ca-trust/source/anchors:ro
- /etc/pki/tls/certs/ca-bundle.crt:/etc/pki/tls/certs/ca-bundle.crt:ro
- /etc/pki/tls/certs/ca-bundle.trust.crt:/etc/pki/tls/certs/ca-bundle.trust.crt:ro
- /etc/pki/tls/cert.pem:/etc/pki/tls/cert.pem:ro
# Syslog socket
- /dev/log:/dev/log
- if:
- internal_tls_enabled
- - list_join:
- ':'
- - {get_param: InternalTLSCAFile}
- {get_param: InternalTLSCAFile}
- 'ro'
- null
volumes:
description: Common volumes for the containers.
value:
list_concat:
- *volumes_base
# required for bootstrap_host_exec
- - /etc/puppet:/etc/puppet:ro
pacemaker_restart_volumes:
description: Common volumes for the pacemaker restart containers.
value:
list_concat:
- *volumes_base
- - /var/lib/container-config-scripts:/var/lib/container-config-scripts:ro
- /dev/shm:/dev/shm:rw
# required for bootstrap_host_exec, facter
- /etc/puppet:/etc/puppet:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
container_puppet_apply_volumes:
description: Common volumes needed to run the container_puppet_apply.sh from container_config_scripts
value:
list_concat:
- *volumes_base
- - /var/lib/container-config-scripts/container_puppet_apply.sh:/container_puppet_apply.sh:ro
# container_puppet_apply.sh will copy this to /etc/puppet in the container
- /etc/puppet:/tmp/puppet-etc:ro
- /usr/share/openstack-puppet/modules:/usr/share/openstack-puppet/modules:ro
healthcheck_rpc_port:
description: healthcheck command that probes the RpcPort
value:
test:
str_replace:
template:
'/openstack/healthcheck RPCPORT'
params:
RPCPORT: {get_param: RpcPort}