8e9798caf6
When running minor update in a composable HA, different
roles could run ansible tasks concurrently. However,
there is currently a race when pacemaker nodes are
stopped in parallel [1,2], that could cause nodes to
incorrectly stop themselves once they reconnect to the
cluster.
To prevent concurrent shutdown, use a cluster-wide lock
to signals that one node is about to shutdown, and block
the others until the node disconnects from the cluster.
Tested the minor update in a composable HA environment:
. when run with "openstack update run", every role
is updated sequentially, and the shutdown lock
doesn't interfere.
. when running multiple ansible tasks in parallel
"openstack update run --limit role<X>", pacemaker
nodes are correctly stopped sequentially thanks
to the shutdown lock.
. when updating an existing overcloud, the new
locking script used in the review is correctly
injected on the overcloud, thanks to [3].
[1] https://bugzilla.redhat.com/show_bug.cgi?id=1791841
[2] https://bugzilla.redhat.com/show_bug.cgi?id=1872404
[3] I2ac6bb98e1d4183327e888240fc8d5a70e0d6fcb
Closes-Bug: #1904193
Change-Id: I0e041c6a95a7f53019967f9263df2326b1408c6f
(cherry picked from commit cb55cc8ce5
)
198 lines
6.5 KiB
YAML
198 lines
6.5 KiB
YAML
heat_template_version: rocky
|
|
|
|
description: >
|
|
Contains a static list of common things necessary for containers
|
|
|
|
parameters:
|
|
|
|
# Required parameters
|
|
EndpointMap:
|
|
default: {}
|
|
description: Mapping of service endpoint -> protocol. Typically set
|
|
via parameter_defaults in the resource registry.
|
|
type: json
|
|
ServiceData:
|
|
default: {}
|
|
description: Dictionary packing service data
|
|
type: json
|
|
ServiceNetMap:
|
|
default: {}
|
|
description: Mapping of service_name -> network name. Typically set
|
|
via parameter_defaults in the resource registry. This
|
|
mapping overrides those in ServiceNetMapDefaults.
|
|
type: json
|
|
DefaultPasswords:
|
|
default: {}
|
|
type: json
|
|
RoleName:
|
|
default: ''
|
|
description: Role name on which the service is applied
|
|
type: string
|
|
RoleParameters:
|
|
default: {}
|
|
description: Parameters specific to the role
|
|
type: json
|
|
|
|
|
|
EnableInternalTLS:
|
|
type: boolean
|
|
default: false
|
|
InternalTLSCAFile:
|
|
default: '/etc/ipa/ca.crt'
|
|
type: string
|
|
description: Specifies the default CA cert to use if TLS is used for
|
|
services in the internal network.
|
|
RpcPort:
|
|
default: 5672
|
|
description: The network port for messaging backend
|
|
type: number
|
|
|
|
PcmkConfigRestartTimeout:
|
|
default: 600
|
|
description: Time in seconds to wait for a pacemaker resource to restart when
|
|
a config change is detected and the resource is being restarted
|
|
type: number
|
|
|
|
ContainerCli:
|
|
type: string
|
|
default: 'podman'
|
|
description: CLI tool used to manage containers.
|
|
constraints:
|
|
- allowed_values: ['docker', 'podman']
|
|
|
|
conditions:
|
|
|
|
internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]}
|
|
docker_enabled: {equals: [{get_param: ContainerCli}, 'docker']}
|
|
|
|
outputs:
|
|
container_config_scripts:
|
|
description: Shared container config scripts
|
|
value:
|
|
container_puppet_apply.sh:
|
|
mode: "0700"
|
|
content: |
|
|
#!/bin/bash
|
|
set -eux
|
|
STEP=$1
|
|
TAGS=$2
|
|
CONFIG=$3
|
|
EXTRA_ARGS=${4:-''}
|
|
if [ -d /tmp/puppet-etc ]; then
|
|
# ignore copy failures as these may be the same file depending on docker mounts
|
|
cp -a /tmp/puppet-etc/* /etc/puppet || true
|
|
fi
|
|
echo "{\"step\": ${STEP}}" > /etc/puppet/hieradata/docker_puppet.json
|
|
# $::deployment_type in puppet-tripleo
|
|
export FACTER_deployment_type=containers
|
|
set +e
|
|
puppet apply $EXTRA_ARGS \
|
|
--verbose \
|
|
--detailed-exitcodes \
|
|
--summarize \
|
|
--color=false \
|
|
--modulepath /etc/puppet/modules:/opt/stack/puppet-modules:/usr/share/openstack-puppet/modules \
|
|
--tags $TAGS \
|
|
-e "noop_resource('package'); ${CONFIG}"
|
|
rc=$?
|
|
set -e
|
|
set +ux
|
|
if [ $rc -eq 2 -o $rc -eq 0 ]; then
|
|
exit 0
|
|
fi
|
|
exit $rc
|
|
pyshim.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/pyshim.sh }
|
|
pacemaker_restart_bundle.sh:
|
|
mode: "0755"
|
|
content:
|
|
str_replace:
|
|
template: { get_file: ../container_config_scripts/pacemaker_restart_bundle.sh }
|
|
params:
|
|
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
|
|
pacemaker_wait_bundle.sh:
|
|
mode: "0755"
|
|
content:
|
|
str_replace:
|
|
template: { get_file: ../container_config_scripts/pacemaker_wait_bundle.sh }
|
|
params:
|
|
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
|
|
wait-port-and-run.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/wait-port-and-run.sh }
|
|
pacemaker_resource_lock.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/pacemaker_resource_lock.sh }
|
|
pacemaker_mutex_restart_bundle.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/pacemaker_mutex_restart_bundle.sh }
|
|
pacemaker_mutex_shutdown.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/pacemaker_mutex_shutdown.sh }
|
|
|
|
volumes_base:
|
|
description: Base volume list
|
|
value: &volumes_base
|
|
list_concat:
|
|
- - /etc/hosts:/etc/hosts:ro
|
|
- /etc/localtime:/etc/localtime:ro
|
|
# OpenSSL trusted CAs
|
|
- /etc/pki/ca-trust/extracted:/etc/pki/ca-trust/extracted:ro
|
|
- /etc/pki/ca-trust/source/anchors:/etc/pki/ca-trust/source/anchors:ro
|
|
- /etc/pki/tls/certs/ca-bundle.crt:/etc/pki/tls/certs/ca-bundle.crt:ro
|
|
- /etc/pki/tls/certs/ca-bundle.trust.crt:/etc/pki/tls/certs/ca-bundle.trust.crt:ro
|
|
- /etc/pki/tls/cert.pem:/etc/pki/tls/cert.pem:ro
|
|
# Syslog socket
|
|
- /dev/log:/dev/log
|
|
- if:
|
|
- internal_tls_enabled
|
|
- - list_join:
|
|
- ':'
|
|
- - {get_param: InternalTLSCAFile}
|
|
- {get_param: InternalTLSCAFile}
|
|
- 'ro'
|
|
- null
|
|
|
|
volumes:
|
|
description: Common volumes for the containers.
|
|
value:
|
|
list_concat:
|
|
- *volumes_base
|
|
# required for bootstrap_host_exec
|
|
- - /etc/puppet:/etc/puppet:ro
|
|
|
|
pacemaker_restart_volumes:
|
|
description: Common volumes for the pacemaker restart containers.
|
|
value:
|
|
list_concat:
|
|
- *volumes_base
|
|
- - /var/lib/container-config-scripts:/var/lib/container-config-scripts:ro
|
|
- /dev/shm:/dev/shm:rw
|
|
# required for bootstrap_host_exec, facter
|
|
- /etc/puppet:/etc/puppet:ro
|
|
- if:
|
|
- docker_enabled
|
|
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
|
|
- null
|
|
|
|
container_puppet_apply_volumes:
|
|
description: Common volumes needed to run the container_puppet_apply.sh from container_config_scripts
|
|
value:
|
|
list_concat:
|
|
- *volumes_base
|
|
- - /var/lib/container-config-scripts/container_puppet_apply.sh:/container_puppet_apply.sh:ro
|
|
# container_puppet_apply.sh will copy this to /etc/puppet in the container
|
|
- /etc/puppet:/tmp/puppet-etc:ro
|
|
- /usr/share/openstack-puppet/modules:/usr/share/openstack-puppet/modules:ro
|
|
|
|
healthcheck_rpc_port:
|
|
description: healthcheck command that probes the RpcPort
|
|
value:
|
|
test:
|
|
str_replace:
|
|
template:
|
|
'/openstack/healthcheck RPCPORT'
|
|
params:
|
|
RPCPORT: {get_param: RpcPort}
|