0f54889408
There are certain HA clustered services (e.g. galera) that don't have the ability natively to reload their TLS certificate without being restarted. If too many replicas are restarted concurrently this might result in full service disruption. To ensure service availability, provide a means to ensure that only one service replica is restarted at a time in the cluster. This works by using pacemaker's CIB to implement a cluster-wide restart lock for a service. The lock has a TTL so it's guaranteed to be eventually released without requiring complex contingency cleanup in case of failures. Tested locally by running the following: 1. force recreate certificate on all nodes at once for galera (ipa-cert resubmit -i mysql), and verify that the resources restart one after the other 2. create a lock manually in pacemaker, recreate certificate for galera on all nodes, and verify that no resource is restarted before the manually created lock expires. 3. create a lock manually, let it expires, recreate a certificate, and verify that the resource is restarted appropriately and the lock gets cleaned up from pacemaker once the restart finished. Closes-Bug: #1885113 Change-Id: Ib2b62e33b34cf72edfdae6299cf432259bf960a2
195 lines
6.3 KiB
YAML
195 lines
6.3 KiB
YAML
heat_template_version: rocky
|
|
|
|
description: >
|
|
Contains a static list of common things necessary for containers
|
|
|
|
parameters:
|
|
|
|
# Required parameters
|
|
EndpointMap:
|
|
default: {}
|
|
description: Mapping of service endpoint -> protocol. Typically set
|
|
via parameter_defaults in the resource registry.
|
|
type: json
|
|
ServiceData:
|
|
default: {}
|
|
description: Dictionary packing service data
|
|
type: json
|
|
ServiceNetMap:
|
|
default: {}
|
|
description: Mapping of service_name -> network name. Typically set
|
|
via parameter_defaults in the resource registry. This
|
|
mapping overrides those in ServiceNetMapDefaults.
|
|
type: json
|
|
DefaultPasswords:
|
|
default: {}
|
|
type: json
|
|
RoleName:
|
|
default: ''
|
|
description: Role name on which the service is applied
|
|
type: string
|
|
RoleParameters:
|
|
default: {}
|
|
description: Parameters specific to the role
|
|
type: json
|
|
|
|
|
|
EnableInternalTLS:
|
|
type: boolean
|
|
default: false
|
|
InternalTLSCAFile:
|
|
default: '/etc/ipa/ca.crt'
|
|
type: string
|
|
description: Specifies the default CA cert to use if TLS is used for
|
|
services in the internal network.
|
|
RpcPort:
|
|
default: 5672
|
|
description: The network port for messaging backend
|
|
type: number
|
|
|
|
PcmkConfigRestartTimeout:
|
|
default: 600
|
|
description: Time in seconds to wait for a pacemaker resource to restart when
|
|
a config change is detected and the resource is being restarted
|
|
type: number
|
|
|
|
ContainerCli:
|
|
type: string
|
|
default: 'podman'
|
|
description: CLI tool used to manage containers.
|
|
constraints:
|
|
- allowed_values: ['docker', 'podman']
|
|
|
|
conditions:
|
|
|
|
internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]}
|
|
docker_enabled: {equals: [{get_param: ContainerCli}, 'docker']}
|
|
|
|
outputs:
|
|
container_config_scripts:
|
|
description: Shared container config scripts
|
|
value:
|
|
container_puppet_apply.sh:
|
|
mode: "0700"
|
|
content: |
|
|
#!/bin/bash
|
|
set -eux
|
|
STEP=$1
|
|
TAGS=$2
|
|
CONFIG=$3
|
|
EXTRA_ARGS=${4:-''}
|
|
if [ -d /tmp/puppet-etc ]; then
|
|
# ignore copy failures as these may be the same file depending on docker mounts
|
|
cp -a /tmp/puppet-etc/* /etc/puppet || true
|
|
fi
|
|
echo "{\"step\": ${STEP}}" > /etc/puppet/hieradata/docker_puppet.json
|
|
# $::deployment_type in puppet-tripleo
|
|
export FACTER_deployment_type=containers
|
|
set +e
|
|
puppet apply $EXTRA_ARGS \
|
|
--verbose \
|
|
--detailed-exitcodes \
|
|
--summarize \
|
|
--color=false \
|
|
--modulepath /etc/puppet/modules:/opt/stack/puppet-modules:/usr/share/openstack-puppet/modules \
|
|
--tags $TAGS \
|
|
-e "noop_resource('package'); ${CONFIG}"
|
|
rc=$?
|
|
set -e
|
|
set +ux
|
|
if [ $rc -eq 2 -o $rc -eq 0 ]; then
|
|
exit 0
|
|
fi
|
|
exit $rc
|
|
pyshim.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/pyshim.sh }
|
|
pacemaker_restart_bundle.sh:
|
|
mode: "0755"
|
|
content:
|
|
str_replace:
|
|
template: { get_file: ../container_config_scripts/pacemaker_restart_bundle.sh }
|
|
params:
|
|
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
|
|
pacemaker_wait_bundle.sh:
|
|
mode: "0755"
|
|
content:
|
|
str_replace:
|
|
template: { get_file: ../container_config_scripts/pacemaker_wait_bundle.sh }
|
|
params:
|
|
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
|
|
wait-port-and-run.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/wait-port-and-run.sh }
|
|
pacemaker_resource_lock.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/pacemaker_resource_lock.sh }
|
|
pacemaker_mutex_restart_bundle.sh:
|
|
mode: "0755"
|
|
content: { get_file: ../container_config_scripts/pacemaker_mutex_restart_bundle.sh }
|
|
|
|
volumes_base:
|
|
description: Base volume list
|
|
value: &volumes_base
|
|
list_concat:
|
|
- - /etc/hosts:/etc/hosts:ro
|
|
- /etc/localtime:/etc/localtime:ro
|
|
# OpenSSL trusted CAs
|
|
- /etc/pki/ca-trust/extracted:/etc/pki/ca-trust/extracted:ro
|
|
- /etc/pki/ca-trust/source/anchors:/etc/pki/ca-trust/source/anchors:ro
|
|
- /etc/pki/tls/certs/ca-bundle.crt:/etc/pki/tls/certs/ca-bundle.crt:ro
|
|
- /etc/pki/tls/certs/ca-bundle.trust.crt:/etc/pki/tls/certs/ca-bundle.trust.crt:ro
|
|
- /etc/pki/tls/cert.pem:/etc/pki/tls/cert.pem:ro
|
|
# Syslog socket
|
|
- /dev/log:/dev/log
|
|
- if:
|
|
- internal_tls_enabled
|
|
- - list_join:
|
|
- ':'
|
|
- - {get_param: InternalTLSCAFile}
|
|
- {get_param: InternalTLSCAFile}
|
|
- 'ro'
|
|
- null
|
|
|
|
volumes:
|
|
description: Common volumes for the containers.
|
|
value:
|
|
list_concat:
|
|
- *volumes_base
|
|
# required for bootstrap_host_exec
|
|
- - /etc/puppet:/etc/puppet:ro
|
|
|
|
pacemaker_restart_volumes:
|
|
description: Common volumes for the pacemaker restart containers.
|
|
value:
|
|
list_concat:
|
|
- *volumes_base
|
|
- - /var/lib/container-config-scripts:/var/lib/container-config-scripts:ro
|
|
- /dev/shm:/dev/shm:rw
|
|
# required for bootstrap_host_exec, facter
|
|
- /etc/puppet:/etc/puppet:ro
|
|
- if:
|
|
- docker_enabled
|
|
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
|
|
- null
|
|
|
|
container_puppet_apply_volumes:
|
|
description: Common volumes needed to run the container_puppet_apply.sh from container_config_scripts
|
|
value:
|
|
list_concat:
|
|
- *volumes_base
|
|
- - /var/lib/container-config-scripts/container_puppet_apply.sh:/container_puppet_apply.sh:ro
|
|
# container_puppet_apply.sh will copy this to /etc/puppet in the container
|
|
- /etc/puppet:/tmp/puppet-etc:ro
|
|
- /usr/share/openstack-puppet/modules:/usr/share/openstack-puppet/modules:ro
|
|
|
|
healthcheck_rpc_port:
|
|
description: healthcheck command that probes the RpcPort
|
|
value:
|
|
test:
|
|
str_replace:
|
|
template:
|
|
'/openstack/healthcheck RPCPORT'
|
|
params:
|
|
RPCPORT: {get_param: RpcPort}
|