tripleo-heat-templates/deployment/containers-common.yaml
Damien Ciabrini 0f54889408 Rolling certificate update for HA services
There are certain HA clustered services (e.g. galera) that don't
have the ability natively to reload their TLS certificate without
being restarted. If too many replicas are restarted concurrently
this might result in full service disruption.

To ensure service availability, provide a means to ensure that
only one service replica is restarted at a time in the cluster.
This works by using pacemaker's CIB to implement a cluster-wide
restart lock for a service. The lock has a TTL so it's guaranteed
to be eventually released without requiring complex contingency
cleanup in case of failures.

Tested locally by running the following:
1. force recreate certificate on all nodes at once for galera
   (ipa-cert resubmit -i mysql), and verify that the resources
   restart one after the other

2. create a lock manually in pacemaker, recreate certificate for
   galera on all nodes, and verify that no resource is restarted
   before the manually created lock expires.

3. create a lock manually, let it expires, recreate a certificate,
   and verify that the resource is restarted appropriately and the
   lock gets cleaned up from pacemaker once the restart finished.

Closes-Bug: #1885113
Change-Id: Ib2b62e33b34cf72edfdae6299cf432259bf960a2
2020-07-30 16:51:48 +02:00

195 lines
6.3 KiB
YAML

heat_template_version: rocky
description: >
Contains a static list of common things necessary for containers
parameters:
# Required parameters
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
ServiceData:
default: {}
description: Dictionary packing service data
type: json
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
RoleName:
default: ''
description: Role name on which the service is applied
type: string
RoleParameters:
default: {}
description: Parameters specific to the role
type: json
EnableInternalTLS:
type: boolean
default: false
InternalTLSCAFile:
default: '/etc/ipa/ca.crt'
type: string
description: Specifies the default CA cert to use if TLS is used for
services in the internal network.
RpcPort:
default: 5672
description: The network port for messaging backend
type: number
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pacemaker resource to restart when
a config change is detected and the resource is being restarted
type: number
ContainerCli:
type: string
default: 'podman'
description: CLI tool used to manage containers.
constraints:
- allowed_values: ['docker', 'podman']
conditions:
internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]}
docker_enabled: {equals: [{get_param: ContainerCli}, 'docker']}
outputs:
container_config_scripts:
description: Shared container config scripts
value:
container_puppet_apply.sh:
mode: "0700"
content: |
#!/bin/bash
set -eux
STEP=$1
TAGS=$2
CONFIG=$3
EXTRA_ARGS=${4:-''}
if [ -d /tmp/puppet-etc ]; then
# ignore copy failures as these may be the same file depending on docker mounts
cp -a /tmp/puppet-etc/* /etc/puppet || true
fi
echo "{\"step\": ${STEP}}" > /etc/puppet/hieradata/docker_puppet.json
# $::deployment_type in puppet-tripleo
export FACTER_deployment_type=containers
set +e
puppet apply $EXTRA_ARGS \
--verbose \
--detailed-exitcodes \
--summarize \
--color=false \
--modulepath /etc/puppet/modules:/opt/stack/puppet-modules:/usr/share/openstack-puppet/modules \
--tags $TAGS \
-e "noop_resource('package'); ${CONFIG}"
rc=$?
set -e
set +ux
if [ $rc -eq 2 -o $rc -eq 0 ]; then
exit 0
fi
exit $rc
pyshim.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pyshim.sh }
pacemaker_restart_bundle.sh:
mode: "0755"
content:
str_replace:
template: { get_file: ../container_config_scripts/pacemaker_restart_bundle.sh }
params:
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
pacemaker_wait_bundle.sh:
mode: "0755"
content:
str_replace:
template: { get_file: ../container_config_scripts/pacemaker_wait_bundle.sh }
params:
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
wait-port-and-run.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/wait-port-and-run.sh }
pacemaker_resource_lock.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pacemaker_resource_lock.sh }
pacemaker_mutex_restart_bundle.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pacemaker_mutex_restart_bundle.sh }
volumes_base:
description: Base volume list
value: &volumes_base
list_concat:
- - /etc/hosts:/etc/hosts:ro
- /etc/localtime:/etc/localtime:ro
# OpenSSL trusted CAs
- /etc/pki/ca-trust/extracted:/etc/pki/ca-trust/extracted:ro
- /etc/pki/ca-trust/source/anchors:/etc/pki/ca-trust/source/anchors:ro
- /etc/pki/tls/certs/ca-bundle.crt:/etc/pki/tls/certs/ca-bundle.crt:ro
- /etc/pki/tls/certs/ca-bundle.trust.crt:/etc/pki/tls/certs/ca-bundle.trust.crt:ro
- /etc/pki/tls/cert.pem:/etc/pki/tls/cert.pem:ro
# Syslog socket
- /dev/log:/dev/log
- if:
- internal_tls_enabled
- - list_join:
- ':'
- - {get_param: InternalTLSCAFile}
- {get_param: InternalTLSCAFile}
- 'ro'
- null
volumes:
description: Common volumes for the containers.
value:
list_concat:
- *volumes_base
# required for bootstrap_host_exec
- - /etc/puppet:/etc/puppet:ro
pacemaker_restart_volumes:
description: Common volumes for the pacemaker restart containers.
value:
list_concat:
- *volumes_base
- - /var/lib/container-config-scripts:/var/lib/container-config-scripts:ro
- /dev/shm:/dev/shm:rw
# required for bootstrap_host_exec, facter
- /etc/puppet:/etc/puppet:ro
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
container_puppet_apply_volumes:
description: Common volumes needed to run the container_puppet_apply.sh from container_config_scripts
value:
list_concat:
- *volumes_base
- - /var/lib/container-config-scripts/container_puppet_apply.sh:/container_puppet_apply.sh:ro
# container_puppet_apply.sh will copy this to /etc/puppet in the container
- /etc/puppet:/tmp/puppet-etc:ro
- /usr/share/openstack-puppet/modules:/usr/share/openstack-puppet/modules:ro
healthcheck_rpc_port:
description: healthcheck command that probes the RpcPort
value:
test:
str_replace:
template:
'/openstack/healthcheck RPCPORT'
params:
RPCPORT: {get_param: RpcPort}