Damien Ciabrini 7f785e8757 HA: fix <service>_restart_bundle with minor update workflow
For each HA service we have a paunch container <service>_restart_bundle
which is started by paunch whenever config files changes during stack
deploy/update. This container runs a pcs command on a single node to
restart all the service's containers (e.g. all galera on all controllers).
By design, when it is run, configs have already been regenerated by the
deploy tasks on all nodes.

For minor updates, the workflow runs differently: all the steps of the
deploy tasks are run one node after the other, so when
<service>_restart_bundle is called, there is no guarantee that the
service's configs have been regenerated on all the nodes yet.

To fix the wrong restart behaviour, only restart local containers when
running during a minor update. And run once per node. When the minor
update workflow calls <service>_restart_container, we still have the
guarantee that the config files are already regenerated locally.

Co-Authored-By: Michele Baldessari <michele@acksyn.org>
Co-Authored-By: Luca Miccini <lmiccini@redhat.com>

Change-Id: I92d4ddf2feeac06ce14468ae928c283f3fd04f45
Closes-Bug: #1841629
2019-08-30 18:46:31 +02:00

169 lines
5.4 KiB
YAML

heat_template_version: rocky
description: >
Contains a static list of common things necessary for containers
parameters:
# Required parameters
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
ServiceData:
default: {}
description: Dictionary packing service data
type: json
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
RoleName:
default: ''
description: Role name on which the service is applied
type: string
RoleParameters:
default: {}
description: Parameters specific to the role
type: json
EnableInternalTLS:
type: boolean
default: false
InternalTLSCAFile:
default: '/etc/ipa/ca.crt'
type: string
description: Specifies the default CA cert to use if TLS is used for
services in the internal network.
RpcPort:
default: 5672
description: The network port for messaging backend
type: number
PcmkConfigRestartTimeout:
default: 600
description: Time in seconds to wait for a pacemaker resource to restart when
a config change is detected and the resource is being restarted
type: number
conditions:
internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]}
outputs:
container_config_scripts:
description: Shared container config scripts
value:
container_puppet_apply.sh:
mode: "0700"
content: |
#!/bin/bash
set -eux
STEP=$1
TAGS=$2
CONFIG=$3
EXTRA_ARGS=${4:-''}
if [ -d /tmp/puppet-etc ]; then
# ignore copy failures as these may be the same file depending on docker mounts
cp -a /tmp/puppet-etc/* /etc/puppet || true
fi
echo "{\"step\": ${STEP}}" > /etc/puppet/hieradata/docker_puppet.json
# $::deployment_type in puppet-tripleo
export FACTER_deployment_type=containers
set +e
puppet apply $EXTRA_ARGS \
--verbose \
--detailed-exitcodes \
--summarize \
--color=false \
--modulepath /etc/puppet/modules:/opt/stack/puppet-modules:/usr/share/openstack-puppet/modules \
--tags $TAGS \
-e "noop_resource('package'); ${CONFIG}"
rc=$?
set -e
set +ux
if [ $rc -eq 2 -o $rc -eq 0 ]; then
exit 0
fi
exit $rc
pyshim.sh:
mode: "0755"
content: { get_file: ../container_config_scripts/pyshim.sh }
pacemaker_restart_bundle.sh:
mode: "0755"
content:
str_replace:
template: { get_file: ../container_config_scripts/pacemaker_restart_bundle.sh }
params:
__PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout}
volumes_base:
description: Base volume list
value: &volumes_base
list_concat:
- - /etc/hosts:/etc/hosts:ro
- /etc/localtime:/etc/localtime:ro
# OpenSSL trusted CAs
- /etc/pki/ca-trust/extracted:/etc/pki/ca-trust/extracted:ro
- /etc/pki/ca-trust/source/anchors:/etc/pki/ca-trust/source/anchors:ro
- /etc/pki/tls/certs/ca-bundle.crt:/etc/pki/tls/certs/ca-bundle.crt:ro
- /etc/pki/tls/certs/ca-bundle.trust.crt:/etc/pki/tls/certs/ca-bundle.trust.crt:ro
- /etc/pki/tls/cert.pem:/etc/pki/tls/cert.pem:ro
# Syslog socket
- /dev/log:/dev/log
- if:
- internal_tls_enabled
- - list_join:
- ':'
- - {get_param: InternalTLSCAFile}
- {get_param: InternalTLSCAFile}
- 'ro'
- null
volumes:
description: Common volumes for the containers.
value:
list_concat:
- *volumes_base
- - /etc/ssh/ssh_known_hosts:/etc/ssh/ssh_known_hosts:ro
# required for bootstrap_host_exec
- /etc/puppet:/etc/puppet:ro
pacemaker_restart_volumes:
description: Common volumes for the pacemaker restart containers.
value:
list_concat:
- *volumes_base
- - /var/lib/container-config-scripts/pacemaker_restart_bundle.sh:/pacemaker_restart_bundle.sh:ro
- /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- /dev/shm:/dev/shm:rw
# required for bootstrap_host_exec, facter
- /etc/puppet:/etc/puppet:ro
container_puppet_apply_volumes:
description: Common volumes needed to run the container_puppet_apply.sh from container_config_scripts
value:
list_concat:
- *volumes_base
- - /var/lib/container-config-scripts/container_puppet_apply.sh:/container_puppet_apply.sh:ro
# container_puppet_apply.sh will copy this to /etc/puppet in the container
- /etc/puppet:/tmp/puppet-etc:ro
- /usr/share/openstack-puppet/modules:/usr/share/openstack-puppet/modules:ro
healthcheck_rpc_port:
description: healthcheck command that probes the RpcPort
value:
test:
str_replace:
template:
'/openstack/healthcheck RPCPORT'
params:
RPCPORT: {get_param: RpcPort}