tripleo-heat-templates/deployment/rabbitmq/rabbitmq-messaging-pacemaker-puppet.yaml
Damien Ciabrini 7f785e8757 HA: fix <service>_restart_bundle with minor update workflow
For each HA service we have a paunch container <service>_restart_bundle
which is started by paunch whenever config files changes during stack
deploy/update. This container runs a pcs command on a single node to
restart all the service's containers (e.g. all galera on all controllers).
By design, when it is run, configs have already been regenerated by the
deploy tasks on all nodes.

For minor updates, the workflow runs differently: all the steps of the
deploy tasks are run one node after the other, so when
<service>_restart_bundle is called, there is no guarantee that the
service's configs have been regenerated on all the nodes yet.

To fix the wrong restart behaviour, only restart local containers when
running during a minor update. And run once per node. When the minor
update workflow calls <service>_restart_container, we still have the
guarantee that the config files are already regenerated locally.

Co-Authored-By: Michele Baldessari <michele@acksyn.org>
Co-Authored-By: Luca Miccini <lmiccini@redhat.com>

Change-Id: I92d4ddf2feeac06ce14468ae928c283f3fd04f45
Closes-Bug: #1841629
2019-08-30 18:46:31 +02:00

458 lines
19 KiB
YAML

heat_template_version: rocky
description: >
OpenStack containerized Rabbitmq service
parameters:
ContainerRabbitmqImage:
description: image
type: string
ContainerRabbitmqConfigImage:
description: The container image to use for the rabbitmq config_volume
type: string
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
ServiceData:
default: {}
description: Dictionary packing service data
type: json
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
RabbitCookie:
type: string
default: ''
hidden: true
RoleName:
default: ''
description: Role name on which the service is applied
type: string
RoleParameters:
default: {}
description: Parameters specific to the role
type: json
ConfigDebug:
default: false
description: Whether to run config management (e.g. Puppet) in debug mode.
type: boolean
ContainerCli:
type: string
default: 'podman'
description: CLI tool used to manage containers.
constraints:
- allowed_values: ['docker', 'podman']
DeployIdentifier:
default: ''
type: string
description: >
Setting this to a unique value will re-run any deployment tasks which
perform configuration on a Heat stack-update.
conditions:
puppet_debug_enabled: {get_param: ConfigDebug}
docker_enabled: {equals: [{get_param: ContainerCli}, 'docker']}
resources:
ContainersCommon:
type: ../containers-common.yaml
RabbitMQServiceBase:
type: ./rabbitmq-container-puppet.yaml
properties:
ServiceData: {get_param: ServiceData}
ServiceNetMap: {get_param: ServiceNetMap}
DefaultPasswords: {get_param: DefaultPasswords}
EndpointMap: {get_param: EndpointMap}
RoleName: {get_param: RoleName}
RoleParameters: {get_param: RoleParameters}
outputs:
role_data:
description: Role data for the Rabbitmq API role.
value:
service_name: rabbitmq
monitoring_subscription: {get_attr: [RabbitMQServiceBase, role_data, monitoring_subscription]}
config_settings:
map_merge:
- get_attr: [RabbitMQServiceBase, role_data, config_settings]
- rabbitmq::service_manage: false
tripleo::profile::pacemaker::rabbitmq_bundle::rabbitmq_docker_image: &rabbitmq_image_pcmklatest
list_join:
- ':'
- - yaql:
data: {get_param: ContainerRabbitmqImage}
expression: $.data.rightSplit(separator => ":", maxSplits => 1)[0]
- 'pcmklatest'
tripleo::profile::pacemaker::rabbitmq_bundle::control_port: 3122
tripleo::profile::pacemaker::rabbitmq_bundle::container_backend: {get_param: ContainerCli}
tripleo::rabbitmq::firewall_rules:
'109 rabbitmq-bundle':
dport:
- 3122
- 4369
- 5672
- 25672
service_config_settings: {get_attr: [RabbitmqBase, role_data, service_config_settings]}
# BEGIN DOCKER SETTINGS
puppet_config:
config_volume: rabbitmq
puppet_tags: 'file,file_line'
step_config:
list_join:
- "\n"
- - "['Rabbitmq_policy', 'Rabbitmq_user'].each |String $val| { noop_resource($val) }"
- "include ::tripleo::profile::pacemaker::rabbitmq_bundle"
config_image: {get_param: ContainerRabbitmqConfigImage}
kolla_config:
/var/lib/kolla/config_files/rabbitmq.json:
command: /usr/sbin/pacemaker_remoted
config_files:
- dest: /etc/libqb/force-filesystem-sockets
source: /dev/null
owner: root
perm: '0644'
- dest: /var/log/btmp
source: /dev/null
owner: root:utmp
perm: '0600'
- source: "/var/lib/kolla/config_files/src/*"
dest: "/"
merge: true
preserve_properties: true
- source: "/var/lib/kolla/config_files/src-tls/*"
dest: "/"
merge: true
optional: true
preserve_properties: true
permissions:
- path: /var/lib/rabbitmq
owner: rabbitmq:rabbitmq
recurse: true
- path: /var/log/rabbitmq
owner: rabbitmq:rabbitmq
recurse: true
- path: /etc/pki/tls/certs/rabbitmq.crt
owner: rabbitmq:rabbitmq
perm: '0600'
optional: true
- path: /etc/pki/tls/private/rabbitmq.key
owner: rabbitmq:rabbitmq
perm: '0600'
optional: true
# When using pacemaker we don't launch the container, instead that is done by pacemaker
# itself.
container_config_scripts: {get_attr: [ContainersCommon, container_config_scripts]}
docker_config:
step_1:
rabbitmq_bootstrap:
start_order: 0
image: {get_param: ContainerRabbitmqImage}
net: host
privileged: false
volumes:
- /var/lib/kolla/config_files/rabbitmq.json:/var/lib/kolla/config_files/config.json:ro
- /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro
- /etc/hosts:/etc/hosts:ro
- /etc/localtime:/etc/localtime:ro
- /var/lib/rabbitmq:/var/lib/rabbitmq:z
environment:
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
- KOLLA_BOOTSTRAP=True
-
list_join:
- '='
- - 'RABBITMQ_CLUSTER_COOKIE'
-
yaql:
expression: $.data.passwords.where($ != '').first()
data:
passwords:
- {get_param: RabbitCookie}
- {get_param: [DefaultPasswords, rabbit_cookie]}
step_2:
rabbitmq_restart_bundle:
start_order: 0
config_volume: rabbitmq
detach: false
net: host
ipc: host
user: root
environment:
- TRIPLEO_MINOR_UPDATE
command: /pacemaker_restart_bundle.sh rabbitmq-bundle rabbitmq
image: {get_param: ContainerRabbitmqImage}
volumes:
list_concat:
- {get_attr: [ContainersCommon, pacemaker_restart_volumes]}
- - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro
rabbitmq_init_bundle:
start_order: 1
detach: false
net: host
ipc: host
user: root
command: # '/container_puppet_apply.sh "STEP" "TAGS" "CONFIG" "DEBUG"'
list_concat:
- - '/container_puppet_apply.sh'
- '2'
- 'file,file_line,concat,augeas,pacemaker::resource::bundle,pacemaker::property,pacemaker::resource::ocf,pacemaker::constraint::order,pacemaker::constraint::colocation,rabbitmq_policy,rabbitmq_user,rabbitmq_ready'
- 'include ::tripleo::profile::base::pacemaker;include ::tripleo::profile::pacemaker::rabbitmq_bundle'
- if:
- puppet_debug_enabled
- - '--debug'
- - ''
image: {get_param: ContainerRabbitmqImage}
volumes:
list_concat:
- {get_attr: [ContainersCommon, container_puppet_apply_volumes]}
- - /bin/true:/bin/epmd
- if:
- docker_enabled
- - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro
- null
environment:
# https://launchpad.net/bugs/1822673 (lang/lc_all to utf-8 are an elixir requirement)
- 'LANG=en_US.UTF-8'
- 'LC_ALL=en_US.UTF-8'
# NOTE: this should force this container to re-run on each
# update (scale-out, etc.)
- list_join:
- ''
- - 'TRIPLEO_DEPLOY_IDENTIFIER='
- {get_param: DeployIdentifier}
host_prep_tasks:
- name: create persistent directories
file:
path: "{{ item.path }}"
state: directory
setype: "{{ item.setype }}"
with_items:
- { 'path': /var/lib/rabbitmq, 'setype': svirt_sandbox_file_t }
- { 'path': /var/log/containers/rabbitmq, 'setype': svirt_sandbox_file_t }
- { 'path': /var/log/rabbitmq, 'setype': svirt_sandbox_file_t }
- name: rabbitmq logs readme
copy:
dest: /var/log/rabbitmq/readme.txt
content: |
Log files from rabbitmq containers can be found under
/var/log/containers/rabbitmq.
ignore_errors: true
- name: stop the Erlang port mapper on the host and make sure it cannot bind to the port used by container
shell: |
echo 'export ERL_EPMD_ADDRESS=127.0.0.1' > /etc/rabbitmq/rabbitmq-env.conf
echo 'export ERL_EPMD_PORT=4370' >> /etc/rabbitmq/rabbitmq-env.conf
for pid in $(pgrep epmd --ns 1 --nslist pid); do kill $pid; done
metadata_settings:
get_attr: [RabbitMQServiceBase, role_data, metadata_settings]
deploy_steps_tasks:
- name: RabbitMQ tag container image for pacemaker
when: step|int == 1
import_role:
name: tripleo-container-tag
vars:
container_image: {get_param: ContainerRabbitmqImage}
container_image_latest: *rabbitmq_image_pcmklatest
update_tasks:
- name: Rabbit fetch and retag container image for pacemaker
when: step|int == 2
block: &rabbitmq_fetch_retag_container_tasks
- name: Get container rabbitmq image
set_fact:
rabbitmq_image: {get_param: ContainerRabbitmqImage}
rabbitmq_image_latest: *rabbitmq_image_pcmklatest
- name: Pull latest rabbitmq images
command: "{{container_cli}} pull {{rabbitmq_image}}"
- name: Get previous rabbitmq image id
shell: "{{container_cli}} inspect --format '{{'{{'}}.Id{{'}}'}}' {{rabbitmq_image_latest}}"
register: old_rabbitmq_image_id
failed_when: false
- name: Get new rabbitmq image id
shell: "{{container_cli}} inspect --format '{{'{{'}}.Id{{'}}'}}' {{rabbitmq_image}}"
register: new_rabbitmq_image_id
- name: Retag pcmklatest to latest rabbitmq image
include_role:
name: tripleo-container-tag
vars:
container_image: "{{rabbitmq_image}}"
container_image_latest: "{{rabbitmq_image_latest}}"
when:
- old_rabbitmq_image_id.stdout != new_rabbitmq_image_id.stdout
- block:
- name: Get a list of container using rabbitmq image
shell: "{{container_cli}} ps -a -q -f 'ancestor={{old_rabbitmq_image_id.stdout}}'"
register: rabbitmq_containers_to_destroy
# It will be recreated with the delpoy step.
- name: Remove any container using the same rabbitmq image
shell: "{{container_cli}} rm -fv {{item}}"
with_items: "{{ rabbitmq_containers_to_destroy.stdout_lines }}"
- name: Remove previous rabbitmq images
shell: "{{container_cli}} rmi -f {{old_rabbitmq_image_id.stdout}}"
when:
- old_rabbitmq_image_id.stdout != ''
- old_rabbitmq_image_id.stdout != new_rabbitmq_image_id.stdout
upgrade_tasks:
- name: Prepare switch of rabbitmq image name
when:
- step|int == 0
block:
- name: Get rabbitmq image id currently used by pacemaker
shell: "pcs resource config rabbitmq-bundle | grep -Eo 'image=[^ ]+' | awk -F= '{print $2;}'"
register: rabbitmq_image_current_res
failed_when: false
- name: Image facts for rabbitmq
set_fact:
rabbitmq_image_latest: *rabbitmq_image_pcmklatest
rabbitmq_image_current: "{{rabbitmq_image_current_res.stdout}}"
- name: Prepare the switch to new rabbitmq container image name in pacemaker
block:
- name: Temporarily tag the current rabbitmq image id with the upgraded image name
import_role:
name: tripleo-container-tag
vars:
container_image: "{{rabbitmq_image_current}}"
container_image_latest: "{{rabbitmq_image_latest}}"
pull_image: false
when:
- rabbitmq_image_current != ''
- rabbitmq_image_current != rabbitmq_image_latest
- name: Check rabbitmq cluster resource status
shell: pcs resource config rabbitmq-bundle
failed_when: false
register: rabbitmq_pcs_res_result
- name: Set fact rabbitmq_pcs_res
set_fact:
rabbitmq_pcs_res: "{{rabbitmq_pcs_res_result.rc == 0}}"
- name: set is_rabbitmq_bootstrap_node fact
tags: common
set_fact: is_rabbitmq_bootstrap_node={{rabbitmq_short_bootstrap_node_name|lower == ansible_hostname|lower}}
- name: Update rabbitmq-bundle pcs resource bundle for new container image
when:
- step|int == 1
- is_rabbitmq_bootstrap_node|bool
- rabbitmq_pcs_res|bool
- rabbitmq_image_current != rabbitmq_image_latest
block:
- name: Disable the rabbitmq cluster resource before container upgrade
pacemaker_resource:
resource: rabbitmq-bundle
state: disable
wait_for_resource: true
register: output
retries: 5
until: output.rc == 0
- name: Move rabbitmq logging to /var/log/containers
block:
- name: Check rabbitmq logging configuration in pacemaker
command: cibadmin --query --xpath "//storage-mapping[@id='rabbitmq-log']"
ignore_errors: true
register: rabbitmq_logs_moved
- name: Add a bind mount for logging in the rabbitmq bundle
# rc == 6 means the configuration doesn't exist in the CIB
when: rabbitmq_logs_moved.rc == 6
command: pcs resource bundle update rabbitmq-bundle storage-map add id=rabbitmq-log source-dir=/var/log/containers/rabbitmq target-dir=/var/log/rabbitmq options=rw
- name: Update the rabbitmq bundle to use the new container image name
command: "pcs resource bundle update rabbitmq-bundle container image={{rabbitmq_image_latest}}"
- name: Enable the rabbitmq cluster resource
pacemaker_resource:
resource: rabbitmq-bundle
state: enable
wait_for_resource: true
register: output
retries: 5
until: output.rc == 0
- name: Create hiera data to upgrade rabbitmq in a stepwise manner.
when:
- step|int == 1
block:
- name: set rabbitmq upgrade node facts in a single-node environment
set_fact:
rabbitmq_short_node_names_upgraded: "{{ rabbitmq_short_node_names }}"
cacheable: no
when: groups['rabbitmq'] | length <= 1
- name: set rabbitmq upgrade node facts from the limit option
set_fact:
rabbitmq_short_node_names_upgraded: "{{ rabbitmq_short_node_names_upgraded|default([]) + [item.split('.')[0]] }}"
cacheable: no
when:
- groups['rabbitmq'] | length > 1
- item.split('.')[0] in ansible_limit.split(',')
loop: "{{ rabbitmq_short_node_names }}"
- debug:
msg: "Prepare rabbitmq upgrade for {{ rabbitmq_short_node_names_upgraded }}"
- fail:
msg: >
You can't upgrade rabbitmq without staged
upgrade. You need to use the limit option in order
to do so.
when: >-
rabbitmq_short_node_names_upgraded is not defined or
rabbitmq_short_node_names_upgraded | length == 0
- name: remove rabbitmq init container on upgrade-scaleup to force re-init
shell: |
if podman inspect rabbitmq_init_bundle &> /dev/null; then
podman rm rabbitmq_init_bundle
fi
when: rabbitmq_short_node_names_upgraded | length > 1
- name: add the rabbitmq short name to hiera data for the upgrade.
include_role:
name: tripleo-upgrade-hiera
tasks_from: set.yml
vars:
tripleo_upgrade_key: rabbitmq_short_node_names_override
tripleo_upgrade_value: "{{rabbitmq_short_node_names_upgraded}}"
- name: remove the extra hiera data needed for the upgrade.
include_role:
name: tripleo-upgrade-hiera
tasks_from: remove.yml
vars:
tripleo_upgrade_key: rabbitmq_short_node_names_override
when: rabbitmq_short_node_names_upgraded | length == rabbitmq_short_node_names | length
- name: Retag the pacemaker image if containerized
when:
- step|int == 3
block: *rabbitmq_fetch_retag_container_tasks
fast_forward_upgrade_tasks:
- when:
- step|int == 0
- release == 'ocata'
- is_bootstrap_node|bool
block:
- name: Check cluster resource status of rabbitmq
pacemaker_resource:
resource: {get_attr: [RabbitmqBase, role_data, service_name]}
state: show
check_mode: false
ignore_errors: true
register: rabbitmq_res_result
- name: Set fact rabbitmq_res
set_fact:
rabbitmq_res: "{{ rabbitmq_res_result.rc == 0 }}"
- name: Disable the rabbitmq cluster resource
pacemaker_resource:
resource: {get_attr: [RabbitmqBase, role_data, service_name]}
state: disable
wait_for_resource: true
register: rabbitmq_output
retries: 5
until: rabbitmq_output.rc == 0
when:
- step|int == 2
- release == 'ocata'
- is_bootstrap_node|bool
- rabbitmq_res|bool