Files
openstack-ansible-rabbitmq_…/tasks/rabbitmq_upgrade_check.yml
Dmitriy Rabotyagov 8ea306455e Fix quorum/stream queues if they're below minimal size
In case it is unsafe to  shutdown current node due to queues going
below their minimal quorum size, we attempt to fix them by growing
affected queues to rest of the cluster.

This step is vital one in case of OS upgrades, when operating system
on controller is being re-installed, as quorum/stream queues
must be manually stretched after re-install.

Change-Id: I87e2ae7b43dbcf77c9f166656686d25dbd8937fd
2025-04-10 13:15:15 +00:00

212 lines
8.1 KiB
YAML

---
# Copyright 2015, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ansible-lint fails on this task due to the detected use of 'dpkg',
# so we skip this task in the ansible-lint check by using the
# 'skip_ansible_lint' tag
- name: Get version of installed RabbitMQ package (deb)
ansible.builtin.command: "dpkg-query -f='${Version}' -W rabbitmq-server"
changed_when: false
check_mode: false
failed_when: false
register: installed_rabbitmq_deb
when:
- ansible_facts['pkg_mgr'] == 'apt'
tags:
- rabbitmq-package-deb
- rabbitmq-apt-packages
- skip_ansible_lint
# ansible-lint fails on this task due to the detected use of 'rpm',
# so we skip this task in the ansible-lint check by using the
# 'skip_ansible_lint' tag
- name: Get version of installed RabbitMQ package (rpm)
ansible.builtin.command: "rpm --queryformat '%{VERSION}' -q rabbitmq-server"
changed_when: false
check_mode: false
failed_when: false
register: installed_rabbitmq_rpm
when:
- ansible_facts['pkg_mgr'] == 'dnf'
tags:
- rabbitmq-package-rpm
- rabbitmq-dnf-packages
- skip_ansible_lint
- name: Register a fact for the installed RabbitMQ version
ansible.builtin.set_fact:
installed_rabbitmq: "{{ item }}"
when:
- item is not skipped
with_items:
- "{{ installed_rabbitmq_deb }}"
- "{{ installed_rabbitmq_rpm }}"
- name: Ensure installed RabbitMQ version is same as expected
ansible.builtin.fail:
msg: "To install a new major/minor version of RabbitMQ set '-e rabbitmq_upgrade=true'."
any_errors_fatal: true
when:
- not rabbitmq_upgrade | bool
- installed_rabbitmq.rc == 0
- not (installed_rabbitmq.stdout | split('-') | first) is version(rabbitmq_package_version | split('-') | first)
- rabbitmq_install_method != 'distro'
tags:
- rabbitmq-package-deb
- rabbitmq-package-rpm
- rabbitmq-apt-packages
- name: Ensure that RabbitMQ is running minimal required version for upgrade
ansible.builtin.fail:
msg: |
Minimal required version for proceeding with RabbitMQ upgrade to {{ rabbitmq_package_version }} is {{ _minimal_required_version }}.
Please, upgrade to the version {{ _minimal_required_version }} before proceeding with further upgrade.
Current detected version is {{ installed_rabbitmq.stdout }}.
any_errors_fatal: true
vars:
_minimal_required_version: >-
{{ (_rabbitmq_upgrade_minimal_requirement_mapping | selectattr('target', 'in', rabbitmq_package_version) | first)['requirement'] }}
when:
- rabbitmq_upgrade | bool
- installed_rabbitmq.rc == 0
- installed_rabbitmq.stdout is version(_minimal_required_version, '<')
- rabbitmq_install_method != 'distro'
tags:
- rabbitmq-package-deb
- rabbitmq-package-rpm
- rabbitmq-apt-packages
- name: Including rabbitmq_cluster_state tasks
ansible.builtin.include_tasks: rabbitmq_cluster_state.yml
args:
apply:
tags:
- rabbitmq-upgrade
when:
- rabbitmq_upgrade | bool
tags:
- rabbitmq-upgrade
- name: Fail if cluster state is not healthy for upgrade
vars:
_rabbitmq_cluster_health_issues:
- msg: |
The number of running RabbitMQ nodes is not equal to number of configured nodes in the {{ rabbitmq_host_group }} group.
Please ensure that all of the nodes are running and healthy before performing the upgrade.
condition: "{{ ((_cluster_state.get('running_nodes', []) | length) != (groups[rabbitmq_host_group] | length)) }}"
- msg: |
There are active alarms for the rabbitmq cluster. Please check the cluster status with `rabbitmq-diagnostics cluster_status` and
resolve any alarms before proceeding with the upgrade.
condition: "{{ ((_cluster_state.get('alarms', []) | length) != 0) }}"
- msg: |
A network partition has been detected within your cluster. This might indicate a split-brain or network connectivity problem.
Please check the cluster status with `rabbitmq-diagnostics cluster_status` and resolve any partitioning before proceeding with the upgrade.
condition: "{{ ((_cluster_state.get('partitions', []) | length) != 0) }}"
_rabbitmq_cluster_upgrade_blockers: "{{ _rabbitmq_cluster_health_issues | selectattr('condition', 'true') }}"
ansible.builtin.fail:
msg: "{{ _rabbitmq_cluster_upgrade_blockers | map(attribute='msg') | join('\n') }}"
any_errors_fatal: true
when:
- rabbitmq_upgrade | bool
- _cluster_state
- _rabbitmq_cluster_upgrade_blockers | length > 0
tags:
- rabbitmq-upgrade
- name: Verify if queues/streams are healthy
vars:
_rabbitmq_other_cluster_memembers: "{{ _cluster_state.get('running_nodes', []) | reject('eq', 'rabbit@' ~ ansible_facts['hostname'].split('.')[0]) }}"
_rabbitmq_quorum_critical_queues: "{{ (_rabbitmq_is_quorum_critical.stderr | from_json)['queues'] }}"
when:
- rabbitmq_upgrade | bool
- _cluster_state
tags:
- rabbitmq-upgrade
block:
- name: Check if all quorum queues and streams are above the minimum online quorum
ansible.builtin.command: rabbitmq-diagnostics check_if_node_is_quorum_critical --formatter json
changed_when: false
register: _rabbitmq_is_quorum_critical
rescue:
# NOTE(noonedeadpunk): In case this proves to be too slow, alternatively we can use smth like
# rabbitmq-queues grow "{{ item.0 }}" "all" --vhost-pattern ".*" --queue-pattern ".*"
# But: a) it is available only for quorum, not streams b) it may cause load on rabbit at scale
- name: Grow affected quorum queues to all cluster nodes
ansible.builtin.command: 'rabbitmq-queues add_member --vhost "{{ item.1.virtual_host }}" "{{ item.1.name }}" "{{ item.0 }}"'
changed_when: false
vars:
loop_label:
node: "{{ item.0 }}"
vhost: "{{ item.1.virtual_host }}"
queue: "{{ item.1.name }}"
loop: >-
{{
_rabbitmq_other_cluster_memembers | product(
_rabbitmq_quorum_critical_queues | selectattr('type', 'eq', 'rabbit_quorum_queue')
)
}}
loop_control:
label: "{{ loop_label | to_json }}"
- name: Grow affected stream queues to all cluster nodes
ansible.builtin.command: 'rabbitmq-streams add_replica --vhost "{{ item.1.virtual_host }}" "{{ item.1.name }}" "{{ item.0 }}"'
changed_when: false
vars:
loop_label:
node: "{{ item.0 }}"
vhost: "{{ item.1.virtual_host }}"
queue: "{{ item.1.name }}"
loop: >-
{{
_rabbitmq_other_cluster_memembers | product(
_rabbitmq_quorum_critical_queues | selectattr('type', 'eq', 'rabbit_stream_queue')
)
}}
loop_control:
label: "{{ loop_label | to_json }}"
- name: Wait for queues to go above minimum online quorum (120s)
ansible.builtin.command: rabbitmq-upgrade await_online_quorum_plus_one
changed_when: false
- name: Prepare node for upgrade
when:
- rabbitmq_upgrade | bool
- _cluster_state
- (_cluster_state.get('running_nodes', []) | length) == (groups[rabbitmq_host_group] | length)
- (_cluster_state.get('alarms', []) | length) == 0
- (_cluster_state.get('partitions', []) | length) == 0
block:
- name: Including rabbitmq_feature_flags tasks
ansible.builtin.include_tasks: rabbitmq_feature_flags.yml
args:
apply:
tags:
- rabbitmq-upgrade
when:
- _rabbitmq_is_first_play_host
tags:
- rabbitmq-upgrade
- name: Including rabbitmq_upgrade_prep tasks
ansible.builtin.include_tasks: rabbitmq_upgrade_prep.yml
args:
apply:
tags:
- rabbitmq-upgrade
tags:
- always