Make execution of the backup idempotent

Currently, the playbook stops the services and the pacemaker to perform the backup. If the backup fails for whatever reason, the playbook cannot run again and fails due to the services being down. This patch fixes the issue in two directions. First, by using the block-always ansible feature, we ensure that whenever an error happens, the playbook will restart the pacemaker and the services before exiting.

The second direction is on the execution itself. By controlling certain actions, we ensure that the backup will be succesful not matter what state the services and the pacemaker are when the playbook is ran. This ensures success even in the case an operator stops the ansible execution manually (which will not trigger the 'always' condition).

BZ: #1954818
Change-Id: Id2aff61f219b0c4992f6f0045f1aba2c7d129758
(cherry picked from commit 9a865d1769)
This commit is contained in:
Juan Larriba 2021-05-14 14:31:46 +02:00
parent f48431709f
commit 72b53f1d5b
6 changed files with 92 additions and 30 deletions

View File

@ -32,6 +32,12 @@
tags:
- always
- name: Unpause ceph mon container if paused
command: "{{ tripleo_container_cli }} unpause ceph-mon-{{ ansible_facts['hostname'] }}"
failed_when: false
tags:
- bar_create_recover_image
- name: Export ceph authentication
shell: |
set -o pipefail

View File

@ -48,6 +48,31 @@
tags:
- bar_create_recover_image
- name: Enable pacemaker if it is stopped
command: pcs cluster start --all
when:
- enabled_galera
- tripleo_backup_and_restore_service_manager|bool
- not tripleo_backup_and_restore_enable_snapshots|bool
run_once: true
tags:
- bar_create_recover_image
- name: Wait until pacemaker has Galera up&running
shell: |
set -o pipefail
ss -tunlp | grep ":3306 " | sed -e 's/.*\///'
register: mysql_result
retries: 300
until: mysql_result is search('mysqld')
delay: 5
when:
- enabled_galera
- tripleo_backup_and_restore_service_manager|bool
- not tripleo_backup_and_restore_enable_snapshots|bool
tags:
- bar_create_recover_image
- name: Get the mysql container id when galera is enabled
shell: |
set -o pipefail
@ -65,6 +90,17 @@
tags:
- bar_create_recover_image
- name: Unpause mysql for backup if it is paused
command: "{{ tripleo_container_cli }} unpause {{ tripleo_backup_and_restore_mysql_container }}"
when:
- mysql_password.stderr is defined
- tripleo_backup_and_restore_mysql_container == "mysql"
- not enabled_galera
- tripleo_backup_and_restore_service_manager|bool
failed_when: false
tags:
- bar_create_recover_image
- name: MySQL Grants backup
shell: |
set -o pipefail
@ -98,7 +134,8 @@
- tripleo_backup_and_restore_mysql_container == "mysql"
- not enabled_galera
- tripleo_backup_and_restore_service_manager|bool
- not tripleo_backup_and_restore_enable_snapshots
- not tripleo_backup_and_restore_enable_snapshots|bool
failed_when: false
tags:
- bar_create_recover_image
@ -107,7 +144,7 @@
when:
- enabled_galera
- tripleo_backup_and_restore_service_manager|bool
- not tripleo_backup_and_restore_enable_snapshots
- not tripleo_backup_and_restore_enable_snapshots|bool
run_once: true
tags:
- bar_create_recover_image

View File

@ -42,7 +42,7 @@
import_tasks: service_manager_pause.yml
when:
- tripleo_backup_and_restore_service_manager|bool
- not tripleo_backup_and_restore_enable_snapshots
- not tripleo_backup_and_restore_enable_snapshots|bool
- name: Backup the database
import_tasks: db_backup.yml
@ -50,11 +50,19 @@
- name: Backup pacemaker configuration
import_tasks: pacemaker_backup.yml
- name: Create recovery images with ReaR
import_tasks: run_backup.yml
- name: Perform backup
block:
- name: Create recovery images with ReaR
import_tasks: run_backup.yml
always:
- name: Service management
import_tasks: service_manager_unpause.yml
when:
- tripleo_backup_and_restore_service_manager|bool
- not tripleo_backup_and_restore_enable_snapshots|bool
- name: Service management
import_tasks: service_manager_unpause.yml
when:
- tripleo_backup_and_restore_service_manager|bool
- not tripleo_backup_and_restore_enable_snapshots
- name: Pacemaker management
import_tasks: pacemaker_unstandby.yml
when:
- pacemaker_enabled
- tripleo_backup_and_restore_enable_snapshots|bool

View File

@ -0,0 +1,30 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# Start again pacemaker
- name: Add the node to the pacemaker cluster
command: pcs node unstandby
tags:
- bar_create_recover_image
- name: Wait until pacemaker has Galera up&running
shell: ss -tunlp | grep ":3306 " | sed -e 's/.*\///'
register: mysql_result
retries: 300
until: mysql_result is search('mysqld')
delay: 5
tags:
- bar_create_recover_image

View File

@ -54,26 +54,6 @@
tags:
- bar_create_recover_image
- name: Add the node to the pacemaker cluster
command: pcs node unstandby
when:
- pacemaker_enabled
- tripleo_backup_and_restore_enable_snapshots|bool
tags:
- bar_create_recover_image
- name: Wait until pacemaker has Galera up&running
shell: netstat -tunlp | grep ":3306 " | sed -e 's/.*\///'
register: mysql_result
retries: 10
until: mysql_result is search('mysqld')
delay: 5
when:
- pacemaker_enabled
- tripleo_backup_and_restore_enable_snapshots|bool
tags:
- bar_create_recover_image
- name: Clean old backups
shell: |
set -o pipefail

View File

@ -39,6 +39,7 @@
/usr/bin/{{ tripleo_container_cli }} ps --format '{{ '{{' }}.Names {{ '}}' }} ' | /usr/bin/egrep -v 'galera|mysql|bundle'
register: container_services
changed_when: container_services.stdout is undefined
failed_when: false
tags:
- bar_create_recover_image