From 2b56b7f933adda7bd26c8ccf12bee179beb1dffb Mon Sep 17 00:00:00 2001 From: Juan Larriba Date: Fri, 7 May 2021 12:02:30 +0200 Subject: [PATCH] Move the snapshots feature to optional/beta Previously a commit[1] introduced a new way of doing backup, using "snapshots", that do not require to bring down the whole controller to take the backup. On that commit, the old way was removed, which do not follow the deprecation procedure. This commits reintroduces the old way of performing backups via stopping the controllers and move the snapshots feature to an optional, user-selected feature. [1] https://review.opendev.org/c/openstack/tripleo-ansible/+/786380 (cherry picked from commit 00a17ffb4a2262bffe750330bee333a8d3948247) Change-Id: I49944d585825b32052278cd865a74c46dcb5eaa4 --- .../backup_and_restore/defaults/main.yml | 4 ++ .../molecule/default/converge.yml | 1 - .../backup_and_restore/tasks/db_backup.yml | 21 ++++++++ .../roles/backup_and_restore/tasks/main.yml | 28 ++++++---- .../backup_and_restore/tasks/run_backup.yml | 35 ++++--------- .../tasks/service_manager_pause.yml | 50 ++++++++++++++++++ .../tasks/service_manager_unpause.yml | 51 +++++++++++++++++++ 7 files changed, 153 insertions(+), 37 deletions(-) create mode 100644 tripleo_ansible/roles/backup_and_restore/tasks/service_manager_pause.yml create mode 100644 tripleo_ansible/roles/backup_and_restore/tasks/service_manager_unpause.yml diff --git a/tripleo_ansible/roles/backup_and_restore/defaults/main.yml b/tripleo_ansible/roles/backup_and_restore/defaults/main.yml index ac209ced5..5b61223fd 100644 --- a/tripleo_ansible/roles/backup_and_restore/defaults/main.yml +++ b/tripleo_ansible/roles/backup_and_restore/defaults/main.yml @@ -24,6 +24,10 @@ tripleo_container_cli: "{{ container_cli | default('podman') }}" # Stop and start all running services before backup is ran. tripleo_backup_and_restore_service_manager: true +# If this is false, backup of the overcloud is taken by stopping it completely. Enable it to do a +# backup stopping only one node at a time, maintaining the controller active during the backup duration. +tripleo_backup_and_restore_enable_snapshots: true + # Set the name of the mysql container tripleo_backup_and_restore_mysql_container: mysql diff --git a/tripleo_ansible/roles/backup_and_restore/molecule/default/converge.yml b/tripleo_ansible/roles/backup_and_restore/molecule/default/converge.yml index 58a7a882b..92db74f8e 100644 --- a/tripleo_ansible/roles/backup_and_restore/molecule/default/converge.yml +++ b/tripleo_ansible/roles/backup_and_restore/molecule/default/converge.yml @@ -22,5 +22,4 @@ - role: backup_and_restore tripleo_backup_and_restore_nfs_server: undercloud tripleo_backup_and_restore_rear_simulate: true - tripleo_backup_and_restore_service_manager: false tripleo_backup_and_restore_hiera_config_file: "{{ ansible_user_dir }}/hiera.yaml" diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/db_backup.yml b/tripleo_ansible/roles/backup_and_restore/tasks/db_backup.yml index a893992f7..b9d36e662 100644 --- a/tripleo_ansible/roles/backup_and_restore/tasks/db_backup.yml +++ b/tripleo_ansible/roles/backup_and_restore/tasks/db_backup.yml @@ -90,3 +90,24 @@ tags: - bar_create_recover_image no_log: "{{ tripleo_backup_and_restore_hide_sensitive_logs | bool }}" + +- name: Pause mysql. + command: "{{ tripleo_container_cli }} pause {{ tripleo_backup_and_restore_mysql_container }}" + when: + - mysql_password.stderr is defined + - tripleo_backup_and_restore_mysql_container == "mysql" + - not enabled_galera + - tripleo_backup_and_restore_service_manager|bool + - not tripleo_backup_and_restore_enable_snapshots + tags: + - bar_create_recover_image + +- name: Stop pacemaker + command: pcs cluster stop --all + when: + - enabled_galera + - tripleo_backup_and_restore_service_manager|bool + - not tripleo_backup_and_restore_enable_snapshots + run_once: true + tags: + - bar_create_recover_image diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/main.yml b/tripleo_ansible/roles/backup_and_restore/tasks/main.yml index 5ac7ea6d0..534875ca3 100644 --- a/tripleo_ansible/roles/backup_and_restore/tasks/main.yml +++ b/tripleo_ansible/roles/backup_and_restore/tasks/main.yml @@ -38,15 +38,23 @@ - name: Setup ReaR import_tasks: setup_rear.yml -- name: Do Backup - block: - - name: Backup the database - import_tasks: db_backup.yml +- name: Service management + import_tasks: service_manager_pause.yml + when: + - tripleo_backup_and_restore_service_manager|bool + - not tripleo_backup_and_restore_enable_snapshots - - name: Backup pacemaker configuration - import_tasks: pacemaker_backup.yml +- name: Backup the database + import_tasks: db_backup.yml - - name: Create recovery images with ReaR - import_tasks: run_backup.yml - tags: - - bar_create_recover_image +- name: Backup pacemaker configuration + import_tasks: pacemaker_backup.yml + +- name: Create recovery images with ReaR + import_tasks: run_backup.yml + +- name: Service management + import_tasks: service_manager_unpause.yml + when: + - tripleo_backup_and_restore_service_manager|bool + - not tripleo_backup_and_restore_enable_snapshots diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/run_backup.yml b/tripleo_ansible/roles/backup_and_restore/tasks/run_backup.yml index 2476ce210..976f74df4 100644 --- a/tripleo_ansible/roles/backup_and_restore/tasks/run_backup.yml +++ b/tripleo_ansible/roles/backup_and_restore/tasks/run_backup.yml @@ -32,32 +32,11 @@ tags: - always -- name: Move virtual IPs to another node before stopping pacemaker - when: pacemaker_enabled - shell: | - CLUSTER_NODE=$(crm_node -n) - echo "Retrieving all the VIPs which are hosted on this node" - VIPS_TO_MOVE=$(crm_mon --as-xml | xmllint --xpath '//resource[@resource_agent = "ocf::heartbeat:IPaddr2" and @role = "Started" and @managed = "true" and ./node[@name = "'${CLUSTER_NODE}'"]]/@id' - | sed -e 's/id=//g' -e 's/"//g') - for v in ${VIPS_TO_MOVE}; do - echo "Moving VIP $v on another node" - pcs resource ban $v ${CLUSTER_NODE} --wait=300 - done - echo "Removing the location constraints that were created to move the VIPs" - for v in ${VIPS_TO_MOVE}; do - echo "Removing location ban for VIP $v" - ban_id=$(cibadmin --query | xmllint --xpath 'string(//rsc_location[@rsc="'${v}'" and @node="'${CLUSTER_NODE}'" and @score="-INFINITY"]/@id)' -) - if [ -n "$ban_id" ]; then - pcs constraint remove ${ban_id} - else - echo "Could not retrieve and clear location constraint for VIP $v" 2>&1 - fi - done - tags: - - bar_create_recover_image - - name: Take this node out of pacemaker command: pcs node standby - when: pacemaker_enabled + when: + - pacemaker_enabled + - tripleo_backup_and_restore_enable_snapshots|bool tags: - bar_create_recover_image @@ -77,7 +56,9 @@ - name: Add the node to the pacemaker cluster command: pcs node unstandby - when: pacemaker_enabled + when: + - pacemaker_enabled + - tripleo_backup_and_restore_enable_snapshots|bool tags: - bar_create_recover_image @@ -87,6 +68,8 @@ retries: 10 until: mysql_result is search('mysqld') delay: 5 - when: pacemaker_enabled + when: + - pacemaker_enabled + - tripleo_backup_and_restore_enable_snapshots|bool tags: - bar_create_recover_image diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/service_manager_pause.yml b/tripleo_ansible/roles/backup_and_restore/tasks/service_manager_pause.yml new file mode 100644 index 000000000..0f27ad476 --- /dev/null +++ b/tripleo_ansible/roles/backup_and_restore/tasks/service_manager_pause.yml @@ -0,0 +1,50 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# Call to podman to list running containers then commit all state to +# disk. Once services state has been flushed dump the database then allow +# the backup to start. + +- name: Get Container cli + command: hiera -c /etc/puppet/hiera.yaml container_cli + register: tripleo_backup_and_restore_container_cli + changed_when: tripleo_backup_and_restore_container_cli.stdout is undefined + tags: + - bar_create_recover_image + +- name: set tripleo_container_cli + set_fact: + tripleo_container_cli: "{{ tripleo_backup_and_restore_container_cli.stdout }}" + when: + - tripleo_backup_and_restore_container_cli.stdout != 'nil' + tags: + - bar_create_recover_image + +- name: Gather Container Service Name + shell: | + set -o pipefail + /usr/bin/{{ tripleo_container_cli }} ps --format '{{ '{{' }}.Names {{ '}}' }} ' | /usr/bin/egrep -v 'galera|mysql|bundle' + register: container_services + changed_when: container_services.stdout is undefined + tags: + - bar_create_recover_image + +- name: Pause containers for database backup. + command: "{{ tripleo_container_cli }} pause {{ item }}" + with_items: "{{ container_services.stdout_lines }}" + when: container_services is defined + tags: + - bar_create_recover_image diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/service_manager_unpause.yml b/tripleo_ansible/roles/backup_and_restore/tasks/service_manager_unpause.yml new file mode 100644 index 000000000..f3852dcfb --- /dev/null +++ b/tripleo_ansible/roles/backup_and_restore/tasks/service_manager_unpause.yml @@ -0,0 +1,51 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# Call to podman to list running containers then commit all state to +# disk. Once services state has been flushed dump the database then allow +# the backup to start. + +- name: Enable pacemaker + command: pcs cluster start --all + when: enabled_galera + run_once: true + tags: + - bar_create_recover_image + +- name: unPause database container + command: "{{ tripleo_container_cli }} unpause {{ tripleo_backup_and_restore_mysql_container }}" + when: + - tripleo_container_cli is defined + - not enabled_galera + - tripleo_backup_and_restore_mysql_container is defined + tags: + - bar_create_recover_image + +- name: Gather Container Service Name + shell: | + set -o pipefail + /usr/bin/{{ tripleo_container_cli }} ps -a --filter='status=paused' --format '{{ '{{' }}.Names {{ '}}' }} ' + register: container_services + changed_when: container_services.stdout is defined + tags: + - bar_create_recover_image + +- name: unPause containers + command: "{{ tripleo_container_cli }} unpause {{ item }}" + with_items: "{{ container_services.stdout_lines }}" + when: tripleo_container_cli is defined + tags: + - bar_create_recover_image