From a930407217c7478a4b0141d28b9377973d840f97 Mon Sep 17 00:00:00 2001 From: Juan Badia Payno Date: Thu, 5 Mar 2020 11:15:25 +0100 Subject: [PATCH] BaR - Ceph Backup Backup and Restore: This task aims to take a backup of the /var/lib/ceph directory and copied to the shared directory. To be able to do it, we copy the way that the ceph-ansible project [1] deactivates the mds [2], so we can stop all the ceph services on the controllers. [1]-https://github.com/ceph/ceph-ansible/ [2]-https://github.com/ceph/ceph-ansible/blob/v4.0.14/infrastructure-playbooks/rolling_update.yml#L550-L623 I left the comment below to describe and give some backgroud. ** Just splitting the patch into patches. All the Randy's comments are implemented. The other patch is https://review.opendev.org/#/c/732399/ Change-Id: I55681d69cacf2e7bee52d9a61f12d8577fe15a2d --- tripleo_ansible/playbooks/ceph-backup.yaml | 107 ++++++++++++++++++ .../playbooks/ceph_deactivate_mds.yaml | 96 ++++++++++++++++ .../backup_and_restore/defaults/main.yml | 6 + .../backup_and_restore/tasks/ceph_backup.yml | 91 +++++++++++++++ .../tasks/ceph_mgr_stop.yml | 41 +++++++ .../tasks/ceph_mon_stop.yml | 41 +++++++ .../tasks/ceph_nfs_stop.yml | 60 ++++++++++ .../tasks/ceph_rgw_stop.yml | 62 ++++++++++ 8 files changed, 504 insertions(+) create mode 100644 tripleo_ansible/playbooks/ceph-backup.yaml create mode 100644 tripleo_ansible/playbooks/ceph_deactivate_mds.yaml create mode 100644 tripleo_ansible/roles/backup_and_restore/tasks/ceph_backup.yml create mode 100644 tripleo_ansible/roles/backup_and_restore/tasks/ceph_mgr_stop.yml create mode 100644 tripleo_ansible/roles/backup_and_restore/tasks/ceph_mon_stop.yml create mode 100644 tripleo_ansible/roles/backup_and_restore/tasks/ceph_nfs_stop.yml create mode 100644 tripleo_ansible/roles/backup_and_restore/tasks/ceph_rgw_stop.yml diff --git a/tripleo_ansible/playbooks/ceph-backup.yaml b/tripleo_ansible/playbooks/ceph-backup.yaml new file mode 100644 index 000000000..4600eef23 --- /dev/null +++ b/tripleo_ansible/playbooks/ceph-backup.yaml @@ -0,0 +1,107 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +- name: Ceph MON + become: true + hosts: overcloud_ceph_mon + tasks: + - name: Stop monitor services + import_role: + name: backup_and_restore + tasks_from: ceph_mon_stop + tags: backup + +- name: Ceph MGR + become: true + hosts: overcloud_ceph_mgr + tasks: + - name: Stop manager services + import_role: + name: backup_and_restore + tasks_from: ceph_mgr_stop + tags: backup + +- name: Ceph NFS + become: true + hosts: overcloud_ceph_nfs + tasks: + - name: Stop ceph_nfs services + import_role: + name: backup_and_restore + tasks_from: ceph_nfs_stop + tags: backup + +- name: Ceph MDS + hosts: undercloud + tasks: + - name: Set variables + set_fact: + ceph_ansible_path: "{{ ceph_ansible_path_dir is defined | ternary ( ceph_ansible_path_dir, '/usr/share/ceph-ansible' ) }}" + ansible_log: "{{ ceph_deactivate_log is defined | ternary ( ceph_deactivate_log, '/home/stack/ceph_deactivate_mds.log' ) }}" + ceph_ansible_inventory: | + "{{ ansible_inventory is defined | ternary (ansible_inventory, '/home/stack/config-download/overcloud/ceph-ansible/inventory.yml' ) }}" + ceph_deactivate_mds_file_path: | + "{{ ceph_deactivate_mds_file is defined | ternary (ceph_deactivate_mds_file, '/usr/share/ansible/tripleo_playbooks/ceph_deactivate_mds.yaml') }}" + ceph_extra_vars_file_path: | + "{{ ceph_extra_vars_file is defined | ternary (ceph_extra_vars_file, '@/home/stack/config-download/overcloud/ceph-ansible/extra_vars.yml') }}" + when: + - groups["overcloud_ceph_mds"] is defined + - groups["overcloud_ceph_mds"]|length>0 + + - name: Execute the deactivate_mds ansible playbook + shell: | + set -o pipefail + ANSIBLE_ACTION_PLUGINS={{ ceph_ansible_path }}/plugins/actions/ \ + ANSIBLE_CALLBACK_PLUGINS={{ ceph_ansible_path }}/plugins/callback/ \ + ANSIBLE_FILTER_PLUGINS={{ ceph_ansible_path }}/plugins/filter/ \ + ANSIBLE_ROLES_PATH={{ ceph_ansible_path }}/roles/ \ + ANSIBLE_LIBRARY={{ ceph_ansible_path }}/library/ \ + ANSIBLE_CONFIG={{ ceph_ansible_path }}/ansible.cfg \ + ANSIBLE_REMOTE_TEMP=/tmp/ceph_ansible_tmp \ + ANSIBLE_FORKS=25 \ + ANSIBLE_GATHER_TIMEOUT=60 \ + ANSIBLE_CALLBACK_WHITELIST=profile_tasks \ + ANSIBLE_STDOUT_CALLBACK=default \ + ANSIBLE_LOG_PATH={{ ansible_log }} \ + ansible-playbook --skip-tags package-install,with_pkg \ + -e ansible_python_interpreter=/usr/libexec/platform-python \ + --extra-vars {{ ceph_extra_vars_file_path }} \ + -i {{ ceph_ansible_inventory }} \ + {{ ceph_deactivate_mds_file_path }} + tags: backup + when: + - groups["overcloud_ceph_mds"] is defined + - groups["overcloud_ceph_mds"]|length>0 + +- name: Ceph RGW + become: true + hosts: overcloud_ceph_rgw + tasks: + - name: Stop ceph_rgw services + import_role: + name: backup_and_restore + tasks_from: ceph_rgw_stop + tags: backup + +- name: Backup Ceph directory + become: true + hosts: overcloud_ceph_mon + tasks: + - name: Backup ceph directory and storage on the shared directory + import_role: + name: backup_and_restore + tasks_from: ceph_backup + tags: backup diff --git a/tripleo_ansible/playbooks/ceph_deactivate_mds.yaml b/tripleo_ansible/playbooks/ceph_deactivate_mds.yaml new file mode 100644 index 000000000..9bbdc3d5c --- /dev/null +++ b/tripleo_ansible/playbooks/ceph_deactivate_mds.yaml @@ -0,0 +1,96 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +- name: gather facts + hosts: all + +- name: upgrade ceph mdss cluster, deactivate all rank > 0 + hosts: "{{ groups[mon_group_name|default('mons')][0] }}" + become: true + tasks: + - name: deactivate all mds rank > 0 + when: groups.get(mds_group_name, []) | length > 0 + block: + - import_role: + name: ceph-defaults + - import_role: + name: ceph-facts + + - name: deactivate all mds rank > 0 if any + when: groups.get(mds_group_name, []) | length > 1 + block: + - name: set max_mds 1 on ceph fs + command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs set {{ cephfs }} max_mds 1" + changed_when: false + + - name: wait until only rank 0 is up + command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs get {{ cephfs }} -f json" + changed_when: false + register: wait_rank_zero + retries: 720 + delay: 5 + until: (wait_rank_zero.stdout | from_json).mdsmap.in | length == 1 and (wait_rank_zero.stdout | from_json).mdsmap.in[0] == 0 + + - name: get name of remaining active mds + command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json" + changed_when: false + register: _mds_active_name + + - name: set_fact mds_active_name + set_fact: + mds_active_name: "{{ (_mds_active_name.stdout | from_json)['filesystems'][0]['mdsmap']['info'][item.key]['name'] }}" + with_dict: "{{ (_mds_active_name.stdout | default('{}') | from_json).filesystems[0]['mdsmap']['info'] | default({}) }}" + + - name: set_fact mds_active_host + set_fact: + mds_active_host: "{{ [hostvars[item]['inventory_hostname']] }}" + with_items: "{{ groups[mds_group_name] }}" + when: hostvars[item]['ansible_hostname'] == mds_active_name + + - name: create standby_mdss group + add_host: + name: "{{ item }}" + groups: standby_mdss + ansible_host: "{{ hostvars[item]['ansible_host'] | default(omit) }}" + ansible_port: "{{ hostvars[item]['ansible_port'] | default(omit) }}" + with_items: "{{ groups[mds_group_name] | difference(mds_active_host) }}" + + - name: stop standby ceph mds + systemd: + name: "ceph-mds@{{ hostvars[item]['ansible_hostname'] }}" + state: stopped + enabled: false + delegate_to: "{{ item }}" + with_items: "{{ groups['standby_mdss'] }}" + when: groups['standby_mdss'] | default([]) | length > 0 + + # dedicated task for masking systemd unit + # somehow, having a single task doesn't work in containerized context + - name: mask systemd units for standby ceph mds + systemd: + name: "ceph-mds@{{ hostvars[item]['ansible_hostname'] }}" + masked: true + delegate_to: "{{ item }}" + with_items: "{{ groups['standby_mdss'] }}" + when: groups['standby_mdss'] | default([]) | length > 0 + + - name: wait until all standbys mds are stopped + command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json" + changed_when: false + register: wait_standbys_down + retries: 300 + delay: 5 + until: (wait_standbys_down.stdout | from_json).standbys | length == 0 diff --git a/tripleo_ansible/roles/backup_and_restore/defaults/main.yml b/tripleo_ansible/roles/backup_and_restore/defaults/main.yml index da05a54a9..61c941aef 100644 --- a/tripleo_ansible/roles/backup_and_restore/defaults/main.yml +++ b/tripleo_ansible/roles/backup_and_restore/defaults/main.yml @@ -63,3 +63,9 @@ tripleo_backup_and_restore_backup_url: "nfs://{{ tripleo_backup_and_restore_nfs_ # Ceph authentication backup file tripleo_backup_and_restore_ceph_auth_file: "ceph_auth_export.bak" + +# Ceph backup file +tripleo_backup_and_restore_ceph_backup_file: "/var/lib/ceph.tar.gz" + +# Ceph directory to back up +tripleo_backup_and_restore_ceph_path: "/var/lib/ceph" diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/ceph_backup.yml b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_backup.yml new file mode 100644 index 000000000..08690bc52 --- /dev/null +++ b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_backup.yml @@ -0,0 +1,91 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# "backup_and_restore" will search for and load any operating system variable file + +# found within the "vars/" path. If no OS files are found the task will skip. +- name: Gather variables for each operating system + include_vars: "{{ item }}" + with_first_found: + - skip: true + files: + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml" + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_distribution | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml" + - "{{ ansible_os_family | lower }}.yml" + tags: + - always + +# The shell command is used because the archive ansible module can not use +# extra flags needed. +- name: Tar and Compress the /var/lib/ceph directory + shell: |- + tar -zcv --xattrs-include=*.* \ + --xattrs \ + --xattrs-include=security.capability \ + --xattrs-include=security.selinux \ + --acls \ + -f {{ tripleo_backup_and_restore_ceph_backup_file }} \ + {{ tripleo_backup_and_restore_ceph_path }} + become: true + tags: + - bar_create_recover_image + +- name: Create temporary file + tempfile: + state: directory + suffix: bar + register: tripleo_backup_and_restore_temporary_dir + tags: + - bar_create_recover_image + +- name: Mount nfs shared directory + mount: + path: "{{ tripleo_backup_and_restore_temporary_dir.path }}" + src: "{{ tripleo_backup_and_restore_nfs_server }}:{{ tripleo_backup_and_restore_nfs_storage_folder }}" + fstype: nfs + opts: rw,noatime + state: mounted + tags: + - bar_create_recover_image + +- name: Copy the backup to the shared directory + copy: + src: "{{ tripleo_backup_and_restore_ceph_backup_file }}" + dest: "{{ tripleo_backup_and_restore_temporary_dir.path }}/{{ ansible_hostname }}" + remote_src: true + tags: + - bar_create_recover_image + +- name: Umount nfs shared directory + mount: + path: "{{ tripleo_backup_and_restore_temporary_dir.path }}" + state: unmounted + tags: + - bar_create_recover_image + +- name: Delete the mount point from /etc/fstab + mount: + path: "{{ tripleo_backup_and_restore_temporary_dir.path }}" + src: "{{ tripleo_backup_and_restore_nfs_server }}:{{ tripleo_backup_and_restore_nfs_storage_folder }}" + fstype: nfs + opts: rw,noatime + state: absent + tags: + - bar_create_recover_image diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/ceph_mgr_stop.yml b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_mgr_stop.yml new file mode 100644 index 000000000..531f8bcb1 --- /dev/null +++ b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_mgr_stop.yml @@ -0,0 +1,41 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# "backup_and_restore" will search for and load any operating system variable file + +# found within the "vars/" path. If no OS files are found the task will skip. +- name: Gather variables for each operating system + include_vars: "{{ item }}" + with_first_found: + - skip: true + files: + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml" + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_distribution | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml" + - "{{ ansible_os_family | lower }}.yml" + tags: + - always + +- name: Stop ceph management + systemd: + state: stopped + name: "ceph-mgr@{{ ansible_hostname }}" + become: true + tags: + - bar_create_recover_image diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/ceph_mon_stop.yml b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_mon_stop.yml new file mode 100644 index 000000000..5d5e68376 --- /dev/null +++ b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_mon_stop.yml @@ -0,0 +1,41 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# "backup_and_restore" will search for and load any operating system variable file + +# found within the "vars/" path. If no OS files are found the task will skip. +- name: Gather variables for each operating system + include_vars: "{{ item }}" + with_first_found: + - skip: true + files: + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml" + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_distribution | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml" + - "{{ ansible_os_family | lower }}.yml" + tags: + - always + +- name: Stop ceph monitor + systemd: + state: stopped + name: "ceph-mon@{{ ansible_hostname }}" + become: true + tags: + - bar_create_recover_image diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/ceph_nfs_stop.yml b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_nfs_stop.yml new file mode 100644 index 000000000..4bb6fed56 --- /dev/null +++ b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_nfs_stop.yml @@ -0,0 +1,60 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# "backup_and_restore" will search for and load any operating system variable file + +# found within the "vars/" path. If no OS files are found the task will skip. +- name: Gather variables for each operating system + include_vars: "{{ item }}" + with_first_found: + - skip: true + files: + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml" + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_distribution | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml" + - "{{ ansible_os_family | lower }}.yml" + tags: + - always + +- name: Check pacemaker presence + command: type pcs + register: bar_pcs_exits + ignore_errors: true + tags: + - bar_create_recover_image + +- name: Check ceph-nfs presence + shell: | + set -o pipefail + pcs status | grep ceph-nfs | wc -l + register: bar_ceph_nfs_exits + ignore_errors: true + when: bar_pcs_exits is succeeded + tags: + - bar_create_recover_image + +- name: Stop ceph-nfs with pacemaker + shell: | + pcs resource disable ceph-nfs + when: + - bar_pcs_exits is succeeded + - bar_ceph_nfs_exits.stdout!="0" + run_once: true + tags: + - bar_create_recover_image diff --git a/tripleo_ansible/roles/backup_and_restore/tasks/ceph_rgw_stop.yml b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_rgw_stop.yml new file mode 100644 index 000000000..3ee39480b --- /dev/null +++ b/tripleo_ansible/roles/backup_and_restore/tasks/ceph_rgw_stop.yml @@ -0,0 +1,62 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# "backup_and_restore" will search for and load any operating system variable file + +# found within the "vars/" path. If no OS files are found the task will skip. +- name: Gather variables for each operating system + include_vars: "{{ item }}" + with_first_found: + - skip: true + files: + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml" + - "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml" + - "{{ ansible_distribution | lower }}.yml" + - "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml" + - "{{ ansible_os_family | lower }}.yml" + tags: + - always + +- name: Check ceph rgw presence + shell: | + set -o pipefail + {{ tripleo_container_cli }} ps -a | grep ceph-rgw | wc -l + register: bar_ceph_rgw_exits + ignore_errors: true + tags: + - bar_create_recover_image + +- name: Get the rgw name + shell: | + set -o pipefail + podman ps -a | grep ceph-r | cut -c150- | awk -F- '{print $6}' + register: bar_ceph_rgw_name + when: bar_ceph_rgw_exits.stdout!="0" + tags: + - bar_create_recover_image + +- name: Stop ceph rgw + systemd: + state: stopped + name: "ceph-radosgw@rgw.{{ ansible_hostname }}.{{ bar_ceph_rgw_name.stdout }}" + become: true + when: + - bar_ceph_rgw_exits.stdout!="0" + - bar_ceph_rgw_name.stdout!="" + tags: + - bar_create_recover_image