BaR - Ceph Backup

Backup and Restore:
  This task aims to take a backup of the /var/lib/ceph directory and
copied to the shared directory.
  To be able to do it, we copy the way that the ceph-ansible project [1]
deactivates the mds [2], so we can stop all the ceph services on the
controllers.

[1]-https://github.com/ceph/ceph-ansible/
[2]-https://github.com/ceph/ceph-ansible/blob/v4.0.14/infrastructure-playbooks/rolling_update.yml#L550-L623

I left the comment below to describe and give some backgroud.
** Just splitting the patch into patches.
All the Randy's comments are implemented.
The other patch is https://review.opendev.org/#/c/732399/

Change-Id: I55681d69cacf2e7bee52d9a61f12d8577fe15a2d
This commit is contained in:
Juan Badia Payno 2020-03-05 11:15:25 +01:00
parent 08e87e54db
commit a930407217
8 changed files with 504 additions and 0 deletions

@ -0,0 +1,107 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: Ceph MON
become: true
hosts: overcloud_ceph_mon
tasks:
- name: Stop monitor services
import_role:
name: backup_and_restore
tasks_from: ceph_mon_stop
tags: backup
- name: Ceph MGR
become: true
hosts: overcloud_ceph_mgr
tasks:
- name: Stop manager services
import_role:
name: backup_and_restore
tasks_from: ceph_mgr_stop
tags: backup
- name: Ceph NFS
become: true
hosts: overcloud_ceph_nfs
tasks:
- name: Stop ceph_nfs services
import_role:
name: backup_and_restore
tasks_from: ceph_nfs_stop
tags: backup
- name: Ceph MDS
hosts: undercloud
tasks:
- name: Set variables
set_fact:
ceph_ansible_path: "{{ ceph_ansible_path_dir is defined | ternary ( ceph_ansible_path_dir, '/usr/share/ceph-ansible' ) }}"
ansible_log: "{{ ceph_deactivate_log is defined | ternary ( ceph_deactivate_log, '/home/stack/ceph_deactivate_mds.log' ) }}"
ceph_ansible_inventory: |
"{{ ansible_inventory is defined | ternary (ansible_inventory, '/home/stack/config-download/overcloud/ceph-ansible/inventory.yml' ) }}"
ceph_deactivate_mds_file_path: |
"{{ ceph_deactivate_mds_file is defined | ternary (ceph_deactivate_mds_file, '/usr/share/ansible/tripleo_playbooks/ceph_deactivate_mds.yaml') }}"
ceph_extra_vars_file_path: |
"{{ ceph_extra_vars_file is defined | ternary (ceph_extra_vars_file, '@/home/stack/config-download/overcloud/ceph-ansible/extra_vars.yml') }}"
when:
- groups["overcloud_ceph_mds"] is defined
- groups["overcloud_ceph_mds"]|length>0
- name: Execute the deactivate_mds ansible playbook
shell: |
set -o pipefail
ANSIBLE_ACTION_PLUGINS={{ ceph_ansible_path }}/plugins/actions/ \
ANSIBLE_CALLBACK_PLUGINS={{ ceph_ansible_path }}/plugins/callback/ \
ANSIBLE_FILTER_PLUGINS={{ ceph_ansible_path }}/plugins/filter/ \
ANSIBLE_ROLES_PATH={{ ceph_ansible_path }}/roles/ \
ANSIBLE_LIBRARY={{ ceph_ansible_path }}/library/ \
ANSIBLE_CONFIG={{ ceph_ansible_path }}/ansible.cfg \
ANSIBLE_REMOTE_TEMP=/tmp/ceph_ansible_tmp \
ANSIBLE_FORKS=25 \
ANSIBLE_GATHER_TIMEOUT=60 \
ANSIBLE_CALLBACK_WHITELIST=profile_tasks \
ANSIBLE_STDOUT_CALLBACK=default \
ANSIBLE_LOG_PATH={{ ansible_log }} \
ansible-playbook --skip-tags package-install,with_pkg \
-e ansible_python_interpreter=/usr/libexec/platform-python \
--extra-vars {{ ceph_extra_vars_file_path }} \
-i {{ ceph_ansible_inventory }} \
{{ ceph_deactivate_mds_file_path }}
tags: backup
when:
- groups["overcloud_ceph_mds"] is defined
- groups["overcloud_ceph_mds"]|length>0
- name: Ceph RGW
become: true
hosts: overcloud_ceph_rgw
tasks:
- name: Stop ceph_rgw services
import_role:
name: backup_and_restore
tasks_from: ceph_rgw_stop
tags: backup
- name: Backup Ceph directory
become: true
hosts: overcloud_ceph_mon
tasks:
- name: Backup ceph directory and storage on the shared directory
import_role:
name: backup_and_restore
tasks_from: ceph_backup
tags: backup

@ -0,0 +1,96 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: gather facts
hosts: all
- name: upgrade ceph mdss cluster, deactivate all rank > 0
hosts: "{{ groups[mon_group_name|default('mons')][0] }}"
become: true
tasks:
- name: deactivate all mds rank > 0
when: groups.get(mds_group_name, []) | length > 0
block:
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
- name: deactivate all mds rank > 0 if any
when: groups.get(mds_group_name, []) | length > 1
block:
- name: set max_mds 1 on ceph fs
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs set {{ cephfs }} max_mds 1"
changed_when: false
- name: wait until only rank 0 is up
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs get {{ cephfs }} -f json"
changed_when: false
register: wait_rank_zero
retries: 720
delay: 5
until: (wait_rank_zero.stdout | from_json).mdsmap.in | length == 1 and (wait_rank_zero.stdout | from_json).mdsmap.in[0] == 0
- name: get name of remaining active mds
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
changed_when: false
register: _mds_active_name
- name: set_fact mds_active_name
set_fact:
mds_active_name: "{{ (_mds_active_name.stdout | from_json)['filesystems'][0]['mdsmap']['info'][item.key]['name'] }}"
with_dict: "{{ (_mds_active_name.stdout | default('{}') | from_json).filesystems[0]['mdsmap']['info'] | default({}) }}"
- name: set_fact mds_active_host
set_fact:
mds_active_host: "{{ [hostvars[item]['inventory_hostname']] }}"
with_items: "{{ groups[mds_group_name] }}"
when: hostvars[item]['ansible_hostname'] == mds_active_name
- name: create standby_mdss group
add_host:
name: "{{ item }}"
groups: standby_mdss
ansible_host: "{{ hostvars[item]['ansible_host'] | default(omit) }}"
ansible_port: "{{ hostvars[item]['ansible_port'] | default(omit) }}"
with_items: "{{ groups[mds_group_name] | difference(mds_active_host) }}"
- name: stop standby ceph mds
systemd:
name: "ceph-mds@{{ hostvars[item]['ansible_hostname'] }}"
state: stopped
enabled: false
delegate_to: "{{ item }}"
with_items: "{{ groups['standby_mdss'] }}"
when: groups['standby_mdss'] | default([]) | length > 0
# dedicated task for masking systemd unit
# somehow, having a single task doesn't work in containerized context
- name: mask systemd units for standby ceph mds
systemd:
name: "ceph-mds@{{ hostvars[item]['ansible_hostname'] }}"
masked: true
delegate_to: "{{ item }}"
with_items: "{{ groups['standby_mdss'] }}"
when: groups['standby_mdss'] | default([]) | length > 0
- name: wait until all standbys mds are stopped
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
changed_when: false
register: wait_standbys_down
retries: 300
delay: 5
until: (wait_standbys_down.stdout | from_json).standbys | length == 0

@ -63,3 +63,9 @@ tripleo_backup_and_restore_backup_url: "nfs://{{ tripleo_backup_and_restore_nfs_
# Ceph authentication backup file # Ceph authentication backup file
tripleo_backup_and_restore_ceph_auth_file: "ceph_auth_export.bak" tripleo_backup_and_restore_ceph_auth_file: "ceph_auth_export.bak"
# Ceph backup file
tripleo_backup_and_restore_ceph_backup_file: "/var/lib/ceph.tar.gz"
# Ceph directory to back up
tripleo_backup_and_restore_ceph_path: "/var/lib/ceph"

@ -0,0 +1,91 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# "backup_and_restore" will search for and load any operating system variable file
# found within the "vars/" path. If no OS files are found the task will skip.
- name: Gather variables for each operating system
include_vars: "{{ item }}"
with_first_found:
- skip: true
files:
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml"
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_distribution | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml"
- "{{ ansible_os_family | lower }}.yml"
tags:
- always
# The shell command is used because the archive ansible module can not use
# extra flags needed.
- name: Tar and Compress the /var/lib/ceph directory
shell: |-
tar -zcv --xattrs-include=*.* \
--xattrs \
--xattrs-include=security.capability \
--xattrs-include=security.selinux \
--acls \
-f {{ tripleo_backup_and_restore_ceph_backup_file }} \
{{ tripleo_backup_and_restore_ceph_path }}
become: true
tags:
- bar_create_recover_image
- name: Create temporary file
tempfile:
state: directory
suffix: bar
register: tripleo_backup_and_restore_temporary_dir
tags:
- bar_create_recover_image
- name: Mount nfs shared directory
mount:
path: "{{ tripleo_backup_and_restore_temporary_dir.path }}"
src: "{{ tripleo_backup_and_restore_nfs_server }}:{{ tripleo_backup_and_restore_nfs_storage_folder }}"
fstype: nfs
opts: rw,noatime
state: mounted
tags:
- bar_create_recover_image
- name: Copy the backup to the shared directory
copy:
src: "{{ tripleo_backup_and_restore_ceph_backup_file }}"
dest: "{{ tripleo_backup_and_restore_temporary_dir.path }}/{{ ansible_hostname }}"
remote_src: true
tags:
- bar_create_recover_image
- name: Umount nfs shared directory
mount:
path: "{{ tripleo_backup_and_restore_temporary_dir.path }}"
state: unmounted
tags:
- bar_create_recover_image
- name: Delete the mount point from /etc/fstab
mount:
path: "{{ tripleo_backup_and_restore_temporary_dir.path }}"
src: "{{ tripleo_backup_and_restore_nfs_server }}:{{ tripleo_backup_and_restore_nfs_storage_folder }}"
fstype: nfs
opts: rw,noatime
state: absent
tags:
- bar_create_recover_image

@ -0,0 +1,41 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# "backup_and_restore" will search for and load any operating system variable file
# found within the "vars/" path. If no OS files are found the task will skip.
- name: Gather variables for each operating system
include_vars: "{{ item }}"
with_first_found:
- skip: true
files:
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml"
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_distribution | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml"
- "{{ ansible_os_family | lower }}.yml"
tags:
- always
- name: Stop ceph management
systemd:
state: stopped
name: "ceph-mgr@{{ ansible_hostname }}"
become: true
tags:
- bar_create_recover_image

@ -0,0 +1,41 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# "backup_and_restore" will search for and load any operating system variable file
# found within the "vars/" path. If no OS files are found the task will skip.
- name: Gather variables for each operating system
include_vars: "{{ item }}"
with_first_found:
- skip: true
files:
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml"
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_distribution | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml"
- "{{ ansible_os_family | lower }}.yml"
tags:
- always
- name: Stop ceph monitor
systemd:
state: stopped
name: "ceph-mon@{{ ansible_hostname }}"
become: true
tags:
- bar_create_recover_image

@ -0,0 +1,60 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# "backup_and_restore" will search for and load any operating system variable file
# found within the "vars/" path. If no OS files are found the task will skip.
- name: Gather variables for each operating system
include_vars: "{{ item }}"
with_first_found:
- skip: true
files:
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml"
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_distribution | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml"
- "{{ ansible_os_family | lower }}.yml"
tags:
- always
- name: Check pacemaker presence
command: type pcs
register: bar_pcs_exits
ignore_errors: true
tags:
- bar_create_recover_image
- name: Check ceph-nfs presence
shell: |
set -o pipefail
pcs status | grep ceph-nfs | wc -l
register: bar_ceph_nfs_exits
ignore_errors: true
when: bar_pcs_exits is succeeded
tags:
- bar_create_recover_image
- name: Stop ceph-nfs with pacemaker
shell: |
pcs resource disable ceph-nfs
when:
- bar_pcs_exits is succeeded
- bar_ceph_nfs_exits.stdout!="0"
run_once: true
tags:
- bar_create_recover_image

@ -0,0 +1,62 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# "backup_and_restore" will search for and load any operating system variable file
# found within the "vars/" path. If no OS files are found the task will skip.
- name: Gather variables for each operating system
include_vars: "{{ item }}"
with_first_found:
- skip: true
files:
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_version | lower }}.yml"
- "{{ ansible_distribution | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_major_version | lower }}.yml"
- "{{ ansible_distribution | lower }}.yml"
- "{{ ansible_os_family | lower }}-{{ ansible_distribution_version.split('.')[0] }}.yml"
- "{{ ansible_os_family | lower }}.yml"
tags:
- always
- name: Check ceph rgw presence
shell: |
set -o pipefail
{{ tripleo_container_cli }} ps -a | grep ceph-rgw | wc -l
register: bar_ceph_rgw_exits
ignore_errors: true
tags:
- bar_create_recover_image
- name: Get the rgw name
shell: |
set -o pipefail
podman ps -a | grep ceph-r | cut -c150- | awk -F- '{print $6}'
register: bar_ceph_rgw_name
when: bar_ceph_rgw_exits.stdout!="0"
tags:
- bar_create_recover_image
- name: Stop ceph rgw
systemd:
state: stopped
name: "ceph-radosgw@rgw.{{ ansible_hostname }}.{{ bar_ceph_rgw_name.stdout }}"
become: true
when:
- bar_ceph_rgw_exits.stdout!="0"
- bar_ceph_rgw_name.stdout!=""
tags:
- bar_create_recover_image