tripleo-ansible/tripleo_ansible/playbooks/ceph_deactivate_mds.yaml
Juan Badia Payno a930407217 BaR - Ceph Backup
Backup and Restore:
  This task aims to take a backup of the /var/lib/ceph directory and
copied to the shared directory.
  To be able to do it, we copy the way that the ceph-ansible project [1]
deactivates the mds [2], so we can stop all the ceph services on the
controllers.

[1]-https://github.com/ceph/ceph-ansible/
[2]-https://github.com/ceph/ceph-ansible/blob/v4.0.14/infrastructure-playbooks/rolling_update.yml#L550-L623

I left the comment below to describe and give some backgroud.
** Just splitting the patch into patches.
All the Randy's comments are implemented.
The other patch is https://review.opendev.org/#/c/732399/

Change-Id: I55681d69cacf2e7bee52d9a61f12d8577fe15a2d
2020-08-28 12:31:23 +02:00

97 lines
4.2 KiB
YAML

---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: gather facts
hosts: all
- name: upgrade ceph mdss cluster, deactivate all rank > 0
hosts: "{{ groups[mon_group_name|default('mons')][0] }}"
become: true
tasks:
- name: deactivate all mds rank > 0
when: groups.get(mds_group_name, []) | length > 0
block:
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
- name: deactivate all mds rank > 0 if any
when: groups.get(mds_group_name, []) | length > 1
block:
- name: set max_mds 1 on ceph fs
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs set {{ cephfs }} max_mds 1"
changed_when: false
- name: wait until only rank 0 is up
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs get {{ cephfs }} -f json"
changed_when: false
register: wait_rank_zero
retries: 720
delay: 5
until: (wait_rank_zero.stdout | from_json).mdsmap.in | length == 1 and (wait_rank_zero.stdout | from_json).mdsmap.in[0] == 0
- name: get name of remaining active mds
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
changed_when: false
register: _mds_active_name
- name: set_fact mds_active_name
set_fact:
mds_active_name: "{{ (_mds_active_name.stdout | from_json)['filesystems'][0]['mdsmap']['info'][item.key]['name'] }}"
with_dict: "{{ (_mds_active_name.stdout | default('{}') | from_json).filesystems[0]['mdsmap']['info'] | default({}) }}"
- name: set_fact mds_active_host
set_fact:
mds_active_host: "{{ [hostvars[item]['inventory_hostname']] }}"
with_items: "{{ groups[mds_group_name] }}"
when: hostvars[item]['ansible_hostname'] == mds_active_name
- name: create standby_mdss group
add_host:
name: "{{ item }}"
groups: standby_mdss
ansible_host: "{{ hostvars[item]['ansible_host'] | default(omit) }}"
ansible_port: "{{ hostvars[item]['ansible_port'] | default(omit) }}"
with_items: "{{ groups[mds_group_name] | difference(mds_active_host) }}"
- name: stop standby ceph mds
systemd:
name: "ceph-mds@{{ hostvars[item]['ansible_hostname'] }}"
state: stopped
enabled: false
delegate_to: "{{ item }}"
with_items: "{{ groups['standby_mdss'] }}"
when: groups['standby_mdss'] | default([]) | length > 0
# dedicated task for masking systemd unit
# somehow, having a single task doesn't work in containerized context
- name: mask systemd units for standby ceph mds
systemd:
name: "ceph-mds@{{ hostvars[item]['ansible_hostname'] }}"
masked: true
delegate_to: "{{ item }}"
with_items: "{{ groups['standby_mdss'] }}"
when: groups['standby_mdss'] | default([]) | length > 0
- name: wait until all standbys mds are stopped
command: "{{ container_exec_cmd | default('') }} ceph --cluster {{ cluster }} fs dump -f json"
changed_when: false
register: wait_standbys_down
retries: 300
delay: 5
until: (wait_standbys_down.stdout | from_json).standbys | length == 0