From ec40ed3cbb4ab0bb2b1dd229b6de5d206bbca5b1 Mon Sep 17 00:00:00 2001 From: Paul Bourke Date: Tue, 5 Jul 2016 11:05:26 +0100 Subject: [PATCH] Add a check before wiping Ceph disks in bootstrap An unwitting user may apply the KOLLA_CEPH_OSD[_CACHE]_BOOTSTRAP label to a partition assuming it will only use that partition for Ceph, and end up wiping out their disk. This change adds a layer of checking to this scenario to try and help avoid a disaster scenario. Closes-Bug: 1599103 DocImpact Change-Id: Ibb9fb42f87a76bc02165ec0b93b60234bad8747a --- ansible/group_vars/all.yml | 4 +++ ansible/roles/ceph/tasks/bootstrap_osds.yml | 30 +++++++++++++++++++ ...disk-init-protection-5b38ce8f1502ff69.yaml | 20 +++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 releasenotes/notes/add-ceph-disk-init-protection-5b38ce8f1502ff69.yaml diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index d6e25ad773..f621cd5825 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -322,6 +322,10 @@ ceph_cache_mode: "writeback" # Valid options are [ ext4, btrfs, xfs ] ceph_osd_filesystem: "xfs" +# Set to 'yes-i-really-really-mean-it' to force wipe disks with existing partitions for OSDs. Only +# set if you understand the consequences! +ceph_osd_wipe_disk: "" + # These are /etc/fstab options. Comma separated, no spaces (see fstab(8)) ceph_osd_mount_options: "defaults,noatime" diff --git a/ansible/roles/ceph/tasks/bootstrap_osds.yml b/ansible/roles/ceph/tasks/bootstrap_osds.yml index 014d2dcd47..e5d0ab8b08 100644 --- a/ansible/roles/ceph/tasks/bootstrap_osds.yml +++ b/ansible/roles/ceph/tasks/bootstrap_osds.yml @@ -23,6 +23,21 @@ set_fact: osds_cache_bootstrap: "{{ (osd_cache_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).disks|from_json }}" +- pause: + prompt: | + WARNING: It seems {{ item.device }} is marked to be wiped and partitioned for Ceph data and + a co-located journal, but appears to contain other existing partitions (>1). + + If you are sure you want this disk to be *wiped* for use with Ceph, press enter. + + Otherwise, press Ctrl-C, then 'A'. (You can disable this check by setting + ceph_osd_wipe_disk: 'yes-i-really-really-mean-it' within globals.yml) + with_items: "{{ osds_bootstrap|default([]) }}" + when: + - item.external_journal | bool == False + - ansible_devices[item.device.split('/')[2]].partitions|count > 1 + - ceph_osd_wipe_disk != "yes-i-really-really-mean-it" + - name: Bootstrapping Ceph OSDs kolla_docker: action: "start_container" @@ -54,6 +69,21 @@ - "kolla_logs:/var/log/kolla/" with_indexed_items: "{{ osds_bootstrap|default([]) }}" +- pause: + prompt: | + WARNING: It seems {{ item.device }} is marked to be wiped and partitioned for Ceph data and + a co-located journal, but appears to contain other existing partitions (>1). + + If you are sure you want this disk to be *wiped* for use with Ceph, press enter. + + Otherwise, press Ctrl-C, then 'A'. (You can disable this check by setting + ceph_osd_wipe_disk: 'yes-i-really-really-mean-it' within globals.yml) + with_items: "{{ osds_cache_bootstrap|default([]) }}" + when: + - item.external_journal | bool == False + - ansible_devices[item.device.split('/')[2]].partitions|count > 1 + - ceph_osd_wipe_disk != "yes-i-really-really-mean-it" + - name: Bootstrapping Ceph Cache OSDs kolla_docker: action: "start_container" diff --git a/releasenotes/notes/add-ceph-disk-init-protection-5b38ce8f1502ff69.yaml b/releasenotes/notes/add-ceph-disk-init-protection-5b38ce8f1502ff69.yaml new file mode 100644 index 0000000000..20febed2d6 --- /dev/null +++ b/releasenotes/notes/add-ceph-disk-init-protection-5b38ce8f1502ff69.yaml @@ -0,0 +1,20 @@ +--- +prelude: > + There any multiple options available to an operator + with regards to how they wish to provision devices + for use with Ceph OSDs. One of those is to co-locate + the data and journal on a disk, which is done by + marking a partition on that disk with + `KOLLA_CEPH_OSD[_CACHE]_BOOTSTRAP`. + If an unwitting user happened to confuse this with + the partition based approach, they could wipe out + their entire disk, resulting in data loss. +features: + - The Ceph bootstrap will now pause requesting + confirmation from the operator if it detects a + bootstrap label on a disk with more than one + partition. + An operator may override this behaviour by + setting the Ansible variable + 'ceph_osd_wipe_disk' to + 'yes-i-really-really-mean-it'.