From 5250a00781a214911fec78718ef6dfb91154b0de Mon Sep 17 00:00:00 2001 From: SamYaple Date: Fri, 18 Mar 2016 13:52:32 +0000 Subject: [PATCH] Allow external ceph journals and fix bootstrap This allows us to specify external journals for osds which can greatly improve performance when the external journals are on the solid-state drives. The new lookup and startup methods fix the previous races we had preventing osds from being created properly. This retains the same functionality as before and is completely compatible with the previous method and labels, however this does set new labels for all new bootstrap OSDs. This was due to a limitation in the length of the name of a GPT partition. Closes-Bug: #1558853 DocImpact Partially-Implements: blueprint ceph-improvements Change-Id: I61fd10cb35c67dabc53bd82270f26909ef51fc38 --- ansible/roles/ceph/tasks/bootstrap_osds.yml | 16 +++- ansible/roles/ceph/tasks/start_osds.yml | 4 +- docker/ceph/ceph-osd/extend_start.sh | 37 +++++---- docker/kolla-toolbox/find_disks.py | 84 +++++++++++++++------ 4 files changed, 93 insertions(+), 48 deletions(-) diff --git a/ansible/roles/ceph/tasks/bootstrap_osds.yml b/ansible/roles/ceph/tasks/bootstrap_osds.yml index 16599170e6..471d173c90 100644 --- a/ansible/roles/ceph/tasks/bootstrap_osds.yml +++ b/ansible/roles/ceph/tasks/bootstrap_osds.yml @@ -2,7 +2,7 @@ - name: Looking up disks to bootstrap for Ceph command: docker exec -t kolla_toolbox /usr/bin/ansible localhost -m find_disks - -a "partition_name='KOLLA_CEPH_OSD_BOOTSTRAP'" + -a "partition_name='KOLLA_CEPH_OSD_BOOTSTRAP' match_mode='prefix'" register: osd_lookup changed_when: "{{ osd_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}" failed_when: osd_lookup.stdout.split()[2] != 'SUCCESS' @@ -14,7 +14,7 @@ - name: Looking up disks to bootstrap for Ceph command: docker exec -t kolla_toolbox /usr/bin/ansible localhost -m find_disks - -a "partition_name='KOLLA_CEPH_OSD_CACHE_BOOTSTRAP'" + -a "partition_name='KOLLA_CEPH_OSD_CACHE_BOOTSTRAP' match_mode='prefix'" register: osd_cache_lookup changed_when: "{{ osd_cache_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_cache_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}" failed_when: osd_cache_lookup.stdout.split()[2] != 'SUCCESS' @@ -32,6 +32,12 @@ KOLLA_BOOTSTRAP: KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" OSD_DEV: "{{ item.1.device }}" + OSD_PARTITION: "{{ item.1.partition }}" + OSD_PARTITION_NUM: "{{ item.1.partition_num }}" + JOURNAL_DEV: "{{ item.1.journal_device }}" + JOURNAL_PARTITION: "{{ item.1.journal }}" + JOURNAL_PARTITION_NUM: "{{ item.1.journal_num }}" + USE_EXTERNAL_JOURNAL: "{{ item.1.external_journal | bool }}" OSD_FILESYSTEM: "{{ ceph_osd_filesystem }}" OSD_INITIAL_WEIGHT: "{{ osd_initial_weight }}" HOSTNAME: "{{ hostvars[inventory_hostname]['ansible_' + storage_interface]['ipv4']['address'] }}" @@ -56,6 +62,12 @@ KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" CEPH_CACHE: OSD_DEV: "{{ item.1.device }}" + OSD_PARTITION: "{{ item.1.partition }}" + OSD_PARTITION_NUM: "{{ item.1.partition_num }}" + JOURNAL_DEV: "{{ item.1.journal_device }}" + JOURNAL_PARTITION: "{{ item.1.journal }}" + JOURNAL_PARTITION_NUM: "{{ item.1.journal_num }}" + USE_EXTERNAL_JOURNAL: "{{ item.1.external_journal | bool }}" OSD_FILESYSTEM: "{{ ceph_osd_filesystem }}" OSD_INITIAL_WEIGHT: "{{ osd_initial_weight }}" HOSTNAME: "{{ hostvars[inventory_hostname]['ansible_' + storage_interface]['ipv4']['address'] }}" diff --git a/ansible/roles/ceph/tasks/start_osds.yml b/ansible/roles/ceph/tasks/start_osds.yml index 3f2bdd4cdc..63787dd120 100644 --- a/ansible/roles/ceph/tasks/start_osds.yml +++ b/ansible/roles/ceph/tasks/start_osds.yml @@ -2,7 +2,7 @@ - name: Looking up OSDs for Ceph command: docker exec -t kolla_toolbox /usr/bin/ansible localhost -m find_disks - -a "partition_name='KOLLA_CEPH_DATA'" + -a "partition_name='KOLLA_CEPH_DATA' match_mode='prefix'" register: osd_lookup changed_when: "{{ osd_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}" failed_when: osd_lookup.stdout.split()[2] != 'SUCCESS' @@ -34,7 +34,7 @@ environment: KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" OSD_ID: "{{ item.0.stdout }}" - OSD_DEV: "{{ item.1['device'] }}" + JOURNAL_PARTITION: "{{ item.1.journal }}" image: "{{ ceph_osd_image_full }}" name: "ceph_osd_{{ item.0.stdout }}" pid_mode: "host" diff --git a/docker/ceph/ceph-osd/extend_start.sh b/docker/ceph/ceph-osd/extend_start.sh index 5db6f61c93..97119578a7 100644 --- a/docker/ceph/ceph-osd/extend_start.sh +++ b/docker/ceph/ceph-osd/extend_start.sh @@ -3,28 +3,22 @@ # Bootstrap and exit if KOLLA_BOOTSTRAP variable is set. This catches all cases # of the KOLLA_BOOTSTRAP variable being set, including empty. if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then + # NOTE(SamYaple): Static gpt partcodes + CEPH_JOURNAL_TYPE_CODE="45B0969E-9B03-4F30-B4C6-B4B80CEFF106" + CEPH_OSD_TYPE_CODE="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D" + # Wait for ceph quorum before proceeding ceph quorum_status - # Formatting disk for ceph - sgdisk --zap-all -- "${OSD_DEV}" - sgdisk --new=2:1M:5G --change-name=2:KOLLA_CEPH_JOURNAL --typecode=2:45B0969E-9B03-4F30-B4C6-B4B80CEFF106 --mbrtogpt -- "${OSD_DEV}" - sgdisk --largest-new=1 --change-name=1:KOLLA_CEPH_DATA --typecode=1:4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D -- "${OSD_DEV}" - # This command may throw errors that we can safely ignore - partprobe || true + if [[ "${USE_EXTERNAL_JOURNAL}" == "False" ]]; then + # Formatting disk for ceph + sgdisk --zap-all -- "${OSD_DEV}" + sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}" + sgdisk --largest-new=1 -- "${OSD_DEV}" + # NOTE(SamYaple): This command may throw errors that we can safely ignore + partprobe || true - count=0 - while [[ "${OSD_PARTITION}x" == "x" ]]; do - if [[ "${count}" -gt 5 ]]; then - echo "Could not find OSD Partition" - exit 1 - fi - sleep 1 - # We look up the appropriate device path with partition. - OSD_PARTITION=$(ls "${OSD_DEV}"* | egrep "${OSD_DEV}p?1") - count=$(( count + 1 )) - done - JOURNAL_PARTITION="${OSD_PARTITION%?}2" + fi OSD_ID=$(ceph osd create) OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}" @@ -56,10 +50,13 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then # Adding osd to crush map ceph osd crush add "${OSD_ID}" "${OSD_INITIAL_WEIGHT}" host="${HOSTNAME}${CEPH_ROOT_NAME:+-${CEPH_ROOT_NAME}}" + + # Setting partition name based on ${OSD_ID} + sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}" + sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}" + exit 0 fi -# We look up the appropriate journal since we cannot rely on symlinks -JOURNAL_PARTITION=$(ls "${OSD_DEV}"* | egrep "${OSD_DEV}p?2") OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}" ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring" diff --git a/docker/kolla-toolbox/find_disks.py b/docker/kolla-toolbox/find_disks.py index 6ebf2b6a27..e72dcc6fba 100644 --- a/docker/kolla-toolbox/find_disks.py +++ b/docker/kolla-toolbox/find_disks.py @@ -66,6 +66,61 @@ EXAMPLES = ''' import json import pyudev +import re + + +def is_dev_matched_by_name(dev, name, mode): + if dev.get('DEVTYPE', '') == 'partition': + dev_name = dev.get('ID_PART_ENTRY_NAME', '') + else: + dev_name = dev.get('ID_FS_LABEL', '') + + if mode == 'strict': + return dev_name == name + elif mode == 'prefix': + return dev_name.startswith(name) + else: + return False + + +def find_disk(ct, name, match_mode): + for dev in ct.list_devices(subsystem='block'): + if is_dev_matched_by_name(dev, name, match_mode): + yield dev + + +def extract_disk_info(ct, dev, name): + if not dev: + return + kwargs = dict() + kwargs['fs_uuid'] = dev.get('ID_FS_UUID', '') + kwargs['fs_label'] = dev.get('ID_FS_LABEL', '') + if dev.get('DEVTYPE', '') == 'partition': + kwargs['device'] = dev.find_parent('block').device_node + kwargs['partition'] = dev.device_node + kwargs['partition_num'] = \ + re.sub(r'.*[^\d$]', '', dev.device_node) + if is_dev_matched_by_name(dev, name, 'strict'): + kwargs['external_journal'] = False + kwargs['journal'] = dev.device_node[:-1] + '2' + kwargs['journal_device'] = kwargs['device'] + kwargs['journal_num'] = 2 + else: + kwargs['external_journal'] = True + journal_name = dev.get('ID_PART_ENTRY_NAME', '') + '_J' + for journal in find_disk(ct, journal_name, 'strict'): + kwargs['journal'] = journal.device_node + kwargs['journal_device'] = \ + journal.find_parent('block').device_node + kwargs['journal_num'] = \ + re.sub(r'.*[^\d$]', '', journal.device_node) + break + if 'journal' not in kwargs: + # NOTE(SamYaple): Journal not found, not returning info + return + else: + kwargs['device'] = dev.device_node + yield kwargs def main(): @@ -78,33 +133,14 @@ def main(): match_mode = module.params.get('match_mode') name = module.params.get('name') - def is_dev_matched_by_name(dev, name): - if dev.get('DEVTYPE', '') == 'partition': - dev_name = dev.get('ID_PART_ENTRY_NAME', '') - else: - dev_name = dev.get('ID_FS_LABEL', '') - - if match_mode == 'strict': - return dev_name == name - elif match_mode == 'prefix': - return dev_name.startswith(name) - else: - return False - try: ret = list() ct = pyudev.Context() - for dev in ct.list_devices(subsystem='block'): - if is_dev_matched_by_name(dev, name): - fs_uuid = dev.get('ID_FS_UUID', '') - fs_label = dev.get('ID_FS_LABEL', '') - if dev.get('DEVTYPE', '') == 'partition': - device_node = dev.find_parent('block').device_node - else: - device_node = dev.device_node - ret.append({'device': device_node, - 'fs_uuid': fs_uuid, - 'fs_label': fs_label}) + for dev in find_disk(ct, name, match_mode): + for info in extract_disk_info(ct, dev, name): + if info: + ret.append(info) + module.exit_json(disks=json.dumps(ret)) except Exception as e: module.exit_json(failed=True, msg=repr(e))