From 7a93e1451a83468deeb78f0337b7a3810e20e703 Mon Sep 17 00:00:00 2001 From: "tone.zhang" Date: Thu, 14 Jun 2018 14:23:51 +0800 Subject: [PATCH] Allow Kolla Ceph to deploy bluestore OSD with 4 partitions Enhance the deployment of Kolla-Ceph bluestore OSD. Deploy bluestore OSD including up to 4 partitions: * one partition is for bluestore OSD information * one partition is for bluestore block * one partition is for bluestore block.wal * one partition is for bluestore block.db Deploy bluestore OSD deployment with LOOP devices. Partially-Implements: blueprint kolla-ceph-bluestore Change-Id: I00eaa600a5e9ad4c1ebca2eeb523bca3d7a25128 Signed-off-by: tone.zhang --- docker/ceph/ceph-osd/extend_start.sh | 75 +++++++++++++++++------- docker/kolla-toolbox/find_disks.py | 86 ++++++++++++++++++---------- 2 files changed, 110 insertions(+), 51 deletions(-) diff --git a/docker/ceph/ceph-osd/extend_start.sh b/docker/ceph/ceph-osd/extend_start.sh index 57ec84f4e5..1f42a1d92e 100644 --- a/docker/ceph/ceph-osd/extend_start.sh +++ b/docker/ceph/ceph-osd/extend_start.sh @@ -22,19 +22,39 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then if [[ "${USE_EXTERNAL_JOURNAL}" == "False" ]]; then # Formatting disk for ceph if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then - if [ -n "${OSD_BS_BLOCK_DEV}" ]; then - sgdisk --zap-all -- "${OSD_BS_BLOCK_DEV}" - sgdisk --new=1:0:+100M --mbrtogpt -- "${OSD_BS_BLOCK_DEV}" - sgdisk --largest-new=2 --mbrtogpt -- "${OSD_BS_BLOCK_DEV}" - sgdisk --zap-all -- "${OSD_BS_BLOCK_DEV}"2 + if [[ "${OSD_BS_DEV}" =~ "/dev/loop" ]]; then + sgdisk --zap-all -- "${OSD_BS_DEV}""p${OSD_BS_PARTNUM}" + else + sgdisk --zap-all -- "${OSD_BS_DEV}""${OSD_BS_PARTNUM}" fi - if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then - sgdisk --zap-all -- "${OSD_BS_WAL_DEV}""${OSD_BS_WAL_PARTNUM}" + if [ -n "${OSD_BS_BLK_DEV}" ] && [ "${OSD_BS_DEV}" != "${OSD_BS_BLK_DEV}" ] && [ -n "${OSD_BS_BLK_PARTNUM}" ]; then + if [[ "${OSD_BS_BLK_DEV}" =~ "/dev/loop" ]]; then + sgdisk --zap-all -- "${OSD_BS_BLK_DEV}""p${OSD_BS_BLK_PARTNUM}" + else + sgdisk --zap-all -- "${OSD_BS_BLK_DEV}""${OSD_BS_BLK_PARTNUM}" + fi + else + sgdisk --zap-all -- "${OSD_BS_DEV}" + sgdisk --new=1:0:+100M --mbrtogpt -- "${OSD_BS_DEV}" + sgdisk --largest-new=2 --mbrtogpt -- "${OSD_BS_DEV}" + sgdisk --zap-all -- "${OSD_BS_DEV}"2 fi - if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then - sgdisk --zap-all -- "${OSD_BS_DB_DEV}""${OSD_BS_DB_PARTNUM}" + if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then + if [[ "${OSD_BS_WAL_DEV}" =~ "/dev/loop" ]]; then + sgdisk --zap-all -- "${OSD_BS_WAL_DEV}""p${OSD_BS_WAL_PARTNUM}" + else + sgdisk --zap-all -- "${OSD_BS_WAL_DEV}""${OSD_BS_WAL_PARTNUM}" + fi + fi + + if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then + if [[ "${OSD_BS_DB_DEV}" =~ "/dev/loop" ]]; then + sgdisk --zap-all -- "${OSD_BS_DB_DEV}""p${OSD_BS_DB_PARTNUM}" + else + sgdisk --zap-all -- "${OSD_BS_DB_DEV}""${OSD_BS_DB_PARTNUM}" + fi fi else sgdisk --zap-all -- "${OSD_DEV}" @@ -52,38 +72,51 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}" mkdir -p "${OSD_DIR}" - mkfs.xfs -f "${OSD_BS_BLOCK_DEV}"1 - mount "${OSD_BS_BLOCK_DEV}"1 "${OSD_DIR}" + if [[ "${OSD_BS_DEV}" =~ "/dev/loop" ]]; then + mkfs.xfs -f "${OSD_BS_DEV}""p${OSD_BS_PARTNUM}" + mount "${OSD_BS_DEV}""p${OSD_BS_PARTNUM}" "${OSD_DIR}" + else + mkfs.xfs -f "${OSD_BS_DEV}""${OSD_BS_PARTNUM}" + mount "${OSD_BS_DEV}""${OSD_BS_PARTNUM}" "${OSD_DIR}" + fi # This will through an error about no key existing. That is normal. It then # creates the key in the next step. ceph-osd -i "${OSD_ID}" --mkkey echo "bluestore" > "${OSD_DIR}"/type - sgdisk "--change-name=2:KOLLA_CEPH_DATA_BS_B_${OSD_ID}" "--typecode=2:${CEPH_OSD_TYPE_CODE}" -- "${OSD_BS_BLOCK_DEV}" - - if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then - sgdisk "--change-name="${OSD_BS_WAL_PARTNUM}":KOLLA_CEPH_DATA_BS_W_${OSD_ID}" "--typecode=1:${CEPH_OSD_BS_WAL_TYPE_CODE}" -- "${OSD_BS_WAL_DEV}" + if [ -n "${OSD_BS_BLK_DEV}" ] && [ "${OSD_BS_BLK_DEV}" != "${OSD_BS_DEV}" ] && [ -n "${OSD_BS_BLK_PARTNUM}" ]; then + sgdisk "--change-name="${OSD_BS_BLK_PARTNUM}":KOLLA_CEPH_DATA_BS_B_${OSD_ID}" "--typecode="${OSD_BS_BLK_PARTNUM}":${CEPH_OSD_TYPE_CODE}" -- "${OSD_BS_BLK_DEV}" + else + sgdisk "--change-name=2:KOLLA_CEPH_DATA_BS_B_${OSD_ID}" "--typecode=2:${CEPH_OSD_TYPE_CODE}" -- "${OSD_BS_DEV}" fi - if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then - sgdisk "--change-name="${OSD_BS_DB_PARTNUM}":KOLLA_CEPH_DATA_BS_D_${OSD_ID}" "--typecode=1:${CEPH_OSD_BS_DB_TYPE_CODE}" -- "${OSD_BS_DB_DEV}" + if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then + sgdisk "--change-name="${OSD_BS_WAL_PARTNUM}":KOLLA_CEPH_DATA_BS_W_${OSD_ID}" "--typecode="${OSD_BS_WAL_PARTNUM}":${CEPH_OSD_BS_WAL_TYPE_CODE}" -- "${OSD_BS_WAL_DEV}" + fi + + if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then + sgdisk "--change-name="${OSD_BS_DB_PARTNUM}":KOLLA_CEPH_DATA_BS_D_${OSD_ID}" "--typecode="${OSD_BS_DB_PARTNUM}":${CEPH_OSD_BS_DB_TYPE_CODE}" -- "${OSD_BS_DB_DEV}" fi partprobe || true ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_B_"${OSD_ID}" "${OSD_DIR}"/block - if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then + if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_W_"${OSD_ID}" "${OSD_DIR}"/block.wal fi - if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ]i && [ -n "${OSD_BS_DB_PARTNUM}" ]; then + if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_D_"${OSD_ID}" "${OSD_DIR}"/block.db fi ceph-osd -i "${OSD_ID}" --mkfs -k "${OSD_DIR}"/keyring --osd-uuid "${OSD_UUID}" ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring" - umount "${OSD_BS_BLOCK_DEV}"1 + if [[ "${OSD_BS_DEV}" =~ "/dev/loop" ]]; then + umount "${OSD_BS_DEV}""p${OSD_BS_PARTNUM}" + else + umount "${OSD_BS_DEV}""${OSD_BS_PARTNUM}" + fi else OSD_ID=$(ceph osd create) OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}" @@ -123,7 +156,7 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then # Setting partition name based on ${OSD_ID} if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then - sgdisk "--change-name=1:KOLLA_CEPH_BSDATA_${OSD_ID}" -- "${OSD_BS_BLOCK_DEV}" + sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_BSDATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_BS_DEV}" else sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}" sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}" diff --git a/docker/kolla-toolbox/find_disks.py b/docker/kolla-toolbox/find_disks.py index b7047d58aa..51cd7a0784 100644 --- a/docker/kolla-toolbox/find_disks.py +++ b/docker/kolla-toolbox/find_disks.py @@ -191,81 +191,97 @@ def extract_disk_info(ct, dev, name, use_udev): def extract_disk_info_bs(ct, dev, name, use_udev): if not dev: return - kwargs = dict(bs_db_partition='', bs_db_label='', bs_db_device='', - bs_wal_partition='', bs_wal_label='', bs_wal_device='', + kwargs = dict(bs_blk_label='', bs_blk_device='', bs_db_label='', + bs_db_device='', bs_wal_label='', bs_wal_device='', bs_wal_partition_num='', bs_db_partition_num='', - partition='', partition_label='', partition_num='', - device='', partition_usage='') + bs_blk_partition_num='', partition='', partition_label='', + partition_num='', device='', partition_usage='') kwargs['fs_uuid'] = get_id_fs_uuid(dev, use_udev) kwargs['fs_label'] = dev.get('ID_FS_LABEL', '') if dev.get('DEVTYPE', '') == 'partition': actual_name = get_id_part_entry_name(dev, use_udev) - if (('BOOTSTRAP_BS' in name and name in actual_name) - or ('BSDATA' in name and name in actual_name)): + if (('BOOTSTRAP_BS' in name or 'BSDATA' in name) + and name in actual_name): + if '_BS_B' in actual_name: + kwargs['partition_usage'] = 'block' + kwargs['bs_blk_partition_num'] = \ + re.sub(r'.*[^\d]', '', dev.device_node) + kwargs['bs_blk_device'] = dev.find_parent('block').device_node + kwargs['bs_blk_label'] = actual_name + return kwargs if '_BS_D' in actual_name: kwargs['partition_usage'] = 'block.db' kwargs['bs_db_partition_num'] = \ re.sub(r'.*[^\d]', '', dev.device_node) - kwargs['bs_db_device'] = dev.device_node[:-1] + kwargs['bs_db_device'] = dev.find_parent('block').device_node kwargs['bs_db_label'] = actual_name return kwargs if '_BS_W' in actual_name: kwargs['partition_usage'] = 'block.wal' kwargs['bs_wal_partition_num'] = \ re.sub(r'.*[^\d]', '', dev.device_node) - kwargs['bs_wal_device'] = dev.device_node[:-1] + kwargs['bs_wal_device'] = dev.find_parent('block').device_node kwargs['bs_wal_label'] = actual_name return kwargs if '_BS' in actual_name: - kwargs['partition_usage'] = 'block' - kwargs['partition'] = dev.device_node[:-1] + kwargs['partition_usage'] = 'osd' + kwargs['partition'] = dev.find_parent('block').device_node kwargs['partition_label'] = actual_name kwargs['partition_num'] = \ re.sub(r'.*[^\d]', '', dev.device_node) - kwargs['device'] = dev.device_node[:-1] + kwargs['device'] = dev.find_parent('block').device_node return kwargs return 0 -def nb_of_block_device(disks): - block_info = dict() - block_info['block_label'] = list() - nb_of_blocks = 0 +def nb_of_osd(disks): + osd_info = dict() + osd_info['block_label'] = list() + nb_of_osds = 0 for item in disks: - if item['partition_usage'] == 'block': - block_info['block_label'].append(item['partition_label']) - nb_of_blocks += 1 - block_info['nb_of_block'] = nb_of_blocks - return block_info + if item['partition_usage'] == 'osd': + osd_info['block_label'].append(item['partition_label']) + nb_of_osds += 1 + osd_info['nb_of_osd'] = nb_of_osds + return osd_info def combine_info(disks): info = list() - blocks = nb_of_block_device(disks) - block_id = 0 - while block_id < blocks['nb_of_block']: + osds = nb_of_osd(disks) + osd_id = 0 + while osd_id < osds['nb_of_osd']: final = dict() idx = 0 - idx_osd = idx_wal = idx_db = -1 + idx_osd = idx_blk = idx_wal = idx_db = -1 for item in disks: - if (item['partition_usage'] == 'block' and - item['partition_label'] == - blocks['block_label'][block_id]): + if (item['partition_usage'] == 'osd' and + item['partition_label'] == osds['block_label'][osd_id]): idx_osd = idx + elif (item['partition_usage'] == 'block' and + item['bs_blk_label'] == + osds['block_label'][osd_id].replace('_BS', '_BS_B')): + idx_blk = idx elif (item['partition_usage'] == 'block.wal' and item['bs_wal_label'] == - blocks['block_label'][block_id].replace('_BS', '_BS_W')): + osds['block_label'][osd_id].replace('_BS', '_BS_W')): idx_wal = idx elif (item['partition_usage'] == 'block.db' and item['bs_db_label'] == - blocks['block_label'][block_id].replace('_BS', '_BS_D')): + osds['block_label'][osd_id].replace('_BS', '_BS_D')): idx_db = idx idx = idx + 1 # write the information of block.db and block.wal to block item # if block.db and block.wal are found + if idx_blk != -1: + disks[idx_osd]['bs_blk_device'] = disks[idx_blk]['bs_blk_device'] + disks[idx_osd]['bs_blk_label'] = disks[idx_blk]['bs_blk_label'] + disks[idx_osd]['bs_blk_partition_num'] = \ + disks[idx_blk]['bs_blk_partition_num'] + disks[idx_blk]['partition_usage'] = '' if idx_wal != -1: disks[idx_osd]['bs_wal_device'] = disks[idx_wal]['bs_wal_device'] disks[idx_osd]['bs_wal_partition_num'] = \ @@ -281,6 +297,9 @@ def combine_info(disks): final['fs_uuid'] = disks[idx_osd]['fs_uuid'] final['fs_label'] = disks[idx_osd]['fs_label'] + final['bs_blk_device'] = disks[idx_osd]['bs_blk_device'] + final['bs_blk_label'] = disks[idx_osd]['bs_blk_label'] + final['bs_blk_partition_num'] = disks[idx_osd]['bs_blk_partition_num'] final['bs_db_device'] = disks[idx_osd]['bs_db_device'] final['bs_db_partition_num'] = disks[idx_osd]['bs_db_partition_num'] final['bs_db_label'] = disks[idx_osd]['bs_db_label'] @@ -298,7 +317,7 @@ def combine_info(disks): info.append(final) disks[idx_osd]['partition_usage'] = '' - block_id += 1 + osd_id += 1 return info @@ -329,6 +348,13 @@ def main(): ret.append(info) info = extract_disk_info_bs(ct, dev, + name.replace('_BS', '_BS_B'), + use_udev) + if info: + ret.append(info) + + info = extract_disk_info_bs(ct, dev, + name.replace('_BS', '_BS_W'), use_udev) if info: