From 291ba252a5d07f02a058c5cee4370d74121f563f Mon Sep 17 00:00:00 2001 From: Tone Zhang Date: Tue, 8 May 2018 08:12:28 +0000 Subject: [PATCH] Allow Kolla Ceph to deploy bluestore OSDs in Kolla Support Kolla Ceph to deploy blustore OSDs. With the patch, Kolla Ceph can deploy bluestore OSDs on ONE, TWO or THREE storage devices. Before deploy bluestore OSD, please prepare devices. The detailed Please refer to [1] for devices initialization. extend_start.sh: initialize and start bluestore OSD find_disk.py: search the devices for bluestore OSD [1]: specs/kolla-ceph-bluestore.rst Partially-Implements: blueprint kolla-ceph-bluestore Change-Id: I832f490de63e1aeb68814697cda610a51b622c1f Signed-off-by: Tone Zhang --- docker/ceph/ceph-osd/extend_start.sh | 110 +++++++++++--- docker/kolla-toolbox/find_disks.py | 142 +++++++++++++++++- ...kolla-ceph-bluestore-a30ce85948d28427.yaml | 4 + 3 files changed, 234 insertions(+), 22 deletions(-) create mode 100644 releasenotes/notes/kolla-ceph-bluestore-a30ce85948d28427.yaml diff --git a/docker/ceph/ceph-osd/extend_start.sh b/docker/ceph/ceph-osd/extend_start.sh index ee745d0af0..57ec84f4e5 100644 --- a/docker/ceph/ceph-osd/extend_start.sh +++ b/docker/ceph/ceph-osd/extend_start.sh @@ -13,38 +13,97 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then # NOTE(SamYaple): Static gpt partcodes CEPH_JOURNAL_TYPE_CODE="45B0969E-9B03-4F30-B4C6-B4B80CEFF106" CEPH_OSD_TYPE_CODE="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D" + CEPH_OSD_BS_WAL_TYPE_CODE="0FC63DAF-8483-4772-8E79-3D69D8477DE4" + CEPH_OSD_BS_DB_TYPE_CODE="CE8DF73C-B89D-45B0-AD98-D45332906d90" # Wait for ceph quorum before proceeding ceph quorum_status if [[ "${USE_EXTERNAL_JOURNAL}" == "False" ]]; then # Formatting disk for ceph - sgdisk --zap-all -- "${OSD_DEV}" - sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}" - sgdisk --largest-new=1 -- "${OSD_DEV}" + if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then + if [ -n "${OSD_BS_BLOCK_DEV}" ]; then + sgdisk --zap-all -- "${OSD_BS_BLOCK_DEV}" + sgdisk --new=1:0:+100M --mbrtogpt -- "${OSD_BS_BLOCK_DEV}" + sgdisk --largest-new=2 --mbrtogpt -- "${OSD_BS_BLOCK_DEV}" + sgdisk --zap-all -- "${OSD_BS_BLOCK_DEV}"2 + fi + + if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then + sgdisk --zap-all -- "${OSD_BS_WAL_DEV}""${OSD_BS_WAL_PARTNUM}" + fi + + if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then + sgdisk --zap-all -- "${OSD_BS_DB_DEV}""${OSD_BS_DB_PARTNUM}" + fi + else + sgdisk --zap-all -- "${OSD_DEV}" + sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}" + sgdisk --largest-new=1 -- "${OSD_DEV}" + fi # NOTE(SamYaple): This command may throw errors that we can safely ignore partprobe || true fi - OSD_ID=$(ceph osd create) - OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}" - mkdir -p "${OSD_DIR}" + if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then + OSD_UUID=$(uuidgen) + OSD_ID=$(ceph osd new "${OSD_UUID}") + OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}" + mkdir -p "${OSD_DIR}" - if [[ "${OSD_FILESYSTEM}" == "btrfs" ]]; then - mkfs.btrfs -f "${OSD_PARTITION}" - elif [[ "${OSD_FILESYSTEM}" == "ext4" ]]; then - mkfs.ext4 "${OSD_PARTITION}" + mkfs.xfs -f "${OSD_BS_BLOCK_DEV}"1 + mount "${OSD_BS_BLOCK_DEV}"1 "${OSD_DIR}" + + # This will through an error about no key existing. That is normal. It then + # creates the key in the next step. + ceph-osd -i "${OSD_ID}" --mkkey + echo "bluestore" > "${OSD_DIR}"/type + sgdisk "--change-name=2:KOLLA_CEPH_DATA_BS_B_${OSD_ID}" "--typecode=2:${CEPH_OSD_TYPE_CODE}" -- "${OSD_BS_BLOCK_DEV}" + + if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then + sgdisk "--change-name="${OSD_BS_WAL_PARTNUM}":KOLLA_CEPH_DATA_BS_W_${OSD_ID}" "--typecode=1:${CEPH_OSD_BS_WAL_TYPE_CODE}" -- "${OSD_BS_WAL_DEV}" + fi + + if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then + sgdisk "--change-name="${OSD_BS_DB_PARTNUM}":KOLLA_CEPH_DATA_BS_D_${OSD_ID}" "--typecode=1:${CEPH_OSD_BS_DB_TYPE_CODE}" -- "${OSD_BS_DB_DEV}" + fi + + partprobe || true + + ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_B_"${OSD_ID}" "${OSD_DIR}"/block + + if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then + ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_W_"${OSD_ID}" "${OSD_DIR}"/block.wal + fi + + if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ]i && [ -n "${OSD_BS_DB_PARTNUM}" ]; then + ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_D_"${OSD_ID}" "${OSD_DIR}"/block.db + fi + + ceph-osd -i "${OSD_ID}" --mkfs -k "${OSD_DIR}"/keyring --osd-uuid "${OSD_UUID}" + ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring" + umount "${OSD_BS_BLOCK_DEV}"1 else - mkfs.xfs -f "${OSD_PARTITION}" - fi - mount "${OSD_PARTITION}" "${OSD_DIR}" + OSD_ID=$(ceph osd create) + OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}" + mkdir -p "${OSD_DIR}" - # This will through an error about no key existing. That is normal. It then - # creates the key in the next step. - ceph-osd -i "${OSD_ID}" --mkfs --osd-journal="${JOURNAL_PARTITION}" --mkkey - ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring" - umount "${OSD_PARTITION}" + if [[ "${OSD_FILESYSTEM}" == "btrfs" ]]; then + mkfs.btrfs -f "${OSD_PARTITION}" + elif [[ "${OSD_FILESYSTEM}" == "ext4" ]]; then + mkfs.ext4 "${OSD_PARTITION}" + else + mkfs.xfs -f "${OSD_PARTITION}" + fi + mount "${OSD_PARTITION}" "${OSD_DIR}" + + # This will through an error about no key existing. That is normal. It then + # creates the key in the next step. + ceph-osd -i "${OSD_ID}" --mkfs --osd-journal="${JOURNAL_PARTITION}" --mkkey + ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring" + umount "${OSD_PARTITION}" + fi if [[ "${!CEPH_CACHE[@]}" ]]; then CEPH_ROOT_NAME=cache @@ -63,11 +122,20 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then ceph osd crush add "${OSD_ID}" "${OSD_INITIAL_WEIGHT}" host="${HOSTNAME}${CEPH_ROOT_NAME:+-${CEPH_ROOT_NAME}}" # Setting partition name based on ${OSD_ID} - sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}" - sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}" + if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then + sgdisk "--change-name=1:KOLLA_CEPH_BSDATA_${OSD_ID}" -- "${OSD_BS_BLOCK_DEV}" + else + sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}" + sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}" + fi + partprobe || true exit 0 fi OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}" -ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring" +if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then + ARGS="-i ${OSD_ID}" +else + ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring" +fi diff --git a/docker/kolla-toolbox/find_disks.py b/docker/kolla-toolbox/find_disks.py index 63f5262af9..b7047d58aa 100644 --- a/docker/kolla-toolbox/find_disks.py +++ b/docker/kolla-toolbox/find_disks.py @@ -157,6 +157,7 @@ def extract_disk_info(ct, dev, name, use_udev): kwargs['fs_uuid'] = get_id_fs_uuid(dev, use_udev) kwargs['fs_label'] = dev.get('ID_FS_LABEL', '') if dev.get('DEVTYPE', '') == 'partition': + kwargs['partition_label'] = name kwargs['device'] = dev.find_parent('block').device_node kwargs['partition'] = dev.device_node kwargs['partition_num'] = \ @@ -187,6 +188,121 @@ def extract_disk_info(ct, dev, name, use_udev): yield kwargs +def extract_disk_info_bs(ct, dev, name, use_udev): + if not dev: + return + kwargs = dict(bs_db_partition='', bs_db_label='', bs_db_device='', + bs_wal_partition='', bs_wal_label='', bs_wal_device='', + bs_wal_partition_num='', bs_db_partition_num='', + partition='', partition_label='', partition_num='', + device='', partition_usage='') + kwargs['fs_uuid'] = get_id_fs_uuid(dev, use_udev) + kwargs['fs_label'] = dev.get('ID_FS_LABEL', '') + + if dev.get('DEVTYPE', '') == 'partition': + actual_name = get_id_part_entry_name(dev, use_udev) + + if (('BOOTSTRAP_BS' in name and name in actual_name) + or ('BSDATA' in name and name in actual_name)): + if '_BS_D' in actual_name: + kwargs['partition_usage'] = 'block.db' + kwargs['bs_db_partition_num'] = \ + re.sub(r'.*[^\d]', '', dev.device_node) + kwargs['bs_db_device'] = dev.device_node[:-1] + kwargs['bs_db_label'] = actual_name + return kwargs + if '_BS_W' in actual_name: + kwargs['partition_usage'] = 'block.wal' + kwargs['bs_wal_partition_num'] = \ + re.sub(r'.*[^\d]', '', dev.device_node) + kwargs['bs_wal_device'] = dev.device_node[:-1] + kwargs['bs_wal_label'] = actual_name + return kwargs + if '_BS' in actual_name: + kwargs['partition_usage'] = 'block' + kwargs['partition'] = dev.device_node[:-1] + kwargs['partition_label'] = actual_name + kwargs['partition_num'] = \ + re.sub(r'.*[^\d]', '', dev.device_node) + kwargs['device'] = dev.device_node[:-1] + return kwargs + return 0 + + +def nb_of_block_device(disks): + block_info = dict() + block_info['block_label'] = list() + nb_of_blocks = 0 + for item in disks: + if item['partition_usage'] == 'block': + block_info['block_label'].append(item['partition_label']) + nb_of_blocks += 1 + block_info['nb_of_block'] = nb_of_blocks + return block_info + + +def combine_info(disks): + info = list() + blocks = nb_of_block_device(disks) + block_id = 0 + while block_id < blocks['nb_of_block']: + final = dict() + idx = 0 + idx_osd = idx_wal = idx_db = -1 + for item in disks: + if (item['partition_usage'] == 'block' and + item['partition_label'] == + blocks['block_label'][block_id]): + idx_osd = idx + elif (item['partition_usage'] == 'block.wal' and + item['bs_wal_label'] == + blocks['block_label'][block_id].replace('_BS', '_BS_W')): + idx_wal = idx + elif (item['partition_usage'] == 'block.db' and + item['bs_db_label'] == + blocks['block_label'][block_id].replace('_BS', '_BS_D')): + idx_db = idx + idx = idx + 1 + + # write the information of block.db and block.wal to block item + # if block.db and block.wal are found + if idx_wal != -1: + disks[idx_osd]['bs_wal_device'] = disks[idx_wal]['bs_wal_device'] + disks[idx_osd]['bs_wal_partition_num'] = \ + disks[idx_wal]['bs_wal_partition_num'] + disks[idx_osd]['bs_wal_label'] = disks[idx_wal]['bs_wal_label'] + disks[idx_wal]['partition_usage'] = '' + if idx_db != -1: + disks[idx_osd]['bs_db_device'] = disks[idx_db]['bs_db_device'] + disks[idx_osd]['bs_db_partition_num'] = \ + disks[idx_db]['bs_db_partition_num'] + disks[idx_osd]['bs_db_label'] = disks[idx_db]['bs_db_label'] + disks[idx_db]['partition_usage'] = '' + + final['fs_uuid'] = disks[idx_osd]['fs_uuid'] + final['fs_label'] = disks[idx_osd]['fs_label'] + final['bs_db_device'] = disks[idx_osd]['bs_db_device'] + final['bs_db_partition_num'] = disks[idx_osd]['bs_db_partition_num'] + final['bs_db_label'] = disks[idx_osd]['bs_db_label'] + final['bs_wal_device'] = disks[idx_osd]['bs_wal_device'] + final['bs_wal_partition_num'] = disks[idx_osd]['bs_wal_partition_num'] + final['bs_wal_label'] = disks[idx_osd]['bs_wal_label'] + final['device'] = disks[idx_osd]['device'] + final['partition'] = disks[idx_osd]['partition'] + final['partition_label'] = disks[idx_osd]['partition_label'] + final['partition_num'] = disks[idx_osd]['partition_num'] + final['external_journal'] = False + final['journal'] = '' + final['journal_device'] = '' + final['journal_num'] = 0 + + info.append(final) + disks[idx_osd]['partition_usage'] = '' + block_id += 1 + + return info + + def main(): argument_spec = dict( match_mode=dict(required=False, choices=['strict', 'prefix'], @@ -203,9 +319,33 @@ def main(): ret = list() ct = pyudev.Context() for dev in find_disk(ct, name, match_mode, use_udev): - for info in extract_disk_info(ct, dev, name, use_udev): + if '_BSDATA' in name: + info = extract_disk_info_bs(ct, dev, name, use_udev) if info: ret.append(info) + elif '_BS' in name: + info = extract_disk_info_bs(ct, dev, name, use_udev) + if info: + ret.append(info) + + info = extract_disk_info_bs(ct, dev, + name.replace('_BS', '_BS_W'), + use_udev) + if info: + ret.append(info) + + info = extract_disk_info_bs(ct, dev, + name.replace('_BS', '_BS_D'), + use_udev) + if info: + ret.append(info) + else: + for info in extract_disk_info(ct, dev, name, use_udev): + if info: + ret.append(info) + + if '_BS' in name and len(ret) > 0: + ret = combine_info(ret) module.exit_json(disks=json.dumps(ret)) except Exception as e: diff --git a/releasenotes/notes/kolla-ceph-bluestore-a30ce85948d28427.yaml b/releasenotes/notes/kolla-ceph-bluestore-a30ce85948d28427.yaml new file mode 100644 index 0000000000..bd377f8e4f --- /dev/null +++ b/releasenotes/notes/kolla-ceph-bluestore-a30ce85948d28427.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Support Kolla Ceph to deploy bluestore OSDs in Rocky release.