From 459d0440407d97c3586d91f5736ed7b948b3f492 Mon Sep 17 00:00:00 2001 From: "Kabanov, Dmitrii" Date: Mon, 4 May 2020 14:19:22 -0700 Subject: [PATCH] [Ceph OSD] Add OSD device class The PS adds possibility to override device class through the key in values.yaml. Motivation: In some cases the device driver is providing incorrect information about the type of device and automatic detection is setting incorrect device class. Change-Id: I29eb2d5100f020a20f65686ef85c0975f909b39d --- .../templates/bin/osd/ceph-disk/_common.sh.tpl | 17 +++++++++++++++++ .../osd/ceph-disk/_init-with-ceph-disk.sh.tpl | 6 ++++++ .../bin/osd/ceph-volume/_common.sh.tpl | 18 ++++++++++++++++++ .../ceph-volume/_init-with-ceph-volume.sh.tpl | 5 +++++ ceph-osd/values.yaml | 4 ++++ 5 files changed, 50 insertions(+) diff --git a/ceph-osd/templates/bin/osd/ceph-disk/_common.sh.tpl b/ceph-osd/templates/bin/osd/ceph-disk/_common.sh.tpl index 72a2de74b..6aa44d5a5 100644 --- a/ceph-osd/templates/bin/osd/ceph-disk/_common.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-disk/_common.sh.tpl @@ -27,6 +27,7 @@ set -ex eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))') eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))') +eval DEVICE_CLASS=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["device_class"]))') if [[ $(ceph -v | egrep -q "nautilus|mimic|luminous"; echo $?) -ne 0 ]]; then echo "ERROR- need Luminous/Mimic/Nautilus release" @@ -95,6 +96,7 @@ function crush_add_and_move { } function crush_location { + set_device_class if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}" @@ -237,3 +239,18 @@ function udev_settle { done } +function set_device_class { + if [ ! -z "$DEVICE_CLASS" ]; then + if [ "x$DEVICE_CLASS" != "x$(get_device_class)" ]; then + ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + osd crush rm-device-class "osd.${OSD_ID}" + ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + osd crush set-device-class "${DEVICE_CLASS}" "osd.${OSD_ID}" + fi + fi +} + +function get_device_class { + echo $(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + osd crush get-device-class "osd.${OSD_ID}") +} diff --git a/ceph-osd/templates/bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl b/ceph-osd/templates/bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl index c6787eae8..ea94e82a1 100644 --- a/ceph-osd/templates/bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl @@ -177,6 +177,12 @@ function osd_disk_prepare { udev_settle ceph-disk -v prepare ${CLI_OPTS} + + if [ ! -z "$DEVICE_CLASS" ]; then + local osd_id=$(cat "/var/lib/ceph/osd/*/whoami") + ceph osd crush rm-device-class osd."${osd_id}" + ceph osd crush set-device-class "${DEVICE_CLASS}" osd."${osd_id}" + fi } function osd_journal_create { diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl index 0d01b15c7..a1f61c50e 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl @@ -27,6 +27,7 @@ set -ex eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))') eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))') +eval DEVICE_CLASS=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["device_class"]))') if [[ $(ceph -v | egrep -q "nautilus|mimic|luminous"; echo $?) -ne 0 ]]; then echo "ERROR- need Luminous/Mimic/Nautilus release" @@ -95,6 +96,7 @@ function crush_add_and_move { } function crush_location { + set_device_class if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}" @@ -321,3 +323,19 @@ function get_osd_wal_device_from_device { # Use get_lvm_tag_from_device to get the OSD WAL device from the device get_lvm_tag_from_device ${device} ceph.wal_device } + +function set_device_class { + if [ ! -z "$DEVICE_CLASS" ]; then + if [ "x$DEVICE_CLASS" != "x$(get_device_class)" ]; then + ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + osd crush rm-device-class "osd.${OSD_ID}" + ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + osd crush set-device-class "${DEVICE_CLASS}" "osd.${OSD_ID}" + fi + fi +} + +function get_device_class { + echo $(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + osd crush get-device-class "osd.${OSD_ID}") +} diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl index 050eedf00..19a8912ea 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl @@ -384,6 +384,11 @@ function osd_disk_prepare { CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}" udev_settle fi + + if [ ! -z "$DEVICE_CLASS" ]; then + CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}" + fi + if [[ ${CEPH_DISK_USED} -eq 1 ]]; then CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}" ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') diff --git a/ceph-osd/values.yaml b/ceph-osd/values.yaml index 09e1bcd25..a9545da8b 100644 --- a/ceph-osd/values.yaml +++ b/ceph-osd/values.yaml @@ -208,6 +208,10 @@ conf: failure_domain_by_hostname: "false" failure_domain_name: "false" + # Note: You can override the device class by adding the value (e.g., hdd, ssd or nvme). + # Leave it empty if you don't need to modify the device class. + device_class: "" + # NOTE(portdirect): for homogeneous clusters the `osd` key can be used to # define OSD pods that will be deployed across the cluster. # when specifing whole disk (/dev/sdf) for journals, ceph-osd chart will create