Browse Source

Allow Kolla Ceph to deploy bluestore OSDs in Kolla

Support Kolla Ceph to deploy blustore OSDs. With the patch, Kolla
Ceph can deploy bluestore OSDs on ONE, TWO or THREE storage
devices.

Before deploy bluestore OSD, please prepare devices. The detailed
Please refer to [1] for devices initialization.

extend_start.sh: initialize and start bluestore OSD

find_disk.py: search the devices for bluestore OSD

[1]: specs/kolla-ceph-bluestore.rst

Partially-Implements: blueprint kolla-ceph-bluestore

Change-Id: I832f490de63e1aeb68814697cda610a51b622c1f
Signed-off-by: Tone Zhang <tone.zhang@arm.com>
tags/7.0.0.0b3
Tone Zhang 1 year ago
parent
commit
291ba252a5

+ 89
- 21
docker/ceph/ceph-osd/extend_start.sh View File

@@ -13,38 +13,97 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
13 13
     # NOTE(SamYaple): Static gpt partcodes
14 14
     CEPH_JOURNAL_TYPE_CODE="45B0969E-9B03-4F30-B4C6-B4B80CEFF106"
15 15
     CEPH_OSD_TYPE_CODE="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D"
16
+    CEPH_OSD_BS_WAL_TYPE_CODE="0FC63DAF-8483-4772-8E79-3D69D8477DE4"
17
+    CEPH_OSD_BS_DB_TYPE_CODE="CE8DF73C-B89D-45B0-AD98-D45332906d90"
16 18
 
17 19
     # Wait for ceph quorum before proceeding
18 20
     ceph quorum_status
19 21
 
20 22
     if [[ "${USE_EXTERNAL_JOURNAL}" == "False" ]]; then
21 23
         # Formatting disk for ceph
22
-        sgdisk --zap-all -- "${OSD_DEV}"
23
-        sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}"
24
-        sgdisk --largest-new=1 -- "${OSD_DEV}"
24
+        if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then
25
+            if [ -n "${OSD_BS_BLOCK_DEV}" ]; then
26
+                sgdisk --zap-all -- "${OSD_BS_BLOCK_DEV}"
27
+                sgdisk --new=1:0:+100M --mbrtogpt -- "${OSD_BS_BLOCK_DEV}"
28
+                sgdisk --largest-new=2 --mbrtogpt -- "${OSD_BS_BLOCK_DEV}"
29
+                sgdisk --zap-all -- "${OSD_BS_BLOCK_DEV}"2
30
+            fi
31
+
32
+            if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then
33
+                sgdisk --zap-all -- "${OSD_BS_WAL_DEV}""${OSD_BS_WAL_PARTNUM}"
34
+            fi
35
+
36
+            if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then
37
+                sgdisk --zap-all -- "${OSD_BS_DB_DEV}""${OSD_BS_DB_PARTNUM}"
38
+            fi
39
+        else
40
+            sgdisk --zap-all -- "${OSD_DEV}"
41
+            sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}"
42
+            sgdisk --largest-new=1 -- "${OSD_DEV}"
43
+        fi
25 44
         # NOTE(SamYaple): This command may throw errors that we can safely ignore
26 45
         partprobe || true
27 46
 
28 47
     fi
29 48
 
30
-    OSD_ID=$(ceph osd create)
31
-    OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
32
-    mkdir -p "${OSD_DIR}"
49
+    if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then
50
+        OSD_UUID=$(uuidgen)
51
+        OSD_ID=$(ceph osd new "${OSD_UUID}")
52
+        OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
53
+        mkdir -p "${OSD_DIR}"
54
+
55
+        mkfs.xfs -f "${OSD_BS_BLOCK_DEV}"1
56
+        mount "${OSD_BS_BLOCK_DEV}"1 "${OSD_DIR}"
57
+
58
+        # This will through an error about no key existing. That is normal. It then
59
+        # creates the key in the next step.
60
+        ceph-osd -i "${OSD_ID}" --mkkey
61
+        echo "bluestore" > "${OSD_DIR}"/type
62
+        sgdisk "--change-name=2:KOLLA_CEPH_DATA_BS_B_${OSD_ID}" "--typecode=2:${CEPH_OSD_TYPE_CODE}" -- "${OSD_BS_BLOCK_DEV}"
63
+
64
+        if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then
65
+            sgdisk "--change-name="${OSD_BS_WAL_PARTNUM}":KOLLA_CEPH_DATA_BS_W_${OSD_ID}" "--typecode=1:${CEPH_OSD_BS_WAL_TYPE_CODE}" -- "${OSD_BS_WAL_DEV}"
66
+        fi
67
+
68
+        if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then
69
+            sgdisk "--change-name="${OSD_BS_DB_PARTNUM}":KOLLA_CEPH_DATA_BS_D_${OSD_ID}" "--typecode=1:${CEPH_OSD_BS_DB_TYPE_CODE}" -- "${OSD_BS_DB_DEV}"
70
+        fi
71
+
72
+        partprobe || true
73
+
74
+        ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_B_"${OSD_ID}" "${OSD_DIR}"/block
33 75
 
34
-    if [[ "${OSD_FILESYSTEM}" == "btrfs" ]]; then
35
-        mkfs.btrfs -f "${OSD_PARTITION}"
36
-    elif [[ "${OSD_FILESYSTEM}" == "ext4" ]]; then
37
-        mkfs.ext4 "${OSD_PARTITION}"
76
+        if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then
77
+            ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_W_"${OSD_ID}" "${OSD_DIR}"/block.wal
78
+        fi
79
+
80
+        if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ]i && [ -n "${OSD_BS_DB_PARTNUM}" ]; then
81
+            ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_D_"${OSD_ID}" "${OSD_DIR}"/block.db
82
+        fi
83
+
84
+        ceph-osd -i "${OSD_ID}" --mkfs -k "${OSD_DIR}"/keyring --osd-uuid "${OSD_UUID}"
85
+        ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring"
86
+        umount "${OSD_BS_BLOCK_DEV}"1
38 87
     else
39
-        mkfs.xfs -f "${OSD_PARTITION}"
40
-    fi
41
-    mount "${OSD_PARTITION}" "${OSD_DIR}"
88
+        OSD_ID=$(ceph osd create)
89
+        OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
90
+        mkdir -p "${OSD_DIR}"
91
+
92
+        if [[ "${OSD_FILESYSTEM}" == "btrfs" ]]; then
93
+            mkfs.btrfs -f "${OSD_PARTITION}"
94
+        elif [[ "${OSD_FILESYSTEM}" == "ext4" ]]; then
95
+            mkfs.ext4 "${OSD_PARTITION}"
96
+        else
97
+            mkfs.xfs -f "${OSD_PARTITION}"
98
+        fi
99
+        mount "${OSD_PARTITION}" "${OSD_DIR}"
42 100
 
43
-    # This will through an error about no key existing. That is normal. It then
44
-    # creates the key in the next step.
45
-    ceph-osd -i "${OSD_ID}" --mkfs --osd-journal="${JOURNAL_PARTITION}" --mkkey
46
-    ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring"
47
-    umount "${OSD_PARTITION}"
101
+        # This will through an error about no key existing. That is normal. It then
102
+        # creates the key in the next step.
103
+        ceph-osd -i "${OSD_ID}" --mkfs --osd-journal="${JOURNAL_PARTITION}" --mkkey
104
+        ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring"
105
+        umount "${OSD_PARTITION}"
106
+    fi
48 107
 
49 108
     if [[ "${!CEPH_CACHE[@]}" ]]; then
50 109
         CEPH_ROOT_NAME=cache
@@ -63,11 +122,20 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
63 122
     ceph osd crush add "${OSD_ID}" "${OSD_INITIAL_WEIGHT}" host="${HOSTNAME}${CEPH_ROOT_NAME:+-${CEPH_ROOT_NAME}}"
64 123
 
65 124
     # Setting partition name based on ${OSD_ID}
66
-    sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}"
67
-    sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}"
125
+    if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then
126
+        sgdisk "--change-name=1:KOLLA_CEPH_BSDATA_${OSD_ID}" -- "${OSD_BS_BLOCK_DEV}"
127
+    else
128
+        sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}"
129
+        sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}"
130
+    fi
131
+    partprobe || true
68 132
 
69 133
     exit 0
70 134
 fi
71 135
 
72 136
 OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
73
-ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring"
137
+if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then
138
+    ARGS="-i ${OSD_ID}"
139
+else
140
+    ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring"
141
+fi

+ 141
- 1
docker/kolla-toolbox/find_disks.py View File

@@ -157,6 +157,7 @@ def extract_disk_info(ct, dev, name, use_udev):
157 157
     kwargs['fs_uuid'] = get_id_fs_uuid(dev, use_udev)
158 158
     kwargs['fs_label'] = dev.get('ID_FS_LABEL', '')
159 159
     if dev.get('DEVTYPE', '') == 'partition':
160
+        kwargs['partition_label'] = name
160 161
         kwargs['device'] = dev.find_parent('block').device_node
161 162
         kwargs['partition'] = dev.device_node
162 163
         kwargs['partition_num'] = \
@@ -187,6 +188,121 @@ def extract_disk_info(ct, dev, name, use_udev):
187 188
     yield kwargs
188 189
 
189 190
 
191
+def extract_disk_info_bs(ct, dev, name, use_udev):
192
+    if not dev:
193
+        return
194
+    kwargs = dict(bs_db_partition='', bs_db_label='', bs_db_device='',
195
+                  bs_wal_partition='', bs_wal_label='', bs_wal_device='',
196
+                  bs_wal_partition_num='', bs_db_partition_num='',
197
+                  partition='', partition_label='', partition_num='',
198
+                  device='', partition_usage='')
199
+    kwargs['fs_uuid'] = get_id_fs_uuid(dev, use_udev)
200
+    kwargs['fs_label'] = dev.get('ID_FS_LABEL', '')
201
+
202
+    if dev.get('DEVTYPE', '') == 'partition':
203
+        actual_name = get_id_part_entry_name(dev, use_udev)
204
+
205
+        if (('BOOTSTRAP_BS' in name and name in actual_name)
206
+                or ('BSDATA' in name and name in actual_name)):
207
+            if '_BS_D' in actual_name:
208
+                kwargs['partition_usage'] = 'block.db'
209
+                kwargs['bs_db_partition_num'] = \
210
+                    re.sub(r'.*[^\d]', '', dev.device_node)
211
+                kwargs['bs_db_device'] = dev.device_node[:-1]
212
+                kwargs['bs_db_label'] = actual_name
213
+                return kwargs
214
+            if '_BS_W' in actual_name:
215
+                kwargs['partition_usage'] = 'block.wal'
216
+                kwargs['bs_wal_partition_num'] = \
217
+                    re.sub(r'.*[^\d]', '', dev.device_node)
218
+                kwargs['bs_wal_device'] = dev.device_node[:-1]
219
+                kwargs['bs_wal_label'] = actual_name
220
+                return kwargs
221
+            if '_BS' in actual_name:
222
+                kwargs['partition_usage'] = 'block'
223
+                kwargs['partition'] = dev.device_node[:-1]
224
+                kwargs['partition_label'] = actual_name
225
+                kwargs['partition_num'] = \
226
+                    re.sub(r'.*[^\d]', '', dev.device_node)
227
+                kwargs['device'] = dev.device_node[:-1]
228
+                return kwargs
229
+    return 0
230
+
231
+
232
+def nb_of_block_device(disks):
233
+    block_info = dict()
234
+    block_info['block_label'] = list()
235
+    nb_of_blocks = 0
236
+    for item in disks:
237
+        if item['partition_usage'] == 'block':
238
+            block_info['block_label'].append(item['partition_label'])
239
+            nb_of_blocks += 1
240
+    block_info['nb_of_block'] = nb_of_blocks
241
+    return block_info
242
+
243
+
244
+def combine_info(disks):
245
+    info = list()
246
+    blocks = nb_of_block_device(disks)
247
+    block_id = 0
248
+    while block_id < blocks['nb_of_block']:
249
+        final = dict()
250
+        idx = 0
251
+        idx_osd = idx_wal = idx_db = -1
252
+        for item in disks:
253
+            if (item['partition_usage'] == 'block' and
254
+                item['partition_label'] ==
255
+                    blocks['block_label'][block_id]):
256
+                idx_osd = idx
257
+            elif (item['partition_usage'] == 'block.wal' and
258
+                    item['bs_wal_label'] ==
259
+                    blocks['block_label'][block_id].replace('_BS', '_BS_W')):
260
+                idx_wal = idx
261
+            elif (item['partition_usage'] == 'block.db' and
262
+                    item['bs_db_label'] ==
263
+                    blocks['block_label'][block_id].replace('_BS', '_BS_D')):
264
+                idx_db = idx
265
+            idx = idx + 1
266
+
267
+        # write the information of block.db and block.wal to block item
268
+        # if block.db and block.wal are found
269
+        if idx_wal != -1:
270
+            disks[idx_osd]['bs_wal_device'] = disks[idx_wal]['bs_wal_device']
271
+            disks[idx_osd]['bs_wal_partition_num'] = \
272
+                disks[idx_wal]['bs_wal_partition_num']
273
+            disks[idx_osd]['bs_wal_label'] = disks[idx_wal]['bs_wal_label']
274
+            disks[idx_wal]['partition_usage'] = ''
275
+        if idx_db != -1:
276
+            disks[idx_osd]['bs_db_device'] = disks[idx_db]['bs_db_device']
277
+            disks[idx_osd]['bs_db_partition_num'] = \
278
+                disks[idx_db]['bs_db_partition_num']
279
+            disks[idx_osd]['bs_db_label'] = disks[idx_db]['bs_db_label']
280
+            disks[idx_db]['partition_usage'] = ''
281
+
282
+        final['fs_uuid'] = disks[idx_osd]['fs_uuid']
283
+        final['fs_label'] = disks[idx_osd]['fs_label']
284
+        final['bs_db_device'] = disks[idx_osd]['bs_db_device']
285
+        final['bs_db_partition_num'] = disks[idx_osd]['bs_db_partition_num']
286
+        final['bs_db_label'] = disks[idx_osd]['bs_db_label']
287
+        final['bs_wal_device'] = disks[idx_osd]['bs_wal_device']
288
+        final['bs_wal_partition_num'] = disks[idx_osd]['bs_wal_partition_num']
289
+        final['bs_wal_label'] = disks[idx_osd]['bs_wal_label']
290
+        final['device'] = disks[idx_osd]['device']
291
+        final['partition'] = disks[idx_osd]['partition']
292
+        final['partition_label'] = disks[idx_osd]['partition_label']
293
+        final['partition_num'] = disks[idx_osd]['partition_num']
294
+        final['external_journal'] = False
295
+        final['journal'] = ''
296
+        final['journal_device'] = ''
297
+        final['journal_num'] = 0
298
+
299
+        info.append(final)
300
+        disks[idx_osd]['partition_usage'] = ''
301
+        block_id += 1
302
+
303
+    return info
304
+
305
+
190 306
 def main():
191 307
     argument_spec = dict(
192 308
         match_mode=dict(required=False, choices=['strict', 'prefix'],
@@ -203,9 +319,33 @@ def main():
203 319
         ret = list()
204 320
         ct = pyudev.Context()
205 321
         for dev in find_disk(ct, name, match_mode, use_udev):
206
-            for info in extract_disk_info(ct, dev, name, use_udev):
322
+            if '_BSDATA' in name:
323
+                info = extract_disk_info_bs(ct, dev, name, use_udev)
207 324
                 if info:
208 325
                     ret.append(info)
326
+            elif '_BS' in name:
327
+                info = extract_disk_info_bs(ct, dev, name, use_udev)
328
+                if info:
329
+                    ret.append(info)
330
+
331
+                info = extract_disk_info_bs(ct, dev,
332
+                                            name.replace('_BS', '_BS_W'),
333
+                                            use_udev)
334
+                if info:
335
+                    ret.append(info)
336
+
337
+                info = extract_disk_info_bs(ct, dev,
338
+                                            name.replace('_BS', '_BS_D'),
339
+                                            use_udev)
340
+                if info:
341
+                    ret.append(info)
342
+            else:
343
+                for info in extract_disk_info(ct, dev, name, use_udev):
344
+                    if info:
345
+                        ret.append(info)
346
+
347
+        if '_BS' in name and len(ret) > 0:
348
+            ret = combine_info(ret)
209 349
 
210 350
         module.exit_json(disks=json.dumps(ret))
211 351
     except Exception as e:

+ 4
- 0
releasenotes/notes/kolla-ceph-bluestore-a30ce85948d28427.yaml View File

@@ -0,0 +1,4 @@
1
+---
2
+features:
3
+  - |
4
+    Support Kolla Ceph to deploy bluestore OSDs in Rocky release.

Loading…
Cancel
Save