46d78550bd
ceph-disk is used by stx puppet manifest but it's replaced by ceph-volume in ceph 14.1+, and there might be other incompatible issues, so add version 13.2.2 to align with stx 3.0, the version 14.1 is kept so it may be used by future stx release. This includes the following changes: - Add recipe and patches for version 13.2.2 - Set preferred version to 13.2.2 - Rename ceph_%.bbappend to ceph_14.1.0.bbappend - Update the conf file and scripts from stx 2.0 to stx 3.0 - Remove the useless files in stx-integ fix #483 fix #497 Signed-off-by: Jackie Huang <jackie.huang@windriver.com> Signed-off-by: Babak Sarashki <Babak.SarAshki@windriver.com>
335 lines
11 KiB
Python
335 lines
11 KiB
Python
#!/usr/bin/python
|
|
#
|
|
# Copyright (c) 2019 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
import ast
|
|
import os
|
|
import os.path
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
DEVICE_NAME_NVME = "nvme"
|
|
|
|
#########
|
|
# Utils #
|
|
#########
|
|
|
|
|
|
def command(arguments, **kwargs):
|
|
"""Execute e command and capture stdout, stderr & return code"""
|
|
process = subprocess.Popen(
|
|
arguments,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
**kwargs)
|
|
out, err = process.communicate()
|
|
return out, err, process.returncode
|
|
|
|
|
|
def get_input(arg, valid_keys):
|
|
"""Convert the input to a dict and perform basic validation"""
|
|
json_string = arg.replace("\\n", "\n")
|
|
try:
|
|
input_dict = ast.literal_eval(json_string)
|
|
if not all(k in input_dict for k in valid_keys):
|
|
return None
|
|
except Exception:
|
|
return None
|
|
|
|
return input_dict
|
|
|
|
|
|
def get_partition_uuid(dev):
|
|
output, _, _ = command(['blkid', dev])
|
|
try:
|
|
return re.search('PARTUUID=\"(.+?)\"', output).group(1)
|
|
except AttributeError:
|
|
return None
|
|
|
|
|
|
def device_path_to_device_node(device_path):
|
|
try:
|
|
output, _, _ = command(["udevadm", "settle", "-E", device_path])
|
|
out, err, retcode = command(["readlink", "-f", device_path])
|
|
out = out.rstrip()
|
|
except Exception as e:
|
|
return None
|
|
|
|
return out
|
|
|
|
|
|
###########################################
|
|
# Manage Journal Disk Partitioning Scheme #
|
|
###########################################
|
|
|
|
DISK_BY_PARTUUID = "/dev/disk/by-partuuid/"
|
|
JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106' # Type of a journal partition
|
|
|
|
|
|
def is_partitioning_correct(disk_path, partition_sizes):
|
|
"""Validate the existence and size of journal partitions"""
|
|
|
|
# Obtain the device node from the device path.
|
|
disk_node = device_path_to_device_node(disk_path)
|
|
|
|
# Check that partition table format is GPT
|
|
output, _, _ = command(["udevadm", "settle", "-E", disk_node])
|
|
output, _, _ = command(["parted", "-s", disk_node, "print"])
|
|
if not re.search('Partition Table: gpt', output):
|
|
print("Format of disk node %s is not GPT, zapping disk" % disk_node)
|
|
return False
|
|
|
|
# Check each partition size
|
|
partition_index = 1
|
|
for size in partition_sizes:
|
|
# Check that each partition size matches the one in input
|
|
if DEVICE_NAME_NVME in disk_node:
|
|
partition_node = '{}p{}'.format(disk_node, str(partition_index))
|
|
else:
|
|
partition_node = '{}{}'.format(disk_node, str(partition_index))
|
|
|
|
output, _, _ = command(["udevadm", "settle", "-E", partition_node])
|
|
cmd = ["parted", "-s", partition_node, "unit", "MiB", "print"]
|
|
output, _, _ = command(cmd)
|
|
|
|
regex = ("^Disk " + str(partition_node) + ":\\s*" +
|
|
str(size) + "[\\.0]*MiB")
|
|
if not re.search(regex, output, re.MULTILINE):
|
|
print("Journal partition %(node)s size is not %(size)s, "
|
|
"zapping disk" % {"node": partition_node, "size": size})
|
|
return False
|
|
|
|
partition_index += 1
|
|
|
|
output, _, _ = command(["udevadm", "settle", "-t", "10"])
|
|
return True
|
|
|
|
|
|
def create_partitions(disk_path, partition_sizes):
|
|
"""Recreate partitions"""
|
|
|
|
# Obtain the device node from the device path.
|
|
disk_node = device_path_to_device_node(disk_path)
|
|
|
|
# Issue: After creating a new partition table on a device, Udev does not
|
|
# always remove old symlinks (i.e. to previous partitions on that device).
|
|
# Also, even if links are erased before zapping the disk, some of them will
|
|
# be recreated even though there is no partition to back them!
|
|
# Therefore, we have to remove the links AFTER we erase the partition table
|
|
# Issue: DISK_BY_PARTUUID directory is not present at all if there are no
|
|
# GPT partitions on the storage node so nothing to remove in this case
|
|
links = []
|
|
if os.path.isdir(DISK_BY_PARTUUID):
|
|
links = [os.path.join(DISK_BY_PARTUUID, l) for l in os.listdir(DISK_BY_PARTUUID)
|
|
if os.path.islink(os.path.join(DISK_BY_PARTUUID, l))]
|
|
|
|
# Erase all partitions on current node by creating a new GPT table
|
|
_, err, ret = command(["parted", "-s", disk_node, "mktable", "gpt"])
|
|
if ret:
|
|
print("Error erasing partition table of %(node)s\n"
|
|
"Return code: %(ret)s reason: %(reason)s" %
|
|
{"node": disk_node, "ret": ret, "reason": err})
|
|
exit(1)
|
|
|
|
# Erase old symlinks
|
|
for l in links:
|
|
if disk_node in os.path.realpath(l):
|
|
os.remove(l)
|
|
|
|
# Create partitions in order
|
|
used_space_mib = 1 # leave 1 MB at the beginning of the disk
|
|
num = 1
|
|
for size in partition_sizes:
|
|
cmd = ['parted', '-s', disk_node, 'unit', 'mib',
|
|
'mkpart', 'primary',
|
|
str(used_space_mib), str(used_space_mib + size)]
|
|
_, err, ret = command(cmd)
|
|
parms = {"disk_node": disk_node,
|
|
"start": used_space_mib,
|
|
"end": used_space_mib + size,
|
|
"reason": err}
|
|
print("Created partition from start=%(start)s MiB to end=%(end)s MiB"
|
|
" on %(disk_node)s" % parms)
|
|
if ret:
|
|
print("Failed to create partition with "
|
|
"start=%(start)s, end=%(end)s "
|
|
"on %(disk_node)s reason: %(reason)s" % parms)
|
|
exit(1)
|
|
# Set partition type to ceph journal
|
|
# noncritical operation, it makes 'ceph-disk list' output correct info
|
|
cmd = ['sgdisk',
|
|
'--change-name={num}:ceph journal'.format(num=num),
|
|
'--typecode={num}:{uuid}'.format(
|
|
num=num,
|
|
uuid=JOURNAL_UUID,
|
|
),
|
|
disk_node]
|
|
_, err, ret = command(cmd)
|
|
if ret:
|
|
print("WARNINIG: Failed to set partition name and typecode")
|
|
used_space_mib += size
|
|
num += 1
|
|
|
|
|
|
###########################
|
|
# Manage Journal Location #
|
|
###########################
|
|
|
|
OSD_PATH = "/var/lib/ceph/osd/"
|
|
|
|
|
|
def mount_data_partition(data_path, osdid):
|
|
"""Mount an OSD data partition and return the mounted path"""
|
|
|
|
# Obtain the device node from the device path.
|
|
data_node = device_path_to_device_node(data_path)
|
|
|
|
mount_path = OSD_PATH + "ceph-" + str(osdid)
|
|
output, _, _ = command(['mount'])
|
|
regex = "^" + data_node + ".*" + mount_path
|
|
if not re.search(regex, output, re.MULTILINE):
|
|
cmd = ['mount', '-t', 'xfs', data_node, mount_path]
|
|
_, _, ret = command(cmd)
|
|
params = {"node": data_node, "path": mount_path}
|
|
if ret:
|
|
print("Failed to mount %(node)s to %(path), aborting" % params)
|
|
exit(1)
|
|
else:
|
|
print("Mounted %(node)s to %(path)s" % params)
|
|
return mount_path
|
|
|
|
|
|
def is_location_correct(path, journal_path, osdid):
|
|
"""Check if location points to the correct device"""
|
|
|
|
# Obtain the device node from the device path.
|
|
journal_node = device_path_to_device_node(journal_path)
|
|
|
|
cur_node = os.path.realpath(path + "/journal")
|
|
if cur_node == journal_node:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def fix_location(mount_point, journal_path, osdid):
|
|
"""Move the journal to the new partition"""
|
|
|
|
# Obtain the device node from the device path.
|
|
journal_node = device_path_to_device_node(journal_path)
|
|
|
|
# Fix symlink
|
|
path = mount_point + "/journal" # 'journal' symlink path used by ceph-osd
|
|
journal_uuid = get_partition_uuid(journal_node)
|
|
new_target = DISK_BY_PARTUUID + journal_uuid
|
|
params = {"path": path, "target": new_target}
|
|
try:
|
|
if os.path.lexists(path):
|
|
os.unlink(path) # delete the old symlink
|
|
os.symlink(new_target, path)
|
|
print("Symlink created: %(path)s -> %(target)s" % params)
|
|
except:
|
|
print("Failed to create symlink: %(path)s -> %(target)s" % params)
|
|
exit(1)
|
|
# Fix journal_uuid
|
|
path = mount_point + "/journal_uuid"
|
|
try:
|
|
with open(path, 'w') as f:
|
|
f.write(journal_uuid)
|
|
except Exception as ex:
|
|
# The operation is noncritical, it only makes 'ceph-disk list'
|
|
# display complete output. We log and continue.
|
|
params = {"path": path, "uuid": journal_uuid}
|
|
print("WARNING: Failed to set uuid of %(path)s to %(uuid)s" % params)
|
|
|
|
# Clean the journal partition
|
|
# even if erasing the partition table, if another journal was present here
|
|
# it's going to be reused. Journals are always bigger than 100MB.
|
|
command(['dd', 'if=/dev/zero', 'of=%s' % journal_node,
|
|
'bs=1M', 'count=100'])
|
|
|
|
# Format the journal
|
|
cmd = ['/usr/bin/ceph-osd', '-i', str(osdid),
|
|
'--pid-file', '/var/run/ceph/osd.%s.pid' % osdid,
|
|
'-c', '/etc/ceph/ceph.conf',
|
|
'--cluster', 'ceph',
|
|
'--mkjournal']
|
|
out, err, ret = command(cmd)
|
|
params = {"journal_node": journal_node,
|
|
"osdid": osdid,
|
|
"ret": ret,
|
|
"reason": err}
|
|
if not ret:
|
|
print("Prepared new journal partition: %(journal_node)s "
|
|
"for osd id: %(osdid)s" % params)
|
|
else:
|
|
print("Error initializing journal node: "
|
|
"%(journal_node)s for osd id: %(osdid)s "
|
|
"ceph-osd return code: %(ret)s reason: %(reason)s" % params)
|
|
|
|
|
|
########
|
|
# Main #
|
|
########
|
|
|
|
def main(argv):
|
|
# parse and validate arguments
|
|
err = False
|
|
partitions = None
|
|
location = None
|
|
if len(argv) != 2:
|
|
err = True
|
|
elif argv[0] == "partitions":
|
|
valid_keys = ['disk_path', 'journals']
|
|
partitions = get_input(argv[1], valid_keys)
|
|
if not partitions:
|
|
err = True
|
|
elif not isinstance(partitions['journals'], list):
|
|
err = True
|
|
elif argv[0] == "location":
|
|
valid_keys = ['data_path', 'journal_path', 'osdid']
|
|
location = get_input(argv[1], valid_keys)
|
|
if not location:
|
|
err = True
|
|
elif not isinstance(location['osdid'], int):
|
|
err = True
|
|
else:
|
|
err = True
|
|
if err:
|
|
print("Command intended for internal use only")
|
|
exit(-1)
|
|
|
|
if partitions:
|
|
# Recreate partitions only if the existing ones don't match input
|
|
if not is_partitioning_correct(partitions['disk_path'],
|
|
partitions['journals']):
|
|
create_partitions(partitions['disk_path'], partitions['journals'])
|
|
else:
|
|
print("Partition table for %s is correct, "
|
|
"no need to repartition" %
|
|
device_path_to_device_node(partitions['disk_path']))
|
|
elif location:
|
|
# we need to have the data partition mounted & we can let it mounted
|
|
mount_point = mount_data_partition(location['data_path'],
|
|
location['osdid'])
|
|
# Update journal location only if link point to another partition
|
|
if not is_location_correct(mount_point,
|
|
location['journal_path'],
|
|
location['osdid']):
|
|
print("Fixing journal location for "
|
|
"OSD id: %(id)s" % {"node": location['data_path'],
|
|
"id": location['osdid']})
|
|
fix_location(mount_point,
|
|
location['journal_path'],
|
|
location['osdid'])
|
|
else:
|
|
print("Journal location for %s is correct,"
|
|
"no need to change it" % location['data_path'])
|
|
|
|
|
|
main(sys.argv[1:])
|