meta-starlingx/meta-stx-virt/recipes-extended/ceph/files/ceph-manage-journal.py
Jackie Huang 46d78550bd ceph: add version 13.2.2 and align with stx 3.0
ceph-disk is used by stx puppet manifest but it's replaced by
ceph-volume in ceph 14.1+, and there might be other incompatible
issues, so add version 13.2.2 to align with stx 3.0, the version
14.1 is kept so it may be used by future stx release.

This includes the following changes:
- Add recipe and patches for version 13.2.2
- Set preferred version to 13.2.2
- Rename ceph_%.bbappend to ceph_14.1.0.bbappend
- Update the conf file and scripts from stx 2.0 to stx 3.0
- Remove the useless files in stx-integ

fix #483
fix #497

Signed-off-by: Jackie Huang <jackie.huang@windriver.com>
Signed-off-by: Babak Sarashki <Babak.SarAshki@windriver.com>
2020-06-17 08:55:01 -07:00

335 lines
11 KiB
Python

#!/usr/bin/python
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import ast
import os
import os.path
import re
import subprocess
import sys
DEVICE_NAME_NVME = "nvme"
#########
# Utils #
#########
def command(arguments, **kwargs):
"""Execute e command and capture stdout, stderr & return code"""
process = subprocess.Popen(
arguments,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
**kwargs)
out, err = process.communicate()
return out, err, process.returncode
def get_input(arg, valid_keys):
"""Convert the input to a dict and perform basic validation"""
json_string = arg.replace("\\n", "\n")
try:
input_dict = ast.literal_eval(json_string)
if not all(k in input_dict for k in valid_keys):
return None
except Exception:
return None
return input_dict
def get_partition_uuid(dev):
output, _, _ = command(['blkid', dev])
try:
return re.search('PARTUUID=\"(.+?)\"', output).group(1)
except AttributeError:
return None
def device_path_to_device_node(device_path):
try:
output, _, _ = command(["udevadm", "settle", "-E", device_path])
out, err, retcode = command(["readlink", "-f", device_path])
out = out.rstrip()
except Exception as e:
return None
return out
###########################################
# Manage Journal Disk Partitioning Scheme #
###########################################
DISK_BY_PARTUUID = "/dev/disk/by-partuuid/"
JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106' # Type of a journal partition
def is_partitioning_correct(disk_path, partition_sizes):
"""Validate the existence and size of journal partitions"""
# Obtain the device node from the device path.
disk_node = device_path_to_device_node(disk_path)
# Check that partition table format is GPT
output, _, _ = command(["udevadm", "settle", "-E", disk_node])
output, _, _ = command(["parted", "-s", disk_node, "print"])
if not re.search('Partition Table: gpt', output):
print("Format of disk node %s is not GPT, zapping disk" % disk_node)
return False
# Check each partition size
partition_index = 1
for size in partition_sizes:
# Check that each partition size matches the one in input
if DEVICE_NAME_NVME in disk_node:
partition_node = '{}p{}'.format(disk_node, str(partition_index))
else:
partition_node = '{}{}'.format(disk_node, str(partition_index))
output, _, _ = command(["udevadm", "settle", "-E", partition_node])
cmd = ["parted", "-s", partition_node, "unit", "MiB", "print"]
output, _, _ = command(cmd)
regex = ("^Disk " + str(partition_node) + ":\\s*" +
str(size) + "[\\.0]*MiB")
if not re.search(regex, output, re.MULTILINE):
print("Journal partition %(node)s size is not %(size)s, "
"zapping disk" % {"node": partition_node, "size": size})
return False
partition_index += 1
output, _, _ = command(["udevadm", "settle", "-t", "10"])
return True
def create_partitions(disk_path, partition_sizes):
"""Recreate partitions"""
# Obtain the device node from the device path.
disk_node = device_path_to_device_node(disk_path)
# Issue: After creating a new partition table on a device, Udev does not
# always remove old symlinks (i.e. to previous partitions on that device).
# Also, even if links are erased before zapping the disk, some of them will
# be recreated even though there is no partition to back them!
# Therefore, we have to remove the links AFTER we erase the partition table
# Issue: DISK_BY_PARTUUID directory is not present at all if there are no
# GPT partitions on the storage node so nothing to remove in this case
links = []
if os.path.isdir(DISK_BY_PARTUUID):
links = [os.path.join(DISK_BY_PARTUUID, l) for l in os.listdir(DISK_BY_PARTUUID)
if os.path.islink(os.path.join(DISK_BY_PARTUUID, l))]
# Erase all partitions on current node by creating a new GPT table
_, err, ret = command(["parted", "-s", disk_node, "mktable", "gpt"])
if ret:
print("Error erasing partition table of %(node)s\n"
"Return code: %(ret)s reason: %(reason)s" %
{"node": disk_node, "ret": ret, "reason": err})
exit(1)
# Erase old symlinks
for l in links:
if disk_node in os.path.realpath(l):
os.remove(l)
# Create partitions in order
used_space_mib = 1 # leave 1 MB at the beginning of the disk
num = 1
for size in partition_sizes:
cmd = ['parted', '-s', disk_node, 'unit', 'mib',
'mkpart', 'primary',
str(used_space_mib), str(used_space_mib + size)]
_, err, ret = command(cmd)
parms = {"disk_node": disk_node,
"start": used_space_mib,
"end": used_space_mib + size,
"reason": err}
print("Created partition from start=%(start)s MiB to end=%(end)s MiB"
" on %(disk_node)s" % parms)
if ret:
print("Failed to create partition with "
"start=%(start)s, end=%(end)s "
"on %(disk_node)s reason: %(reason)s" % parms)
exit(1)
# Set partition type to ceph journal
# noncritical operation, it makes 'ceph-disk list' output correct info
cmd = ['sgdisk',
'--change-name={num}:ceph journal'.format(num=num),
'--typecode={num}:{uuid}'.format(
num=num,
uuid=JOURNAL_UUID,
),
disk_node]
_, err, ret = command(cmd)
if ret:
print("WARNINIG: Failed to set partition name and typecode")
used_space_mib += size
num += 1
###########################
# Manage Journal Location #
###########################
OSD_PATH = "/var/lib/ceph/osd/"
def mount_data_partition(data_path, osdid):
"""Mount an OSD data partition and return the mounted path"""
# Obtain the device node from the device path.
data_node = device_path_to_device_node(data_path)
mount_path = OSD_PATH + "ceph-" + str(osdid)
output, _, _ = command(['mount'])
regex = "^" + data_node + ".*" + mount_path
if not re.search(regex, output, re.MULTILINE):
cmd = ['mount', '-t', 'xfs', data_node, mount_path]
_, _, ret = command(cmd)
params = {"node": data_node, "path": mount_path}
if ret:
print("Failed to mount %(node)s to %(path), aborting" % params)
exit(1)
else:
print("Mounted %(node)s to %(path)s" % params)
return mount_path
def is_location_correct(path, journal_path, osdid):
"""Check if location points to the correct device"""
# Obtain the device node from the device path.
journal_node = device_path_to_device_node(journal_path)
cur_node = os.path.realpath(path + "/journal")
if cur_node == journal_node:
return True
else:
return False
def fix_location(mount_point, journal_path, osdid):
"""Move the journal to the new partition"""
# Obtain the device node from the device path.
journal_node = device_path_to_device_node(journal_path)
# Fix symlink
path = mount_point + "/journal" # 'journal' symlink path used by ceph-osd
journal_uuid = get_partition_uuid(journal_node)
new_target = DISK_BY_PARTUUID + journal_uuid
params = {"path": path, "target": new_target}
try:
if os.path.lexists(path):
os.unlink(path) # delete the old symlink
os.symlink(new_target, path)
print("Symlink created: %(path)s -> %(target)s" % params)
except:
print("Failed to create symlink: %(path)s -> %(target)s" % params)
exit(1)
# Fix journal_uuid
path = mount_point + "/journal_uuid"
try:
with open(path, 'w') as f:
f.write(journal_uuid)
except Exception as ex:
# The operation is noncritical, it only makes 'ceph-disk list'
# display complete output. We log and continue.
params = {"path": path, "uuid": journal_uuid}
print("WARNING: Failed to set uuid of %(path)s to %(uuid)s" % params)
# Clean the journal partition
# even if erasing the partition table, if another journal was present here
# it's going to be reused. Journals are always bigger than 100MB.
command(['dd', 'if=/dev/zero', 'of=%s' % journal_node,
'bs=1M', 'count=100'])
# Format the journal
cmd = ['/usr/bin/ceph-osd', '-i', str(osdid),
'--pid-file', '/var/run/ceph/osd.%s.pid' % osdid,
'-c', '/etc/ceph/ceph.conf',
'--cluster', 'ceph',
'--mkjournal']
out, err, ret = command(cmd)
params = {"journal_node": journal_node,
"osdid": osdid,
"ret": ret,
"reason": err}
if not ret:
print("Prepared new journal partition: %(journal_node)s "
"for osd id: %(osdid)s" % params)
else:
print("Error initializing journal node: "
"%(journal_node)s for osd id: %(osdid)s "
"ceph-osd return code: %(ret)s reason: %(reason)s" % params)
########
# Main #
########
def main(argv):
# parse and validate arguments
err = False
partitions = None
location = None
if len(argv) != 2:
err = True
elif argv[0] == "partitions":
valid_keys = ['disk_path', 'journals']
partitions = get_input(argv[1], valid_keys)
if not partitions:
err = True
elif not isinstance(partitions['journals'], list):
err = True
elif argv[0] == "location":
valid_keys = ['data_path', 'journal_path', 'osdid']
location = get_input(argv[1], valid_keys)
if not location:
err = True
elif not isinstance(location['osdid'], int):
err = True
else:
err = True
if err:
print("Command intended for internal use only")
exit(-1)
if partitions:
# Recreate partitions only if the existing ones don't match input
if not is_partitioning_correct(partitions['disk_path'],
partitions['journals']):
create_partitions(partitions['disk_path'], partitions['journals'])
else:
print("Partition table for %s is correct, "
"no need to repartition" %
device_path_to_device_node(partitions['disk_path']))
elif location:
# we need to have the data partition mounted & we can let it mounted
mount_point = mount_data_partition(location['data_path'],
location['osdid'])
# Update journal location only if link point to another partition
if not is_location_correct(mount_point,
location['journal_path'],
location['osdid']):
print("Fixing journal location for "
"OSD id: %(id)s" % {"node": location['data_path'],
"id": location['osdid']})
fix_location(mount_point,
location['journal_path'],
location['osdid'])
else:
print("Journal location for %s is correct,"
"no need to change it" % location['data_path'])
main(sys.argv[1:])