Implement the 'remove-disk' action
This new action allows users to either purge an OSD, or remove it, opening up the possibility of recycling the previous OSD id. In addition, this action will clean up any bcache devices that were created in previous steps. Change-Id: If3566031ba3f02dac0bc86938dcf9e85a66a66f0 Depends-On: Ib959e81833eb2094d02c7bdd507b1c8b7fbcd3db func-test-pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/683
This commit is contained in:
36
actions.yaml
36
actions.yaml
@ -149,3 +149,39 @@ get-availability-zone:
|
||||
show-all:
|
||||
type: boolean
|
||||
description: Option to view information for all units. Default is 'false'.
|
||||
remove-disk:
|
||||
description: |
|
||||
Remove disks from Ceph, producing a report afterwards indicating the user
|
||||
as to how to replace them in the closest way possible.
|
||||
params:
|
||||
osd-devices:
|
||||
type: string
|
||||
description: A space-separated list of devices to remove
|
||||
osd-ids:
|
||||
type: string
|
||||
description: |
|
||||
A space separated list of OSD ids to remove. This parameter is mutually
|
||||
exclusive with the parameter 'osd-devices'.
|
||||
purge:
|
||||
type: boolean
|
||||
description: |
|
||||
Whether to fully purge the OSD or let the id be available for reuse.
|
||||
default: false
|
||||
timeout:
|
||||
type: integer
|
||||
description: |
|
||||
The time in minutes to wait for the OSD to be safe to remove.
|
||||
default: 5
|
||||
force:
|
||||
type: boolean
|
||||
description: |
|
||||
Whether to forcefully remove the OSD even if it's determined to be
|
||||
unsafe to destroy it.
|
||||
default: false
|
||||
format:
|
||||
type: string
|
||||
enum:
|
||||
- text
|
||||
- json
|
||||
default: text
|
||||
description: The output format returned for the command.
|
||||
|
@ -61,6 +61,9 @@ def add_device(request, device_path, bucket=None,
|
||||
else:
|
||||
effective_dev = device_path
|
||||
|
||||
if osd_id is not None and osd_id.startswith('osd.'):
|
||||
osd_id = osd_id[4:]
|
||||
|
||||
charms_ceph.utils.osdize(effective_dev, hookenv.config('osd-format'),
|
||||
ceph_hooks.get_journal_devices(),
|
||||
hookenv.config('ignore-device-errors'),
|
||||
@ -91,6 +94,14 @@ def add_device(request, device_path, bucket=None,
|
||||
}
|
||||
)
|
||||
|
||||
if part_iter is not None:
|
||||
# Update the alias map so we can refer to an OSD via the original
|
||||
# device instead of the newly created cache name.
|
||||
aliases = db.get('osd-aliases', {})
|
||||
aliases[device_path] = effective_dev
|
||||
db.set('osd-aliases', aliases)
|
||||
db.flush()
|
||||
|
||||
return request
|
||||
|
||||
|
||||
@ -183,5 +194,5 @@ if __name__ == "__main__":
|
||||
for error in errors:
|
||||
part_iter.cleanup(error)
|
||||
|
||||
function_fail('Failed to add devices: {}', ','.join(errors))
|
||||
function_fail('Failed to add devices: {}'.format(','.join(errors)))
|
||||
sys.exit(1)
|
||||
|
1
actions/remove-disk
Symbolic link
1
actions/remove-disk
Symbolic link
@ -0,0 +1 @@
|
||||
./remove_disk.py
|
358
actions/remove_disk.py
Executable file
358
actions/remove_disk.py
Executable file
@ -0,0 +1,358 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright 2021 Canonical Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import datetime
|
||||
import errno
|
||||
import json
|
||||
from math import ceil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.append('lib')
|
||||
sys.path.append('hooks')
|
||||
|
||||
import charmhelpers.core.hookenv as hookenv
|
||||
from charmhelpers.core.hookenv import function_fail
|
||||
|
||||
import charms_ceph.utils
|
||||
from charmhelpers.core.unitdata import kv
|
||||
from utils import (get_bcache_names, bcache_remove, device_size,
|
||||
get_parent_device, remove_lvm, wipefs_safely)
|
||||
|
||||
|
||||
def normalize_osd_id(osd_id):
|
||||
"""Make sure an OSD id has the form 'osd.<number>'.
|
||||
|
||||
:param osd_id: The OSD id, either a string or the integer ID.
|
||||
:type osd_id: Option[int, str]
|
||||
|
||||
:returns: A string with the form 'osd.<number>.
|
||||
:rtype: str
|
||||
"""
|
||||
if not isinstance(osd_id, str) or not osd_id.startswith('osd.'):
|
||||
osd_id = 'osd.' + str(osd_id)
|
||||
return osd_id
|
||||
|
||||
|
||||
def get_device_map():
|
||||
"""Get a list of osd.id, device-path for every device that
|
||||
is being used by local OSD.
|
||||
|
||||
:returns: A list of OSD ids and devices.
|
||||
:rtype: list[dict['id', 'path']]
|
||||
"""
|
||||
ret = []
|
||||
vlist = subprocess.check_output(['ceph-volume', 'lvm', 'list',
|
||||
'--format=json'])
|
||||
for osd_id, data in json.loads(vlist.decode('utf8')).items():
|
||||
osd_id = normalize_osd_id(osd_id)
|
||||
for elem in data:
|
||||
for device in elem['devices']:
|
||||
ret.append({'id': osd_id, 'path': device})
|
||||
return ret
|
||||
|
||||
|
||||
def map_device_to_id(dev_map, device):
|
||||
"""Get the OSD id for a device or bcache name.
|
||||
|
||||
:param dev_map: A map with the same form as that returned by
|
||||
the function 'get_device_map'.
|
||||
:type dev_map: list[dict['id', 'path']]
|
||||
|
||||
:param device: The path to the device.
|
||||
:type device: str
|
||||
|
||||
:returns: The OSD id in use by the device, if any.
|
||||
:rtype: Option[None, str]
|
||||
"""
|
||||
for elem in dev_map:
|
||||
if device == elem['path']:
|
||||
return elem['id']
|
||||
|
||||
|
||||
def map_id_to_device(dev_map, osd_id):
|
||||
"""Get the device path for an OSD id.
|
||||
|
||||
:param dev_map: A map with the same form as that returned by
|
||||
the function 'get_device_map'.
|
||||
:type dev_map: list[dict['id', 'path']]
|
||||
|
||||
:param osd_id: The OSD id to check against.
|
||||
:type osd_id: str
|
||||
|
||||
:returns: The device path being used by the OSD id, if any.
|
||||
:rtype: Option[None, str]
|
||||
"""
|
||||
for elem in dev_map:
|
||||
if elem['id'] == osd_id:
|
||||
return elem['path']
|
||||
|
||||
|
||||
def safe_to_destroy(osd_id):
|
||||
"""Test whether an OSD id is safe to destroy per the Ceph cluster."""
|
||||
ret = subprocess.call(['ceph', '--id', 'osd-removal',
|
||||
'osd', 'safe-to-destroy', osd_id])
|
||||
return ret == 0
|
||||
|
||||
|
||||
def safe_to_stop(osd_id):
|
||||
"""Test whether an OSD is safe to stop."""
|
||||
ret = subprocess.call(['ceph', '--id', 'osd-removal',
|
||||
'osd', 'ok-to-stop', osd_id])
|
||||
return ret == 0
|
||||
|
||||
|
||||
def reweight_osd(osd_id):
|
||||
"""Set the weight of the OSD id to zero."""
|
||||
subprocess.check_call(['ceph', '--id', 'osd-removal',
|
||||
'osd', 'crush', 'reweight', osd_id, '0'])
|
||||
|
||||
|
||||
def destroy(osd_id, purge=False):
|
||||
"""Destroy or purge an OSD id."""
|
||||
for _ in range(10):
|
||||
# We might get here before the OSD is marked as down. As such,
|
||||
# retry if the error code is EBUSY.
|
||||
try:
|
||||
subprocess.check_call(['ceph', '--id', 'osd-removal', 'osd',
|
||||
'purge' if purge else 'destroy',
|
||||
osd_id, '--yes-i-really-mean-it'])
|
||||
return
|
||||
except subprocess.CalledProcessError as e:
|
||||
if e.returncode != errno.EBUSY:
|
||||
raise
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
class RemoveException(Exception):
|
||||
"""Exception type used to notify of errors for this action."""
|
||||
pass
|
||||
|
||||
|
||||
class ActionOSD:
|
||||
|
||||
"""Class used to encapsulate all the needed information to
|
||||
perform OSD removal."""
|
||||
|
||||
def __init__(self, dev_map, dev=None, osd_id=None, aliases={}):
|
||||
"""Construct an action-OSD.
|
||||
|
||||
:param dev_map: A map with the same form as that returned by
|
||||
the function 'get_device_map'.
|
||||
:type dev_map: list[dict['id', 'path']]
|
||||
|
||||
:param dev: The device being used by an OSD.
|
||||
:type dev: Option[None, str]
|
||||
|
||||
:param osd_id: The OSD id.
|
||||
:type osd_id: Option[None, int, str]
|
||||
"""
|
||||
if dev is not None:
|
||||
if osd_id is not None:
|
||||
raise RemoveException(
|
||||
'osd-ids and osd-devices are mutually exclusive')
|
||||
elif dev in aliases:
|
||||
self.alias = dev
|
||||
self.device = aliases.get(dev)
|
||||
else:
|
||||
self.device, self.alias = dev, None
|
||||
|
||||
self.osd_id = map_device_to_id(dev_map, self.device)
|
||||
self.bcache_backing, self.bcache_caching = \
|
||||
get_bcache_names(self.device)
|
||||
if self.osd_id is None:
|
||||
raise RemoveException('Device {} is not being used'
|
||||
.format(self.device))
|
||||
else:
|
||||
self.alias = None
|
||||
self.osd_id = normalize_osd_id(osd_id)
|
||||
self.device = map_id_to_device(dev_map, self.osd_id)
|
||||
if self.device is None:
|
||||
raise RemoveException('Invalid osd ID: {}'.format(self.osd_id))
|
||||
|
||||
self.bcache_backing, self.bcache_caching = \
|
||||
get_bcache_names(self.device)
|
||||
|
||||
self.report = {} # maps device -> actions.
|
||||
|
||||
@property
|
||||
def osd_device(self):
|
||||
return self.bcache_backing or self.device
|
||||
|
||||
def remove(self, purge, timeout, force):
|
||||
"""Remove the OSD from the cluster.
|
||||
|
||||
:param purge: Whether to purge or just destroy the OSD.
|
||||
:type purge: bool
|
||||
|
||||
:param timeout: The number of minutes to wait for until the OSD
|
||||
is safe to destroy.
|
||||
:type timeout: int
|
||||
|
||||
:param force: Whether to proceed with OSD removal, even when
|
||||
it's not safe to do so.
|
||||
:type force: bool
|
||||
"""
|
||||
# Set the CRUSH weight to 0.
|
||||
hookenv.log('Reweighting OSD', hookenv.DEBUG)
|
||||
reweight_osd(self.osd_id)
|
||||
|
||||
# Ensure that the OSD is safe to stop and destroy.
|
||||
end = (datetime.datetime.now() +
|
||||
datetime.timedelta(seconds=timeout * 60))
|
||||
safe_stop, safe_destroy = False, False
|
||||
|
||||
while True:
|
||||
if not safe_stop and safe_to_stop(self.osd_id):
|
||||
safe_stop = True
|
||||
if not safe_destroy and safe_to_destroy(self.osd_id):
|
||||
safe_destroy = True
|
||||
|
||||
if safe_stop and safe_destroy:
|
||||
break
|
||||
|
||||
curr = datetime.datetime.now()
|
||||
if curr >= end:
|
||||
if force:
|
||||
hookenv.log(
|
||||
'OSD not safe to destroy, but "force" was specified',
|
||||
hookenv.DEBUG)
|
||||
break
|
||||
|
||||
raise RemoveException(
|
||||
'timed out waiting for an OSD to be safe to destroy')
|
||||
time.sleep(min(1, (end - curr).total_seconds()))
|
||||
|
||||
# Stop the OSD service.
|
||||
hookenv.log('Stopping the OSD service', hookenv.DEBUG)
|
||||
charms_ceph.utils.stop_osd(self.osd_id[4:])
|
||||
|
||||
# Remove the OSD from the cluster.
|
||||
hookenv.log('Destroying the OSD', hookenv.DEBUG)
|
||||
destroy(self.osd_id, purge)
|
||||
report = self.report.setdefault(self.osd_device,
|
||||
{'osd-ids': self.osd_id})
|
||||
|
||||
if self.bcache_backing:
|
||||
# Remove anything related to bcache.
|
||||
size = int(ceil(device_size(self.bcache_caching)))
|
||||
caching = get_parent_device(self.bcache_caching)
|
||||
report.update({'cache-devices': caching, 'partition-size': size})
|
||||
bcache_remove(self.device, self.bcache_backing,
|
||||
self.bcache_caching)
|
||||
else:
|
||||
remove_lvm(self.device)
|
||||
wipefs_safely(self.device)
|
||||
|
||||
|
||||
def make_same_length(l1, l2):
|
||||
"""Make sure 2 lists have the same length, padding out with None's."""
|
||||
ln = max(len(l1), len(l2))
|
||||
l1.extend([None] * (ln - len(l1)))
|
||||
l2.extend([None] * (ln - len(l2)))
|
||||
|
||||
|
||||
def write_report(report, ftype):
|
||||
"""Generate a report on how to re-established the removed disks
|
||||
to be part of the cluster again, then set the 'message' attribute to
|
||||
either a JSON object or a textual representation.
|
||||
|
||||
:param report: The initial, raw report from the 'ActionOSD' objects.
|
||||
:type report: dict
|
||||
|
||||
:param ftype: Either 'text' or 'json'; specifies the type of report
|
||||
:type ftype: Enum['text', 'json']
|
||||
"""
|
||||
if ftype == 'text':
|
||||
msg = '{} disks have been removed\n'.format(len(report))
|
||||
msg += 'To replace them, run:\n'
|
||||
for device, action_args in report.items():
|
||||
args = json.dumps(action_args, separators=(' ', '='))
|
||||
args = args.replace('{', '').replace('}', '').replace('"', '')
|
||||
msg += 'juju run-action {} add-disk {} {}'.format(
|
||||
hookenv.local_unit(), 'osd-devices=' + device, args)
|
||||
else:
|
||||
msg = json.dumps(report)
|
||||
|
||||
hookenv.action_set({'message': msg})
|
||||
|
||||
|
||||
def get_list(key):
|
||||
"""Retrieve the action arguments based on the key as a list."""
|
||||
ret = hookenv.action_get(key)
|
||||
return ret.split() if ret else []
|
||||
|
||||
|
||||
def advertise_osd_count(count):
|
||||
"""Let the Ceph-mon know of the updated OSD number."""
|
||||
for relid in hookenv.relation_ids('mon'):
|
||||
hookenv.relation_set(
|
||||
relation_id=relid,
|
||||
relation_settings={'bootstrapped-osds': count}
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
osd_ids = get_list('osd-ids')
|
||||
osd_devs = get_list('osd-devices')
|
||||
purge = hookenv.action_get('purge')
|
||||
force = hookenv.action_get('force')
|
||||
timeout = hookenv.action_get('timeout')
|
||||
|
||||
if timeout <= 0:
|
||||
function_fail('timeout must be > 0')
|
||||
sys.exit(1)
|
||||
elif not osd_ids and not osd_devs:
|
||||
function_fail('One of osd-ids or osd-devices must be provided')
|
||||
sys.exit(1)
|
||||
|
||||
make_same_length(osd_ids, osd_devs)
|
||||
errors = []
|
||||
report = {}
|
||||
dev_map = get_device_map()
|
||||
charm_devices = kv().get('osd-devices', [])
|
||||
aliases = kv().get('osd-aliases', {})
|
||||
|
||||
for dev, osd_id in zip(osd_devs, osd_ids):
|
||||
try:
|
||||
action_osd = ActionOSD(dev_map, dev=dev, osd_id=osd_id,
|
||||
aliases=aliases)
|
||||
if action_osd.device not in charm_devices:
|
||||
errors.append('Device {} not being used by Ceph'
|
||||
.format(action_osd.device))
|
||||
continue
|
||||
action_osd.remove(purge, timeout, force)
|
||||
charm_devices.remove(action_osd.device)
|
||||
if action_osd.alias:
|
||||
aliases.pop(action_osd.alias)
|
||||
report.update(action_osd.report)
|
||||
except RemoveException as e:
|
||||
errors.append(str(e))
|
||||
|
||||
kv().set('osd-devices', charm_devices)
|
||||
kv().set('osd-aliases', aliases)
|
||||
kv().flush()
|
||||
advertise_osd_count(len(charm_devices))
|
||||
write_report(report, hookenv.action_get('format'))
|
||||
|
||||
if errors:
|
||||
function_fail('Failed to remove devices: {}'.format(','.join(errors)))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -79,6 +79,7 @@ from utils import (
|
||||
is_osd_bootstrap_ready,
|
||||
import_osd_bootstrap_key,
|
||||
import_osd_upgrade_key,
|
||||
import_osd_removal_key,
|
||||
get_host_ip,
|
||||
get_networks,
|
||||
assert_charm_supports_ipv6,
|
||||
@ -662,11 +663,14 @@ def get_bdev_enable_discard():
|
||||
def mon_relation():
|
||||
bootstrap_key = relation_get('osd_bootstrap_key')
|
||||
upgrade_key = relation_get('osd_upgrade_key')
|
||||
removal_key = relation_get('osd_disk_removal_key')
|
||||
if get_fsid() and get_auth() and bootstrap_key:
|
||||
log('mon has provided conf- scanning disks')
|
||||
emit_cephconf()
|
||||
import_osd_bootstrap_key(bootstrap_key)
|
||||
import_osd_upgrade_key(upgrade_key)
|
||||
if removal_key:
|
||||
import_osd_removal_key(removal_key)
|
||||
prepare_disks_and_activate()
|
||||
_, settings, _ = (ch_ceph.CephOSDConfContext()
|
||||
.filter_osd_from_mon_settings())
|
||||
|
197
hooks/utils.py
197
hooks/utils.py
@ -72,6 +72,7 @@ except ImportError:
|
||||
|
||||
_bootstrap_keyring = "/var/lib/ceph/bootstrap-osd/ceph.keyring"
|
||||
_upgrade_keyring = "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring"
|
||||
_removal_keyring = "/var/lib/ceph/osd/ceph.client.osd-removal.keyring"
|
||||
|
||||
|
||||
def is_osd_bootstrap_ready():
|
||||
@ -83,6 +84,21 @@ def is_osd_bootstrap_ready():
|
||||
return os.path.exists(_bootstrap_keyring)
|
||||
|
||||
|
||||
def _import_key(key, path, name):
|
||||
if not os.path.exists(path):
|
||||
cmd = [
|
||||
'sudo',
|
||||
'-u',
|
||||
ceph.ceph_user(),
|
||||
'ceph-authtool',
|
||||
path,
|
||||
'--create-keyring',
|
||||
'--name={}'.format(name),
|
||||
'--add-key={}'.format(key)
|
||||
]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def import_osd_bootstrap_key(key):
|
||||
"""
|
||||
Ensure that the osd-bootstrap keyring is setup.
|
||||
@ -90,18 +106,7 @@ def import_osd_bootstrap_key(key):
|
||||
:param key: The cephx key to add to the bootstrap keyring
|
||||
:type key: str
|
||||
:raises: subprocess.CalledProcessError"""
|
||||
if not os.path.exists(_bootstrap_keyring):
|
||||
cmd = [
|
||||
"sudo",
|
||||
"-u",
|
||||
ceph.ceph_user(),
|
||||
'ceph-authtool',
|
||||
_bootstrap_keyring,
|
||||
'--create-keyring',
|
||||
'--name=client.bootstrap-osd',
|
||||
'--add-key={}'.format(key)
|
||||
]
|
||||
subprocess.check_call(cmd)
|
||||
_import_key(key, _bootstrap_keyring, 'client.bootstrap-osd')
|
||||
|
||||
|
||||
def import_osd_upgrade_key(key):
|
||||
@ -111,18 +116,17 @@ def import_osd_upgrade_key(key):
|
||||
:param key: The cephx key to add to the upgrade keyring
|
||||
:type key: str
|
||||
:raises: subprocess.CalledProcessError"""
|
||||
if not os.path.exists(_upgrade_keyring):
|
||||
cmd = [
|
||||
"sudo",
|
||||
"-u",
|
||||
ceph.ceph_user(),
|
||||
'ceph-authtool',
|
||||
_upgrade_keyring,
|
||||
'--create-keyring',
|
||||
'--name=client.osd-upgrade',
|
||||
'--add-key={}'.format(key)
|
||||
]
|
||||
subprocess.check_call(cmd)
|
||||
_import_key(key, _upgrade_keyring, 'client.osd-upgrade')
|
||||
|
||||
|
||||
def import_osd_removal_key(key):
|
||||
"""
|
||||
Ensure that the osd-removal keyring is setup.
|
||||
|
||||
:param key: The cephx key to add to the upgrade keyring
|
||||
:type key: str
|
||||
:raises: subprocess.CalledProcessError"""
|
||||
_import_key(key, _removal_keyring, 'client.osd-removal')
|
||||
|
||||
|
||||
def render_template(template_name, context, template_dir=TEMPLATES_DIR):
|
||||
@ -348,16 +352,16 @@ class DeviceError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _check_output(args):
|
||||
def _check_output(args, **kwargs):
|
||||
try:
|
||||
return subprocess.check_output(args).decode('UTF-8')
|
||||
return subprocess.check_output(args, **kwargs).decode('UTF-8')
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise DeviceError(str(e))
|
||||
|
||||
|
||||
def _check_call(args):
|
||||
def _check_call(args, **kwargs):
|
||||
try:
|
||||
return subprocess.check_call(args)
|
||||
return subprocess.check_call(args, **kwargs)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise DeviceError(str(e))
|
||||
|
||||
@ -458,16 +462,37 @@ def device_size(dev):
|
||||
return ret / (1024 * 1024 * 1024) # Return size in GB.
|
||||
|
||||
|
||||
def bcache_remove(bcache, cache_dev):
|
||||
def remove_lvm(device):
|
||||
"""Remove any physical and logical volumes associated to a device."""
|
||||
vgs = []
|
||||
try:
|
||||
rv = _check_output(['sudo', 'pvdisplay', device])
|
||||
except DeviceError:
|
||||
# Assume no physical volumes.
|
||||
return
|
||||
|
||||
for line in rv.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith('VG Name'):
|
||||
vgs.append(line.split()[2])
|
||||
if vgs:
|
||||
_check_call(['sudo', 'vgremove', '-y'] + vgs)
|
||||
_check_call(['sudo', 'pvremove', '-y', device])
|
||||
|
||||
|
||||
def bcache_remove(bcache, backing, caching):
|
||||
"""Remove a bcache kernel device, given its caching.
|
||||
|
||||
:param bache: The path of the bcache device.
|
||||
:type bcache: str
|
||||
|
||||
:param cache_dev: The caching device used for the bcache name.
|
||||
:type cache_dev: str
|
||||
:param backing: The backing device for bcache
|
||||
:type backing: str
|
||||
|
||||
:param caching: The caching device for bcache
|
||||
:type caching: str
|
||||
"""
|
||||
rv = _check_output(['sudo', 'bcache-super-show', cache_dev])
|
||||
rv = _check_output(['sudo', 'bcache-super-show', backing])
|
||||
uuid = None
|
||||
# Fetch the UUID for the caching device.
|
||||
for line in rv.split('\n'):
|
||||
@ -478,15 +503,47 @@ def bcache_remove(bcache, cache_dev):
|
||||
else:
|
||||
return
|
||||
bcache_name = bcache[bcache.rfind('/') + 1:]
|
||||
with open('/sys/block/{}/bcache/stop'.format(bcache_name), 'wb') as f:
|
||||
f.write(b'1')
|
||||
with open('/sys/fs/bcache/{}/stop'.format(uuid), 'wb') as f:
|
||||
f.write(b'1')
|
||||
|
||||
def write_one(path):
|
||||
os.system('echo 1 | sudo tee {}'.format(path))
|
||||
|
||||
# The command ceph-volume typically creates PV's and VG's for the
|
||||
# OSD device. Remove them now before deleting the bcache.
|
||||
remove_lvm(bcache)
|
||||
|
||||
# NOTE: We *must* do the following steps in this order. For
|
||||
# kernels 4.x and prior, not doing so will cause the bcache device
|
||||
# to be undeletable.
|
||||
# In addition, we have to use 'sudo tee' as done above, since it
|
||||
# can cause permission issues in some implementations.
|
||||
write_one('/sys/block/{}/bcache/detach'.format(bcache_name))
|
||||
write_one('/sys/block/{}/bcache/stop'.format(bcache_name))
|
||||
write_one('/sys/fs/bcache/{}/stop'.format(uuid))
|
||||
|
||||
# We wipe the bcache signatures here because the bcache tools will not
|
||||
# create the devices otherwise. There is a 'force' option, but it's not
|
||||
# always available, so we do the portable thing here.
|
||||
wipefs_safely(backing)
|
||||
wipefs_safely(caching)
|
||||
|
||||
|
||||
def wipe_disk(dev):
|
||||
def wipe_disk(dev, timeout=None):
|
||||
"""Destroy all data in a specific device, including partition tables."""
|
||||
_check_call(['sudo', 'wipefs', '-a', dev])
|
||||
_check_call(['sudo', 'wipefs', '-a', dev], timeout=timeout)
|
||||
|
||||
|
||||
def wipefs_safely(dev):
|
||||
for _ in range(10):
|
||||
try:
|
||||
wipe_disk(dev, 1)
|
||||
return
|
||||
except DeviceError:
|
||||
time.sleep(0.3)
|
||||
except subprocess.TimeoutExpired:
|
||||
# If this command times out, then it's likely because
|
||||
# the disk is dead, so give up.
|
||||
return
|
||||
raise DeviceError('Failed to wipe bcache device: {}'.format(dev))
|
||||
|
||||
|
||||
class PartitionIter:
|
||||
@ -556,11 +613,71 @@ class PartitionIter:
|
||||
return ret
|
||||
|
||||
def cleanup(self, device):
|
||||
"""Destroy any created partitions and bcache names for a device."""
|
||||
args = self.created.get(device)
|
||||
if not args:
|
||||
return
|
||||
|
||||
bcache, caching = args
|
||||
try:
|
||||
bcache_remove(*args)
|
||||
bcache_remove(bcache, device, caching)
|
||||
except DeviceError:
|
||||
log('Failed to cleanup bcache device: {}'.format(args[0]))
|
||||
log('Failed to cleanup bcache device: {}'.format(bcache))
|
||||
|
||||
|
||||
def _device_suffix(dev):
|
||||
ix = dev.rfind('/')
|
||||
if ix >= 0:
|
||||
dev = dev[ix + 1:]
|
||||
return dev
|
||||
|
||||
|
||||
def get_bcache_names(dev):
|
||||
"""Return the backing and caching devices for a bcache device,
|
||||
in that specific order.
|
||||
|
||||
:param dev: The path to the bcache device, i.e: /dev/bcache0
|
||||
:type dev: str
|
||||
|
||||
:returns: A tuple with the backing and caching devices.
|
||||
:rtype: list[Option[None, str], Option[None, str]]
|
||||
"""
|
||||
if dev is None:
|
||||
return None, None
|
||||
|
||||
dev_name = _device_suffix(dev)
|
||||
bcache_path = '/sys/block/{}/slaves'.format(dev_name)
|
||||
if (not os.path.exists('/sys/block/{}/bcache'.format(dev_name)) or
|
||||
not os.path.exists(bcache_path)):
|
||||
return None, None
|
||||
|
||||
cache = os.listdir(bcache_path)
|
||||
if len(cache) < 2:
|
||||
return None, None
|
||||
|
||||
backing = '/dev/' + cache[0]
|
||||
caching = '/dev/' + cache[1]
|
||||
out = _check_output(['sudo', 'bcache-super-show', backing])
|
||||
if 'backing device' not in out:
|
||||
return caching, backing
|
||||
return backing, caching
|
||||
|
||||
|
||||
def get_parent_device(dev):
|
||||
"""Return the device's parent, assuming if it's a block device."""
|
||||
try:
|
||||
rv = subprocess.check_output(['lsblk', '-as', dev, '-J'])
|
||||
rv = json.loads(rv.decode('UTF-8'))
|
||||
except subprocess.CalledProcessError:
|
||||
return dev
|
||||
|
||||
children = rv.get('blockdevices', [])
|
||||
if not children:
|
||||
return dev
|
||||
|
||||
children = children[0].get('children', [])
|
||||
for child in children:
|
||||
if 'children' not in child:
|
||||
return '/dev/' + child['name']
|
||||
|
||||
return dev
|
||||
|
@ -1162,6 +1162,10 @@ osd_upgrade_caps = collections.OrderedDict([
|
||||
'allow command "osd in"',
|
||||
'allow command "osd rm"',
|
||||
'allow command "auth del"',
|
||||
'allow command "osd safe-to-destroy"',
|
||||
'allow command "osd crush reweight"',
|
||||
'allow command "osd purge"',
|
||||
'allow command "osd destroy"',
|
||||
])
|
||||
])
|
||||
|
||||
|
@ -51,6 +51,9 @@ enable experimental unrecoverable data corrupting features = bluestore rocksdb
|
||||
[client.osd-upgrade]
|
||||
keyring = /var/lib/ceph/osd/ceph.client.osd-upgrade.keyring
|
||||
|
||||
[client.osd-removal]
|
||||
keyring = /var/lib/ceph/osd/ceph.client.osd-removal.keyring
|
||||
|
||||
[mon]
|
||||
keyring = /var/lib/ceph/mon/$cluster-$id/keyring
|
||||
|
||||
|
@ -44,7 +44,7 @@ class AddDiskActionTests(CharmTestCase):
|
||||
|
||||
db = mock.MagicMock()
|
||||
self.kv.return_value = db
|
||||
db.get.return_value = ['/dev/myosddev']
|
||||
db.get.side_effect = {'osd-devices': ['/dev/myosddev']}.get
|
||||
|
||||
request = {'ops': []}
|
||||
add_disk.add_device(request, '/dev/myosddev')
|
||||
@ -57,11 +57,13 @@ class AddDiskActionTests(CharmTestCase):
|
||||
True, None)])
|
||||
|
||||
piter = add_disk.PartitionIter(['/dev/cache'], 100, ['/dev/myosddev'])
|
||||
mock_create_bcache = mock.MagicMock(side_effect=lambda b: b)
|
||||
mock_create_bcache = mock.MagicMock(side_effect=lambda b: '/dev/cache')
|
||||
with mock.patch.object(add_disk.PartitionIter, 'create_bcache',
|
||||
mock_create_bcache) as mock_call:
|
||||
add_disk.add_device(request, '/dev/myosddev', part_iter=piter)
|
||||
mock_call.assert_called()
|
||||
db.set.assert_called_with('osd-aliases',
|
||||
{'/dev/myosddev': '/dev/cache'})
|
||||
|
||||
mock_create_bcache.side_effect = lambda b: None
|
||||
with mock.patch.object(add_disk.PartitionIter, 'create_bcache',
|
||||
|
136
unit_tests/test_actions_remove_disk.py
Normal file
136
unit_tests/test_actions_remove_disk.py
Normal file
@ -0,0 +1,136 @@
|
||||
# Copyright 2021 Canonical Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from unittest import mock
|
||||
|
||||
from actions import remove_disk
|
||||
|
||||
from test_utils import CharmTestCase
|
||||
|
||||
|
||||
class RemoveDiskActionTests(CharmTestCase):
|
||||
|
||||
@mock.patch.object(remove_disk.subprocess, 'check_output')
|
||||
def test_get_device_map(self, check_output):
|
||||
check_output.return_value = b'''
|
||||
{
|
||||
"1": [{"devices": ["/dev/sdx1"]}],
|
||||
"2": [{"devices": ["/dev/sdc2", "/dev/sdc3"]}]
|
||||
}
|
||||
'''
|
||||
rv = remove_disk.get_device_map()
|
||||
self.assertEqual(rv[0]['path'], '/dev/sdx1')
|
||||
self.assertEqual(rv[1]['id'], rv[2]['id'])
|
||||
|
||||
def test_normalize_osd_id(self):
|
||||
self.assertEqual('osd.1', remove_disk.normalize_osd_id(1))
|
||||
self.assertEqual('osd.2', remove_disk.normalize_osd_id('osd.2'))
|
||||
self.assertEqual('osd.3', remove_disk.normalize_osd_id('3'))
|
||||
|
||||
def test_map_device_id(self):
|
||||
dev_map = [
|
||||
{'id': 'osd.1', 'path': '/dev/sdc1'},
|
||||
{'id': 'osd.2', 'path': '/dev/sdd2'},
|
||||
{'id': 'osd.2', 'path': '/dev/sdx3'}
|
||||
]
|
||||
self.assertEqual(
|
||||
'osd.1',
|
||||
remove_disk.map_device_to_id(dev_map, '/dev/sdc1'))
|
||||
self.assertIsNone(
|
||||
remove_disk.map_device_to_id(dev_map, '/dev/sdx4'))
|
||||
|
||||
self.assertEqual(
|
||||
'/dev/sdd2',
|
||||
remove_disk.map_id_to_device(dev_map, 'osd.2'))
|
||||
self.assertIsNone(
|
||||
remove_disk.map_id_to_device(dev_map, 'osd.3'))
|
||||
|
||||
@mock.patch.object(remove_disk, 'get_bcache_names')
|
||||
def test_action_osd_constructor(self, bcache_names):
|
||||
bcache_names.return_value = ('bcache0', '/dev/bcache0')
|
||||
dev_map = [
|
||||
{'path': '/dev/sdx1', 'id': 'osd.1'}
|
||||
]
|
||||
with self.assertRaises(remove_disk.RemoveException):
|
||||
remove_disk.ActionOSD(dev_map, dev='/dev/sdx1', osd_id='osd.1')
|
||||
obj = remove_disk.ActionOSD(dev_map, dev='/dev/sdx1')
|
||||
self.assertEqual(obj.osd_id, 'osd.1')
|
||||
obj = remove_disk.ActionOSD(dev_map, osd_id='1')
|
||||
self.assertEqual(obj.device, '/dev/sdx1')
|
||||
|
||||
@mock.patch.object(remove_disk, 'device_size')
|
||||
@mock.patch.object(remove_disk.charms_ceph.utils, 'stop_osd')
|
||||
@mock.patch.object(remove_disk, 'bcache_remove')
|
||||
@mock.patch.object(remove_disk.subprocess, 'call')
|
||||
@mock.patch.object(remove_disk.subprocess, 'check_call')
|
||||
@mock.patch.object(remove_disk, 'get_bcache_names')
|
||||
def test_action_osd_remove(self, get_bcache_names, check_call,
|
||||
call, bcache_remove, stop_osd, device_size):
|
||||
call.return_value = 0
|
||||
get_bcache_names.return_value = ('/dev/backing', '/dev/caching')
|
||||
device_size.side_effect = lambda x: 1 if x == '/dev/caching' else 0
|
||||
dev_map = [
|
||||
{'path': '/dev/bcache0', 'id': 'osd.1'}
|
||||
]
|
||||
prefix_args = ['ceph', '--id', 'osd-removal']
|
||||
obj = remove_disk.ActionOSD(dev_map, osd_id='1')
|
||||
|
||||
obj.remove(True, 1, True)
|
||||
call.assert_any_call(prefix_args + ['osd', 'safe-to-destroy', 'osd.1'])
|
||||
check_call.assert_any_call(prefix_args + ['osd', 'purge', 'osd.1',
|
||||
'--yes-i-really-mean-it'])
|
||||
check_call.assert_any_call(prefix_args + ['osd', 'crush', 'reweight',
|
||||
'osd.1', '0'])
|
||||
bcache_remove.assert_called_with(
|
||||
'/dev/bcache0', '/dev/backing', '/dev/caching')
|
||||
report = obj.report
|
||||
self.assertIn('/dev/backing', report)
|
||||
report = report['/dev/backing']
|
||||
self.assertIn('osd-ids', report)
|
||||
self.assertIn('osd.1', report['osd-ids'])
|
||||
self.assertIn('cache-devices', report)
|
||||
self.assertIn('partition-size', report)
|
||||
self.assertEqual('/dev/caching', report['cache-devices'])
|
||||
self.assertEqual(1, report['partition-size'])
|
||||
|
||||
# Test the timeout check.
|
||||
with self.assertRaises(remove_disk.RemoveException):
|
||||
call.return_value = 1
|
||||
obj.remove(False, 0, False)
|
||||
|
||||
@mock.patch.object(remove_disk.hookenv, 'local_unit')
|
||||
@mock.patch.object(remove_disk.hookenv, 'action_set')
|
||||
def test_write_report(self, action_set, local_unit):
|
||||
output = {}
|
||||
local_unit.return_value = 'ceph-osd/0'
|
||||
action_set.side_effect = lambda x: output.update(x)
|
||||
report = {'dev@': {'osd-ids': 'osd.1', 'cache-devices': 'cache@',
|
||||
'partition-size': 5}}
|
||||
remove_disk.write_report(report, 'text')
|
||||
self.assertIn('message', output)
|
||||
msg = output['message']
|
||||
self.assertIn('juju run-action ceph-osd/0 add-disk', msg)
|
||||
self.assertIn('osd-devices=dev@', msg)
|
||||
self.assertIn('osd-ids=osd.1', msg)
|
||||
self.assertIn('cache-devices=cache@', msg)
|
||||
self.assertIn('partition-size=5', msg)
|
||||
|
||||
def test_make_same_length(self):
|
||||
l1, l2 = [1], []
|
||||
remove_disk.make_same_length(l1, l2)
|
||||
self.assertEqual(len(l1), len(l2))
|
||||
self.assertIsNone(l2[0])
|
||||
prev_len = len(l1)
|
||||
remove_disk.make_same_length(l1, l2)
|
||||
self.assertEqual(len(l1), prev_len)
|
@ -15,7 +15,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from unittest.mock import patch, mock_open
|
||||
from unittest.mock import patch
|
||||
|
||||
with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec:
|
||||
mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f:
|
||||
@ -204,7 +204,10 @@ class CephUtilsTestCase(unittest.TestCase):
|
||||
self.assertEqual(745, int(utils.device_size('')))
|
||||
|
||||
@patch('subprocess.check_output')
|
||||
def test_bcache_remove(self, check_output):
|
||||
@patch.object(utils, 'remove_lvm')
|
||||
@patch.object(utils, 'wipe_disk')
|
||||
@patch('os.system')
|
||||
def test_bcache_remove(self, system, wipe_disk, remove_lvm, check_output):
|
||||
check_output.return_value = b'''
|
||||
sb.magic ok
|
||||
sb.first_sector 8 [match]
|
||||
@ -223,15 +226,93 @@ class CephUtilsTestCase(unittest.TestCase):
|
||||
dev.cache.replacement 0 [lru]
|
||||
cset.uuid 424242
|
||||
'''
|
||||
mo = mock_open()
|
||||
with patch('builtins.open', mo):
|
||||
utils.bcache_remove('/dev/bcache0', '/dev/nvme0n1p1')
|
||||
mo.assert_any_call('/sys/block/bcache0/bcache/stop', 'wb')
|
||||
mo.assert_any_call('/sys/fs/bcache/424242/stop', 'wb')
|
||||
utils.bcache_remove('/dev/bcache0', 'backing', 'caching')
|
||||
system.assert_any_call(
|
||||
'echo 1 | sudo tee /sys/block/bcache0/bcache/detach')
|
||||
system.assert_any_call(
|
||||
'echo 1 | sudo tee /sys/block/bcache0/bcache/stop')
|
||||
system.assert_any_call(
|
||||
'echo 1 | sudo tee /sys/fs/bcache/424242/stop')
|
||||
wipe_disk.assert_any_call('backing', 1)
|
||||
wipe_disk.assert_any_call('caching', 1)
|
||||
|
||||
@patch('os.listdir')
|
||||
@patch('os.path.exists')
|
||||
@patch('subprocess.check_output')
|
||||
def test_get_bcache_names(self, check_output, exists, listdir):
|
||||
exists.return_value = True
|
||||
check_output.return_value = b'''
|
||||
sb.magic ok
|
||||
sb.first_sector 8 [match]
|
||||
sb.csum A71D96D4364343BF [match]
|
||||
sb.version 1 [backing device]
|
||||
|
||||
dev.label (empty)
|
||||
dev.uuid cca84a86-3f68-4ffb-8be1-4449c9fb29a8
|
||||
dev.sectors_per_block 1
|
||||
dev.sectors_per_bucket 1024
|
||||
dev.data.first_sector 16
|
||||
dev.data.cache_mode 1 [writeback]
|
||||
dev.data.cache_state 1 [clean]
|
||||
|
||||
cset.uuid 57add9da-e5de-47c6-8f39-3e16aafb8d31
|
||||
'''
|
||||
listdir.return_value = ['backing', 'caching']
|
||||
values = utils.get_bcache_names('/dev/bcache0')
|
||||
self.assertEqual(2, len(values))
|
||||
self.assertEqual(values[0], '/dev/backing')
|
||||
check_output.return_value = b'''
|
||||
sb.magic ok
|
||||
sb.first_sector 8 [match]
|
||||
sb.csum 6802E76075FF7B77 [match]
|
||||
sb.version 3 [cache device]
|
||||
|
||||
dev.label (empty)
|
||||
dev.uuid fb6e9d06-12e2-46ca-b8fd-797ecec1a126
|
||||
dev.sectors_per_block 1
|
||||
dev.sectors_per_bucket 1024
|
||||
dev.cache.first_sector 1024
|
||||
dev.cache.cache_sectors 10238976
|
||||
dev.cache.total_sectors 10240000
|
||||
dev.cache.ordered yes
|
||||
dev.cache.discard no
|
||||
dev.cache.pos 0
|
||||
dev.cache.replacement 0 [lru]
|
||||
|
||||
cset.uuid 57add9da-e5de-47c6-8f39-3e16aafb8d31
|
||||
'''
|
||||
values = utils.get_bcache_names('/dev/bcache0')
|
||||
self.assertEqual(values[0], '/dev/caching')
|
||||
|
||||
@patch('subprocess.check_output')
|
||||
@patch('subprocess.check_call')
|
||||
def test_remove_lvm(self, check_call, check_output):
|
||||
check_output.return_value = b'''
|
||||
--- Physical volume ---
|
||||
PV Name /dev/bcache0
|
||||
VG Name ceph-1
|
||||
VG Name ceph-2
|
||||
'''
|
||||
utils.remove_lvm('/dev/bcache0')
|
||||
check_call.assert_any_call(
|
||||
['sudo', 'vgremove', '-y', 'ceph-1', 'ceph-2'])
|
||||
check_call.assert_any_call(['sudo', 'pvremove', '-y', '/dev/bcache0'])
|
||||
|
||||
check_call.reset_mock()
|
||||
|
||||
def just_raise(*args):
|
||||
raise utils.DeviceError()
|
||||
|
||||
check_output.side_effect = just_raise
|
||||
utils.remove_lvm('')
|
||||
check_call.assert_not_called()
|
||||
|
||||
@patch.object(utils, 'wipe_disk')
|
||||
@patch.object(utils, 'bcache_remove')
|
||||
@patch.object(utils, 'create_partition')
|
||||
@patch.object(utils, 'setup_bcache')
|
||||
def test_partition_iter(self, setup_bcache, create_partition):
|
||||
def test_partition_iter(self, setup_bcache, create_partition,
|
||||
bcache_remove, wipe_disk):
|
||||
create_partition.side_effect = \
|
||||
lambda c, s, n: c + '|' + str(s) + '|' + str(n)
|
||||
setup_bcache.side_effect = lambda *args: args
|
||||
@ -239,6 +320,8 @@ class CephUtilsTestCase(unittest.TestCase):
|
||||
200, ['dev1', 'dev2', 'dev3'])
|
||||
piter.create_bcache('dev1')
|
||||
setup_bcache.assert_called_with('dev1', '/dev/nvm0n1|200|0')
|
||||
piter.cleanup('dev1')
|
||||
bcache_remove.assert_called()
|
||||
setup_bcache.mock_reset()
|
||||
piter.create_bcache('dev2')
|
||||
setup_bcache.assert_called_with('dev2', '/dev/nvm0n2|200|0')
|
||||
@ -258,3 +341,14 @@ class CephUtilsTestCase(unittest.TestCase):
|
||||
# 300GB across 3 devices, i.e: 100 for each.
|
||||
self.assertEqual(100, next(piter))
|
||||
self.assertEqual(100, next(piter))
|
||||
|
||||
@patch.object(utils.subprocess, 'check_output')
|
||||
def test_parent_device(self, check_output):
|
||||
check_output.return_value = b'''
|
||||
{"blockdevices": [
|
||||
{"name": "loop1p1",
|
||||
"children": [
|
||||
{"name": "loop1"}]
|
||||
}]
|
||||
}'''
|
||||
self.assertEqual(utils.get_parent_device('/dev/loop1p1'), '/dev/loop1')
|
||||
|
Reference in New Issue
Block a user