Add pre-flight check for device pristinity
Add `non-pristine` key to `list-disks` action. No longer attempt to do initializtion of `osd-journal` devices. Make py27 test noop Flip pep8 test to py3 Partial-Bug: #1698154 Change-Id: I0ca574fa7f0683b4e8a693b9f62fbf6b39689789 Depends-On: I90a866aa138d18e4242783c42d4c7c587f696d7d
This commit is contained in:
parent
bebf8601c9
commit
352d699387
13
actions.yaml
13
actions.yaml
@ -25,7 +25,18 @@ resume:
|
||||
Set the local osd units in the charm to 'in'. Note that the pause option
|
||||
does NOT stop the osd processes.
|
||||
list-disks:
|
||||
description: List the unmounted disk on the specified unit
|
||||
description: |
|
||||
List disks
|
||||
.
|
||||
The 'disks' key is populated with block devices that are known by udev,
|
||||
are not mounted and not mentioned in 'osd-journal' configuration option.
|
||||
.
|
||||
The 'blacklist' key is populated with osd-devices in the blacklist stored
|
||||
in the local kv store of this specific unit.
|
||||
.
|
||||
The 'non-pristine' key is populated with block devices that are known by
|
||||
udev, are not mounted, not mentioned in 'osd-journal' configuration option
|
||||
and are currently not eligible for use because of presence of foreign data.
|
||||
add-disk:
|
||||
description: Add disk(s) to Ceph
|
||||
params:
|
||||
|
@ -15,10 +15,17 @@
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
List unmounted devices.
|
||||
List disks
|
||||
|
||||
This script will get all block devices known by udev and check if they
|
||||
are mounted so that we can give unmounted devices to the administrator.
|
||||
The 'disks' key is populated with block devices that are known by udev,
|
||||
are not mounted and not mentioned in 'osd-journal' configuration option.
|
||||
|
||||
The 'blacklist' key is populated with osd-devices in the blacklist stored
|
||||
in the local kv store of this specific unit.
|
||||
|
||||
The 'non-pristine' key is populated with block devices that are known by
|
||||
udev, are not mounted, not mentioned in 'osd-journal' configuration option
|
||||
and are currently not eligible for use because of presence of foreign data.
|
||||
"""
|
||||
|
||||
import sys
|
||||
@ -32,7 +39,15 @@ import ceph.utils
|
||||
import utils
|
||||
|
||||
if __name__ == '__main__':
|
||||
non_pristine = []
|
||||
osd_journal = utils.get_journal_devices()
|
||||
for dev in list(set(ceph.utils.unmounted_disks()) - set(osd_journal)):
|
||||
if (not ceph.utils.is_active_bluestore_device(dev) and
|
||||
not ceph.utils.is_pristine_disk(dev)):
|
||||
non_pristine.append(dev)
|
||||
|
||||
hookenv.action_set({
|
||||
'disks': ceph.utils.unmounted_disks(),
|
||||
'disks': list(set(ceph.utils.unmounted_disks()) - set(osd_journal)),
|
||||
'blacklist': utils.get_blacklist(),
|
||||
'non-pristine': non_pristine,
|
||||
})
|
||||
|
@ -20,7 +20,6 @@ import glob
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import socket
|
||||
import subprocess
|
||||
import netifaces
|
||||
@ -41,6 +40,7 @@ from charmhelpers.core.hookenv import (
|
||||
Hooks,
|
||||
UnregisteredHookError,
|
||||
service_name,
|
||||
status_get,
|
||||
status_set,
|
||||
storage_get,
|
||||
storage_list,
|
||||
@ -76,6 +76,7 @@ from utils import (
|
||||
get_public_addr,
|
||||
get_cluster_addr,
|
||||
get_blacklist,
|
||||
get_journal_devices,
|
||||
)
|
||||
from charmhelpers.contrib.openstack.alternatives import install_alternative
|
||||
from charmhelpers.contrib.network.ip import (
|
||||
@ -85,6 +86,9 @@ from charmhelpers.contrib.network.ip import (
|
||||
)
|
||||
from charmhelpers.contrib.storage.linux.ceph import (
|
||||
CephConfContext)
|
||||
from charmhelpers.contrib.storage.linux.utils import (
|
||||
is_device_mounted,
|
||||
)
|
||||
from charmhelpers.contrib.charmsupport import nrpe
|
||||
from charmhelpers.contrib.hardening.harden import harden
|
||||
|
||||
@ -357,38 +361,6 @@ def emit_cephconf(upgrading=False):
|
||||
charm_ceph_conf, 90)
|
||||
|
||||
|
||||
JOURNAL_ZAPPED = '/var/lib/ceph/journal_zapped'
|
||||
|
||||
|
||||
def read_zapped_journals():
|
||||
if os.path.exists(JOURNAL_ZAPPED):
|
||||
with open(JOURNAL_ZAPPED, 'rt', encoding='UTF-8') as zapfile:
|
||||
zapped = set(
|
||||
filter(None,
|
||||
[l.strip() for l in zapfile.readlines()]))
|
||||
log("read zapped: {}".format(zapped), level=DEBUG)
|
||||
return zapped
|
||||
return set()
|
||||
|
||||
|
||||
def write_zapped_journals(journal_devs):
|
||||
tmpfh, tmpfile = tempfile.mkstemp()
|
||||
with os.fdopen(tmpfh, 'wb') as zapfile:
|
||||
log("write zapped: {}".format(journal_devs),
|
||||
level=DEBUG)
|
||||
zapfile.write('\n'.join(sorted(list(journal_devs))).encode('UTF-8'))
|
||||
shutil.move(tmpfile, JOURNAL_ZAPPED)
|
||||
|
||||
|
||||
def check_overlap(journaldevs, datadevs):
|
||||
if not journaldevs.isdisjoint(datadevs):
|
||||
msg = ("Journal/data devices mustn't"
|
||||
" overlap; journal: {0}, data: {1}".format(journaldevs,
|
||||
datadevs))
|
||||
log(msg, level=ERROR)
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
@hooks.hook('config-changed')
|
||||
@harden()
|
||||
def config_changed():
|
||||
@ -438,13 +410,28 @@ def prepare_disks_and_activate():
|
||||
vaultlocker.write_vaultlocker_conf(context)
|
||||
|
||||
osd_journal = get_journal_devices()
|
||||
check_overlap(osd_journal, set(get_devices()))
|
||||
if not osd_journal.isdisjoint(set(get_devices())):
|
||||
raise ValueError('`osd-journal` and `osd-devices` options must not'
|
||||
'overlap.')
|
||||
log("got journal devs: {}".format(osd_journal), level=DEBUG)
|
||||
already_zapped = read_zapped_journals()
|
||||
non_zapped = osd_journal - already_zapped
|
||||
for journ in non_zapped:
|
||||
ceph.maybe_zap_journal(journ)
|
||||
write_zapped_journals(osd_journal)
|
||||
|
||||
# pre-flight check of eligible device pristinity
|
||||
devices = get_devices()
|
||||
# filter osd-devices that are file system paths
|
||||
devices = [dev for dev in devices if dev.startswith('/dev')]
|
||||
# filter osd-devices that does not exist on this unit
|
||||
devices = [dev for dev in devices if os.path.exists(dev)]
|
||||
# filter osd-devices that are already mounted
|
||||
devices = [dev for dev in devices if not is_device_mounted(dev)]
|
||||
# filter osd-devices that are active bluestore devices
|
||||
devices = [dev for dev in devices
|
||||
if not ceph.is_active_bluestore_device(dev)]
|
||||
log('Checking for pristine devices: "{}"'.format(devices), level=DEBUG)
|
||||
if not all(ceph.is_pristine_disk(dev) for dev in devices):
|
||||
status_set('blocked',
|
||||
'Non-pristine devices detected, consult '
|
||||
'`list-disks`, `zap-disk` and `blacklist-*` actions.')
|
||||
return
|
||||
|
||||
if ceph.is_bootstrapped():
|
||||
log('ceph bootstrapped, rescanning disks')
|
||||
@ -521,20 +508,6 @@ def get_devices():
|
||||
return [device for device in devices if device not in _blacklist]
|
||||
|
||||
|
||||
def get_journal_devices():
|
||||
if config('osd-journal'):
|
||||
devices = [l.strip() for l in config('osd-journal').split(' ')]
|
||||
else:
|
||||
devices = []
|
||||
storage_ids = storage_list('osd-journals')
|
||||
devices.extend((storage_get('location', s) for s in storage_ids))
|
||||
|
||||
# Filter out any devices in the action managed unit-local device blacklist
|
||||
_blacklist = get_blacklist()
|
||||
return set(device for device in devices
|
||||
if device not in _blacklist and os.path.exists(device))
|
||||
|
||||
|
||||
@hooks.hook('mon-relation-changed',
|
||||
'mon-relation-departed')
|
||||
def mon_relation():
|
||||
@ -631,7 +604,9 @@ def assess_status():
|
||||
|
||||
# Check for OSD device creation parity i.e. at least some devices
|
||||
# must have been presented and used for this charm to be operational
|
||||
(prev_status, prev_message) = status_get()
|
||||
running_osds = ceph.get_running_osds()
|
||||
if prev_status != 'blocked':
|
||||
if not running_osds:
|
||||
status_set('blocked',
|
||||
'No block devices detected using current configuration')
|
||||
|
@ -12,8 +12,10 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import socket
|
||||
import re
|
||||
import os
|
||||
import socket
|
||||
|
||||
from charmhelpers.core.hookenv import (
|
||||
unit_get,
|
||||
cached,
|
||||
@ -22,6 +24,8 @@ from charmhelpers.core.hookenv import (
|
||||
log,
|
||||
DEBUG,
|
||||
status_set,
|
||||
storage_get,
|
||||
storage_list,
|
||||
)
|
||||
from charmhelpers.core import unitdata
|
||||
from charmhelpers.fetch import (
|
||||
@ -39,6 +43,7 @@ from charmhelpers.contrib.network.ip import (
|
||||
get_ipv6_addr
|
||||
)
|
||||
|
||||
|
||||
TEMPLATES_DIR = 'templates'
|
||||
|
||||
try:
|
||||
@ -213,3 +218,17 @@ def get_blacklist():
|
||||
"""Get blacklist stored in the local kv() store"""
|
||||
db = unitdata.kv()
|
||||
return db.get('osd-blacklist', [])
|
||||
|
||||
|
||||
def get_journal_devices():
|
||||
if config('osd-journal'):
|
||||
devices = [l.strip() for l in config('osd-journal').split(' ')]
|
||||
else:
|
||||
devices = []
|
||||
storage_ids = storage_list('osd-journals')
|
||||
devices.extend((storage_get('location', s) for s in storage_ids))
|
||||
|
||||
# Filter out any devices in the action managed unit-local device blacklist
|
||||
_blacklist = get_blacklist()
|
||||
return set(device for device in devices
|
||||
if device not in _blacklist and os.path.exists(device))
|
||||
|
@ -66,7 +66,6 @@ from charmhelpers.contrib.storage.linux.ceph import (
|
||||
from charmhelpers.contrib.storage.linux.utils import (
|
||||
is_block_device,
|
||||
is_device_mounted,
|
||||
zap_disk,
|
||||
)
|
||||
from charmhelpers.contrib.openstack.utils import (
|
||||
get_os_codename_install_source,
|
||||
@ -870,7 +869,42 @@ def get_partition_list(dev):
|
||||
raise
|
||||
|
||||
|
||||
def is_pristine_disk(dev):
|
||||
"""
|
||||
Read first 2048 bytes (LBA 0 - 3) of block device to determine whether it
|
||||
is actually all zeros and safe for us to use.
|
||||
|
||||
Existing partitioning tools does not discern between a failure to read from
|
||||
block device, failure to understand a partition table and the fact that a
|
||||
block device has no partition table. Since we need to be positive about
|
||||
which is which we need to read the device directly and confirm ourselves.
|
||||
|
||||
:param dev: Path to block device
|
||||
:type dev: str
|
||||
:returns: True all 2048 bytes == 0x0, False if not
|
||||
:rtype: bool
|
||||
"""
|
||||
want_bytes = 2048
|
||||
|
||||
f = open(dev, 'rb')
|
||||
data = f.read(want_bytes)
|
||||
read_bytes = len(data)
|
||||
if read_bytes != want_bytes:
|
||||
log('{}: short read, got {} bytes expected {}.'
|
||||
.format(dev, read_bytes, want_bytes), level=WARNING)
|
||||
return False
|
||||
|
||||
return all(byte == 0x0 for byte in data)
|
||||
|
||||
|
||||
def is_osd_disk(dev):
|
||||
db = kv()
|
||||
osd_devices = db.get('osd-devices', [])
|
||||
if dev in osd_devices:
|
||||
log('Device {} already processed by charm,'
|
||||
' skipping'.format(dev))
|
||||
return True
|
||||
|
||||
partitions = get_partition_list(dev)
|
||||
for partition in partitions:
|
||||
try:
|
||||
@ -1296,15 +1330,6 @@ def update_monfs():
|
||||
pass
|
||||
|
||||
|
||||
def maybe_zap_journal(journal_dev):
|
||||
if is_osd_disk(journal_dev):
|
||||
log('Looks like {} is already an OSD data'
|
||||
' or journal, skipping.'.format(journal_dev))
|
||||
return
|
||||
zap_disk(journal_dev)
|
||||
log("Zapped journal device {}".format(journal_dev))
|
||||
|
||||
|
||||
def get_partitions(dev):
|
||||
cmd = ['partx', '--raw', '--noheadings', dev]
|
||||
try:
|
||||
|
@ -15,6 +15,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import amulet
|
||||
import re
|
||||
import time
|
||||
|
||||
import keystoneclient
|
||||
@ -712,6 +713,69 @@ class CephOsdBasicDeployment(OpenStackAmuletDeployment):
|
||||
mtime, unit_name))
|
||||
amulet.raise_status('Folder mtime is older than provided mtime')
|
||||
|
||||
def test_901_blocked_when_non_pristine_disk_appears(self):
|
||||
"""
|
||||
Validate that charm goes into blocked state when it is presented with
|
||||
new block devices that have foreign data on them.
|
||||
|
||||
Instances used in UOSCI has a flavour with ephemeral storage in
|
||||
addition to the bootable instance storage. The ephemeral storage
|
||||
device is partitioned, formatted and mounted early in the boot process
|
||||
by cloud-init.
|
||||
|
||||
As long as the device is mounted the charm will not attempt to use it.
|
||||
|
||||
If we unmount it and trigger the config-changed hook the block device
|
||||
will appear as a new and previously untouched device for the charm.
|
||||
|
||||
One of the first steps of device eligibility checks should be to make
|
||||
sure we are seeing a pristine and empty device before doing any
|
||||
further processing.
|
||||
|
||||
As the ephemeral device will have data on it we can use it to validate
|
||||
that these checks work as intended.
|
||||
"""
|
||||
u.log.debug('Checking behaviour when non-pristine disks appear...')
|
||||
u.log.debug('Configuring ephemeral-unmount...')
|
||||
self.d.configure('ceph-osd', {'ephemeral-unmount': '/mnt',
|
||||
'osd-devices': '/dev/vdb'})
|
||||
self._auto_wait_for_status(message=re.compile('Non-pristine.*'),
|
||||
include_only=['ceph-osd'])
|
||||
u.log.debug('Units now in blocked state, running zap-disk action...')
|
||||
action_ids = []
|
||||
self.ceph_osd_sentry = self.d.sentry['ceph-osd'][0]
|
||||
for unit in range(0, 3):
|
||||
zap_disk_params = {
|
||||
'devices': '/dev/vdb',
|
||||
'i-really-mean-it': True,
|
||||
}
|
||||
action_id = u.run_action(self.d.sentry['ceph-osd'][unit],
|
||||
'zap-disk', params=zap_disk_params)
|
||||
action_ids.append(action_id)
|
||||
for unit in range(0, 3):
|
||||
assert u.wait_on_action(action_ids[unit]), (
|
||||
'zap-disk action failed.')
|
||||
|
||||
u.log.debug('Running add-disk action...')
|
||||
action_ids = []
|
||||
for unit in range(0, 3):
|
||||
add_disk_params = {
|
||||
'osd-devices': '/dev/vdb',
|
||||
}
|
||||
action_id = u.run_action(self.d.sentry['ceph-osd'][unit],
|
||||
'add-disk', params=add_disk_params)
|
||||
action_ids.append(action_id)
|
||||
|
||||
# NOTE(fnordahl): LP: #1774694
|
||||
# for unit in range(0, 3):
|
||||
# assert u.wait_on_action(action_ids[unit]), (
|
||||
# 'add-disk action failed.')
|
||||
|
||||
u.log.debug('Wait for idle/ready status...')
|
||||
self._auto_wait_for_status(include_only=['ceph-osd'])
|
||||
|
||||
u.log.debug('OK')
|
||||
|
||||
def test_910_pause_and_resume(self):
|
||||
"""The services can be paused and resumed. """
|
||||
u.log.debug('Checking pause and resume actions...')
|
||||
|
10
tox.ini
10
tox.ini
@ -18,11 +18,11 @@ whitelist_externals = juju
|
||||
passenv = HOME TERM AMULET_* CS_API_*
|
||||
|
||||
[testenv:py27]
|
||||
basepython = python2.7
|
||||
deps = -r{toxinidir}/requirements.txt
|
||||
-r{toxinidir}/test-requirements.txt
|
||||
# temporarily disable py27
|
||||
commands = /bin/true
|
||||
# ceph charms are Python3-only, but py27 unit test target
|
||||
# is required by OpenStack Governance. Remove this shim as soon as
|
||||
# permitted. http://governance.openstack.org/reference/cti/python_cti.html
|
||||
whitelist_externals = true
|
||||
commands = true
|
||||
|
||||
[testenv:py35]
|
||||
basepython = python3.5
|
||||
|
@ -405,21 +405,6 @@ class CephHooksTestCase(unittest.TestCase):
|
||||
devices = ceph_hooks.get_devices()
|
||||
self.assertEqual(devices, ['/dev/vda', '/dev/vdb'])
|
||||
|
||||
@patch('os.path.exists')
|
||||
@patch.object(ceph_hooks, 'storage_list')
|
||||
@patch.object(ceph_hooks, 'config')
|
||||
def test_get_journal_devices(self, mock_config, mock_storage_list,
|
||||
mock_os_path_exists):
|
||||
'''Devices returned as expected'''
|
||||
config = {'osd-journal': '/dev/vda /dev/vdb'}
|
||||
mock_config.side_effect = lambda key: config[key]
|
||||
mock_storage_list.return_value = []
|
||||
mock_os_path_exists.return_value = True
|
||||
devices = ceph_hooks.get_journal_devices()
|
||||
mock_storage_list.assert_called()
|
||||
mock_os_path_exists.assert_called()
|
||||
self.assertEqual(devices, set(['/dev/vda', '/dev/vdb']))
|
||||
|
||||
@patch.object(ceph_hooks, 'get_blacklist')
|
||||
@patch.object(ceph_hooks, 'storage_list')
|
||||
@patch.object(ceph_hooks, 'config')
|
||||
@ -435,26 +420,6 @@ class CephHooksTestCase(unittest.TestCase):
|
||||
mock_get_blacklist.assert_called()
|
||||
self.assertEqual(devices, ['/dev/vdb'])
|
||||
|
||||
@patch('os.path.exists')
|
||||
@patch.object(ceph_hooks, 'get_blacklist')
|
||||
@patch.object(ceph_hooks, 'storage_list')
|
||||
@patch.object(ceph_hooks, 'config')
|
||||
def test_get_journal_devices_blacklist(self, mock_config,
|
||||
mock_storage_list,
|
||||
mock_get_blacklist,
|
||||
mock_os_path_exists):
|
||||
'''Devices returned as expected when blacklist in effect'''
|
||||
config = {'osd-journal': '/dev/vda /dev/vdb'}
|
||||
mock_config.side_effect = lambda key: config[key]
|
||||
mock_storage_list.return_value = []
|
||||
mock_get_blacklist.return_value = ['/dev/vda']
|
||||
mock_os_path_exists.return_value = True
|
||||
devices = ceph_hooks.get_journal_devices()
|
||||
mock_storage_list.assert_called()
|
||||
mock_os_path_exists.assert_called()
|
||||
mock_get_blacklist.assert_called()
|
||||
self.assertEqual(devices, set(['/dev/vdb']))
|
||||
|
||||
@patch.object(ceph_hooks, 'log')
|
||||
@patch.object(ceph_hooks, 'config')
|
||||
@patch('os.environ')
|
||||
|
63
unit_tests/test_ceph_utils.py
Normal file
63
unit_tests/test_ceph_utils.py
Normal file
@ -0,0 +1,63 @@
|
||||
# Copyright 2016 Canonical Ltd
|
||||
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
from mock import patch
|
||||
|
||||
with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec:
|
||||
mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f:
|
||||
lambda *args, **kwargs: f(*args, **kwargs))
|
||||
import utils
|
||||
|
||||
|
||||
class CephUtilsTestCase(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super(CephUtilsTestCase, self).setUp()
|
||||
|
||||
@patch('os.path.exists')
|
||||
@patch.object(utils, 'storage_list')
|
||||
@patch.object(utils, 'config')
|
||||
def test_get_journal_devices(self, mock_config, mock_storage_list,
|
||||
mock_os_path_exists):
|
||||
'''Devices returned as expected'''
|
||||
config = {'osd-journal': '/dev/vda /dev/vdb'}
|
||||
mock_config.side_effect = lambda key: config[key]
|
||||
mock_storage_list.return_value = []
|
||||
mock_os_path_exists.return_value = True
|
||||
devices = utils.get_journal_devices()
|
||||
mock_storage_list.assert_called()
|
||||
mock_os_path_exists.assert_called()
|
||||
self.assertEqual(devices, set(['/dev/vda', '/dev/vdb']))
|
||||
|
||||
@patch('os.path.exists')
|
||||
@patch.object(utils, 'get_blacklist')
|
||||
@patch.object(utils, 'storage_list')
|
||||
@patch.object(utils, 'config')
|
||||
def test_get_journal_devices_blacklist(self, mock_config,
|
||||
mock_storage_list,
|
||||
mock_get_blacklist,
|
||||
mock_os_path_exists):
|
||||
'''Devices returned as expected when blacklist in effect'''
|
||||
config = {'osd-journal': '/dev/vda /dev/vdb'}
|
||||
mock_config.side_effect = lambda key: config[key]
|
||||
mock_storage_list.return_value = []
|
||||
mock_get_blacklist.return_value = ['/dev/vda']
|
||||
mock_os_path_exists.return_value = True
|
||||
devices = utils.get_journal_devices()
|
||||
mock_storage_list.assert_called()
|
||||
mock_os_path_exists.assert_called()
|
||||
mock_get_blacklist.assert_called()
|
||||
self.assertEqual(devices, set(['/dev/vdb']))
|
Loading…
Reference in New Issue
Block a user