Add pre-flight check for device pristinity

Add `non-pristine` key to `list-disks` action.

No longer attempt to do initializtion of `osd-journal` devices.

Make py27 test noop

Flip pep8 test to py3

Partial-Bug: #1698154
Change-Id: I0ca574fa7f0683b4e8a693b9f62fbf6b39689789
Depends-On: I90a866aa138d18e4242783c42d4c7c587f696d7d
This commit is contained in:
Frode Nordahl 2018-06-01 12:15:01 +02:00
parent bebf8601c9
commit 352d699387
No known key found for this signature in database
GPG Key ID: 6A5D59A3BA48373F
9 changed files with 252 additions and 115 deletions

View File

@ -25,7 +25,18 @@ resume:
Set the local osd units in the charm to 'in'. Note that the pause option Set the local osd units in the charm to 'in'. Note that the pause option
does NOT stop the osd processes. does NOT stop the osd processes.
list-disks: list-disks:
description: List the unmounted disk on the specified unit description: |
List disks
.
The 'disks' key is populated with block devices that are known by udev,
are not mounted and not mentioned in 'osd-journal' configuration option.
.
The 'blacklist' key is populated with osd-devices in the blacklist stored
in the local kv store of this specific unit.
.
The 'non-pristine' key is populated with block devices that are known by
udev, are not mounted, not mentioned in 'osd-journal' configuration option
and are currently not eligible for use because of presence of foreign data.
add-disk: add-disk:
description: Add disk(s) to Ceph description: Add disk(s) to Ceph
params: params:

View File

@ -15,10 +15,17 @@
# limitations under the License. # limitations under the License.
""" """
List unmounted devices. List disks
This script will get all block devices known by udev and check if they The 'disks' key is populated with block devices that are known by udev,
are mounted so that we can give unmounted devices to the administrator. are not mounted and not mentioned in 'osd-journal' configuration option.
The 'blacklist' key is populated with osd-devices in the blacklist stored
in the local kv store of this specific unit.
The 'non-pristine' key is populated with block devices that are known by
udev, are not mounted, not mentioned in 'osd-journal' configuration option
and are currently not eligible for use because of presence of foreign data.
""" """
import sys import sys
@ -32,7 +39,15 @@ import ceph.utils
import utils import utils
if __name__ == '__main__': if __name__ == '__main__':
non_pristine = []
osd_journal = utils.get_journal_devices()
for dev in list(set(ceph.utils.unmounted_disks()) - set(osd_journal)):
if (not ceph.utils.is_active_bluestore_device(dev) and
not ceph.utils.is_pristine_disk(dev)):
non_pristine.append(dev)
hookenv.action_set({ hookenv.action_set({
'disks': ceph.utils.unmounted_disks(), 'disks': list(set(ceph.utils.unmounted_disks()) - set(osd_journal)),
'blacklist': utils.get_blacklist(), 'blacklist': utils.get_blacklist(),
'non-pristine': non_pristine,
}) })

View File

@ -20,7 +20,6 @@ import glob
import os import os
import shutil import shutil
import sys import sys
import tempfile
import socket import socket
import subprocess import subprocess
import netifaces import netifaces
@ -41,6 +40,7 @@ from charmhelpers.core.hookenv import (
Hooks, Hooks,
UnregisteredHookError, UnregisteredHookError,
service_name, service_name,
status_get,
status_set, status_set,
storage_get, storage_get,
storage_list, storage_list,
@ -76,6 +76,7 @@ from utils import (
get_public_addr, get_public_addr,
get_cluster_addr, get_cluster_addr,
get_blacklist, get_blacklist,
get_journal_devices,
) )
from charmhelpers.contrib.openstack.alternatives import install_alternative from charmhelpers.contrib.openstack.alternatives import install_alternative
from charmhelpers.contrib.network.ip import ( from charmhelpers.contrib.network.ip import (
@ -85,6 +86,9 @@ from charmhelpers.contrib.network.ip import (
) )
from charmhelpers.contrib.storage.linux.ceph import ( from charmhelpers.contrib.storage.linux.ceph import (
CephConfContext) CephConfContext)
from charmhelpers.contrib.storage.linux.utils import (
is_device_mounted,
)
from charmhelpers.contrib.charmsupport import nrpe from charmhelpers.contrib.charmsupport import nrpe
from charmhelpers.contrib.hardening.harden import harden from charmhelpers.contrib.hardening.harden import harden
@ -357,38 +361,6 @@ def emit_cephconf(upgrading=False):
charm_ceph_conf, 90) charm_ceph_conf, 90)
JOURNAL_ZAPPED = '/var/lib/ceph/journal_zapped'
def read_zapped_journals():
if os.path.exists(JOURNAL_ZAPPED):
with open(JOURNAL_ZAPPED, 'rt', encoding='UTF-8') as zapfile:
zapped = set(
filter(None,
[l.strip() for l in zapfile.readlines()]))
log("read zapped: {}".format(zapped), level=DEBUG)
return zapped
return set()
def write_zapped_journals(journal_devs):
tmpfh, tmpfile = tempfile.mkstemp()
with os.fdopen(tmpfh, 'wb') as zapfile:
log("write zapped: {}".format(journal_devs),
level=DEBUG)
zapfile.write('\n'.join(sorted(list(journal_devs))).encode('UTF-8'))
shutil.move(tmpfile, JOURNAL_ZAPPED)
def check_overlap(journaldevs, datadevs):
if not journaldevs.isdisjoint(datadevs):
msg = ("Journal/data devices mustn't"
" overlap; journal: {0}, data: {1}".format(journaldevs,
datadevs))
log(msg, level=ERROR)
raise ValueError(msg)
@hooks.hook('config-changed') @hooks.hook('config-changed')
@harden() @harden()
def config_changed(): def config_changed():
@ -438,13 +410,28 @@ def prepare_disks_and_activate():
vaultlocker.write_vaultlocker_conf(context) vaultlocker.write_vaultlocker_conf(context)
osd_journal = get_journal_devices() osd_journal = get_journal_devices()
check_overlap(osd_journal, set(get_devices())) if not osd_journal.isdisjoint(set(get_devices())):
raise ValueError('`osd-journal` and `osd-devices` options must not'
'overlap.')
log("got journal devs: {}".format(osd_journal), level=DEBUG) log("got journal devs: {}".format(osd_journal), level=DEBUG)
already_zapped = read_zapped_journals()
non_zapped = osd_journal - already_zapped # pre-flight check of eligible device pristinity
for journ in non_zapped: devices = get_devices()
ceph.maybe_zap_journal(journ) # filter osd-devices that are file system paths
write_zapped_journals(osd_journal) devices = [dev for dev in devices if dev.startswith('/dev')]
# filter osd-devices that does not exist on this unit
devices = [dev for dev in devices if os.path.exists(dev)]
# filter osd-devices that are already mounted
devices = [dev for dev in devices if not is_device_mounted(dev)]
# filter osd-devices that are active bluestore devices
devices = [dev for dev in devices
if not ceph.is_active_bluestore_device(dev)]
log('Checking for pristine devices: "{}"'.format(devices), level=DEBUG)
if not all(ceph.is_pristine_disk(dev) for dev in devices):
status_set('blocked',
'Non-pristine devices detected, consult '
'`list-disks`, `zap-disk` and `blacklist-*` actions.')
return
if ceph.is_bootstrapped(): if ceph.is_bootstrapped():
log('ceph bootstrapped, rescanning disks') log('ceph bootstrapped, rescanning disks')
@ -521,20 +508,6 @@ def get_devices():
return [device for device in devices if device not in _blacklist] return [device for device in devices if device not in _blacklist]
def get_journal_devices():
if config('osd-journal'):
devices = [l.strip() for l in config('osd-journal').split(' ')]
else:
devices = []
storage_ids = storage_list('osd-journals')
devices.extend((storage_get('location', s) for s in storage_ids))
# Filter out any devices in the action managed unit-local device blacklist
_blacklist = get_blacklist()
return set(device for device in devices
if device not in _blacklist and os.path.exists(device))
@hooks.hook('mon-relation-changed', @hooks.hook('mon-relation-changed',
'mon-relation-departed') 'mon-relation-departed')
def mon_relation(): def mon_relation():
@ -631,13 +604,15 @@ def assess_status():
# Check for OSD device creation parity i.e. at least some devices # Check for OSD device creation parity i.e. at least some devices
# must have been presented and used for this charm to be operational # must have been presented and used for this charm to be operational
(prev_status, prev_message) = status_get()
running_osds = ceph.get_running_osds() running_osds = ceph.get_running_osds()
if not running_osds: if prev_status != 'blocked':
status_set('blocked', if not running_osds:
'No block devices detected using current configuration') status_set('blocked',
else: 'No block devices detected using current configuration')
status_set('active', else:
'Unit is ready ({} OSD)'.format(len(running_osds))) status_set('active',
'Unit is ready ({} OSD)'.format(len(running_osds)))
@hooks.hook('update-status') @hooks.hook('update-status')

View File

@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import socket
import re import re
import os
import socket
from charmhelpers.core.hookenv import ( from charmhelpers.core.hookenv import (
unit_get, unit_get,
cached, cached,
@ -22,6 +24,8 @@ from charmhelpers.core.hookenv import (
log, log,
DEBUG, DEBUG,
status_set, status_set,
storage_get,
storage_list,
) )
from charmhelpers.core import unitdata from charmhelpers.core import unitdata
from charmhelpers.fetch import ( from charmhelpers.fetch import (
@ -39,6 +43,7 @@ from charmhelpers.contrib.network.ip import (
get_ipv6_addr get_ipv6_addr
) )
TEMPLATES_DIR = 'templates' TEMPLATES_DIR = 'templates'
try: try:
@ -213,3 +218,17 @@ def get_blacklist():
"""Get blacklist stored in the local kv() store""" """Get blacklist stored in the local kv() store"""
db = unitdata.kv() db = unitdata.kv()
return db.get('osd-blacklist', []) return db.get('osd-blacklist', [])
def get_journal_devices():
if config('osd-journal'):
devices = [l.strip() for l in config('osd-journal').split(' ')]
else:
devices = []
storage_ids = storage_list('osd-journals')
devices.extend((storage_get('location', s) for s in storage_ids))
# Filter out any devices in the action managed unit-local device blacklist
_blacklist = get_blacklist()
return set(device for device in devices
if device not in _blacklist and os.path.exists(device))

View File

@ -66,7 +66,6 @@ from charmhelpers.contrib.storage.linux.ceph import (
from charmhelpers.contrib.storage.linux.utils import ( from charmhelpers.contrib.storage.linux.utils import (
is_block_device, is_block_device,
is_device_mounted, is_device_mounted,
zap_disk,
) )
from charmhelpers.contrib.openstack.utils import ( from charmhelpers.contrib.openstack.utils import (
get_os_codename_install_source, get_os_codename_install_source,
@ -870,7 +869,42 @@ def get_partition_list(dev):
raise raise
def is_pristine_disk(dev):
"""
Read first 2048 bytes (LBA 0 - 3) of block device to determine whether it
is actually all zeros and safe for us to use.
Existing partitioning tools does not discern between a failure to read from
block device, failure to understand a partition table and the fact that a
block device has no partition table. Since we need to be positive about
which is which we need to read the device directly and confirm ourselves.
:param dev: Path to block device
:type dev: str
:returns: True all 2048 bytes == 0x0, False if not
:rtype: bool
"""
want_bytes = 2048
f = open(dev, 'rb')
data = f.read(want_bytes)
read_bytes = len(data)
if read_bytes != want_bytes:
log('{}: short read, got {} bytes expected {}.'
.format(dev, read_bytes, want_bytes), level=WARNING)
return False
return all(byte == 0x0 for byte in data)
def is_osd_disk(dev): def is_osd_disk(dev):
db = kv()
osd_devices = db.get('osd-devices', [])
if dev in osd_devices:
log('Device {} already processed by charm,'
' skipping'.format(dev))
return True
partitions = get_partition_list(dev) partitions = get_partition_list(dev)
for partition in partitions: for partition in partitions:
try: try:
@ -1296,15 +1330,6 @@ def update_monfs():
pass pass
def maybe_zap_journal(journal_dev):
if is_osd_disk(journal_dev):
log('Looks like {} is already an OSD data'
' or journal, skipping.'.format(journal_dev))
return
zap_disk(journal_dev)
log("Zapped journal device {}".format(journal_dev))
def get_partitions(dev): def get_partitions(dev):
cmd = ['partx', '--raw', '--noheadings', dev] cmd = ['partx', '--raw', '--noheadings', dev]
try: try:

View File

@ -15,6 +15,7 @@
# limitations under the License. # limitations under the License.
import amulet import amulet
import re
import time import time
import keystoneclient import keystoneclient
@ -712,6 +713,69 @@ class CephOsdBasicDeployment(OpenStackAmuletDeployment):
mtime, unit_name)) mtime, unit_name))
amulet.raise_status('Folder mtime is older than provided mtime') amulet.raise_status('Folder mtime is older than provided mtime')
def test_901_blocked_when_non_pristine_disk_appears(self):
"""
Validate that charm goes into blocked state when it is presented with
new block devices that have foreign data on them.
Instances used in UOSCI has a flavour with ephemeral storage in
addition to the bootable instance storage. The ephemeral storage
device is partitioned, formatted and mounted early in the boot process
by cloud-init.
As long as the device is mounted the charm will not attempt to use it.
If we unmount it and trigger the config-changed hook the block device
will appear as a new and previously untouched device for the charm.
One of the first steps of device eligibility checks should be to make
sure we are seeing a pristine and empty device before doing any
further processing.
As the ephemeral device will have data on it we can use it to validate
that these checks work as intended.
"""
u.log.debug('Checking behaviour when non-pristine disks appear...')
u.log.debug('Configuring ephemeral-unmount...')
self.d.configure('ceph-osd', {'ephemeral-unmount': '/mnt',
'osd-devices': '/dev/vdb'})
self._auto_wait_for_status(message=re.compile('Non-pristine.*'),
include_only=['ceph-osd'])
u.log.debug('Units now in blocked state, running zap-disk action...')
action_ids = []
self.ceph_osd_sentry = self.d.sentry['ceph-osd'][0]
for unit in range(0, 3):
zap_disk_params = {
'devices': '/dev/vdb',
'i-really-mean-it': True,
}
action_id = u.run_action(self.d.sentry['ceph-osd'][unit],
'zap-disk', params=zap_disk_params)
action_ids.append(action_id)
for unit in range(0, 3):
assert u.wait_on_action(action_ids[unit]), (
'zap-disk action failed.')
u.log.debug('Running add-disk action...')
action_ids = []
for unit in range(0, 3):
add_disk_params = {
'osd-devices': '/dev/vdb',
}
action_id = u.run_action(self.d.sentry['ceph-osd'][unit],
'add-disk', params=add_disk_params)
action_ids.append(action_id)
# NOTE(fnordahl): LP: #1774694
# for unit in range(0, 3):
# assert u.wait_on_action(action_ids[unit]), (
# 'add-disk action failed.')
u.log.debug('Wait for idle/ready status...')
self._auto_wait_for_status(include_only=['ceph-osd'])
u.log.debug('OK')
def test_910_pause_and_resume(self): def test_910_pause_and_resume(self):
"""The services can be paused and resumed. """ """The services can be paused and resumed. """
u.log.debug('Checking pause and resume actions...') u.log.debug('Checking pause and resume actions...')

10
tox.ini
View File

@ -18,11 +18,11 @@ whitelist_externals = juju
passenv = HOME TERM AMULET_* CS_API_* passenv = HOME TERM AMULET_* CS_API_*
[testenv:py27] [testenv:py27]
basepython = python2.7 # ceph charms are Python3-only, but py27 unit test target
deps = -r{toxinidir}/requirements.txt # is required by OpenStack Governance. Remove this shim as soon as
-r{toxinidir}/test-requirements.txt # permitted. http://governance.openstack.org/reference/cti/python_cti.html
# temporarily disable py27 whitelist_externals = true
commands = /bin/true commands = true
[testenv:py35] [testenv:py35]
basepython = python3.5 basepython = python3.5

View File

@ -405,21 +405,6 @@ class CephHooksTestCase(unittest.TestCase):
devices = ceph_hooks.get_devices() devices = ceph_hooks.get_devices()
self.assertEqual(devices, ['/dev/vda', '/dev/vdb']) self.assertEqual(devices, ['/dev/vda', '/dev/vdb'])
@patch('os.path.exists')
@patch.object(ceph_hooks, 'storage_list')
@patch.object(ceph_hooks, 'config')
def test_get_journal_devices(self, mock_config, mock_storage_list,
mock_os_path_exists):
'''Devices returned as expected'''
config = {'osd-journal': '/dev/vda /dev/vdb'}
mock_config.side_effect = lambda key: config[key]
mock_storage_list.return_value = []
mock_os_path_exists.return_value = True
devices = ceph_hooks.get_journal_devices()
mock_storage_list.assert_called()
mock_os_path_exists.assert_called()
self.assertEqual(devices, set(['/dev/vda', '/dev/vdb']))
@patch.object(ceph_hooks, 'get_blacklist') @patch.object(ceph_hooks, 'get_blacklist')
@patch.object(ceph_hooks, 'storage_list') @patch.object(ceph_hooks, 'storage_list')
@patch.object(ceph_hooks, 'config') @patch.object(ceph_hooks, 'config')
@ -435,26 +420,6 @@ class CephHooksTestCase(unittest.TestCase):
mock_get_blacklist.assert_called() mock_get_blacklist.assert_called()
self.assertEqual(devices, ['/dev/vdb']) self.assertEqual(devices, ['/dev/vdb'])
@patch('os.path.exists')
@patch.object(ceph_hooks, 'get_blacklist')
@patch.object(ceph_hooks, 'storage_list')
@patch.object(ceph_hooks, 'config')
def test_get_journal_devices_blacklist(self, mock_config,
mock_storage_list,
mock_get_blacklist,
mock_os_path_exists):
'''Devices returned as expected when blacklist in effect'''
config = {'osd-journal': '/dev/vda /dev/vdb'}
mock_config.side_effect = lambda key: config[key]
mock_storage_list.return_value = []
mock_get_blacklist.return_value = ['/dev/vda']
mock_os_path_exists.return_value = True
devices = ceph_hooks.get_journal_devices()
mock_storage_list.assert_called()
mock_os_path_exists.assert_called()
mock_get_blacklist.assert_called()
self.assertEqual(devices, set(['/dev/vdb']))
@patch.object(ceph_hooks, 'log') @patch.object(ceph_hooks, 'log')
@patch.object(ceph_hooks, 'config') @patch.object(ceph_hooks, 'config')
@patch('os.environ') @patch('os.environ')

View File

@ -0,0 +1,63 @@
# Copyright 2016 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from mock import patch
with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec:
mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f:
lambda *args, **kwargs: f(*args, **kwargs))
import utils
class CephUtilsTestCase(unittest.TestCase):
def setUp(self):
super(CephUtilsTestCase, self).setUp()
@patch('os.path.exists')
@patch.object(utils, 'storage_list')
@patch.object(utils, 'config')
def test_get_journal_devices(self, mock_config, mock_storage_list,
mock_os_path_exists):
'''Devices returned as expected'''
config = {'osd-journal': '/dev/vda /dev/vdb'}
mock_config.side_effect = lambda key: config[key]
mock_storage_list.return_value = []
mock_os_path_exists.return_value = True
devices = utils.get_journal_devices()
mock_storage_list.assert_called()
mock_os_path_exists.assert_called()
self.assertEqual(devices, set(['/dev/vda', '/dev/vdb']))
@patch('os.path.exists')
@patch.object(utils, 'get_blacklist')
@patch.object(utils, 'storage_list')
@patch.object(utils, 'config')
def test_get_journal_devices_blacklist(self, mock_config,
mock_storage_list,
mock_get_blacklist,
mock_os_path_exists):
'''Devices returned as expected when blacklist in effect'''
config = {'osd-journal': '/dev/vda /dev/vdb'}
mock_config.side_effect = lambda key: config[key]
mock_storage_list.return_value = []
mock_get_blacklist.return_value = ['/dev/vda']
mock_os_path_exists.return_value = True
devices = utils.get_journal_devices()
mock_storage_list.assert_called()
mock_os_path_exists.assert_called()
mock_get_blacklist.assert_called()
self.assertEqual(devices, set(['/dev/vdb']))