Updates to support Ceph Nautilus release

For Nautilus the Ceph monitors automatically create the admin and
bootstrap keyrings as part of cluster formation so the charm just
needs to retrieve the admin keyring after the monitors have been
bootstrapped.

As the bootstrap process is two quite distinct steps, this change
refactors the bootstrap_monitor_cluster function into two discrete
functions - one to create and start the ceph monitor, and a second
to create the ceph keyrings on the local filesystem.

As the osd-bootstrap keyring is not actually created on disk from
Nautilus onwards, the 'is_bootstrapped' function now looks for the
client.admin keyring across all Ceph releases - this should be
backwards compatible.

This change also adds the standing data for upgrade paths to
Nautilus, however this is as yet untested.

Change-Id: I2dfab7404b72e314625ea554ee64595c5e26f3c6
This commit is contained in:
James Page 2019-05-31 10:08:36 +01:00
parent 2c8d26d125
commit f29aba7672
3 changed files with 251 additions and 83 deletions

View File

@ -40,6 +40,7 @@ from charmhelpers.core.host import (
service_start,
service_stop,
CompareHostReleases,
write_file,
)
from charmhelpers.core.hookenv import (
cached,
@ -950,13 +951,13 @@ def rescan_osd_devices():
udevadm_settle()
_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring'
_bootstrap_keyring = "/var/lib/ceph/bootstrap-osd/ceph.keyring"
_upgrade_keyring = "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring"
def is_bootstrapped():
return os.path.exists(_bootstrap_keyring)
return os.path.exists(_client_admin_keyring)
def wait_for_bootstrap():
@ -1270,6 +1271,12 @@ def use_bluestore():
def bootstrap_monitor_cluster(secret):
"""Bootstrap local ceph mon into the ceph cluster
:param secret: cephx secret to use for monitor authentication
:type secret: str
:raises: Exception if ceph mon cannot be bootstrapped
"""
hostname = socket.gethostname()
path = '/var/lib/ceph/mon/ceph-{}'.format(hostname)
done = '{}/done'.format(path)
@ -1290,21 +1297,35 @@ def bootstrap_monitor_cluster(secret):
perms=0o755)
# end changes for Ceph >= 0.61.3
try:
add_keyring_to_ceph(keyring,
secret,
hostname,
path,
done,
init_marker)
_create_monitor(keyring,
secret,
hostname,
path,
done,
init_marker)
_create_keyrings()
except:
raise
finally:
os.unlink(keyring)
@retry_on_exception(3, base_delay=5)
def add_keyring_to_ceph(keyring, secret, hostname, path, done, init_marker):
def _create_monitor(keyring, secret, hostname, path, done, init_marker):
"""Create monitor filesystem and enable and start ceph-mon process
:param keyring: path to temporary keyring on disk
:type keyring: str
:param secret: cephx secret to use for monitor authentication
:type: secret: str
:param hostname: hostname of the local unit
:type hostname: str
:param path: full path to ceph mon directory
:type path: str
:param done: full path to 'done' marker for ceph mon
:type done: str
:param init_marker: full path to 'init' marker for ceph mon
:type init_marker: str
"""
subprocess.check_call(['ceph-authtool', keyring,
'--create-keyring', '--name=mon.',
'--add-key={}'.format(secret),
@ -1320,39 +1341,72 @@ def add_keyring_to_ceph(keyring, secret, hostname, path, done, init_marker):
pass
if systemd():
subprocess.check_call(['systemctl', 'enable', 'ceph-mon'])
service_restart('ceph-mon')
if cmp_pkgrevno('ceph', '14.0.0') >= 0:
systemd_unit = 'ceph-mon@{}'.format(socket.gethostname())
else:
systemd_unit = 'ceph-mon'
subprocess.check_call(['systemctl', 'enable', systemd_unit])
service_restart(systemd_unit)
else:
service_restart('ceph-mon-all')
# NOTE(jamespage): Later ceph releases require explicit
# call to ceph-create-keys to setup the
# admin keys for the cluster; this command
# will wait for quorum in the cluster before
# returning.
# NOTE(fnordahl): Explicitly run `ceph-crate-keys` for older
# ceph releases too. This improves bootstrap
# resilience as the charm will wait for
# presence of peer units before attempting
# to bootstrap. Note that charms deploying
# ceph-mon service should disable running of
# `ceph-create-keys` service in init system.
cmd = ['ceph-create-keys', '--id', hostname]
if cmp_pkgrevno('ceph', '12.0.0') >= 0:
# NOTE(fnordahl): The default timeout in ceph-create-keys of 600
# seconds is not adequate. Increase timeout when
# timeout parameter available. For older releases
# we rely on retry_on_exception decorator.
# LP#1719436
cmd.extend(['--timeout', '1800'])
subprocess.check_call(cmd)
_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring'
osstat = os.stat(_client_admin_keyring)
if not osstat.st_size:
# NOTE(fnordahl): Retry will fail as long as this file exists.
# LP#1719436
os.remove(_client_admin_keyring)
raise Exception
@retry_on_exception(3, base_delay=5)
def _create_keyrings():
"""Create keyrings for operation of ceph-mon units
:raises: Exception if keyrings cannot be created
"""
if cmp_pkgrevno('ceph', '14.0.0') >= 0:
# NOTE(jamespage): At Nautilus, keys are created by the
# monitors automatically and just need
# exporting.
output = str(subprocess.check_output(
[
'sudo',
'-u', ceph_user(),
'ceph',
'--name', 'mon.',
'--keyring',
'/var/lib/ceph/mon/ceph-{}/keyring'.format(
socket.gethostname()
),
'auth', 'get', 'client.admin',
]).decode('UTF-8')).strip()
if not output:
# NOTE: key not yet created, raise exception and retry
raise Exception
write_file(_client_admin_keyring, output,
owner=ceph_user(), group=ceph_user(),
perms=0o400)
else:
# NOTE(jamespage): Later ceph releases require explicit
# call to ceph-create-keys to setup the
# admin keys for the cluster; this command
# will wait for quorum in the cluster before
# returning.
# NOTE(fnordahl): Explicitly run `ceph-create-keys` for older
# ceph releases too. This improves bootstrap
# resilience as the charm will wait for
# presence of peer units before attempting
# to bootstrap. Note that charms deploying
# ceph-mon service should disable running of
# `ceph-create-keys` service in init system.
cmd = ['ceph-create-keys', '--id', socket.gethostname()]
if cmp_pkgrevno('ceph', '12.0.0') >= 0:
# NOTE(fnordahl): The default timeout in ceph-create-keys of 600
# seconds is not adequate. Increase timeout when
# timeout parameter available. For older releases
# we rely on retry_on_exception decorator.
# LP#1719436
cmd.extend(['--timeout', '1800'])
subprocess.check_call(cmd)
osstat = os.stat(_client_admin_keyring)
if not osstat.st_size:
# NOTE(fnordahl): Retry will fail as long as this file exists.
# LP#1719436
os.remove(_client_admin_keyring)
raise Exception
def update_monfs():
@ -2733,6 +2787,7 @@ UPGRADE_PATHS = collections.OrderedDict([
('hammer', 'jewel'),
('jewel', 'luminous'),
('luminous', 'mimic'),
('mimic', 'nautilus'),
])
# Map UCA codenames to ceph codenames
@ -2748,6 +2803,7 @@ UCA_CODENAME_MAP = {
'queens': 'luminous',
'rocky': 'mimic',
'stein': 'mimic',
'train': 'nautilus',
}

View File

@ -1,5 +1,5 @@
[tox]
envlist = pep8,py27,py36
envlist = pep8,py27,py36,py37
skipsdist = True
skip_missing_interpreters = True
@ -23,6 +23,11 @@ basepython = python3.6
deps = -r{toxinidir}/requirements.txt
-r{toxinidir}/test-requirements.txt
[testenv:py37]
basepython = python3.7
deps = -r{toxinidir}/requirements.txt
-r{toxinidir}/test-requirements.txt
[testenv:pep8]
basepython = python3
deps = -r{toxinidir}/requirements.txt

View File

@ -632,29 +632,20 @@ class CephTestCase(unittest.TestCase):
self.assertEqual(utils.PACKAGES,
utils.determine_packages())
@patch.object(utils, 'chownr')
@patch.object(utils, 'cmp_pkgrevno')
@patch.object(utils, '_create_monitor')
@patch.object(utils, '_create_keyrings')
@patch.object(utils, 'ceph_user')
@patch.object(utils, 'os')
@patch.object(utils, 'systemd')
@patch.object(utils, 'log')
@patch.object(utils, 'mkdir')
@patch.object(utils.subprocess, 'check_output')
@patch.object(utils.subprocess, 'check_call')
@patch.object(utils, 'service_restart')
@patch.object(utils.socket, 'gethostname', lambda: 'TestHost')
def _test_bootstrap_monitor_cluster(self,
mock_service_restart,
mock_check_call,
mock_check_output,
mock_mkdir,
mock_log,
mock_systemd,
mock_os,
mock_ceph_user,
mock_cmp_pkgrevno,
mock_chownr,
luminous=False):
def test_bootstrap_monitor_cluster(self,
mock_mkdir,
mock_systemd,
mock_os,
mock_ceph_user,
mock_create_keyrings,
mock_create_monitor):
test_hostname = utils.socket.gethostname()
test_secret = 'mysecret'
test_keyring = '/var/lib/ceph/tmp/{}.mon.keyring'.format(test_hostname)
@ -664,9 +655,57 @@ class CephTestCase(unittest.TestCase):
mock_os.path.exists.return_value = False
mock_systemd.return_value = True
mock_cmp_pkgrevno.return_value = 1 if luminous else -1
mock_ceph_user.return_value = 'ceph'
utils.bootstrap_monitor_cluster(test_secret)
mock_mkdir.assert_has_calls([
call('/var/run/ceph', owner='ceph',
group='ceph', perms=0o755),
call(test_path, owner='ceph', group='ceph',
perms=0o755),
])
mock_create_monitor.assert_called_once_with(
test_keyring,
test_secret,
test_hostname,
test_path,
test_done,
test_init_marker,
)
mock_create_keyrings.assert_called_once_with()
mock_os.unlink.assert_called_with(test_keyring)
@patch.object(utils, 'systemd')
@patch.object(utils, 'chownr')
@patch.object(utils, 'cmp_pkgrevno')
@patch.object(utils, 'ceph_user')
@patch.object(utils.subprocess, 'check_call')
@patch.object(utils, 'service_restart')
@patch.object(utils.socket, 'gethostname', lambda: 'TestHost')
def _test_create_monitor(self,
mock_service_restart,
mock_check_call,
mock_ceph_user,
mock_cmp_pkgrevno,
mock_chownr,
mock_systemd,
nautilus=False):
test_hostname = utils.socket.gethostname()
test_secret = 'mysecret'
test_keyring = '/var/lib/ceph/tmp/{}.mon.keyring'.format(test_hostname)
test_path = '/var/lib/ceph/mon/ceph-{}'.format(test_hostname)
test_done = '{}/done'.format(test_path)
test_init_marker = '{}/systemd'.format(test_path)
mock_systemd.return_value = True
mock_cmp_pkgrevno.return_value = 1 if nautilus else -1
mock_ceph_user.return_value = 'ceph'
test_systemd_unit = (
'ceph-mon@{}'.format(test_hostname) if nautilus else 'ceph-mon'
)
test_calls = [
call(
['ceph-authtool', test_keyring,
@ -679,36 +718,103 @@ class CephTestCase(unittest.TestCase):
'-i', test_hostname,
'--keyring', test_keyring]
),
call(['systemctl', 'enable', 'ceph-mon']),
call(['systemctl', 'enable', test_systemd_unit])
]
if luminous:
test_calls.append(
call(['ceph-create-keys', '--id', test_hostname, '--timeout',
'1800'])
)
fake_open = mock_open()
with patch('ceph.utils.open', fake_open, create=True):
utils.bootstrap_monitor_cluster(test_secret)
utils._create_monitor(
test_keyring,
test_secret,
test_hostname,
test_path,
test_done,
test_init_marker
)
mock_check_call.assert_has_calls(test_calls)
mock_service_restart.assert_called_with('ceph-mon')
mock_mkdir.assert_has_calls([
call('/var/run/ceph', owner='ceph',
group='ceph', perms=0o755),
call(test_path, owner='ceph', group='ceph',
perms=0o755),
])
fake_open.assert_has_calls([call(test_done, 'w'),
call(test_init_marker, 'w')],
any_order=True)
mock_os.unlink.assert_called_with(test_keyring)
mock_service_restart.assert_called_with(test_systemd_unit)
def test_bootstrap_monitor_cluster(self):
self._test_bootstrap_monitor_cluster(luminous=False)
def test_create_monitor(self):
self._test_create_monitor(nautilus=False)
def test_bootstrap_monitor_cluster_luminous(self):
self._test_bootstrap_monitor_cluster(luminous=True)
def test_create_monitor_nautilus(self):
self._test_create_monitor(nautilus=True)
@patch.object(utils, 'write_file')
@patch.object(utils, 'cmp_pkgrevno')
@patch.object(utils, 'ceph_user')
@patch.object(utils, 'os')
@patch.object(utils.subprocess, 'check_output')
@patch.object(utils.subprocess, 'check_call')
@patch.object(utils.socket, 'gethostname', lambda: 'TestHost')
def _test_create_keyrings(self,
mock_check_call,
mock_check_output,
mock_os,
mock_ceph_user,
mock_cmp_pkgrevno,
mock_write_file,
ceph_version='10.0.0'):
def _cmp_pkgrevno(_, version):
# NOTE: this is fairly brittle as it just
# does direct string comparison for
# version checking
if ceph_version == version:
return 1
else:
return -1
test_hostname = utils.socket.gethostname()
mock_os.path.exists.return_value = False
mock_cmp_pkgrevno.side_effect = _cmp_pkgrevno
mock_ceph_user.return_value = 'ceph'
mock_check_output.return_value = b'testkey'
test_calls = []
if ceph_version == '12.0.0':
test_calls.append(
call(['ceph-create-keys', '--id', test_hostname,
'--timeout', '1800'])
)
elif ceph_version == '10.0.0':
test_calls.append(
call(['ceph-create-keys', '--id', test_hostname])
)
utils._create_keyrings()
mock_check_call.assert_has_calls(test_calls)
if ceph_version == '14.0.0':
mock_check_output.assert_called_once_with([
'sudo',
'-u', 'ceph',
'ceph',
'--name', 'mon.',
'--keyring',
'/var/lib/ceph/mon/ceph-{}/keyring'.format(
test_hostname
),
'auth', 'get', 'client.admin',
])
mock_write_file.assert_called_with(
'/etc/ceph/ceph.client.admin.keyring',
'testkey', group='ceph', owner='ceph',
perms=0o400
)
else:
mock_check_output.assert_not_called()
mock_write_file.assert_not_called()
def test_create_keyrings(self):
self._test_create_keyrings()
def test_create_keyrings_luminous(self):
self._test_create_keyrings(ceph_version='12.0.0')
def test_create_keyrings_nautilus(self):
self._test_create_keyrings(ceph_version='14.0.0')
@patch.object(utils, 'chownr')
@patch.object(utils, 'cmp_pkgrevno')
@ -786,6 +892,7 @@ class CephTestCase(unittest.TestCase):
'hammer -> jewel',
'jewel -> luminous',
'luminous -> mimic',
'mimic -> nautilus',
])
self.assertEqual(utils.pretty_print_upgrade_paths(), expected)