From 872c670ed6367ff575262f9284273ff60bffd6b7 Mon Sep 17 00:00:00 2001 From: James Page Date: Thu, 30 May 2019 16:48:17 +0100 Subject: [PATCH] Add support for Nautilus release Resync charms.ceph to pickup changes to support new approach to admin and bootstrap key generation with in the ceph-mon cluster. Update get_mon_hosts to avoid specifying the port number for the MON daemons; we use the default so this is not required, and at Nautilus the MON daemons run both v1 and v2 messenger ports. Specifying the port in the ceph.conf file disables the v2 messenger port which is not the desired behaviour on upgrade or new installation. Drop start hook; this has been present in the charm since its inception as 'ceph' but is really not needed - re-deploying the charm to units which previous hosted ceph-mon is dangerous and the ceph-* daemons should still be running anyway. Depends-On: I2dfab7404b72e314625ea554ee64595c5e26f3c6 Change-Id: I340dbf427551e6f9f8cd4ca89128627e497d6097 --- hooks/ceph_hooks.py | 19 +--- lib/ceph/utils.py | 166 +++++++++++++++++++++++++--------- unit_tests/test_ceph_hooks.py | 4 +- 3 files changed, 126 insertions(+), 63 deletions(-) diff --git a/hooks/ceph_hooks.py b/hooks/ceph_hooks.py index ccdc3ce3..b889fee2 100755 --- a/hooks/ceph_hooks.py +++ b/hooks/ceph_hooks.py @@ -18,7 +18,6 @@ import ast import json import os import subprocess -import socket import sys import uuid @@ -51,7 +50,6 @@ from charmhelpers.core.hookenv import ( application_version_set) from charmhelpers.core.host import ( service_pause, - service_restart, mkdir, write_file, rsync, @@ -298,7 +296,7 @@ def config_changed(): def get_mon_hosts(): hosts = [] addr = get_public_addr() - hosts.append('{}:6789'.format(format_ipv6_addr(addr) or addr)) + hosts.append(format_ipv6_addr(addr) or addr) rel_ids = relation_ids('mon') if config('no-bootstrap'): @@ -308,8 +306,7 @@ def get_mon_hosts(): for unit in related_units(relid): addr = relation_get('ceph-public-address', unit, relid) if addr is not None: - hosts.append('{}:6789'.format( - format_ipv6_addr(addr) or addr)) + hosts.append(format_ipv6_addr(addr) or addr) return sorted(hosts) @@ -818,18 +815,6 @@ def upgrade_charm(): notify_rbd_mirrors() -@hooks.hook('start') -def start(): - # In case we're being redeployed to the same machines, try - # to make sure everything is running as soon as possible. - if ceph.systemd(): - service_restart('ceph-mon') - else: - service_restart('ceph-mon-all') - if cmp_pkgrevno('ceph', '12.0.0') >= 0: - service_restart('ceph-mgr@{}'.format(socket.gethostname())) - - @hooks.hook('nrpe-external-master-relation-joined') @hooks.hook('nrpe-external-master-relation-changed') def update_nrpe_config(): diff --git a/lib/ceph/utils.py b/lib/ceph/utils.py index b4f87907..970b15fe 100644 --- a/lib/ceph/utils.py +++ b/lib/ceph/utils.py @@ -40,6 +40,7 @@ from charmhelpers.core.host import ( service_start, service_stop, CompareHostReleases, + write_file, ) from charmhelpers.core.hookenv import ( cached, @@ -82,7 +83,7 @@ QUORUM = [LEADER, PEON] PACKAGES = ['ceph', 'gdisk', 'btrfs-tools', 'radosgw', 'xfsprogs', - 'lvm2', 'parted'] + 'lvm2', 'parted', 'smartmontools'] CEPH_KEY_MANAGER = 'ceph' VAULT_KEY_MANAGER = 'vault' @@ -950,13 +951,13 @@ def rescan_osd_devices(): udevadm_settle() - +_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring' _bootstrap_keyring = "/var/lib/ceph/bootstrap-osd/ceph.keyring" _upgrade_keyring = "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring" def is_bootstrapped(): - return os.path.exists(_bootstrap_keyring) + return os.path.exists(_client_admin_keyring) def wait_for_bootstrap(): @@ -1259,7 +1260,23 @@ def systemd(): return CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'vivid' +def use_bluestore(): + """Determine whether bluestore should be used for OSD's + + :returns: whether bluestore disk format should be used + :rtype: bool""" + if cmp_pkgrevno('ceph', '12.2.0') < 0: + return False + return config('bluestore') + + def bootstrap_monitor_cluster(secret): + """Bootstrap local ceph mon into the ceph cluster + + :param secret: cephx secret to use for monitor authentication + :type secret: str + :raises: Exception if ceph mon cannot be bootstrapped + """ hostname = socket.gethostname() path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) done = '{}/done'.format(path) @@ -1280,21 +1297,35 @@ def bootstrap_monitor_cluster(secret): perms=0o755) # end changes for Ceph >= 0.61.3 try: - add_keyring_to_ceph(keyring, - secret, - hostname, - path, - done, - init_marker) - + _create_monitor(keyring, + secret, + hostname, + path, + done, + init_marker) + _create_keyrings() except: raise finally: os.unlink(keyring) -@retry_on_exception(3, base_delay=5) -def add_keyring_to_ceph(keyring, secret, hostname, path, done, init_marker): +def _create_monitor(keyring, secret, hostname, path, done, init_marker): + """Create monitor filesystem and enable and start ceph-mon process + + :param keyring: path to temporary keyring on disk + :type keyring: str + :param secret: cephx secret to use for monitor authentication + :type: secret: str + :param hostname: hostname of the local unit + :type hostname: str + :param path: full path to ceph mon directory + :type path: str + :param done: full path to 'done' marker for ceph mon + :type done: str + :param init_marker: full path to 'init' marker for ceph mon + :type init_marker: str + """ subprocess.check_call(['ceph-authtool', keyring, '--create-keyring', '--name=mon.', '--add-key={}'.format(secret), @@ -1310,39 +1341,72 @@ def add_keyring_to_ceph(keyring, secret, hostname, path, done, init_marker): pass if systemd(): - subprocess.check_call(['systemctl', 'enable', 'ceph-mon']) - service_restart('ceph-mon') + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + subprocess.check_call(['systemctl', 'enable', systemd_unit]) + service_restart(systemd_unit) else: service_restart('ceph-mon-all') - # NOTE(jamespage): Later ceph releases require explicit - # call to ceph-create-keys to setup the - # admin keys for the cluster; this command - # will wait for quorum in the cluster before - # returning. - # NOTE(fnordahl): Explicitly run `ceph-crate-keys` for older - # ceph releases too. This improves bootstrap - # resilience as the charm will wait for - # presence of peer units before attempting - # to bootstrap. Note that charms deploying - # ceph-mon service should disable running of - # `ceph-create-keys` service in init system. - cmd = ['ceph-create-keys', '--id', hostname] - if cmp_pkgrevno('ceph', '12.0.0') >= 0: - # NOTE(fnordahl): The default timeout in ceph-create-keys of 600 - # seconds is not adequate. Increase timeout when - # timeout parameter available. For older releases - # we rely on retry_on_exception decorator. - # LP#1719436 - cmd.extend(['--timeout', '1800']) - subprocess.check_call(cmd) - _client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring' - osstat = os.stat(_client_admin_keyring) - if not osstat.st_size: - # NOTE(fnordahl): Retry will fail as long as this file exists. - # LP#1719436 - os.remove(_client_admin_keyring) - raise Exception + +@retry_on_exception(3, base_delay=5) +def _create_keyrings(): + """Create keyrings for operation of ceph-mon units + + :raises: Exception if keyrings cannot be created + """ + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + # NOTE(jamespage): At Nautilus, keys are created by the + # monitors automatically and just need + # exporting. + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get', 'client.admin', + ]).decode('UTF-8')).strip() + if not output: + # NOTE: key not yet created, raise exception and retry + raise Exception + write_file(_client_admin_keyring, output, + owner=ceph_user(), group=ceph_user(), + perms=0o400) + else: + # NOTE(jamespage): Later ceph releases require explicit + # call to ceph-create-keys to setup the + # admin keys for the cluster; this command + # will wait for quorum in the cluster before + # returning. + # NOTE(fnordahl): Explicitly run `ceph-create-keys` for older + # ceph releases too. This improves bootstrap + # resilience as the charm will wait for + # presence of peer units before attempting + # to bootstrap. Note that charms deploying + # ceph-mon service should disable running of + # `ceph-create-keys` service in init system. + cmd = ['ceph-create-keys', '--id', socket.gethostname()] + if cmp_pkgrevno('ceph', '12.0.0') >= 0: + # NOTE(fnordahl): The default timeout in ceph-create-keys of 600 + # seconds is not adequate. Increase timeout when + # timeout parameter available. For older releases + # we rely on retry_on_exception decorator. + # LP#1719436 + cmd.extend(['--timeout', '1800']) + subprocess.check_call(cmd) + osstat = os.stat(_client_admin_keyring) + if not osstat.st_size: + # NOTE(fnordahl): Retry will fail as long as this file exists. + # LP#1719436 + os.remove(_client_admin_keyring) + raise Exception def update_monfs(): @@ -1555,7 +1619,7 @@ def _ceph_disk(dev, osd_format, osd_journal, encrypt=False, bluestore=False): cmd.append(osd_format) # NOTE(jamespage): enable experimental bluestore support - if cmp_pkgrevno('ceph', '10.2.0') >= 0 and bluestore: + if use_bluestore(): cmd.append('--bluestore') wal = get_devices('bluestore-wal') if wal: @@ -1692,7 +1756,10 @@ def is_active_bluestore_device(dev): return False vg_name = lvm.list_lvm_volume_group(dev) - lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0] + try: + lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0] + except IndexError: + return False block_symlinks = glob.glob('/var/lib/ceph/osd/ceph-*/block') for block_candidate in block_symlinks: @@ -2700,6 +2767,14 @@ def dirs_need_ownership_update(service): if (curr_owner == expected_owner) and (curr_group == expected_group): continue + # NOTE(lathiat): when config_changed runs on reboot, the OSD might not + # yet be mounted or started, and the underlying directory the OSD is + # mounted to is expected to be owned by root. So skip the check. This + # may also happen for OSD directories for OSDs that were removed. + if (service == 'osd' and + not os.path.exists(os.path.join(child, 'magic'))): + continue + log('Directory "%s" needs its ownership updated' % child, DEBUG) return True @@ -2712,6 +2787,8 @@ UPGRADE_PATHS = collections.OrderedDict([ ('hammer', 'jewel'), ('jewel', 'luminous'), ('luminous', 'mimic'), + ('luminous', 'nautilus'), + ('mimic', 'nautilus'), ]) # Map UCA codenames to ceph codenames @@ -2727,6 +2804,7 @@ UCA_CODENAME_MAP = { 'queens': 'luminous', 'rocky': 'mimic', 'stein': 'mimic', + 'train': 'nautilus', } diff --git a/unit_tests/test_ceph_hooks.py b/unit_tests/test_ceph_hooks.py index a0ed8d11..f3a149bf 100644 --- a/unit_tests/test_ceph_hooks.py +++ b/unit_tests/test_ceph_hooks.py @@ -619,8 +619,8 @@ class BootstrapSourceTestCase(test_utils.CharmTestCase): self.relation_get.side_effect = rel_get_side_effect hosts = ceph_hooks.get_mon_hosts() self.assertEqual(hosts, [ - '172.16.0.2:6789', '172.16.0.3:6789', '172.16.0.4:6789', - '172.16.10.2:6789', '172.16.10.3:6789', '172.16.10.4:6789', + '172.16.0.2', '172.16.0.3', '172.16.0.4', + '172.16.10.2', '172.16.10.3', '172.16.10.4', ])