diff --git a/hooks/ceph_hooks.py b/hooks/ceph_hooks.py index ccdc3ce3..b889fee2 100755 --- a/hooks/ceph_hooks.py +++ b/hooks/ceph_hooks.py @@ -18,7 +18,6 @@ import ast import json import os import subprocess -import socket import sys import uuid @@ -51,7 +50,6 @@ from charmhelpers.core.hookenv import ( application_version_set) from charmhelpers.core.host import ( service_pause, - service_restart, mkdir, write_file, rsync, @@ -298,7 +296,7 @@ def config_changed(): def get_mon_hosts(): hosts = [] addr = get_public_addr() - hosts.append('{}:6789'.format(format_ipv6_addr(addr) or addr)) + hosts.append(format_ipv6_addr(addr) or addr) rel_ids = relation_ids('mon') if config('no-bootstrap'): @@ -308,8 +306,7 @@ def get_mon_hosts(): for unit in related_units(relid): addr = relation_get('ceph-public-address', unit, relid) if addr is not None: - hosts.append('{}:6789'.format( - format_ipv6_addr(addr) or addr)) + hosts.append(format_ipv6_addr(addr) or addr) return sorted(hosts) @@ -818,18 +815,6 @@ def upgrade_charm(): notify_rbd_mirrors() -@hooks.hook('start') -def start(): - # In case we're being redeployed to the same machines, try - # to make sure everything is running as soon as possible. - if ceph.systemd(): - service_restart('ceph-mon') - else: - service_restart('ceph-mon-all') - if cmp_pkgrevno('ceph', '12.0.0') >= 0: - service_restart('ceph-mgr@{}'.format(socket.gethostname())) - - @hooks.hook('nrpe-external-master-relation-joined') @hooks.hook('nrpe-external-master-relation-changed') def update_nrpe_config(): diff --git a/lib/ceph/utils.py b/lib/ceph/utils.py index b4f87907..970b15fe 100644 --- a/lib/ceph/utils.py +++ b/lib/ceph/utils.py @@ -40,6 +40,7 @@ from charmhelpers.core.host import ( service_start, service_stop, CompareHostReleases, + write_file, ) from charmhelpers.core.hookenv import ( cached, @@ -82,7 +83,7 @@ QUORUM = [LEADER, PEON] PACKAGES = ['ceph', 'gdisk', 'btrfs-tools', 'radosgw', 'xfsprogs', - 'lvm2', 'parted'] + 'lvm2', 'parted', 'smartmontools'] CEPH_KEY_MANAGER = 'ceph' VAULT_KEY_MANAGER = 'vault' @@ -950,13 +951,13 @@ def rescan_osd_devices(): udevadm_settle() - +_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring' _bootstrap_keyring = "/var/lib/ceph/bootstrap-osd/ceph.keyring" _upgrade_keyring = "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring" def is_bootstrapped(): - return os.path.exists(_bootstrap_keyring) + return os.path.exists(_client_admin_keyring) def wait_for_bootstrap(): @@ -1259,7 +1260,23 @@ def systemd(): return CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'vivid' +def use_bluestore(): + """Determine whether bluestore should be used for OSD's + + :returns: whether bluestore disk format should be used + :rtype: bool""" + if cmp_pkgrevno('ceph', '12.2.0') < 0: + return False + return config('bluestore') + + def bootstrap_monitor_cluster(secret): + """Bootstrap local ceph mon into the ceph cluster + + :param secret: cephx secret to use for monitor authentication + :type secret: str + :raises: Exception if ceph mon cannot be bootstrapped + """ hostname = socket.gethostname() path = '/var/lib/ceph/mon/ceph-{}'.format(hostname) done = '{}/done'.format(path) @@ -1280,21 +1297,35 @@ def bootstrap_monitor_cluster(secret): perms=0o755) # end changes for Ceph >= 0.61.3 try: - add_keyring_to_ceph(keyring, - secret, - hostname, - path, - done, - init_marker) - + _create_monitor(keyring, + secret, + hostname, + path, + done, + init_marker) + _create_keyrings() except: raise finally: os.unlink(keyring) -@retry_on_exception(3, base_delay=5) -def add_keyring_to_ceph(keyring, secret, hostname, path, done, init_marker): +def _create_monitor(keyring, secret, hostname, path, done, init_marker): + """Create monitor filesystem and enable and start ceph-mon process + + :param keyring: path to temporary keyring on disk + :type keyring: str + :param secret: cephx secret to use for monitor authentication + :type: secret: str + :param hostname: hostname of the local unit + :type hostname: str + :param path: full path to ceph mon directory + :type path: str + :param done: full path to 'done' marker for ceph mon + :type done: str + :param init_marker: full path to 'init' marker for ceph mon + :type init_marker: str + """ subprocess.check_call(['ceph-authtool', keyring, '--create-keyring', '--name=mon.', '--add-key={}'.format(secret), @@ -1310,39 +1341,72 @@ def add_keyring_to_ceph(keyring, secret, hostname, path, done, init_marker): pass if systemd(): - subprocess.check_call(['systemctl', 'enable', 'ceph-mon']) - service_restart('ceph-mon') + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + systemd_unit = 'ceph-mon@{}'.format(socket.gethostname()) + else: + systemd_unit = 'ceph-mon' + subprocess.check_call(['systemctl', 'enable', systemd_unit]) + service_restart(systemd_unit) else: service_restart('ceph-mon-all') - # NOTE(jamespage): Later ceph releases require explicit - # call to ceph-create-keys to setup the - # admin keys for the cluster; this command - # will wait for quorum in the cluster before - # returning. - # NOTE(fnordahl): Explicitly run `ceph-crate-keys` for older - # ceph releases too. This improves bootstrap - # resilience as the charm will wait for - # presence of peer units before attempting - # to bootstrap. Note that charms deploying - # ceph-mon service should disable running of - # `ceph-create-keys` service in init system. - cmd = ['ceph-create-keys', '--id', hostname] - if cmp_pkgrevno('ceph', '12.0.0') >= 0: - # NOTE(fnordahl): The default timeout in ceph-create-keys of 600 - # seconds is not adequate. Increase timeout when - # timeout parameter available. For older releases - # we rely on retry_on_exception decorator. - # LP#1719436 - cmd.extend(['--timeout', '1800']) - subprocess.check_call(cmd) - _client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring' - osstat = os.stat(_client_admin_keyring) - if not osstat.st_size: - # NOTE(fnordahl): Retry will fail as long as this file exists. - # LP#1719436 - os.remove(_client_admin_keyring) - raise Exception + +@retry_on_exception(3, base_delay=5) +def _create_keyrings(): + """Create keyrings for operation of ceph-mon units + + :raises: Exception if keyrings cannot be created + """ + if cmp_pkgrevno('ceph', '14.0.0') >= 0: + # NOTE(jamespage): At Nautilus, keys are created by the + # monitors automatically and just need + # exporting. + output = str(subprocess.check_output( + [ + 'sudo', + '-u', ceph_user(), + 'ceph', + '--name', 'mon.', + '--keyring', + '/var/lib/ceph/mon/ceph-{}/keyring'.format( + socket.gethostname() + ), + 'auth', 'get', 'client.admin', + ]).decode('UTF-8')).strip() + if not output: + # NOTE: key not yet created, raise exception and retry + raise Exception + write_file(_client_admin_keyring, output, + owner=ceph_user(), group=ceph_user(), + perms=0o400) + else: + # NOTE(jamespage): Later ceph releases require explicit + # call to ceph-create-keys to setup the + # admin keys for the cluster; this command + # will wait for quorum in the cluster before + # returning. + # NOTE(fnordahl): Explicitly run `ceph-create-keys` for older + # ceph releases too. This improves bootstrap + # resilience as the charm will wait for + # presence of peer units before attempting + # to bootstrap. Note that charms deploying + # ceph-mon service should disable running of + # `ceph-create-keys` service in init system. + cmd = ['ceph-create-keys', '--id', socket.gethostname()] + if cmp_pkgrevno('ceph', '12.0.0') >= 0: + # NOTE(fnordahl): The default timeout in ceph-create-keys of 600 + # seconds is not adequate. Increase timeout when + # timeout parameter available. For older releases + # we rely on retry_on_exception decorator. + # LP#1719436 + cmd.extend(['--timeout', '1800']) + subprocess.check_call(cmd) + osstat = os.stat(_client_admin_keyring) + if not osstat.st_size: + # NOTE(fnordahl): Retry will fail as long as this file exists. + # LP#1719436 + os.remove(_client_admin_keyring) + raise Exception def update_monfs(): @@ -1555,7 +1619,7 @@ def _ceph_disk(dev, osd_format, osd_journal, encrypt=False, bluestore=False): cmd.append(osd_format) # NOTE(jamespage): enable experimental bluestore support - if cmp_pkgrevno('ceph', '10.2.0') >= 0 and bluestore: + if use_bluestore(): cmd.append('--bluestore') wal = get_devices('bluestore-wal') if wal: @@ -1692,7 +1756,10 @@ def is_active_bluestore_device(dev): return False vg_name = lvm.list_lvm_volume_group(dev) - lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0] + try: + lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0] + except IndexError: + return False block_symlinks = glob.glob('/var/lib/ceph/osd/ceph-*/block') for block_candidate in block_symlinks: @@ -2700,6 +2767,14 @@ def dirs_need_ownership_update(service): if (curr_owner == expected_owner) and (curr_group == expected_group): continue + # NOTE(lathiat): when config_changed runs on reboot, the OSD might not + # yet be mounted or started, and the underlying directory the OSD is + # mounted to is expected to be owned by root. So skip the check. This + # may also happen for OSD directories for OSDs that were removed. + if (service == 'osd' and + not os.path.exists(os.path.join(child, 'magic'))): + continue + log('Directory "%s" needs its ownership updated' % child, DEBUG) return True @@ -2712,6 +2787,8 @@ UPGRADE_PATHS = collections.OrderedDict([ ('hammer', 'jewel'), ('jewel', 'luminous'), ('luminous', 'mimic'), + ('luminous', 'nautilus'), + ('mimic', 'nautilus'), ]) # Map UCA codenames to ceph codenames @@ -2727,6 +2804,7 @@ UCA_CODENAME_MAP = { 'queens': 'luminous', 'rocky': 'mimic', 'stein': 'mimic', + 'train': 'nautilus', } diff --git a/unit_tests/test_ceph_hooks.py b/unit_tests/test_ceph_hooks.py index a0ed8d11..f3a149bf 100644 --- a/unit_tests/test_ceph_hooks.py +++ b/unit_tests/test_ceph_hooks.py @@ -619,8 +619,8 @@ class BootstrapSourceTestCase(test_utils.CharmTestCase): self.relation_get.side_effect = rel_get_side_effect hosts = ceph_hooks.get_mon_hosts() self.assertEqual(hosts, [ - '172.16.0.2:6789', '172.16.0.3:6789', '172.16.0.4:6789', - '172.16.10.2:6789', '172.16.10.3:6789', '172.16.10.4:6789', + '172.16.0.2', '172.16.0.3', '172.16.0.4', + '172.16.10.2', '172.16.10.3', '172.16.10.4', ])