Add support for Nautilus release

Resync charms.ceph to pickup changes to support new approach to
admin and bootstrap key generation with in the ceph-mon cluster.

Update get_mon_hosts to avoid specifying the port number for the
MON daemons; we use the default so this is not required, and at
Nautilus the MON daemons run both v1 and v2 messenger ports.

Specifying the port in the ceph.conf file disables the v2
messenger port which is not the desired behaviour on upgrade
or new installation.

Drop start hook; this has been present in the charm since its
inception as 'ceph' but is really not needed - re-deploying the
charm to units which previous hosted ceph-mon is dangerous
and the ceph-* daemons should still be running anyway.

Depends-On: I2dfab7404b72e314625ea554ee64595c5e26f3c6
Change-Id: I340dbf427551e6f9f8cd4ca89128627e497d6097
This commit is contained in:
James Page 2019-05-30 16:48:17 +01:00
parent 63b38bf5ce
commit 872c670ed6
3 changed files with 126 additions and 63 deletions

View File

@ -18,7 +18,6 @@ import ast
import json
import os
import subprocess
import socket
import sys
import uuid
@ -51,7 +50,6 @@ from charmhelpers.core.hookenv import (
application_version_set)
from charmhelpers.core.host import (
service_pause,
service_restart,
mkdir,
write_file,
rsync,
@ -298,7 +296,7 @@ def config_changed():
def get_mon_hosts():
hosts = []
addr = get_public_addr()
hosts.append('{}:6789'.format(format_ipv6_addr(addr) or addr))
hosts.append(format_ipv6_addr(addr) or addr)
rel_ids = relation_ids('mon')
if config('no-bootstrap'):
@ -308,8 +306,7 @@ def get_mon_hosts():
for unit in related_units(relid):
addr = relation_get('ceph-public-address', unit, relid)
if addr is not None:
hosts.append('{}:6789'.format(
format_ipv6_addr(addr) or addr))
hosts.append(format_ipv6_addr(addr) or addr)
return sorted(hosts)
@ -818,18 +815,6 @@ def upgrade_charm():
notify_rbd_mirrors()
@hooks.hook('start')
def start():
# In case we're being redeployed to the same machines, try
# to make sure everything is running as soon as possible.
if ceph.systemd():
service_restart('ceph-mon')
else:
service_restart('ceph-mon-all')
if cmp_pkgrevno('ceph', '12.0.0') >= 0:
service_restart('ceph-mgr@{}'.format(socket.gethostname()))
@hooks.hook('nrpe-external-master-relation-joined')
@hooks.hook('nrpe-external-master-relation-changed')
def update_nrpe_config():

View File

@ -40,6 +40,7 @@ from charmhelpers.core.host import (
service_start,
service_stop,
CompareHostReleases,
write_file,
)
from charmhelpers.core.hookenv import (
cached,
@ -82,7 +83,7 @@ QUORUM = [LEADER, PEON]
PACKAGES = ['ceph', 'gdisk', 'btrfs-tools',
'radosgw', 'xfsprogs',
'lvm2', 'parted']
'lvm2', 'parted', 'smartmontools']
CEPH_KEY_MANAGER = 'ceph'
VAULT_KEY_MANAGER = 'vault'
@ -950,13 +951,13 @@ def rescan_osd_devices():
udevadm_settle()
_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring'
_bootstrap_keyring = "/var/lib/ceph/bootstrap-osd/ceph.keyring"
_upgrade_keyring = "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring"
def is_bootstrapped():
return os.path.exists(_bootstrap_keyring)
return os.path.exists(_client_admin_keyring)
def wait_for_bootstrap():
@ -1259,7 +1260,23 @@ def systemd():
return CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'vivid'
def use_bluestore():
"""Determine whether bluestore should be used for OSD's
:returns: whether bluestore disk format should be used
:rtype: bool"""
if cmp_pkgrevno('ceph', '12.2.0') < 0:
return False
return config('bluestore')
def bootstrap_monitor_cluster(secret):
"""Bootstrap local ceph mon into the ceph cluster
:param secret: cephx secret to use for monitor authentication
:type secret: str
:raises: Exception if ceph mon cannot be bootstrapped
"""
hostname = socket.gethostname()
path = '/var/lib/ceph/mon/ceph-{}'.format(hostname)
done = '{}/done'.format(path)
@ -1280,21 +1297,35 @@ def bootstrap_monitor_cluster(secret):
perms=0o755)
# end changes for Ceph >= 0.61.3
try:
add_keyring_to_ceph(keyring,
secret,
hostname,
path,
done,
init_marker)
_create_monitor(keyring,
secret,
hostname,
path,
done,
init_marker)
_create_keyrings()
except:
raise
finally:
os.unlink(keyring)
@retry_on_exception(3, base_delay=5)
def add_keyring_to_ceph(keyring, secret, hostname, path, done, init_marker):
def _create_monitor(keyring, secret, hostname, path, done, init_marker):
"""Create monitor filesystem and enable and start ceph-mon process
:param keyring: path to temporary keyring on disk
:type keyring: str
:param secret: cephx secret to use for monitor authentication
:type: secret: str
:param hostname: hostname of the local unit
:type hostname: str
:param path: full path to ceph mon directory
:type path: str
:param done: full path to 'done' marker for ceph mon
:type done: str
:param init_marker: full path to 'init' marker for ceph mon
:type init_marker: str
"""
subprocess.check_call(['ceph-authtool', keyring,
'--create-keyring', '--name=mon.',
'--add-key={}'.format(secret),
@ -1310,39 +1341,72 @@ def add_keyring_to_ceph(keyring, secret, hostname, path, done, init_marker):
pass
if systemd():
subprocess.check_call(['systemctl', 'enable', 'ceph-mon'])
service_restart('ceph-mon')
if cmp_pkgrevno('ceph', '14.0.0') >= 0:
systemd_unit = 'ceph-mon@{}'.format(socket.gethostname())
else:
systemd_unit = 'ceph-mon'
subprocess.check_call(['systemctl', 'enable', systemd_unit])
service_restart(systemd_unit)
else:
service_restart('ceph-mon-all')
# NOTE(jamespage): Later ceph releases require explicit
# call to ceph-create-keys to setup the
# admin keys for the cluster; this command
# will wait for quorum in the cluster before
# returning.
# NOTE(fnordahl): Explicitly run `ceph-crate-keys` for older
# ceph releases too. This improves bootstrap
# resilience as the charm will wait for
# presence of peer units before attempting
# to bootstrap. Note that charms deploying
# ceph-mon service should disable running of
# `ceph-create-keys` service in init system.
cmd = ['ceph-create-keys', '--id', hostname]
if cmp_pkgrevno('ceph', '12.0.0') >= 0:
# NOTE(fnordahl): The default timeout in ceph-create-keys of 600
# seconds is not adequate. Increase timeout when
# timeout parameter available. For older releases
# we rely on retry_on_exception decorator.
# LP#1719436
cmd.extend(['--timeout', '1800'])
subprocess.check_call(cmd)
_client_admin_keyring = '/etc/ceph/ceph.client.admin.keyring'
osstat = os.stat(_client_admin_keyring)
if not osstat.st_size:
# NOTE(fnordahl): Retry will fail as long as this file exists.
# LP#1719436
os.remove(_client_admin_keyring)
raise Exception
@retry_on_exception(3, base_delay=5)
def _create_keyrings():
"""Create keyrings for operation of ceph-mon units
:raises: Exception if keyrings cannot be created
"""
if cmp_pkgrevno('ceph', '14.0.0') >= 0:
# NOTE(jamespage): At Nautilus, keys are created by the
# monitors automatically and just need
# exporting.
output = str(subprocess.check_output(
[
'sudo',
'-u', ceph_user(),
'ceph',
'--name', 'mon.',
'--keyring',
'/var/lib/ceph/mon/ceph-{}/keyring'.format(
socket.gethostname()
),
'auth', 'get', 'client.admin',
]).decode('UTF-8')).strip()
if not output:
# NOTE: key not yet created, raise exception and retry
raise Exception
write_file(_client_admin_keyring, output,
owner=ceph_user(), group=ceph_user(),
perms=0o400)
else:
# NOTE(jamespage): Later ceph releases require explicit
# call to ceph-create-keys to setup the
# admin keys for the cluster; this command
# will wait for quorum in the cluster before
# returning.
# NOTE(fnordahl): Explicitly run `ceph-create-keys` for older
# ceph releases too. This improves bootstrap
# resilience as the charm will wait for
# presence of peer units before attempting
# to bootstrap. Note that charms deploying
# ceph-mon service should disable running of
# `ceph-create-keys` service in init system.
cmd = ['ceph-create-keys', '--id', socket.gethostname()]
if cmp_pkgrevno('ceph', '12.0.0') >= 0:
# NOTE(fnordahl): The default timeout in ceph-create-keys of 600
# seconds is not adequate. Increase timeout when
# timeout parameter available. For older releases
# we rely on retry_on_exception decorator.
# LP#1719436
cmd.extend(['--timeout', '1800'])
subprocess.check_call(cmd)
osstat = os.stat(_client_admin_keyring)
if not osstat.st_size:
# NOTE(fnordahl): Retry will fail as long as this file exists.
# LP#1719436
os.remove(_client_admin_keyring)
raise Exception
def update_monfs():
@ -1555,7 +1619,7 @@ def _ceph_disk(dev, osd_format, osd_journal, encrypt=False, bluestore=False):
cmd.append(osd_format)
# NOTE(jamespage): enable experimental bluestore support
if cmp_pkgrevno('ceph', '10.2.0') >= 0 and bluestore:
if use_bluestore():
cmd.append('--bluestore')
wal = get_devices('bluestore-wal')
if wal:
@ -1692,7 +1756,10 @@ def is_active_bluestore_device(dev):
return False
vg_name = lvm.list_lvm_volume_group(dev)
lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0]
try:
lv_name = lvm.list_logical_volumes('vg_name={}'.format(vg_name))[0]
except IndexError:
return False
block_symlinks = glob.glob('/var/lib/ceph/osd/ceph-*/block')
for block_candidate in block_symlinks:
@ -2700,6 +2767,14 @@ def dirs_need_ownership_update(service):
if (curr_owner == expected_owner) and (curr_group == expected_group):
continue
# NOTE(lathiat): when config_changed runs on reboot, the OSD might not
# yet be mounted or started, and the underlying directory the OSD is
# mounted to is expected to be owned by root. So skip the check. This
# may also happen for OSD directories for OSDs that were removed.
if (service == 'osd' and
not os.path.exists(os.path.join(child, 'magic'))):
continue
log('Directory "%s" needs its ownership updated' % child, DEBUG)
return True
@ -2712,6 +2787,8 @@ UPGRADE_PATHS = collections.OrderedDict([
('hammer', 'jewel'),
('jewel', 'luminous'),
('luminous', 'mimic'),
('luminous', 'nautilus'),
('mimic', 'nautilus'),
])
# Map UCA codenames to ceph codenames
@ -2727,6 +2804,7 @@ UCA_CODENAME_MAP = {
'queens': 'luminous',
'rocky': 'mimic',
'stein': 'mimic',
'train': 'nautilus',
}

View File

@ -619,8 +619,8 @@ class BootstrapSourceTestCase(test_utils.CharmTestCase):
self.relation_get.side_effect = rel_get_side_effect
hosts = ceph_hooks.get_mon_hosts()
self.assertEqual(hosts, [
'172.16.0.2:6789', '172.16.0.3:6789', '172.16.0.4:6789',
'172.16.10.2:6789', '172.16.10.3:6789', '172.16.10.4:6789',
'172.16.0.2', '172.16.0.3', '172.16.0.4',
'172.16.10.2', '172.16.10.3', '172.16.10.4',
])