[v2] Fix migration across nova-compute apps using ceph

This change reworks previous changes [1] and [2] that had
been respectively reverted and abandoned.

When using the config libvirt-image-backend=rbd, VMs
created from image have their disk data stored in ceph
instead of the compute node itself.

When performing live-migrations, both nodes need to
access the same ceph credentials to access the VM's
disk in ceph, but this is currently not possible
if the nodes involved pertain to different
nova-compute charm apps.

This patch changes app name sent to ceph to
'nova-compute-ceph-auth-c91ce26f', a unique name common to
all nova-compute apps, allowing all nova-compute apps to
use the same ceph auth.

This change also ensures newly deployed nodes install
the old credentials first on ceph-joined hook,
and then supercedes it with the new credentials
on ceph-changed hook, therefore also retaining
the old credentials.

[1] https://review.opendev.org/889642
[2] https://review.opendev.org/896155

Closes-bug: #2028559
Related-bug: #2037003

Func-Test-Pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/1183

Change-Id: I1ae12d787a1f8e7761ca06b5a80049c1c62e9e90
(cherry picked from commit 650f3a5d51)
(cherry picked from commit 28233f9a94)
This commit is contained in:
Rodrigo Barbieri 2023-10-06 10:24:48 -03:00 committed by Rodrigo Barbieri
parent 03adbb6702
commit ecaf8010fc
4 changed files with 195 additions and 19 deletions

View File

@ -38,6 +38,7 @@ from charmhelpers.core.strutils import (
from charmhelpers.fetch import apt_install, filter_installed_packages from charmhelpers.fetch import apt_install, filter_installed_packages
from charmhelpers.core.hookenv import ( from charmhelpers.core.hookenv import (
config, config,
local_unit,
log, log,
relation_get, relation_get,
relation_ids, relation_ids,
@ -62,8 +63,14 @@ from charmhelpers.contrib.network.ip import (
) )
# This is just a label and it must be consistent across # This is just a label and it must be consistent across
# nova-compute nodes to support live migration. # nova-compute nodes to support live migration. It needs to
CEPH_SECRET_UUID = '514c9fca-8cbe-11e2-9c52-3bc8c7819472' # change whenever CEPH_SECRET_UUID also changes.
CEPH_AUTH_CRED_NAME = 'nova-compute-ceph-auth-c91ce26f'
CEPH_SECRET_UUID = 'c91ce26f-403d-4058-9c38-6b56e1c428e0'
# We keep the old secret to retain old credentials and support
# live-migrations between existing instances to newly deployed
# nodes. For more info see LP#2037003.
CEPH_OLD_SECRET_UUID = '514c9fca-8cbe-11e2-9c52-3bc8c7819472'
OVS_BRIDGE = 'br-int' OVS_BRIDGE = 'br-int'
@ -115,6 +122,16 @@ def get_availability_zone():
else config('default-availability-zone')) else config('default-availability-zone'))
def sent_ceph_application_name():
app_name = None
for rid in relation_ids('ceph'):
app_name = relation_get(
'application-name', rid=rid, unit=local_unit())
# default to the old name, so it goes through the old path during
# ceph relation set up
return app_name or service_name()
def _neutron_security_groups(): def _neutron_security_groups():
''' '''
Inspects current cloud-compute relation and determine if nova-c-c has Inspects current cloud-compute relation and determine if nova-c-c has
@ -435,13 +452,18 @@ class NovaComputeCephContext(context.CephContext):
ctxt = super(NovaComputeCephContext, self).__call__() ctxt = super(NovaComputeCephContext, self).__call__()
if not ctxt: if not ctxt:
return {} return {}
svc = service_name() svc = sent_ceph_application_name()
if svc == CEPH_AUTH_CRED_NAME:
secret_uuid = CEPH_SECRET_UUID
else:
secret_uuid = CEPH_OLD_SECRET_UUID
# secret.xml # secret.xml
ctxt['ceph_secret_uuid'] = CEPH_SECRET_UUID ctxt['ceph_secret_uuid'] = secret_uuid
# nova.conf # nova.conf
ctxt['service_name'] = svc ctxt['service_name'] = svc
ctxt['rbd_user'] = svc ctxt['rbd_user'] = svc
ctxt['rbd_secret_uuid'] = CEPH_SECRET_UUID ctxt['rbd_secret_uuid'] = secret_uuid
if config('pool-type') == 'erasure-coded': if config('pool-type') == 'erasure-coded':
ctxt['rbd_pool'] = ( ctxt['rbd_pool'] = (

View File

@ -34,6 +34,7 @@ from charmhelpers.core.hookenv import (
log, log,
DEBUG, DEBUG,
INFO, INFO,
WARNING,
ERROR, ERROR,
relation_ids, relation_ids,
remote_service_name, remote_service_name,
@ -106,7 +107,8 @@ from nova_compute_utils import (
restart_failed_subordinate_services, restart_failed_subordinate_services,
register_configs, register_configs,
NOVA_CONF, NOVA_CONF,
ceph_config_file, CEPH_SECRET, ceph_config_file,
CEPH_SECRET,
CEPH_BACKEND_SECRET, CEPH_BACKEND_SECRET,
enable_shell, disable_shell, enable_shell, disable_shell,
configure_lxd, configure_lxd,
@ -136,6 +138,9 @@ from charmhelpers.core.unitdata import kv
from nova_compute_context import ( from nova_compute_context import (
nova_metadata_requirement, nova_metadata_requirement,
sent_ceph_application_name,
CEPH_OLD_SECRET_UUID,
CEPH_AUTH_CRED_NAME,
CEPH_SECRET_UUID, CEPH_SECRET_UUID,
assert_libvirt_rbd_imagebackend_allowed, assert_libvirt_rbd_imagebackend_allowed,
get_availability_zone, get_availability_zone,
@ -454,6 +459,7 @@ def ceph_joined():
# Bug 1427660 # Bug 1427660
if not is_unit_paused_set() and config('virt-type') in LIBVIRT_TYPES: if not is_unit_paused_set() and config('virt-type') in LIBVIRT_TYPES:
service_restart(libvirt_daemon()) service_restart(libvirt_daemon())
# install old credentials first for backwards compatibility
send_application_name() send_application_name()
@ -566,9 +572,16 @@ def ceph_changed(rid=None, unit=None):
log('ceph relation incomplete. Peer not ready?') log('ceph relation incomplete. Peer not ready?')
return return
if not ensure_ceph_keyring(service=service_name(), user='nova', sent_app_name = sent_ceph_application_name()
if sent_app_name == CEPH_AUTH_CRED_NAME:
secret_uuid = CEPH_SECRET_UUID
else:
secret_uuid = CEPH_OLD_SECRET_UUID
if not ensure_ceph_keyring(service=sent_app_name, user='nova',
group='nova'): group='nova'):
log('Could not create ceph keyring: peer not ready?') log('Could not create ceph keyring '
'for {}: peer not ready?'.format(sent_app_name))
return return
CONFIGS.write(ceph_config_file()) CONFIGS.write(ceph_config_file())
@ -580,7 +593,12 @@ def ceph_changed(rid=None, unit=None):
key = relation_get(attribute='key', rid=rid, unit=unit) key = relation_get(attribute='key', rid=rid, unit=unit)
if config('virt-type') in ['kvm', 'qemu', 'lxc'] and key: if config('virt-type') in ['kvm', 'qemu', 'lxc'] and key:
create_libvirt_secret(secret_file=CEPH_SECRET, create_libvirt_secret(secret_file=CEPH_SECRET,
secret_uuid=CEPH_SECRET_UUID, key=key) secret_uuid=secret_uuid, key=key)
if sent_app_name != CEPH_AUTH_CRED_NAME:
log('Sending application name for new ceph credentials after '
'setting up the old credentials')
send_application_name(relid=rid, app_name=CEPH_AUTH_CRED_NAME)
try: try:
_handle_ceph_request() _handle_ceph_request()
@ -591,7 +609,7 @@ def ceph_changed(rid=None, unit=None):
# the hook execution. # the hook execution.
log('Caught ValueError, invalid value provided for ' log('Caught ValueError, invalid value provided for '
'configuration?: "{}"'.format(str(e)), 'configuration?: "{}"'.format(str(e)),
level=DEBUG) level=WARNING)
# TODO: Refactor this method moving part of this logic to charmhelpers, # TODO: Refactor this method moving part of this logic to charmhelpers,
@ -691,8 +709,11 @@ def _get_broker_rid_unit_for_previous_request():
@hooks.hook('ceph-relation-broken') @hooks.hook('ceph-relation-broken')
def ceph_broken(): def ceph_broken():
service = service_name() delete_keyring(service=CEPH_AUTH_CRED_NAME)
delete_keyring(service=service) # cleanup old entries based on application name
svc_name = service_name()
if svc_name != CEPH_AUTH_CRED_NAME:
delete_keyring(service=svc_name)
update_all_configs() update_all_configs()
@ -718,6 +739,10 @@ def upgrade_charm():
for r_id in relation_ids('amqp'): for r_id in relation_ids('amqp'):
amqp_joined(relation_id=r_id) amqp_joined(relation_id=r_id)
# Trigger upgrade-path from application_name to 'nova-compute'
if sent_ceph_application_name() != CEPH_AUTH_CRED_NAME:
for r_id in relation_ids('ceph'):
send_application_name(relid=r_id, app_name=CEPH_AUTH_CRED_NAME)
if is_relation_made('nrpe-external-master'): if is_relation_made('nrpe-external-master'):
update_nrpe_config() update_nrpe_config()

View File

@ -26,6 +26,7 @@ TO_PATCH = [
'relation_ids', 'relation_ids',
'relation_get', 'relation_get',
'related_units', 'related_units',
'service_name',
'config', 'config',
'log', 'log',
'_save_flag_file', '_save_flag_file',
@ -1024,6 +1025,25 @@ class NovaComputeContextTests(CharmTestCase):
self.assertEqual(context.nova_metadata_requirement(), self.assertEqual(context.nova_metadata_requirement(),
(True, None)) (True, None))
@patch('nova_compute_context.local_unit')
def test_sent_ceph_application_name_empty_rel_data_default(
self, local_unit):
local_unit.return_value = 'nova-compute-kvm/0'
self.relation_ids.return_value = ['ceph:0']
self.test_relation.set({})
self.service_name.return_value = 'nova-compute-kvm'
self.assertEqual(
context.sent_ceph_application_name(), 'nova-compute-kvm')
@patch('nova_compute_context.local_unit')
def test_sent_ceph_application_name(self, local_unit):
local_unit.return_value = 'nova-compute-kvm/0'
self.relation_ids.return_value = ['ceph:0']
self.test_relation.set({'application-name': 'ceph-nova-compute'})
self.service_name.return_value = 'nova-compute-kvm'
self.assertEqual(
context.sent_ceph_application_name(), 'ceph-nova-compute')
def test_nova_compute_extra_flags(self): def test_nova_compute_extra_flags(self):
self.test_config.set('cpu-model-extra-flags', 'pcid vmx pdpe1gb') self.test_config.set('cpu-model-extra-flags', 'pcid vmx pdpe1gb')
self.lsb_release.return_value = {'DISTRIB_CODENAME': 'bionic'} self.lsb_release.return_value = {'DISTRIB_CODENAME': 'bionic'}
@ -1387,6 +1407,43 @@ class NovaComputeCephContextTest(CharmTestCase):
self.config.side_effect = self.test_config.get self.config.side_effect = self.test_config.get
self.os_release.return_value = 'queens' self.os_release.return_value = 'queens'
@patch('nova_compute_context.sent_ceph_application_name')
@patch('charmhelpers.contrib.openstack.context.CephContext.__call__')
def test_secret_uuid_old(self, mock_call, sent_name):
sent_name.return_value = 'nova-compute-kvm'
mock_call.return_value = {'foo': 'bar'}
ctxt = context.NovaComputeCephContext()()
self.assertEqual(
context.CEPH_OLD_SECRET_UUID, ctxt['ceph_secret_uuid'])
self.assertEqual(
context.CEPH_OLD_SECRET_UUID, ctxt['rbd_secret_uuid'])
self.assertEqual('nova-compute-kvm', ctxt['service_name'])
self.assertEqual('nova-compute-kvm', ctxt['rbd_user'])
@patch('nova_compute_context.sent_ceph_application_name')
@patch('charmhelpers.contrib.openstack.context.CephContext.__call__')
def test_secret_uuid_old_2(self, mock_call, sent_name):
sent_name.return_value = 'nova-compute'
mock_call.return_value = {'foo': 'bar'}
ctxt = context.NovaComputeCephContext()()
self.assertEqual(
context.CEPH_OLD_SECRET_UUID, ctxt['ceph_secret_uuid'])
self.assertEqual(
context.CEPH_OLD_SECRET_UUID, ctxt['rbd_secret_uuid'])
self.assertEqual('nova-compute', ctxt['service_name'])
self.assertEqual('nova-compute', ctxt['rbd_user'])
@patch('nova_compute_context.sent_ceph_application_name')
@patch('charmhelpers.contrib.openstack.context.CephContext.__call__')
def test_secret_uuid_new(self, mock_call, sent_name):
sent_name.return_value = context.CEPH_AUTH_CRED_NAME
mock_call.return_value = {'foo': 'bar'}
ctxt = context.NovaComputeCephContext()()
self.assertEqual(context.CEPH_SECRET_UUID, ctxt['ceph_secret_uuid'])
self.assertEqual(context.CEPH_SECRET_UUID, ctxt['rbd_secret_uuid'])
self.assertEqual(context.CEPH_AUTH_CRED_NAME, ctxt['service_name'])
self.assertEqual(context.CEPH_AUTH_CRED_NAME, ctxt['rbd_user'])
@patch.dict(context.os.environ, {'JUJU_UNIT_NAME': 'nova_compute'}, @patch.dict(context.os.environ, {'JUJU_UNIT_NAME': 'nova_compute'},
clear=True) clear=True)
@patch('charmhelpers.contrib.openstack.context.CephContext.__call__') @patch('charmhelpers.contrib.openstack.context.CephContext.__call__')

View File

@ -69,6 +69,7 @@ TO_PATCH = [
'get_relation_ip', 'get_relation_ip',
# nova_compute_context # nova_compute_context
'nova_metadata_requirement', 'nova_metadata_requirement',
'sent_ceph_application_name',
# nova_compute_utils # nova_compute_utils
# 'PACKAGES', # 'PACKAGES',
'create_libvirt_secret', 'create_libvirt_secret',
@ -614,10 +615,13 @@ class NovaComputeRelationsTests(CharmTestCase):
configs.complete_contexts = MagicMock() configs.complete_contexts = MagicMock()
configs.complete_contexts.return_value = ['ceph'] configs.complete_contexts.return_value = ['ceph']
self.ensure_ceph_keyring.return_value = False self.ensure_ceph_keyring.return_value = False
self.sent_ceph_application_name.return_value = 'nova-compute'
hooks.ceph_changed() hooks.ceph_changed()
self.log.assert_called_with( self.log.assert_called_with(
'Could not create ceph keyring: peer not ready?' 'Could not create ceph keyring for nova-compute: peer not ready?'
) )
self.ensure_ceph_keyring.assert_called_once_with(
service='nova-compute', user='nova', group='nova')
@patch.object(hooks, '_handle_ceph_request') @patch.object(hooks, '_handle_ceph_request')
@patch.object(hooks, 'create_libvirt_secret') @patch.object(hooks, 'create_libvirt_secret')
@ -629,15 +633,56 @@ class NovaComputeRelationsTests(CharmTestCase):
_handle_ceph_request): _handle_ceph_request):
configs.complete_contexts = MagicMock() configs.complete_contexts = MagicMock()
configs.complete_contexts.return_value = ['ceph'] configs.complete_contexts.return_value = ['ceph']
self.sent_ceph_application_name.return_value = 'nova-compute-kvm'
service_name.return_value = 'nova-compute-kvm'
self.ensure_ceph_keyring.return_value = True self.ensure_ceph_keyring.return_value = True
configs.write = MagicMock() configs.write = MagicMock()
service_name.return_value = 'nova-compute'
key = {'data': 'key'} key = {'data': 'key'}
self.relation_get.return_value = key self.relation_get.return_value = key
hooks.ceph_changed() hooks.ceph_changed()
self.send_application_name.assert_called_once_with(
relid=None, app_name=hooks.CEPH_AUTH_CRED_NAME)
ex = [ ex = [
call('/var/lib/charm/nova-compute/ceph.conf'), call('/var/lib/charm/nova-compute-kvm/ceph.conf'),
call('/etc/ceph/secret.xml'),
call('/etc/nova/nova.conf'),
]
self.assertEqual(ex, configs.write.call_args_list)
create_libvirt_secret.assert_called_once_with(
secret_file='/etc/ceph/secret.xml', key=key,
secret_uuid=hooks.CEPH_OLD_SECRET_UUID)
# confirm exception is caught
_handle_ceph_request.side_effect = ValueError
hooks.ceph_changed()
self.log.assert_has_calls([
call('Sending application name for new ceph credentials after'
' setting up the old credentials'),
call('Caught ValueError, invalid value provided'
' for configuration?: ""', level='WARNING')
])
@patch.object(hooks, '_handle_ceph_request')
@patch.object(hooks, 'create_libvirt_secret')
@patch('nova_compute_context.service_name')
@patch.object(hooks, 'CONFIGS')
def test_ceph_changed_with_key_and_relation_data_new_ceph_creds(
self, configs, service_name, create_libvirt_secret,
_handle_ceph_request):
configs.complete_contexts = MagicMock()
configs.complete_contexts.return_value = ['ceph']
self.sent_ceph_application_name.return_value = (
hooks.CEPH_AUTH_CRED_NAME)
service_name.return_value = 'nova-compute-kvm'
self.ensure_ceph_keyring.return_value = True
configs.write = MagicMock()
key = {'data': 'key'}
self.relation_get.return_value = key
hooks.ceph_changed()
self.send_application_name.assert_not_called()
ex = [
call('/var/lib/charm/nova-compute-kvm/ceph.conf'),
call('/etc/ceph/secret.xml'), call('/etc/ceph/secret.xml'),
call('/etc/nova/nova.conf'), call('/etc/nova/nova.conf'),
] ]
@ -645,10 +690,7 @@ class NovaComputeRelationsTests(CharmTestCase):
create_libvirt_secret.assert_called_once_with( create_libvirt_secret.assert_called_once_with(
secret_file='/etc/ceph/secret.xml', key=key, secret_file='/etc/ceph/secret.xml', key=key,
secret_uuid=hooks.CEPH_SECRET_UUID) secret_uuid=hooks.CEPH_SECRET_UUID)
# confirm exception is caught self.log.assert_not_called()
_handle_ceph_request.side_effect = ValueError
hooks.ceph_changed()
self.log.assert_called_once()
@patch.object(hooks, 'get_ceph_request') @patch.object(hooks, 'get_ceph_request')
@patch.object(hooks, 'get_request_states') @patch.object(hooks, 'get_request_states')
@ -1280,7 +1322,37 @@ class NovaComputeRelationsTests(CharmTestCase):
grp_mock.gr_gid = None grp_mock.gr_gid = None
getgrnam.return_value = grp_mock getgrnam.return_value = grp_mock
self.remove_old_packages.return_value = False self.remove_old_packages.return_value = False
self.sent_ceph_application_name.return_value = 'nova-compute'
hooks.upgrade_charm() hooks.upgrade_charm()
self.send_application_name.assert_not_called()
self.remove_old_packages.assert_called_once_with()
self.assertFalse(self.service_restart.called)
@patch.object(hooks.grp, 'getgrnam')
def test_upgrade_charm_send_app_name(self, getgrnam):
grp_mock = MagicMock()
grp_mock.gr_gid = None
getgrnam.return_value = grp_mock
self.remove_old_packages.return_value = False
self.sent_ceph_application_name.return_value = 'nova-compute-kvm'
self.relation_ids.return_value = ['ceph:0']
hooks.upgrade_charm()
self.send_application_name.assert_called_once_with(
relid='ceph:0', app_name=hooks.CEPH_AUTH_CRED_NAME)
self.remove_old_packages.assert_called_once_with()
self.assertFalse(self.service_restart.called)
@patch.object(hooks.grp, 'getgrnam')
def test_upgrade_charm_send_app_name_2(self, getgrnam):
grp_mock = MagicMock()
grp_mock.gr_gid = None
getgrnam.return_value = grp_mock
self.remove_old_packages.return_value = False
self.sent_ceph_application_name.return_value = 'nova-compute'
self.relation_ids.return_value = ['ceph:0']
hooks.upgrade_charm()
self.send_application_name.assert_called_once_with(
relid='ceph:0', app_name=hooks.CEPH_AUTH_CRED_NAME)
self.remove_old_packages.assert_called_once_with() self.remove_old_packages.assert_called_once_with()
self.assertFalse(self.service_restart.called) self.assertFalse(self.service_restart.called)