Fix resume action failure

Services have interdependencies and the order in which
we attempt to resume them is important, otherwise the
resume action may fail.

Uncovered while and validated by running the
openstack-upgrade tests. [1]

[1]: https://github.com/openstack-charmers/charmed-openstack-tester

Change-Id: I12218b47dc56b502ecc8578c6ab13acbd321bf26
Related-Bug: #1927277
Related-Bug: #1952882
This commit is contained in:
Aurelien Lourot 2021-12-01 11:09:57 +01:00
parent 9724059fb9
commit 0a4acd6d03
2 changed files with 38 additions and 14 deletions

View File

@ -442,9 +442,14 @@ def services():
'''
Returns a list of services associated with this charm and its subordinates.
'''
return list(set(chain(*restart_map().values()))
| get_subordinate_services())
# NOTE(lourot): the order is important when resuming the services. For
# example the ceilometer-agent-compute service, coming from the
# ceilometer-agent subordinate charm, has a dependency to the nova-compute
# service. Attempting to start the ceilometer-agent-compute service first
# will then fail. Thus we return the services here in a resume-friendly
# order, i.e. the principal services first, then the subordinate ones.
return (list(set(chain(*restart_map().values()))) +
list(get_subordinate_services()))
def register_configs():
@ -1041,7 +1046,10 @@ def services_to_pause_or_resume():
if "post-series-upgrade" in hook_name():
return services()
else:
return list(set(services()) - {libvirt_daemon()})
# WARNING(lourot): the list ordering is important. See services() for
# more details.
return [service for service in services()
if service != libvirt_daemon()]
def _pause_resume_helper(f, configs):

View File

@ -923,7 +923,7 @@ class NovaComputeUtilsTests(CharmTestCase):
asf.return_value = callee
utils.assess_status('test-config')
asf.assert_called_once_with('test-config',
['nova-compute', 'qemu-kvm'])
['qemu-kvm', 'nova-compute'])
callee.assert_called_once_with()
self.os_application_version_set.assert_called_with(
utils.VERSION_PACKAGE
@ -1281,21 +1281,37 @@ class NovaComputeUtilsTests(CharmTestCase):
@patch.object(utils, "libvirt_daemon")
@patch.object(utils, "hook_name")
@patch.object(utils, "services")
@patch.object(utils, "get_subordinate_services")
@patch.object(utils, 'nova_metadata_requirement')
def test_services_to_pause_or_resume(
self, _services, _hook_name, _libvirt_daemon):
_no_libvirt = ["nova-compute"]
_full = _no_libvirt + ["libvirtd"]
_services.return_value = _full
self, _en_meta, _subordinate_services, _hook_name,
_libvirt_daemon):
_en_meta.return_value = (False, None)
_subordinate_services.return_value = set(["ceilometer-agent-compute"])
_libvirt_daemon.return_value = "libvirtd"
self.os_release.return_value = 'victoria'
self.relation_ids.return_value = []
# WARNING(lourot): In the following test expectations, the order of
# the services is important. Principal services have to come before
# the subordinate services. See nova_compute_utils.services() for more
# details.
expected_last_service = "ceilometer-agent-compute"
_hook_name.return_value = "config-changed"
self.assertEqual(_no_libvirt,
utils.services_to_pause_or_resume())
expected_service_set = set(["qemu-kvm", "nova-compute",
"ceilometer-agent-compute"])
actual_service_list = utils.services_to_pause_or_resume()
self.assertEqual(expected_service_set, set(actual_service_list))
self.assertEqual(expected_last_service, actual_service_list[-1])
_hook_name.return_value = "post-series-upgrade"
self.assertEqual(_full,
utils.services_to_pause_or_resume())
expected_service_set = set(["qemu-kvm", "nova-compute", "libvirtd",
"ceilometer-agent-compute"])
actual_service_list = utils.services_to_pause_or_resume()
self.assertEqual(expected_service_set, set(actual_service_list))
self.assertEqual(expected_last_service, actual_service_list[-1])
@patch.object(utils, 'kv')
def test_use_fqdn_hint(self, _kv):