Restart failed subordinate services

Change-Id: Id34e9c6f85886dbf880df0b7002110a40ef41ad6
Closes-Bug: #1947585
This commit is contained in:
Aurelien Lourot 2022-03-15 18:09:57 +01:00 committed by Billy Olsen
parent 71567080b3
commit 140be9d0a9
4 changed files with 69 additions and 0 deletions

View File

@ -99,6 +99,7 @@ from nova_compute_utils import (
public_ssh_key,
restart_map,
services,
restart_failed_subordinate_services,
register_configs,
NOVA_CONF,
ceph_config_file, CEPH_SECRET,
@ -689,6 +690,7 @@ def nova_ceilometer_joined(relid=None, remote_restart=False):
@restart_on_change(restart_map())
def nova_ceilometer_relation_changed():
update_all_configs()
restart_failed_subordinate_services()
@hooks.hook('nova-vgpu-relation-joined')
@ -703,6 +705,7 @@ def nova_vgpu_joined(relid=None, remote_restart=False):
@restart_on_change(restart_map())
def nova_vgpu_relation_changed():
update_all_configs()
restart_failed_subordinate_services()
@hooks.hook('nrpe-external-master-relation-joined',
@ -747,6 +750,7 @@ def neutron_plugin_changed():
apt_purge('nova-api-metadata', fatal=True)
service_restart_handler(default_service='nova-compute')
CONFIGS.write(NOVA_CONF)
restart_failed_subordinate_services()
# TODO(jamespage): Move this into charmhelpers for general reuse.

View File

@ -43,6 +43,7 @@ from charmhelpers.core.fstab import Fstab
from charmhelpers.core.host import (
mkdir,
service_restart,
service_running,
lsb_release,
rsync,
CompareHostReleases,
@ -459,6 +460,27 @@ def services():
list(get_subordinate_services()))
def restart_failed_subordinate_services():
'''
Restarts all subordinate services if they aren't running.
Doesn't do anything if the unit is paused.
Subordinate charms can advertise a list of services to this principal charm
so that this principal charm can properly manage them when pausing,
resuming and upgrading.
It can be useful to call this function once a relation with a subordinate
charm has been established, in order to mitigate a race condition described
in lp:1947585 in which the subordinate services may have failed to come up
if they were started before the principal services.
'''
if not is_unit_paused_set():
for s in get_subordinate_services():
if not service_running(s):
service_restart(s)
def register_configs():
'''
Returns an OSTemplateRenderer object with all required configs registered.

View File

@ -102,6 +102,7 @@ TO_PATCH = [
'render',
'remove_old_packages',
'services',
'restart_failed_subordinate_services',
'send_application_name',
]
@ -486,10 +487,12 @@ class NovaComputeRelationsTests(CharmTestCase):
def test_nova_ceilometer_relation_changed(self):
hooks.nova_ceilometer_relation_changed()
self.update_all_configs.assert_called()
self.restart_failed_subordinate_services.assert_called()
def test_nova_vgpu_relation_changed(self):
hooks.nova_ceilometer_relation_changed()
self.update_all_configs.assert_called()
self.restart_failed_subordinate_services.assert_called()
def test_ceph_joined(self):
self.libvirt_daemon.return_value = 'libvirt-bin'
@ -879,6 +882,7 @@ class NovaComputeRelationsTests(CharmTestCase):
configs.write.assert_called_with('/etc/nova/nova.conf')
service_restart_handler.assert_called_with(
default_service='nova-compute')
self.restart_failed_subordinate_services.assert_called()
@patch.object(hooks, 'service_restart_handler')
@patch.object(hooks, 'CONFIGS')
@ -891,6 +895,7 @@ class NovaComputeRelationsTests(CharmTestCase):
configs.write.assert_called_with('/etc/nova/nova.conf')
service_restart_handler.assert_called_with(
default_service='nova-compute')
self.restart_failed_subordinate_services.assert_called()
@patch.object(hooks, 'service_restart_handler')
@patch.object(hooks, 'CONFIGS')
@ -903,6 +908,7 @@ class NovaComputeRelationsTests(CharmTestCase):
configs.write.assert_called_with('/etc/nova/nova.conf')
service_restart_handler.assert_called_with(
default_service='nova-compute')
self.restart_failed_subordinate_services.assert_called()
@patch.object(hooks, 'get_hugepage_number')
def test_neutron_plugin_joined_relid(self, get_hugepage_number):

View File

@ -1313,6 +1313,43 @@ class NovaComputeUtilsTests(CharmTestCase):
self.assertEqual(expected_service_set, set(actual_service_list))
self.assertEqual(expected_last_service, actual_service_list[-1])
@patch.object(utils, 'service_restart')
@patch.object(utils, 'service_running')
@patch.object(utils, 'get_subordinate_services')
@patch.object(utils, 'is_unit_paused_set')
def test_restart_failed_subordinate_services(self, _is_unit_paused_set,
_get_subordinate_services,
_service_running,
_service_restart):
_is_unit_paused_set.return_value = True
utils.restart_failed_subordinate_services()
_service_restart.assert_not_called()
# Validate that when a single service is not running, it is restarted
_service_restart.reset_mock()
_is_unit_paused_set.return_value = False
_get_subordinate_services.return_value = ['ceilometer-agent']
_service_running.return_value = False
utils.restart_failed_subordinate_services()
_service_restart.assert_called_once_with('ceilometer-agent')
# Validate that when multiple subordinate services exist, only ones
# that are stopped are restarted.
_service_restart.reset_mock()
def is_running(service):
if service == 'ceilometer-agent':
return True
else:
return False
_service_running.side_effect = is_running
_get_subordinate_services.return_value = ['ceilometer-agent', 'foo']
utils.restart_failed_subordinate_services()
_service_restart.assert_has_calls([
call('foo'),
])
@patch.object(utils, 'kv')
def test_use_fqdn_hint(self, _kv):
_kv().get.return_value = False