Add unit status reboot hint

Change-Id: I00343e72cb5be25e1dbd19338a0bead04076ac35
This commit is contained in:
Aurelien Lourot 2022-03-01 15:24:06 +01:00
parent dad6883dcf
commit 46eb5aedd6
4 changed files with 49 additions and 12 deletions

View File

@ -82,6 +82,10 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
self.config, self.services()) self.config, self.services())
def services(self): def services(self):
"""Determine the list of services that should be running.
:rtype: List[str]
"""
# If no NVIDIA software is expected to be installed on this particular # If no NVIDIA software is expected to be installed on this particular
# unit, then no service should be expected to run by # unit, then no service should be expected to run by
# OSBaseCharm.update_status(). Otherwise the services from the # OSBaseCharm.update_status(). Otherwise the services from the
@ -95,7 +99,7 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
:rtype: ops.model.StatusBase :rtype: ops.model.StatusBase
""" """
return check_status(self.config) return check_status(self.config, self.services())
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -23,6 +23,7 @@ from ruamel.yaml import YAML
from charmhelpers.contrib.openstack.utils import ( from charmhelpers.contrib.openstack.utils import (
CompareOpenStackReleases, CompareOpenStackReleases,
get_os_codename_package, get_os_codename_package,
ows_check_services_running,
) )
from charmhelpers.core.hookenv import cached from charmhelpers.core.hookenv import cached
from charmhelpers.core.host import file_hash from charmhelpers.core.host import file_hash
@ -95,25 +96,40 @@ def install_nvidia_software_if_needed(stored, config, resources):
stored.last_installed_resource_hash = nvidia_software_hash stored.last_installed_resource_hash = nvidia_software_hash
def check_status(config): def check_status(config, services):
"""Determine the unit status to be set. """Determine the unit status to be set.
:param config: Juju application config. :param config: Juju application config.
:type config: ops.model.ConfigData :type config: ops.model.ConfigData
:param services: List of services expected to be running.
:type services: List[str]
:rtype: ops.model.StatusBase :rtype: ops.model.StatusBase
""" """
unit_status_msg = ('no ' if not nvidia_utils.has_nvidia_gpu_hardware() unit_status_msg = ('no ' if not nvidia_utils.has_nvidia_gpu_hardware()
else '') + 'NVIDIA GPU found; ' else '') + 'NVIDIA GPU found; '
installed_versions = nvidia_utils.installed_nvidia_software_versions() installed_versions = nvidia_utils.installed_nvidia_software_versions()
if len(installed_versions) > 0: software_is_installed = len(installed_versions) > 0
_, services_not_running_msg = ows_check_services_running(services,
ports=[])
software_is_running = services_not_running_msg is None
if software_is_installed:
unit_status_msg += 'installed NVIDIA software: ' unit_status_msg += 'installed NVIDIA software: '
unit_status_msg += ', '.join(installed_versions) unit_status_msg += ', '.join(installed_versions)
if not software_is_running:
# NOTE(lourot): the exact list of services not running that should
# be will be displayed in the principal's blocked status message
# already, so no need to repeat it here on the subordinate.
unit_status_msg += '; reboot required?'
else: else:
unit_status_msg += 'no NVIDIA software installed' unit_status_msg += 'no NVIDIA software installed'
if (is_nvidia_software_to_be_installed(config) and if ((is_nvidia_software_to_be_installed(config) and
len(installed_versions) == 0): not software_is_installed) or
(software_is_installed and
not software_is_running)):
return BlockedStatus(unit_status_msg) return BlockedStatus(unit_status_msg)
return ActiveStatus('Unit is ready: ' + unit_status_msg) return ActiveStatus('Unit is ready: ' + unit_status_msg)

View File

@ -81,5 +81,5 @@ class TestNovaComputeNvidiaVgpuCharm(CharmTestCase):
self.harness.add_relation_unit(relation_id, 'nova-compute/0') self.harness.add_relation_unit(relation_id, 'nova-compute/0')
# Verify that nova-compute-vgpu-charm sets relation data to its # Verify that nova-compute-vgpu-charm sets relation data to its
# principle nova-compute. # principal nova-compute.
self.assertTrue(self.set_principal_unit_relation_data.called) self.assertTrue(self.set_principal_unit_relation_data.called)

View File

@ -76,16 +76,19 @@ class TestCharmUtils(unittest.TestCase):
apt_install_mock.assert_called_once_with(['path-to-software'], apt_install_mock.assert_called_once_with(['path-to-software'],
fatal=True) fatal=True)
@patch('charm_utils.ows_check_services_running')
@patch('charm_utils.is_nvidia_software_to_be_installed') @patch('charm_utils.is_nvidia_software_to_be_installed')
@patch('nvidia_utils.installed_nvidia_software_versions') @patch('nvidia_utils.installed_nvidia_software_versions')
@patch('nvidia_utils.has_nvidia_gpu_hardware') @patch('nvidia_utils.has_nvidia_gpu_hardware')
def test_check_status(self, has_hw_mock, installed_sw_mock, def test_check_status(
is_sw_to_be_installed_mock): self, has_hw_mock, installed_sw_mock, is_sw_to_be_installed_mock,
check_services_running_mock):
has_hw_mock.return_value = True has_hw_mock.return_value = True
installed_sw_mock.return_value = ['42', '43'] installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None)
self.assertEqual( self.assertEqual(
charm_utils.check_status(None), charm_utils.check_status(None, None),
ActiveStatus( ActiveStatus(
'Unit is ready: ' 'Unit is ready: '
'NVIDIA GPU found; installed NVIDIA software: 42, 43')) 'NVIDIA GPU found; installed NVIDIA software: 42, 43'))
@ -93,8 +96,9 @@ class TestCharmUtils(unittest.TestCase):
has_hw_mock.return_value = False has_hw_mock.return_value = False
installed_sw_mock.return_value = ['42', '43'] installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None)
self.assertEqual( self.assertEqual(
charm_utils.check_status(None), charm_utils.check_status(None, None),
ActiveStatus( ActiveStatus(
'Unit is ready: ' 'Unit is ready: '
'no NVIDIA GPU found; installed NVIDIA software: 42, 43')) 'no NVIDIA GPU found; installed NVIDIA software: 42, 43'))
@ -102,20 +106,33 @@ class TestCharmUtils(unittest.TestCase):
has_hw_mock.return_value = True has_hw_mock.return_value = True
installed_sw_mock.return_value = [] installed_sw_mock.return_value = []
is_sw_to_be_installed_mock.return_value = True is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None)
self.assertEqual( self.assertEqual(
charm_utils.check_status(None), charm_utils.check_status(None, None),
BlockedStatus( BlockedStatus(
'NVIDIA GPU found; no NVIDIA software installed')) 'NVIDIA GPU found; no NVIDIA software installed'))
has_hw_mock.return_value = True has_hw_mock.return_value = True
installed_sw_mock.return_value = [] installed_sw_mock.return_value = []
is_sw_to_be_installed_mock.return_value = False is_sw_to_be_installed_mock.return_value = False
check_services_running_mock.return_value = (None, None)
self.assertEqual( self.assertEqual(
charm_utils.check_status(None), charm_utils.check_status(None, None),
ActiveStatus( ActiveStatus(
'Unit is ready: ' 'Unit is ready: '
'NVIDIA GPU found; no NVIDIA software installed')) 'NVIDIA GPU found; no NVIDIA software installed'))
has_hw_mock.return_value = True
installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (
None, 'Services not running that should be: nvidia-vgpu-mgr')
self.assertEqual(
charm_utils.check_status(None, None),
BlockedStatus(
'NVIDIA GPU found; installed NVIDIA software: 42, 43; '
'reboot required?'))
@patch('nvidia_utils._installed_nvidia_software_packages') @patch('nvidia_utils._installed_nvidia_software_packages')
@patch('charm_utils.get_os_codename_package') @patch('charm_utils.get_os_codename_package')
def test_set_principal_unit_relation_data(self, release_codename_mock, def test_set_principal_unit_relation_data(self, release_codename_mock,