Refactor unit status output

The unit status output for this charm was verbose compared to
others;  refactor to display minimal information when active
and pertinent information when in a blocked state.

Note that the NVIDIA driver version should really be in the
application version; this will require further refactoring
to expose this information to the operator framework.

Change-Id: I26663ca8da99083a409e085ea239e43245ef6265
This commit is contained in:
James Page 2022-05-12 09:30:40 +01:00
parent c699c7d135
commit 37e0fe5982
6 changed files with 47 additions and 48 deletions

1
.gitignore vendored
View File

@ -6,5 +6,4 @@ interfaces
.stestr
*__pycache__*
*.pyc
/nova-compute-nvidia-vgpu.charm
*.charm

View File

@ -58,7 +58,8 @@ def is_nvidia_software_to_be_installed(charm_config):
def is_nvidia_software_to_be_installed_notcached(charm_config):
return (nvidia_utils.has_nvidia_gpu_hardware() or
nvidia_gpu_hardware, _ = nvidia_utils.has_nvidia_gpu_hardware()
return (nvidia_gpu_hardware or
charm_config.get('force-install-nvidia-vgpu'))
@ -110,34 +111,24 @@ def check_status(config, services):
:type services: List[str]
:rtype: ops.model.StatusBase
"""
unit_status_msg = ('no ' if not nvidia_utils.has_nvidia_gpu_hardware()
else '') + 'NVIDIA GPU found; '
installed_versions = nvidia_utils.installed_nvidia_software_versions()
software_is_installed = len(installed_versions) > 0
if (is_nvidia_software_to_be_installed(config) and
not software_is_installed):
return BlockedStatus("NVIDIA GPU detected, drivers not installed")
_, services_not_running_msg = ows_check_services_running(services,
ports=[])
software_is_running = services_not_running_msg is None
if software_is_installed:
unit_status_msg += 'installed NVIDIA software: '
unit_status_msg += ', '.join(installed_versions)
if not software_is_running:
# NOTE(lourot): the exact list of services not running that should
# be will be displayed in the principal's blocked status message
# already, so no need to repeat it here on the subordinate.
unit_status_msg += '; reboot required?'
else:
unit_status_msg += 'no NVIDIA software installed'
if software_is_installed and not software_is_running:
return BlockedStatus("manual reboot required")
if ((is_nvidia_software_to_be_installed(config) and
not software_is_installed) or
(software_is_installed and
not software_is_running)):
return BlockedStatus(unit_status_msg)
nvidia_gpu_hardware, num_gpus = nvidia_utils.has_nvidia_gpu_hardware()
unit_status_msg = "{} GPU".format(num_gpus)
return ActiveStatus('Unit is ready: ' + unit_status_msg)
return ActiveStatus('Unit is ready ({})'.format(unit_status_msg))
def set_principal_unit_relation_data(relation_data_to_be_set, config,

View File

@ -102,15 +102,15 @@ def list_vgpu_types():
def has_nvidia_gpu_hardware():
"""Search for NVIDIA GPU hardware.
:returns: True if some NVIDIA GPU hardware is found on the current
unit.
:rtype: bool
:returns: a tuple of (bool, int) indicating if NVIDIA GPU hardware
is found and how many GPU's where detected.
:rtype: (bool, int)
"""
return _has_nvidia_gpu_hardware_notcached()
def _has_nvidia_gpu_hardware_notcached():
nvidia_gpu_hardware_found = False
num_nvidia_devices = 0
for device in SimpleParser().run():
device_class = device.cls.name
device_vendor = device.vendor.name
@ -124,12 +124,12 @@ def _has_nvidia_gpu_hardware_notcached():
logging.debug('NVIDIA GPU found: {}'.format(device))
# NOTE(lourot): we could `break` out here but it's interesting
# for debugging purposes to print them all.
nvidia_gpu_hardware_found = True
num_nvidia_devices += 1
if not nvidia_gpu_hardware_found:
if num_nvidia_devices == 0:
logging.debug('No NVIDIA GPU found.')
return nvidia_gpu_hardware_found
return num_nvidia_devices > 0, num_nvidia_devices
def _installed_nvidia_software_packages():

View File

@ -1,7 +1,6 @@
# This file is managed centrally. If you find the need to modify this as a
# one-off, please don't. Intead, consult #openstack-charms and ask about
# requirements management in charms via bot-control. Thank you.
charm-tools>=2.4.4
coverage>=3.6
mock>=1.2
flake8>=4.0.1; python_version >= '3.6'

View File

@ -32,12 +32,12 @@ class TestCharmUtils(unittest.TestCase):
@patch('nvidia_utils.has_nvidia_gpu_hardware')
def test_is_nvidia_software_to_be_installed(self,
has_nvidia_gpu_hardware_mock):
has_nvidia_gpu_hardware_mock.return_value = True
has_nvidia_gpu_hardware_mock.return_value = True, 1
self.assertTrue(
charm_utils.is_nvidia_software_to_be_installed_notcached({
'force-install-nvidia-vgpu': False}))
has_nvidia_gpu_hardware_mock.return_value = False
has_nvidia_gpu_hardware_mock.return_value = False, 0
self.assertTrue(
charm_utils.is_nvidia_software_to_be_installed_notcached({
'force-install-nvidia-vgpu': True}))
@ -88,55 +88,59 @@ class TestCharmUtils(unittest.TestCase):
def test_check_status(
self, has_hw_mock, installed_sw_mock, is_sw_to_be_installed_mock,
check_services_running_mock):
has_hw_mock.return_value = True
has_hw_mock.return_value = True, 1
installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None)
self.assertEqual(
charm_utils.check_status(None, None),
ActiveStatus(
'Unit is ready: '
'NVIDIA GPU found; installed NVIDIA software: 42, 43'))
'Unit is ready (1 GPU)'
)
)
has_hw_mock.return_value = False
has_hw_mock.return_value = False, 0
installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None)
self.assertEqual(
charm_utils.check_status(None, None),
ActiveStatus(
'Unit is ready: '
'no NVIDIA GPU found; installed NVIDIA software: 42, 43'))
'Unit is ready (0 GPU)'
)
)
has_hw_mock.return_value = True
has_hw_mock.return_value = True, 1
installed_sw_mock.return_value = []
is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None)
self.assertEqual(
charm_utils.check_status(None, None),
BlockedStatus(
'NVIDIA GPU found; no NVIDIA software installed'))
'NVIDIA GPU detected, drivers not installed'
)
)
has_hw_mock.return_value = True
has_hw_mock.return_value = True, 2
installed_sw_mock.return_value = []
is_sw_to_be_installed_mock.return_value = False
check_services_running_mock.return_value = (None, None)
self.assertEqual(
charm_utils.check_status(None, None),
ActiveStatus(
'Unit is ready: '
'NVIDIA GPU found; no NVIDIA software installed'))
'Unit is ready (2 GPU)'
)
)
has_hw_mock.return_value = True
has_hw_mock.return_value = True, 1
installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (
None, 'Services not running that should be: nvidia-vgpu-mgr')
self.assertEqual(
charm_utils.check_status(None, None),
BlockedStatus(
'NVIDIA GPU found; installed NVIDIA software: 42, 43; '
'reboot required?'))
BlockedStatus('manual reboot required')
)
@patch('nvidia_utils._installed_nvidia_software_packages')
@patch('charm_utils.get_os_codename_package')

View File

@ -54,13 +54,19 @@ class TestNvidiaUtils(unittest.TestCase):
def test_has_nvidia_gpu_hardware_with_hw(self, lspci_parser_mock):
lspci_parser_mock.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITH_NVIDIA_GPU)
self.assertTrue(nvidia_utils._has_nvidia_gpu_hardware_notcached())
self.assertEqual(
nvidia_utils._has_nvidia_gpu_hardware_notcached(),
(True, 1)
)
@patch('nvidia_utils.SimpleParser')
def test_has_nvidia_gpu_hardware_without_hw(self, lspci_parser_mock):
lspci_parser_mock.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITHOUT_GPU)
self.assertFalse(nvidia_utils._has_nvidia_gpu_hardware_notcached())
self.assertEqual(
nvidia_utils._has_nvidia_gpu_hardware_notcached(),
(False, 0)
)
@patch('nvidia_utils.Path')
@patch('nvidia_utils.os.listdir')