Refactor unit status output
The unit status output for this charm was verbose compared to others; refactor to display minimal information when active and pertinent information when in a blocked state. Note that the NVIDIA driver version should really be in the application version; this will require further refactoring to expose this information to the operator framework. Change-Id: I26663ca8da99083a409e085ea239e43245ef6265
This commit is contained in:
parent
c699c7d135
commit
37e0fe5982
1
.gitignore
vendored
1
.gitignore
vendored
@ -6,5 +6,4 @@ interfaces
|
|||||||
.stestr
|
.stestr
|
||||||
*__pycache__*
|
*__pycache__*
|
||||||
*.pyc
|
*.pyc
|
||||||
/nova-compute-nvidia-vgpu.charm
|
|
||||||
*.charm
|
*.charm
|
||||||
|
@ -58,7 +58,8 @@ def is_nvidia_software_to_be_installed(charm_config):
|
|||||||
|
|
||||||
|
|
||||||
def is_nvidia_software_to_be_installed_notcached(charm_config):
|
def is_nvidia_software_to_be_installed_notcached(charm_config):
|
||||||
return (nvidia_utils.has_nvidia_gpu_hardware() or
|
nvidia_gpu_hardware, _ = nvidia_utils.has_nvidia_gpu_hardware()
|
||||||
|
return (nvidia_gpu_hardware or
|
||||||
charm_config.get('force-install-nvidia-vgpu'))
|
charm_config.get('force-install-nvidia-vgpu'))
|
||||||
|
|
||||||
|
|
||||||
@ -110,34 +111,24 @@ def check_status(config, services):
|
|||||||
:type services: List[str]
|
:type services: List[str]
|
||||||
:rtype: ops.model.StatusBase
|
:rtype: ops.model.StatusBase
|
||||||
"""
|
"""
|
||||||
unit_status_msg = ('no ' if not nvidia_utils.has_nvidia_gpu_hardware()
|
|
||||||
else '') + 'NVIDIA GPU found; '
|
|
||||||
|
|
||||||
installed_versions = nvidia_utils.installed_nvidia_software_versions()
|
installed_versions = nvidia_utils.installed_nvidia_software_versions()
|
||||||
software_is_installed = len(installed_versions) > 0
|
software_is_installed = len(installed_versions) > 0
|
||||||
|
|
||||||
|
if (is_nvidia_software_to_be_installed(config) and
|
||||||
|
not software_is_installed):
|
||||||
|
return BlockedStatus("NVIDIA GPU detected, drivers not installed")
|
||||||
|
|
||||||
_, services_not_running_msg = ows_check_services_running(services,
|
_, services_not_running_msg = ows_check_services_running(services,
|
||||||
ports=[])
|
ports=[])
|
||||||
software_is_running = services_not_running_msg is None
|
software_is_running = services_not_running_msg is None
|
||||||
|
|
||||||
if software_is_installed:
|
if software_is_installed and not software_is_running:
|
||||||
unit_status_msg += 'installed NVIDIA software: '
|
return BlockedStatus("manual reboot required")
|
||||||
unit_status_msg += ', '.join(installed_versions)
|
|
||||||
if not software_is_running:
|
|
||||||
# NOTE(lourot): the exact list of services not running that should
|
|
||||||
# be will be displayed in the principal's blocked status message
|
|
||||||
# already, so no need to repeat it here on the subordinate.
|
|
||||||
unit_status_msg += '; reboot required?'
|
|
||||||
else:
|
|
||||||
unit_status_msg += 'no NVIDIA software installed'
|
|
||||||
|
|
||||||
if ((is_nvidia_software_to_be_installed(config) and
|
nvidia_gpu_hardware, num_gpus = nvidia_utils.has_nvidia_gpu_hardware()
|
||||||
not software_is_installed) or
|
unit_status_msg = "{} GPU".format(num_gpus)
|
||||||
(software_is_installed and
|
|
||||||
not software_is_running)):
|
|
||||||
return BlockedStatus(unit_status_msg)
|
|
||||||
|
|
||||||
return ActiveStatus('Unit is ready: ' + unit_status_msg)
|
return ActiveStatus('Unit is ready ({})'.format(unit_status_msg))
|
||||||
|
|
||||||
|
|
||||||
def set_principal_unit_relation_data(relation_data_to_be_set, config,
|
def set_principal_unit_relation_data(relation_data_to_be_set, config,
|
||||||
|
@ -102,15 +102,15 @@ def list_vgpu_types():
|
|||||||
def has_nvidia_gpu_hardware():
|
def has_nvidia_gpu_hardware():
|
||||||
"""Search for NVIDIA GPU hardware.
|
"""Search for NVIDIA GPU hardware.
|
||||||
|
|
||||||
:returns: True if some NVIDIA GPU hardware is found on the current
|
:returns: a tuple of (bool, int) indicating if NVIDIA GPU hardware
|
||||||
unit.
|
is found and how many GPU's where detected.
|
||||||
:rtype: bool
|
:rtype: (bool, int)
|
||||||
"""
|
"""
|
||||||
return _has_nvidia_gpu_hardware_notcached()
|
return _has_nvidia_gpu_hardware_notcached()
|
||||||
|
|
||||||
|
|
||||||
def _has_nvidia_gpu_hardware_notcached():
|
def _has_nvidia_gpu_hardware_notcached():
|
||||||
nvidia_gpu_hardware_found = False
|
num_nvidia_devices = 0
|
||||||
for device in SimpleParser().run():
|
for device in SimpleParser().run():
|
||||||
device_class = device.cls.name
|
device_class = device.cls.name
|
||||||
device_vendor = device.vendor.name
|
device_vendor = device.vendor.name
|
||||||
@ -124,12 +124,12 @@ def _has_nvidia_gpu_hardware_notcached():
|
|||||||
logging.debug('NVIDIA GPU found: {}'.format(device))
|
logging.debug('NVIDIA GPU found: {}'.format(device))
|
||||||
# NOTE(lourot): we could `break` out here but it's interesting
|
# NOTE(lourot): we could `break` out here but it's interesting
|
||||||
# for debugging purposes to print them all.
|
# for debugging purposes to print them all.
|
||||||
nvidia_gpu_hardware_found = True
|
num_nvidia_devices += 1
|
||||||
|
|
||||||
if not nvidia_gpu_hardware_found:
|
if num_nvidia_devices == 0:
|
||||||
logging.debug('No NVIDIA GPU found.')
|
logging.debug('No NVIDIA GPU found.')
|
||||||
|
|
||||||
return nvidia_gpu_hardware_found
|
return num_nvidia_devices > 0, num_nvidia_devices
|
||||||
|
|
||||||
|
|
||||||
def _installed_nvidia_software_packages():
|
def _installed_nvidia_software_packages():
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# This file is managed centrally. If you find the need to modify this as a
|
# This file is managed centrally. If you find the need to modify this as a
|
||||||
# one-off, please don't. Intead, consult #openstack-charms and ask about
|
# one-off, please don't. Intead, consult #openstack-charms and ask about
|
||||||
# requirements management in charms via bot-control. Thank you.
|
# requirements management in charms via bot-control. Thank you.
|
||||||
charm-tools>=2.4.4
|
|
||||||
coverage>=3.6
|
coverage>=3.6
|
||||||
mock>=1.2
|
mock>=1.2
|
||||||
flake8>=4.0.1; python_version >= '3.6'
|
flake8>=4.0.1; python_version >= '3.6'
|
||||||
|
@ -32,12 +32,12 @@ class TestCharmUtils(unittest.TestCase):
|
|||||||
@patch('nvidia_utils.has_nvidia_gpu_hardware')
|
@patch('nvidia_utils.has_nvidia_gpu_hardware')
|
||||||
def test_is_nvidia_software_to_be_installed(self,
|
def test_is_nvidia_software_to_be_installed(self,
|
||||||
has_nvidia_gpu_hardware_mock):
|
has_nvidia_gpu_hardware_mock):
|
||||||
has_nvidia_gpu_hardware_mock.return_value = True
|
has_nvidia_gpu_hardware_mock.return_value = True, 1
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
charm_utils.is_nvidia_software_to_be_installed_notcached({
|
charm_utils.is_nvidia_software_to_be_installed_notcached({
|
||||||
'force-install-nvidia-vgpu': False}))
|
'force-install-nvidia-vgpu': False}))
|
||||||
|
|
||||||
has_nvidia_gpu_hardware_mock.return_value = False
|
has_nvidia_gpu_hardware_mock.return_value = False, 0
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
charm_utils.is_nvidia_software_to_be_installed_notcached({
|
charm_utils.is_nvidia_software_to_be_installed_notcached({
|
||||||
'force-install-nvidia-vgpu': True}))
|
'force-install-nvidia-vgpu': True}))
|
||||||
@ -88,55 +88,59 @@ class TestCharmUtils(unittest.TestCase):
|
|||||||
def test_check_status(
|
def test_check_status(
|
||||||
self, has_hw_mock, installed_sw_mock, is_sw_to_be_installed_mock,
|
self, has_hw_mock, installed_sw_mock, is_sw_to_be_installed_mock,
|
||||||
check_services_running_mock):
|
check_services_running_mock):
|
||||||
has_hw_mock.return_value = True
|
has_hw_mock.return_value = True, 1
|
||||||
installed_sw_mock.return_value = ['42', '43']
|
installed_sw_mock.return_value = ['42', '43']
|
||||||
is_sw_to_be_installed_mock.return_value = True
|
is_sw_to_be_installed_mock.return_value = True
|
||||||
check_services_running_mock.return_value = (None, None)
|
check_services_running_mock.return_value = (None, None)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
charm_utils.check_status(None, None),
|
charm_utils.check_status(None, None),
|
||||||
ActiveStatus(
|
ActiveStatus(
|
||||||
'Unit is ready: '
|
'Unit is ready (1 GPU)'
|
||||||
'NVIDIA GPU found; installed NVIDIA software: 42, 43'))
|
)
|
||||||
|
)
|
||||||
|
|
||||||
has_hw_mock.return_value = False
|
has_hw_mock.return_value = False, 0
|
||||||
installed_sw_mock.return_value = ['42', '43']
|
installed_sw_mock.return_value = ['42', '43']
|
||||||
is_sw_to_be_installed_mock.return_value = True
|
is_sw_to_be_installed_mock.return_value = True
|
||||||
check_services_running_mock.return_value = (None, None)
|
check_services_running_mock.return_value = (None, None)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
charm_utils.check_status(None, None),
|
charm_utils.check_status(None, None),
|
||||||
ActiveStatus(
|
ActiveStatus(
|
||||||
'Unit is ready: '
|
'Unit is ready (0 GPU)'
|
||||||
'no NVIDIA GPU found; installed NVIDIA software: 42, 43'))
|
)
|
||||||
|
)
|
||||||
|
|
||||||
has_hw_mock.return_value = True
|
has_hw_mock.return_value = True, 1
|
||||||
installed_sw_mock.return_value = []
|
installed_sw_mock.return_value = []
|
||||||
is_sw_to_be_installed_mock.return_value = True
|
is_sw_to_be_installed_mock.return_value = True
|
||||||
check_services_running_mock.return_value = (None, None)
|
check_services_running_mock.return_value = (None, None)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
charm_utils.check_status(None, None),
|
charm_utils.check_status(None, None),
|
||||||
BlockedStatus(
|
BlockedStatus(
|
||||||
'NVIDIA GPU found; no NVIDIA software installed'))
|
'NVIDIA GPU detected, drivers not installed'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
has_hw_mock.return_value = True
|
has_hw_mock.return_value = True, 2
|
||||||
installed_sw_mock.return_value = []
|
installed_sw_mock.return_value = []
|
||||||
is_sw_to_be_installed_mock.return_value = False
|
is_sw_to_be_installed_mock.return_value = False
|
||||||
check_services_running_mock.return_value = (None, None)
|
check_services_running_mock.return_value = (None, None)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
charm_utils.check_status(None, None),
|
charm_utils.check_status(None, None),
|
||||||
ActiveStatus(
|
ActiveStatus(
|
||||||
'Unit is ready: '
|
'Unit is ready (2 GPU)'
|
||||||
'NVIDIA GPU found; no NVIDIA software installed'))
|
)
|
||||||
|
)
|
||||||
|
|
||||||
has_hw_mock.return_value = True
|
has_hw_mock.return_value = True, 1
|
||||||
installed_sw_mock.return_value = ['42', '43']
|
installed_sw_mock.return_value = ['42', '43']
|
||||||
is_sw_to_be_installed_mock.return_value = True
|
is_sw_to_be_installed_mock.return_value = True
|
||||||
check_services_running_mock.return_value = (
|
check_services_running_mock.return_value = (
|
||||||
None, 'Services not running that should be: nvidia-vgpu-mgr')
|
None, 'Services not running that should be: nvidia-vgpu-mgr')
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
charm_utils.check_status(None, None),
|
charm_utils.check_status(None, None),
|
||||||
BlockedStatus(
|
BlockedStatus('manual reboot required')
|
||||||
'NVIDIA GPU found; installed NVIDIA software: 42, 43; '
|
)
|
||||||
'reboot required?'))
|
|
||||||
|
|
||||||
@patch('nvidia_utils._installed_nvidia_software_packages')
|
@patch('nvidia_utils._installed_nvidia_software_packages')
|
||||||
@patch('charm_utils.get_os_codename_package')
|
@patch('charm_utils.get_os_codename_package')
|
||||||
|
@ -54,13 +54,19 @@ class TestNvidiaUtils(unittest.TestCase):
|
|||||||
def test_has_nvidia_gpu_hardware_with_hw(self, lspci_parser_mock):
|
def test_has_nvidia_gpu_hardware_with_hw(self, lspci_parser_mock):
|
||||||
lspci_parser_mock.return_value.run.return_value = (
|
lspci_parser_mock.return_value.run.return_value = (
|
||||||
self._PCI_DEVICES_LIST_WITH_NVIDIA_GPU)
|
self._PCI_DEVICES_LIST_WITH_NVIDIA_GPU)
|
||||||
self.assertTrue(nvidia_utils._has_nvidia_gpu_hardware_notcached())
|
self.assertEqual(
|
||||||
|
nvidia_utils._has_nvidia_gpu_hardware_notcached(),
|
||||||
|
(True, 1)
|
||||||
|
)
|
||||||
|
|
||||||
@patch('nvidia_utils.SimpleParser')
|
@patch('nvidia_utils.SimpleParser')
|
||||||
def test_has_nvidia_gpu_hardware_without_hw(self, lspci_parser_mock):
|
def test_has_nvidia_gpu_hardware_without_hw(self, lspci_parser_mock):
|
||||||
lspci_parser_mock.return_value.run.return_value = (
|
lspci_parser_mock.return_value.run.return_value = (
|
||||||
self._PCI_DEVICES_LIST_WITHOUT_GPU)
|
self._PCI_DEVICES_LIST_WITHOUT_GPU)
|
||||||
self.assertFalse(nvidia_utils._has_nvidia_gpu_hardware_notcached())
|
self.assertEqual(
|
||||||
|
nvidia_utils._has_nvidia_gpu_hardware_notcached(),
|
||||||
|
(False, 0)
|
||||||
|
)
|
||||||
|
|
||||||
@patch('nvidia_utils.Path')
|
@patch('nvidia_utils.Path')
|
||||||
@patch('nvidia_utils.os.listdir')
|
@patch('nvidia_utils.os.listdir')
|
||||||
|
Loading…
Reference in New Issue
Block a user