Handle internal server errors while configuring secure boot

At least on some Dell machines, the Redfish SecureBoot resource is
unavailable during configuration, GET requests return HTTP 503.
Sushy does retry these, but not for long enough (the error message
suggests at least 30 seconds, which would be too much to just integrate
in Sushy). This change treats internal errors the same way as
mismatching "enabled" value, i.e. just waits.

Change-Id: I676f48de6b6195a69ea76b4e8b45a034220db2fa
This commit is contained in:
Dmitry Tantsur 2023-12-01 17:36:31 +01:00
parent 6c9de5324b
commit a6e3a7f50c
No known key found for this signature in database
GPG Key ID: 315B2AF9FD216C60
3 changed files with 20 additions and 3 deletions

View File

@ -1225,8 +1225,16 @@ class RedfishManagement(base.ManagementInterface):
def _wait_for_secure_boot(self, task, sb, state): def _wait_for_secure_boot(self, task, sb, state):
# NOTE(dtantsur): at least Dell machines change secure boot status via # NOTE(dtantsur): at least Dell machines change secure boot status via
# a BIOS configuration job. A reboot is needed to apply it. # a BIOS configuration job. A reboot is needed to apply it.
sb.refresh(force=True)
if sb.enabled == state: def _try_refresh():
try:
sb.refresh(force=True)
except sushy.exceptions.ServerSideError:
return False # sushy already does logging, just return
else:
return True
if _try_refresh() and sb.enabled == state:
return return
LOG.info('Rebooting node %(node)s to change secure boot state to ' LOG.info('Rebooting node %(node)s to change secure boot state to '
@ -1244,7 +1252,7 @@ class RedfishManagement(base.ManagementInterface):
{'node': task.node.uuid, 'value': state, {'node': task.node.uuid, 'value': state,
'current': sb.enabled}) 'current': sb.enabled})
time.sleep(BOOT_MODE_CONFIG_INTERVAL) time.sleep(BOOT_MODE_CONFIG_INTERVAL)
sb.refresh(force=True) _try_refresh()
if sb.enabled != state: if sb.enabled != state:
msg = (_('Timeout reached while waiting for secure boot state ' msg = (_('Timeout reached while waiting for secure boot state '

View File

@ -1745,6 +1745,9 @@ class RedfishManagementTestCase(db_base.DbTestCase):
def side_effect(force): def side_effect(force):
nonlocal attempts nonlocal attempts
attempts -= 1 attempts -= 1
if attempts >= 2:
raise sushy.exceptions.ServerSideError(
"POST", 'img-url', mock.MagicMock())
if attempts <= 0: if attempts <= 0:
fake_sb.enabled = True fake_sb.enabled = True

View File

@ -0,0 +1,6 @@
---
fixes:
- |
When configuring secure boot via Redfish, internal server errors are now
retried for a longer period than by default, accounting for the SecureBoot
resource unavailability during configuration on some hardware.