Merge "Fix idrac-wsman set_power_state to wait on HW" into stable/train

This commit is contained in:
Zuul 2021-09-17 02:12:30 +00:00 committed by Gerrit Code Review
commit f809f8687b
3 changed files with 44 additions and 25 deletions

View File

@ -24,6 +24,7 @@ from oslo_utils import importutils
from ironic.common import exception
from ironic.common import states
from ironic.conductor import task_manager
from ironic.conductor import utils as cond_utils
from ironic.drivers import base
from ironic.drivers.modules.drac import common as drac_common
from ironic.drivers.modules.drac import management as drac_management
@ -87,15 +88,17 @@ def _commit_boot_list_change(node):
node.save()
def _set_power_state(node, power_state):
def _set_power_state(task, power_state, timeout=None):
"""Turns the server power on/off or do a reboot.
:param node: an ironic node object.
:param task: a TaskManager instance containing the node to act on.
:param power_state: a power state from :mod:`ironic.common.states`.
:param timeout: Time to wait for the node to reach the requested state.
When requested state is reboot, not used as not waiting then.
:raises: InvalidParameterValue if required DRAC credentials are missing.
:raises: DracOperationError on an error from python-dracclient
"""
node = task.node
# NOTE(ifarkas): DRAC interface doesn't allow changing the boot device
# multiple times in a row without a reboot. This is
# because a change need to be committed via a
@ -131,6 +134,20 @@ def _set_power_state(node, power_state):
try:
client.set_power_state(target_power_state)
if calc_power_state == states.REBOOT:
# TODO(rloo): Support timeouts!
if timeout is not None:
LOG.warning("The 'idrac-wsman' Power Interface does not "
"support 'timeout' parameter when setting "
"power state to reboot. Ignoring "
"timeout=%(timeout)s",
{'timeout': timeout})
else:
# Skipped for reboot as can't match reboot with on/off.
# Reboot so far has been part of workflow that is not followed
# by another power state change that could break the flow.
cond_utils.node_wait_for_power_state(
task, calc_power_state, timeout)
break
except drac_exceptions.BaseClientException as exc:
if (power_state == states.REBOOT
@ -214,20 +231,13 @@ class DracWSManPower(base.PowerInterface):
:param task: a TaskManager instance containing the node to act on.
:param power_state: a power state from :mod:`ironic.common.states`.
:param timeout: timeout (in seconds). Unsupported by this interface.
:param timeout: Time to wait for the node to reach the requested state.
When requested state is reboot, not used as not waiting then.
:raises: InvalidParameterValue if required DRAC credentials are
missing.
:raises: DracOperationError on an error from python-dracclient.
"""
# TODO(rloo): Support timeouts!
if timeout is not None:
LOG.warning(
"The 'idrac' Power Interface's 'set_power_state' method "
"doesn't support the 'timeout' parameter. Ignoring "
"timeout=%(timeout)s",
{'timeout': timeout})
_set_power_state(task.node, power_state)
_set_power_state(task, power_state, timeout)
@METRICS.timer('DracPower.reboot')
@task_manager.require_exclusive_lock
@ -240,14 +250,7 @@ class DracWSManPower(base.PowerInterface):
missing.
:raises: DracOperationError on an error from python-dracclient.
"""
# TODO(rloo): Support timeouts!
if timeout is not None:
LOG.warning("The 'idrac' Power Interface's 'reboot' method "
"doesn't support the 'timeout' parameter. Ignoring "
"timeout=%(timeout)s",
{'timeout': timeout})
_set_power_state(task.node, states.REBOOT)
_set_power_state(task, states.REBOOT, timeout)
class DracPower(DracWSManPower):

View File

@ -71,7 +71,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
@mock.patch.object(drac_power.LOG, 'warning')
def test_set_power_state(self, mock_log, mock_get_drac_client):
mock_client = mock_get_drac_client.return_value
mock_client.get_power_state.side_effect = [drac_constants.POWER_ON,
drac_constants.POWER_OFF]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.power.set_power_state(task, states.POWER_OFF)
@ -97,6 +98,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
@mock.patch.object(drac_power.LOG, 'warning')
def test_set_power_state_timeout(self, mock_log, mock_get_drac_client):
mock_client = mock_get_drac_client.return_value
mock_client.get_power_state.side_effect = [drac_constants.POWER_ON,
drac_constants.POWER_OFF]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
@ -105,7 +108,7 @@ class DracPowerTestCase(test_utils.BaseDracTest):
drac_power_state = drac_power.REVERSE_POWER_STATES[states.POWER_OFF]
mock_client.set_power_state.assert_called_once_with(drac_power_state)
self.assertTrue(mock_log.called)
self.assertFalse(mock_log.called)
@mock.patch.object(drac_power.LOG, 'warning')
def test_reboot_while_powered_on(self, mock_log, mock_get_drac_client):
@ -136,7 +139,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
def test_reboot_while_powered_off(self, mock_get_drac_client):
mock_client = mock_get_drac_client.return_value
mock_client.get_power_state.return_value = drac_constants.POWER_OFF
mock_client.get_power_state.side_effect = [drac_constants.POWER_OFF,
drac_constants.POWER_ON]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
@ -148,7 +152,9 @@ class DracPowerTestCase(test_utils.BaseDracTest):
@mock.patch('time.sleep')
def test_reboot_retries_success(self, mock_sleep, mock_get_drac_client):
mock_client = mock_get_drac_client.return_value
mock_client.get_power_state.return_value = drac_constants.POWER_OFF
mock_client.get_power_state.side_effect = [drac_constants.POWER_OFF,
drac_constants.POWER_OFF,
drac_constants.POWER_ON]
exc = drac_exceptions.DRACOperationFailed(
drac_messages=['The command failed to set RequestedState'])
mock_client.set_power_state.side_effect = [exc, None]

View File

@ -0,0 +1,10 @@
---
fixes:
- |
Fixes ``idrac-wsman`` power interface to wait for the hardware to reach the
target state before returning. For systems where soft power off at the end
of deployment to boot to instance failed and forced hard power off was
used, this left node successfully deployed in off state without any errors.
This broke other workflows expecting node to be on booted into
OS at the end of deployment. Additional information can be found in
`story 2009204 <https://storyboard.openstack.org/#!/story/2009204>`_.