Merge "Fix issue where server fails to reboot" into stable/rocky
This commit is contained in:
commit
d9c9eaf68c
|
@ -15,6 +15,8 @@
|
||||||
DRAC power interface
|
DRAC power interface
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
from ironic_lib import metrics_utils
|
from ironic_lib import metrics_utils
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
from oslo_utils import importutils
|
from oslo_utils import importutils
|
||||||
|
@ -42,6 +44,10 @@ if drac_constants:
|
||||||
|
|
||||||
REVERSE_POWER_STATES = dict((v, k) for (k, v) in POWER_STATES.items())
|
REVERSE_POWER_STATES = dict((v, k) for (k, v) in POWER_STATES.items())
|
||||||
|
|
||||||
|
POWER_STATE_TRIES = 15
|
||||||
|
POWER_STATE_SLEEP = 2
|
||||||
|
POWER_STATE_CHANGE_FAIL = 'The command failed to set RequestedState'
|
||||||
|
|
||||||
|
|
||||||
def _get_power_state(node):
|
def _get_power_state(node):
|
||||||
"""Returns the current power state of the node.
|
"""Returns the current power state of the node.
|
||||||
|
@ -100,18 +106,60 @@ def _set_power_state(node, power_state):
|
||||||
_commit_boot_list_change(node)
|
_commit_boot_list_change(node)
|
||||||
|
|
||||||
client = drac_common.get_drac_client(node)
|
client = drac_common.get_drac_client(node)
|
||||||
target_power_state = REVERSE_POWER_STATES[power_state]
|
tries = POWER_STATE_TRIES
|
||||||
|
|
||||||
try:
|
# Cases have been seen where the iDRAC returns a SYS021 error even when
|
||||||
client.set_power_state(target_power_state)
|
# the server is in the right power state and a valid power state change
|
||||||
except drac_exceptions.BaseClientException as exc:
|
# is attempted. Retry in this case.
|
||||||
LOG.error('DRAC driver failed to set power state for node '
|
while tries > 0:
|
||||||
'%(node_uuid)s to %(power_state)s. '
|
# The iDRAC will return a SYS021 error if the server is powered off
|
||||||
'Reason: %(error)s.',
|
# and a reboot is requested. In this situation, convert the requested
|
||||||
{'node_uuid': node.uuid,
|
# reboot into a power on to avoid this error. To minimize the chance
|
||||||
'power_state': power_state,
|
# of a race condition, it is critical to do this check immediately
|
||||||
'error': exc})
|
# before sending the power state change command. This keeps the
|
||||||
raise exception.DracOperationError(error=exc)
|
# window during which the server could change power states without us
|
||||||
|
# knowing about it as small as possible.
|
||||||
|
calc_power_state = power_state
|
||||||
|
if power_state == states.REBOOT:
|
||||||
|
current_power_state = _get_power_state(node)
|
||||||
|
# If the server is not on, then power it on instead of rebooting
|
||||||
|
if current_power_state != states.POWER_ON:
|
||||||
|
calc_power_state = states.POWER_ON
|
||||||
|
|
||||||
|
target_power_state = REVERSE_POWER_STATES[calc_power_state]
|
||||||
|
|
||||||
|
try:
|
||||||
|
client.set_power_state(target_power_state)
|
||||||
|
break
|
||||||
|
except drac_exceptions.BaseClientException as exc:
|
||||||
|
if (power_state == states.REBOOT
|
||||||
|
and POWER_STATE_CHANGE_FAIL in str(exc)
|
||||||
|
and tries > 0):
|
||||||
|
LOG.warning('DRAC driver failed to set power state for node '
|
||||||
|
'%(node_uuid)s to %(calc_power_state)s. '
|
||||||
|
'Reason: %(error)s. Retrying...',
|
||||||
|
{'node_uuid': node.uuid,
|
||||||
|
'calc_power_state': calc_power_state,
|
||||||
|
'error': exc})
|
||||||
|
tries -= 1
|
||||||
|
time.sleep(POWER_STATE_SLEEP)
|
||||||
|
else:
|
||||||
|
LOG.error('DRAC driver failed to set power state for node '
|
||||||
|
'%(node_uuid)s to %(calc_power_state)s. '
|
||||||
|
'Reason: %(error)s.',
|
||||||
|
{'node_uuid': node.uuid,
|
||||||
|
'calc_power_state': calc_power_state,
|
||||||
|
'error': exc})
|
||||||
|
raise exception.DracOperationError(error=exc)
|
||||||
|
|
||||||
|
if tries <= 0:
|
||||||
|
error_msg = (_('DRAC driver timed out while trying to set the power '
|
||||||
|
'state for node %(node_uuid)s to '
|
||||||
|
'%(calc_power_state)s.') %
|
||||||
|
{'node_uuid': node.uuid,
|
||||||
|
'calc_power_state': calc_power_state})
|
||||||
|
LOG.error(error_msg)
|
||||||
|
raise exception.DracOperationError(error_msg)
|
||||||
|
|
||||||
|
|
||||||
class DracPower(base.PowerInterface):
|
class DracPower(base.PowerInterface):
|
||||||
|
@ -187,10 +235,4 @@ class DracPower(base.PowerInterface):
|
||||||
"timeout=%(timeout)s",
|
"timeout=%(timeout)s",
|
||||||
{'timeout': timeout})
|
{'timeout': timeout})
|
||||||
|
|
||||||
current_power_state = _get_power_state(task.node)
|
_set_power_state(task.node, states.REBOOT)
|
||||||
if current_power_state == states.POWER_ON:
|
|
||||||
target_power_state = states.REBOOT
|
|
||||||
else:
|
|
||||||
target_power_state = states.POWER_ON
|
|
||||||
|
|
||||||
_set_power_state(task.node, target_power_state)
|
|
||||||
|
|
|
@ -144,3 +144,58 @@ class DracPowerTestCase(test_utils.BaseDracTest):
|
||||||
|
|
||||||
drac_power_state = drac_power.REVERSE_POWER_STATES[states.POWER_ON]
|
drac_power_state = drac_power.REVERSE_POWER_STATES[states.POWER_ON]
|
||||||
mock_client.set_power_state.assert_called_once_with(drac_power_state)
|
mock_client.set_power_state.assert_called_once_with(drac_power_state)
|
||||||
|
|
||||||
|
@mock.patch('time.sleep')
|
||||||
|
def test_reboot_retries_success(self, mock_sleep, mock_get_drac_client):
|
||||||
|
mock_client = mock_get_drac_client.return_value
|
||||||
|
mock_client.get_power_state.return_value = drac_constants.POWER_OFF
|
||||||
|
exc = drac_exceptions.DRACOperationFailed(
|
||||||
|
drac_messages=['The command failed to set RequestedState'])
|
||||||
|
mock_client.set_power_state.side_effect = [exc, None]
|
||||||
|
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
|
shared=False) as task:
|
||||||
|
task.driver.power.reboot(task)
|
||||||
|
|
||||||
|
drac_power_state = drac_power.REVERSE_POWER_STATES[states.POWER_ON]
|
||||||
|
self.assertEqual(2, mock_client.set_power_state.call_count)
|
||||||
|
mock_client.set_power_state.assert_has_calls(
|
||||||
|
[mock.call(drac_power_state),
|
||||||
|
mock.call(drac_power_state)])
|
||||||
|
|
||||||
|
@mock.patch('time.sleep')
|
||||||
|
def test_reboot_retries_fail(self, mock_sleep, mock_get_drac_client):
|
||||||
|
mock_client = mock_get_drac_client.return_value
|
||||||
|
mock_client.get_power_state.return_value = drac_constants.POWER_OFF
|
||||||
|
exc = drac_exceptions.DRACOperationFailed(
|
||||||
|
drac_messages=['The command failed to set RequestedState'])
|
||||||
|
mock_client.set_power_state.side_effect = exc
|
||||||
|
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
|
shared=False) as task:
|
||||||
|
self.assertRaises(exception.DracOperationError,
|
||||||
|
task.driver.power.reboot, task)
|
||||||
|
|
||||||
|
self.assertEqual(drac_power.POWER_STATE_TRIES,
|
||||||
|
mock_client.set_power_state.call_count)
|
||||||
|
|
||||||
|
@mock.patch('time.sleep')
|
||||||
|
def test_reboot_retries_power_change_success(self, mock_sleep,
|
||||||
|
mock_get_drac_client):
|
||||||
|
mock_client = mock_get_drac_client.return_value
|
||||||
|
mock_client.get_power_state.side_effect = [drac_constants.POWER_OFF,
|
||||||
|
drac_constants.POWER_ON]
|
||||||
|
exc = drac_exceptions.DRACOperationFailed(
|
||||||
|
drac_messages=['The command failed to set RequestedState'])
|
||||||
|
mock_client.set_power_state.side_effect = [exc, None]
|
||||||
|
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
|
shared=False) as task:
|
||||||
|
task.driver.power.reboot(task)
|
||||||
|
|
||||||
|
self.assertEqual(2, mock_client.set_power_state.call_count)
|
||||||
|
drac_power_state1 = drac_power.REVERSE_POWER_STATES[states.POWER_ON]
|
||||||
|
drac_power_state2 = drac_power.REVERSE_POWER_STATES[states.REBOOT]
|
||||||
|
mock_client.set_power_state.assert_has_calls(
|
||||||
|
[mock.call(drac_power_state1),
|
||||||
|
mock.call(drac_power_state2)])
|
||||||
|
|
|
@ -112,6 +112,17 @@ if not dracclient:
|
||||||
sys.modules['dracclient.exceptions'] = dracclient.exceptions
|
sys.modules['dracclient.exceptions'] = dracclient.exceptions
|
||||||
dracclient.exceptions.BaseClientException = type('BaseClientException',
|
dracclient.exceptions.BaseClientException = type('BaseClientException',
|
||||||
(Exception,), {})
|
(Exception,), {})
|
||||||
|
|
||||||
|
dracclient.exceptions.DRACRequestFailed = type(
|
||||||
|
'DRACRequestFailed', (dracclient.exceptions.BaseClientException,), {})
|
||||||
|
|
||||||
|
class DRACOperationFailed(dracclient.exceptions.DRACRequestFailed):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(DRACOperationFailed, self).__init__(
|
||||||
|
'DRAC operation failed. Messages: %(drac_messages)s' % kwargs)
|
||||||
|
|
||||||
|
dracclient.exceptions.DRACOperationFailed = DRACOperationFailed
|
||||||
|
|
||||||
# Now that the external library has been mocked, if anything had already
|
# Now that the external library has been mocked, if anything had already
|
||||||
# loaded any of the drivers, reload them.
|
# loaded any of the drivers, reload them.
|
||||||
if 'ironic.drivers.modules.drac' in sys.modules:
|
if 'ironic.drivers.modules.drac' in sys.modules:
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixed a bug where rebooting a node managed by the ``idrac`` hardware
|
||||||
|
type when using the WS-MAN power interface sometimes fails with a
|
||||||
|
``The command failed to set RequestedState`` error. See bug `2007487
|
||||||
|
<https://storyboard.openstack.org/#!/story/2007487>`_ for details.
|
Loading…
Reference in New Issue