From ee5d4942a1c33736ffe05ec01619142be400c2f4 Mon Sep 17 00:00:00 2001 From: Julian Edwards Date: Fri, 24 Mar 2017 17:32:38 +1000 Subject: [PATCH] Don't retry power status if power action fails The old code blindly required power status even if the power action failed. Now, it will retry the power action only when it detects a retryable failure, and will only poll for power status if the power action is successful. This patch also moves the logic for handling waiting for power status into the conductor so that the logic is standardised between drivers. Change-Id: Ib48056e05d359848386ac057b58921f40b7bdd60 Co-Authored-By: Sam Betts Related-Bug: #1675529 Closes-Bug: #1692895 --- etc/ironic/ironic.conf.sample | 37 ++++- ironic/conductor/utils.py | 22 +++ ironic/conf/conductor.py | 5 + ironic/conf/ipmi.py | 21 ++- ironic/drivers/modules/ipmitool.py | 140 +++++------------- .../unit/drivers/modules/test_ipmitool.py | 109 ++++++++------ .../fix-bug-1675529-479357c217819420.yaml | 14 ++ 7 files changed, 195 insertions(+), 153 deletions(-) create mode 100644 releasenotes/notes/fix-bug-1675529-479357c217819420.yaml diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index 6109895ca4..89ba119cff 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -237,7 +237,7 @@ # setting this value, please make sure that every enabled # hardware type will have the same set of enabled storage # interfaces on every ironic-conductor service. (list value) -#enabled_storage_interfaces = noop +#enabled_storage_interfaces = cinder,noop # Default storage interface to be used for nodes that do not # have storage_interface field set. A complete list of storage @@ -1134,6 +1134,12 @@ # Minimum value: 1 #soft_power_off_timeout = 600 +# Number of seconds to wait for power operations to complete +# on the baremetal node before declaring the power operation +# has failed (integer value) +# Minimum value: 2 +#power_state_change_timeout = 30 + [console] @@ -1872,13 +1878,28 @@ # From ironic # -# Maximum time in seconds to retry IPMI operations. There is a -# tradeoff when setting this value. Setting this too low may -# cause older BMCs to crash and require a hard reset. However, -# setting too high can cause the sync power state periodic -# task to hang when there are slow or unresponsive BMCs. -# (integer value) -#retry_timeout = 60 +# Maximum time in seconds to retry, retryable IPMI operations. +# For example if the requested action fails because the BMC is +# busy. There is a tradeoff when setting this value. Setting +# this too low may cause older BMCs to crash and require a +# hard reset. However, setting too high can cause the sync +# power state periodic task to hang when there are slow or +# unresponsive BMCs. (integer value) +#command_retry_timeout = 60 + +# DEPRECATED: Maximum time in seconds to retry IPMI +# operations. There is a tradeoff when setting this value. +# Setting this too low may cause older BMCs to crash and +# require a hard reset. However, setting too high can cause +# the sync power state periodic task to hang when there are +# slow or unresponsive BMCs. (integer value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +# Reason: Option ipmi.command_retry_timeout should be used to +# define IPMI command retries and option +# conductor.power_state_change_timeout should be use to define +# timeout value for waiting for power operations to complete +#retry_timeout = # Minimum time, in seconds, between IPMI operations sent to a # server. There is a risk with some hardware that setting this diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index cd3cb950b4..3ab85628dc 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -14,6 +14,7 @@ from oslo_config import cfg from oslo_log import log +from oslo_service import loopingcall from oslo_utils import excutils from oslo_utils import reflection @@ -68,6 +69,27 @@ def node_set_boot_device(task, device, persistent=False): persistent=persistent) +def node_wait_for_power_state(task, new_state, timeout=None): + retry_timeout = (timeout or CONF.conductor.power_state_change_timeout) + + def _wait(): + status = task.driver.power.get_power_state(task) + if status == new_state: + raise loopingcall.LoopingCallDone(retvalue=status) + # NOTE(sambetts): Return False to trigger BackOffLoopingCall to start + # backing off. + return False + + try: + timer = loopingcall.BackOffLoopingCall(_wait) + return timer.start(initial_delay=1, timeout=retry_timeout).wait() + except loopingcall.LoopingCallTimeOut: + LOG.error('Timed out after %(retry_timeout) secs waiting for power ' + '%(state)s on node %(node_id)s.', + {'state': new_state, 'node_id': task.node.uuid}) + raise exception.PowerStateFailure(pstate=new_state) + + @task_manager.require_exclusive_lock def node_power_action(task, new_state, timeout=None): """Change power state or reset for a node. diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index b62cbd7f71..36ab5a6531 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -145,6 +145,11 @@ opts = [ min=1, help=_('Timeout (in seconds) of soft reboot and soft power ' 'off operation. This value always has to be positive.')), + cfg.IntOpt('power_state_change_timeout', + min=2, default=30, + help=_('Number of seconds to wait for power operations to ' + 'complete on the baremetal node before declaring the ' + 'power operation has failed')), ] diff --git a/ironic/conf/ipmi.py b/ironic/conf/ipmi.py index 10629c5b99..e79daf5bd5 100644 --- a/ironic/conf/ipmi.py +++ b/ironic/conf/ipmi.py @@ -20,14 +20,31 @@ from oslo_config import cfg from ironic.common.i18n import _ opts = [ - cfg.IntOpt('retry_timeout', + cfg.IntOpt('command_retry_timeout', default=60, + help=_('Maximum time in seconds to retry, retryable IPMI ' + 'operations. For example if the requested action fails ' + 'because the BMC is busy. There is a tradeoff when ' + 'setting this value. Setting this too low may cause ' + 'older BMCs to crash and require a hard reset. However, ' + 'setting too high can cause the sync power state ' + 'periodic task to hang when there are slow or ' + 'unresponsive BMCs.')), + cfg.IntOpt('retry_timeout', help=_('Maximum time in seconds to retry IPMI operations. ' 'There is a tradeoff when setting this value. Setting ' 'this too low may cause older BMCs to crash and require ' 'a hard reset. However, setting too high can cause the ' 'sync power state periodic task to hang when there are ' - 'slow or unresponsive BMCs.')), + 'slow or unresponsive BMCs.'), + deprecated_for_removal=True, + deprecated_reason=_('Option ipmi.command_retry_timeout should ' + 'be used to define IPMI command retries ' + 'and option ' + 'conductor.power_state_change_timeout ' + 'should be use to define timeout value for ' + 'waiting for power operations to ' + 'complete')), cfg.IntOpt('min_command_interval', default=5, help=_('Minimum time, in seconds, between IPMI operations ' diff --git a/ironic/drivers/modules/ipmitool.py b/ironic/drivers/modules/ipmitool.py index 65606894c9..a630d8c3e5 100644 --- a/ironic/drivers/modules/ipmitool.py +++ b/ironic/drivers/modules/ipmitool.py @@ -40,7 +40,6 @@ from ironic_lib import metrics_utils from ironic_lib import utils as ironic_utils from oslo_concurrency import processutils from oslo_log import log as logging -from oslo_service import loopingcall from oslo_utils import excutils from oslo_utils import strutils import six @@ -51,6 +50,7 @@ from ironic.common.i18n import _ from ironic.common import states from ironic.common import utils from ironic.conductor import task_manager +from ironic.conductor import utils as cond_utils from ironic.conf import CONF from ironic.drivers import base from ironic.drivers.modules import console_utils @@ -396,9 +396,11 @@ def _exec_ipmitool(driver_info, command, check_exit_code=None): args.append(option) args.append(driver_info[name]) + # TODO(sambetts) Remove useage of ipmi.retry_timeout in Queens + timeout = CONF.ipmi.retry_timeout or CONF.ipmi.command_retry_timeout + # specify retry timing more precisely, if supported - num_tries = max( - (CONF.ipmi.retry_timeout // CONF.ipmi.min_command_interval), 1) + num_tries = max((timeout // CONF.ipmi.min_command_interval), 1) if _is_option_supported('timing'): args.append('-R') @@ -407,7 +409,7 @@ def _exec_ipmitool(driver_info, command, check_exit_code=None): args.append('-N') args.append(str(CONF.ipmi.min_command_interval)) - end_time = (time.time() + CONF.ipmi.retry_timeout) + end_time = (time.time() + timeout) while True: num_tries = num_tries - 1 @@ -455,26 +457,8 @@ def _exec_ipmitool(driver_info, command, check_exit_code=None): LAST_CMD_TIME[driver_info['address']] = time.time() -def _sleep_time(iter): - """Return the time-to-sleep for the n'th iteration of a retry loop. - - This implementation increases exponentially. - - :param iter: iteration number - :returns: number of seconds to sleep - - """ - if iter <= 1: - return 1 - return iter ** 2 - - -def _set_and_wait(power_action, driver_info, timeout=None): - """Helper function for DynamicLoopingCall. - - This method changes the power state and polls the BMC until the desired - power state is reached, or CONF.ipmi.retry_timeout would be exceeded by the - next iteration. +def _set_and_wait(task, power_action, driver_info, timeout=None): + """Helper function for performing an IPMI power action This method assumes the caller knows the current power state and does not check it prior to changing the power state. Most BMCs should be fine, but @@ -489,7 +473,8 @@ def _set_and_wait(power_action, driver_info, timeout=None): :returns: one of ironic.common.states """ - retry_timeout = timeout or CONF.ipmi.retry_timeout + # TODO(sambetts) Remove useage of ipmi.retry_timeout in Queens + default_timeout = CONF.ipmi.retry_timeout if power_action == states.POWER_ON: cmd_name = "on" @@ -500,50 +485,27 @@ def _set_and_wait(power_action, driver_info, timeout=None): elif power_action == states.SOFT_POWER_OFF: cmd_name = "soft" target_state = states.POWER_OFF - retry_timeout = timeout or CONF.conductor.soft_power_off_timeout + default_timeout = CONF.conductor.soft_power_off_timeout - def _wait(mutable): - try: - # Only issue power change command once - if mutable['iter'] < 0: - _exec_ipmitool(driver_info, "power %s" % cmd_name) - else: - mutable['power'] = _power_status(driver_info) - except (exception.PasswordFileFailedToCreate, - processutils.ProcessExecutionError, - exception.IPMIFailure): - # Log failures but keep trying - LOG.warning("IPMI power %(state)s failed for node %(node)s.", - {'state': cmd_name, 'node': driver_info['uuid']}) - finally: - mutable['iter'] += 1 + timeout = timeout or default_timeout - if mutable['power'] == target_state: - raise loopingcall.LoopingCallDone() - - sleep_time = _sleep_time(mutable['iter']) - if (sleep_time + mutable['total_time']) > retry_timeout: - # Stop if the next loop would exceed maximum retry_timeout - LOG.error('IPMI power %(state)s timed out after ' - '%(tries)s retries on node %(node_id)s.', - {'state': cmd_name, 'tries': mutable['iter'], - 'node_id': driver_info['uuid']}) - mutable['power'] = states.ERROR - raise loopingcall.LoopingCallDone() - else: - mutable['total_time'] += sleep_time - return sleep_time - - # Use mutable objects so the looped method can change them. - # Start 'iter' from -1 so that the first two checks are one second apart. - status = {'power': None, 'iter': -1, 'total_time': 0} - - timer = loopingcall.DynamicLoopingCall(_wait, status) - timer.start().wait() - return status['power'] + # NOTE(sambetts): Retries for ipmi power action failure will be handled by + # the _exec_ipmitool function, so no need to wrap this call in its own + # retries. + cmd = "power %s" % cmd_name + try: + _exec_ipmitool(driver_info, cmd) + except (exception.PasswordFileFailedToCreate, + processutils.ProcessExecutionError) as e: + LOG.warning("IPMI power action %(cmd)s failed for node %(node_id)s " + "with error: %(error)s.", + {'node_id': driver_info['uuid'], 'cmd': cmd, 'error': e}) + raise exception.IPMIFailure(cmd=cmd) + return cond_utils.node_wait_for_power_state(task, target_state, + timeout=timeout) -def _power_on(driver_info, timeout=None): +def _power_on(task, driver_info, timeout=None): """Turn the power ON for this node. :param driver_info: the ipmitool parameters for accessing a node. @@ -553,10 +515,10 @@ def _power_on(driver_info, timeout=None): :raises: IPMIFailure on an error from ipmitool (from _power_status call). """ - return _set_and_wait(states.POWER_ON, driver_info, timeout=timeout) + return _set_and_wait(task, states.POWER_ON, driver_info, timeout=timeout) -def _power_off(driver_info, timeout=None): +def _power_off(task, driver_info, timeout=None): """Turn the power OFF for this node. :param driver_info: the ipmitool parameters for accessing a node. @@ -566,10 +528,10 @@ def _power_off(driver_info, timeout=None): :raises: IPMIFailure on an error from ipmitool (from _power_status call). """ - return _set_and_wait(states.POWER_OFF, driver_info, timeout=timeout) + return _set_and_wait(task, states.POWER_OFF, driver_info, timeout=timeout) -def _soft_power_off(driver_info, timeout=None): +def _soft_power_off(task, driver_info, timeout=None): """Turn the power SOFT OFF for this node. :param driver_info: the ipmitool parameters for accessing a node. @@ -579,7 +541,8 @@ def _soft_power_off(driver_info, timeout=None): :raises: IPMIFailure on an error from ipmitool (from _power_status call). """ - return _set_and_wait(states.SOFT_POWER_OFF, driver_info, timeout=timeout) + return _set_and_wait(task, states.SOFT_POWER_OFF, driver_info, + timeout=timeout) def _power_status(driver_info): @@ -839,34 +802,20 @@ class IPMIPower(base.PowerInterface): if power_state == states.POWER_ON: driver_utils.ensure_next_boot_device(task, driver_info) - target_state = states.POWER_ON - state = _power_on(driver_info, timeout=timeout) + _power_on(task, driver_info, timeout=timeout) elif power_state == states.POWER_OFF: - target_state = states.POWER_OFF - state = _power_off(driver_info, timeout=timeout) + _power_off(task, driver_info, timeout=timeout) elif power_state == states.SOFT_POWER_OFF: - target_state = states.POWER_OFF - state = _soft_power_off(driver_info, timeout=timeout) + _soft_power_off(task, driver_info, timeout=timeout) elif power_state == states.SOFT_REBOOT: - intermediate_state = _soft_power_off(driver_info, timeout=timeout) - intermediate_target_state = states.POWER_OFF - if intermediate_state != intermediate_target_state: - raise exception.PowerStateFailure( - pstate=(_( - "%(intermediate)s while on %(power_state)s") % - {'intermediate': intermediate_target_state, - 'power_state': power_state})) + _soft_power_off(task, driver_info, timeout=timeout) driver_utils.ensure_next_boot_device(task, driver_info) - target_state = states.POWER_ON - state = _power_on(driver_info, timeout=timeout) + _power_on(task, driver_info, timeout=timeout) else: raise exception.InvalidParameterValue( _("set_power_state called " "with invalid power state %s.") % power_state) - if state != target_state: - raise exception.PowerStateFailure(pstate=target_state) - @METRICS.timer('IPMIPower.reboot') @task_manager.require_exclusive_lock def reboot(self, task, timeout=None): @@ -884,18 +833,9 @@ class IPMIPower(base.PowerInterface): """ driver_info = _parse_driver_info(task.node) - intermediate_state = _power_off(driver_info, timeout=timeout) - if intermediate_state != states.POWER_OFF: - raise exception.PowerStateFailure( - pstate=(_( - "%(power_off)s while on %(reboot)s") % - {'power_off': states.POWER_OFF, - 'reboot': states.REBOOT})) + _power_off(task, driver_info, timeout=timeout) driver_utils.ensure_next_boot_device(task, driver_info) - state = _power_on(driver_info, timeout=timeout) - - if state != states.POWER_ON: - raise exception.PowerStateFailure(pstate=states.POWER_ON) + _power_on(task, driver_info, timeout=timeout) def get_supported_power_states(self, task): """Get a list of the supported power states. diff --git a/ironic/tests/unit/drivers/modules/test_ipmitool.py b/ironic/tests/unit/drivers/modules/test_ipmitool.py index aff0758b1f..fdb8706847 100644 --- a/ironic/tests/unit/drivers/modules/test_ipmitool.py +++ b/ironic/tests/unit/drivers/modules/test_ipmitool.py @@ -341,7 +341,10 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase): def setUp(self): super(IPMIToolPrivateMethodTestCase, self).setUp() - self.node = obj_utils.get_test_node( + self.driver_name = "fake_ipmitool" + mgr_utils.mock_the_extension_manager(driver=self.driver_name) + self.driver = driver_factory.get_driver(self.driver_name) + self.node = obj_utils.create_test_node( self.context, driver='fake_ipmitool', driver_info=INFO_DICT) @@ -1252,10 +1255,11 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase): mock.call(self.info, "power status"), mock.call(self.info, "power status")] - state = ipmi._power_on(self.info) + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.PowerStateFailure, + ipmi._power_on, task, self.info, timeout=2) self.assertEqual(mock_exec.call_args_list, expected) - self.assertEqual(states.ERROR, state) @mock.patch.object(ipmi, '_exec_ipmitool', autospec=True) @mock.patch('eventlet.greenthread.sleep', autospec=True) @@ -1272,10 +1276,11 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase): expected = [mock.call(self.info, "power soft"), mock.call(self.info, "power status")] - state = ipmi._soft_power_off(self.info, timeout=None) + with task_manager.acquire(self.context, self.node.uuid) as task: + state = ipmi._soft_power_off(task, self.info) self.assertEqual(mock_exec.call_args_list, expected) - self.assertEqual(states.POWER_OFF, state) + self.assertEqual(state, states.POWER_OFF) @mock.patch.object(ipmi, '_exec_ipmitool', autospec=True) @mock.patch('eventlet.greenthread.sleep', autospec=True) @@ -1293,10 +1298,27 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase): mock.call(self.info, "power status"), mock.call(self.info, "power status")] - state = ipmi._soft_power_off(self.info, timeout=2) + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.PowerStateFailure, + ipmi._soft_power_off, task, self.info, timeout=2) self.assertEqual(mock_exec.call_args_list, expected) - self.assertEqual(states.ERROR, state) + + @mock.patch.object(ipmi, '_power_status', autospec=True) + @mock.patch.object(ipmi, '_exec_ipmitool', autospec=True) + @mock.patch('eventlet.greenthread.sleep', autospec=True) + def test___set_and_wait_no_needless_status_polling( + self, sleep_mock, mock_exec, mock_status, mock_sleep): + # Check that if the call to power state change fails, it doesn't + # call power_status(). + self.config(retry_timeout=2, group='ipmi') + + mock_exec.side_effect = exception.IPMIFailure(cmd='power on') + + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.IPMIFailure, ipmi._power_on, task, + self.info) + self.assertFalse(mock_status.called) class IPMIToolDriverTestCase(db_base.DbTestCase): @@ -1378,7 +1400,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): self.driver.power.set_power_state(task, states.POWER_ON) - mock_on.assert_called_once_with(self.info, timeout=None) + mock_on.assert_called_once_with(task, self.info, timeout=None) self.assertFalse(mock_off.called) @mock.patch.object(ipmi, '_power_on', autospec=True) @@ -1393,7 +1415,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): states.POWER_ON, timeout=2) - mock_on.assert_called_once_with(self.info, timeout=2) + mock_on.assert_called_once_with(task, self.info, timeout=2) self.assertFalse(mock_off.called) @mock.patch.object(driver_utils, 'ensure_next_boot_device', autospec=True) @@ -1410,7 +1432,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): states.POWER_ON) mock_next_boot.assert_called_once_with(task, self.info) - mock_on.assert_called_once_with(self.info, timeout=None) + mock_on.assert_called_once_with(task, self.info, timeout=None) self.assertFalse(mock_off.called) @mock.patch.object(driver_utils, 'ensure_next_boot_device', autospec=True) @@ -1428,7 +1450,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): timeout=2) mock_next_boot.assert_called_once_with(task, self.info) - mock_on.assert_called_once_with(self.info, timeout=2) + mock_on.assert_called_once_with(task, self.info, timeout=2) self.assertFalse(mock_off.called) @mock.patch.object(ipmi, '_power_on', autospec=True) @@ -1443,7 +1465,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): self.driver.power.set_power_state(task, states.POWER_OFF) - mock_off.assert_called_once_with(self.info, timeout=None) + mock_off.assert_called_once_with(task, self.info, timeout=None) self.assertFalse(mock_on.called) @mock.patch.object(ipmi, '_power_on', autospec=True) @@ -1459,7 +1481,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): states.POWER_OFF, timeout=2) - mock_off.assert_called_once_with(self.info, timeout=2) + mock_off.assert_called_once_with(task, self.info, timeout=2) self.assertFalse(mock_on.called) @mock.patch.object(ipmi, '_power_on', autospec=True) @@ -1474,7 +1496,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): self.driver.power.set_power_state(task, states.SOFT_POWER_OFF) - mock_off.assert_called_once_with(self.info, timeout=None) + mock_off.assert_called_once_with(task, self.info, timeout=None) self.assertFalse(mock_on.called) @mock.patch.object(ipmi, '_power_on', autospec=True) @@ -1490,7 +1512,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): states.SOFT_POWER_OFF, timeout=2) - mock_off.assert_called_once_with(self.info, timeout=2) + mock_off.assert_called_once_with(task, self.info, timeout=2) self.assertFalse(mock_on.called) @mock.patch.object(driver_utils, 'ensure_next_boot_device', autospec=True) @@ -1507,9 +1529,8 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): self.driver.power.set_power_state(task, states.SOFT_REBOOT) mock_next_boot.assert_called_once_with(task, self.info) - - mock_off.assert_called_once_with(self.info, timeout=None) - mock_on.assert_called_once_with(self.info, timeout=None) + mock_off.assert_called_once_with(task, self.info, timeout=None) + mock_on.assert_called_once_with(task, self.info, timeout=None) @mock.patch.object(driver_utils, 'ensure_next_boot_device', autospec=True) @mock.patch.object(ipmi, '_power_on', autospec=True) @@ -1527,9 +1548,8 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): states.SOFT_REBOOT, timeout=2) mock_next_boot.assert_called_once_with(task, self.info) - - mock_off.assert_called_once_with(self.info, timeout=2) - mock_on.assert_called_once_with(self.info, timeout=2) + mock_off.assert_called_once_with(task, self.info, timeout=2) + mock_on.assert_called_once_with(task, self.info, timeout=2) @mock.patch.object(driver_utils, 'ensure_next_boot_device', autospec=True) @mock.patch.object(ipmi, '_power_on', autospec=True) @@ -1538,7 +1558,8 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): mock_next_boot): self.config(retry_timeout=0, group='ipmi') - mock_off.return_value = states.POWER_ON + mock_off.side_effect = exception.PowerStateFailure( + pstate=states.POWER_ON) with task_manager.acquire(self.context, self.node['uuid']) as task: @@ -1548,7 +1569,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): states.SOFT_REBOOT, timeout=2) - mock_off.assert_called_once_with(self.info, timeout=2) + mock_off.assert_called_once_with(task, self.info, timeout=2) self.assertFalse(mock_next_boot.called) self.assertFalse(mock_on.called) @@ -1557,7 +1578,8 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): def test_set_power_on_fail(self, mock_off, mock_on): self.config(retry_timeout=0, group='ipmi') - mock_on.return_value = states.ERROR + mock_on.side_effect = exception.PowerStateFailure( + pstate=states.POWER_ON) with task_manager.acquire(self.context, self.node.uuid) as task: self.assertRaises(exception.PowerStateFailure, @@ -1565,7 +1587,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): task, states.POWER_ON) - mock_on.assert_called_once_with(self.info, timeout=None) + mock_on.assert_called_once_with(task, self.info, timeout=None) self.assertFalse(mock_off.called) @mock.patch.object(ipmi, '_power_on', autospec=True) @@ -1573,7 +1595,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): def test_set_power_on_timeout_fail(self, mock_off, mock_on): self.config(retry_timeout=0, group='ipmi') - mock_on.return_value = states.ERROR + mock_on.side_effect = exception.PowerStateFailure(pstate=states.ERROR) with task_manager.acquire(self.context, self.node.uuid) as task: self.assertRaises(exception.PowerStateFailure, @@ -1582,7 +1604,7 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): states.POWER_ON, timeout=2) - mock_on.assert_called_once_with(self.info, timeout=2) + mock_on.assert_called_once_with(task, self.info, timeout=2) self.assertFalse(mock_off.called) def test_set_power_invalid_state(self): @@ -1655,11 +1677,11 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): mock_on.return_value = states.POWER_ON manager.attach_mock(mock_off, 'power_off') manager.attach_mock(mock_on, 'power_on') - expected = [mock.call.power_off(self.info, timeout=None), - mock.call.power_on(self.info, timeout=None)] with task_manager.acquire(self.context, self.node.uuid) as task: + expected = [mock.call.power_off(task, self.info, timeout=None), + mock.call.power_on(task, self.info, timeout=None)] self.driver.power.reboot(task) mock_next_boot.assert_called_once_with(task, self.info) @@ -1671,32 +1693,32 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): def test_reboot_timeout_ok(self, mock_on, mock_off, mock_next_boot): manager = mock.MagicMock() # NOTE(rloo): if autospec is True, then manager.mock_calls is empty - mock_off.return_value = states.POWER_OFF - mock_on.return_value = states.POWER_ON manager.attach_mock(mock_off, 'power_off') manager.attach_mock(mock_on, 'power_on') - expected = [mock.call.power_off(self.info, timeout=2), - mock.call.power_on(self.info, timeout=2)] with task_manager.acquire(self.context, self.node.uuid) as task: + expected = [mock.call.power_off(task, self.info, timeout=2), + mock.call.power_on(task, self.info, timeout=2)] + self.driver.power.reboot(task, timeout=2) mock_next_boot.assert_called_once_with(task, self.info) - self.assertEqual(manager.mock_calls, expected) + self.assertEqual(manager.mock_calls, expected) @mock.patch.object(ipmi, '_power_off', spec_set=types.FunctionType) @mock.patch.object(ipmi, '_power_on', spec_set=types.FunctionType) def test_reboot_fail_power_off(self, mock_on, mock_off): manager = mock.MagicMock() # NOTE(rloo): if autospec is True, then manager.mock_calls is empty - mock_off.return_value = states.ERROR + mock_off.side_effect = exception.PowerStateFailure( + pstate=states.POWER_OFF) manager.attach_mock(mock_off, 'power_off') manager.attach_mock(mock_on, 'power_on') - expected = [mock.call.power_off(self.info, timeout=None)] with task_manager.acquire(self.context, self.node.uuid) as task: + expected = [mock.call.power_off(task, self.info, timeout=None)] self.assertRaises(exception.PowerStateFailure, self.driver.power.reboot, task) @@ -1709,14 +1731,15 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): manager = mock.MagicMock() # NOTE(rloo): if autospec is True, then manager.mock_calls is empty mock_off.return_value = states.POWER_OFF - mock_on.return_value = states.ERROR + mock_on.side_effect = exception.PowerStateFailure( + pstate=states.POWER_ON) manager.attach_mock(mock_off, 'power_off') manager.attach_mock(mock_on, 'power_on') - expected = [mock.call.power_off(self.info, timeout=None), - mock.call.power_on(self.info, timeout=None)] with task_manager.acquire(self.context, self.node.uuid) as task: + expected = [mock.call.power_off(task, self.info, timeout=None), + mock.call.power_on(task, self.info, timeout=None)] self.assertRaises(exception.PowerStateFailure, self.driver.power.reboot, task) @@ -1728,15 +1751,15 @@ class IPMIToolDriverTestCase(db_base.DbTestCase): def test_reboot_timeout_fail(self, mock_on, mock_off): manager = mock.MagicMock() # NOTE(rloo): if autospec is True, then manager.mock_calls is empty - mock_off.return_value = states.POWER_OFF - mock_on.return_value = states.ERROR + mock_on.side_effect = exception.PowerStateFailure( + pstate=states.POWER_ON) manager.attach_mock(mock_off, 'power_off') manager.attach_mock(mock_on, 'power_on') - expected = [mock.call.power_off(self.info, timeout=2), - mock.call.power_on(self.info, timeout=2)] with task_manager.acquire(self.context, self.node.uuid) as task: + expected = [mock.call.power_off(task, self.info, timeout=2), + mock.call.power_on(task, self.info, timeout=2)] self.assertRaises(exception.PowerStateFailure, self.driver.power.reboot, task, timeout=2) diff --git a/releasenotes/notes/fix-bug-1675529-479357c217819420.yaml b/releasenotes/notes/fix-bug-1675529-479357c217819420.yaml new file mode 100644 index 0000000000..6e56caa791 --- /dev/null +++ b/releasenotes/notes/fix-bug-1675529-479357c217819420.yaml @@ -0,0 +1,14 @@ +--- +deprecations: + - | + Configuration option IPMI.retry_timeout is deprecated in favor of new + options IPMI.command_retry_timeout, and + CONDUCTOR.power_state_change_timeout +fixes: + - | + Prevents the IPMI driver from needlessly checking status if the power + change action fails. Additionally stop retrying power actions and power + status polls if we receive a non-retryable error from ipmitool. + https//bugs.launchpad.net/ironic/+bug/1675529. New configuration option + `power_state_change_timeout` added to define how many seconds to wait for a + server to change power state when a power action is requested.