diff --git a/ironic/conf/ipmi.py b/ironic/conf/ipmi.py index 9545bde179..832caae04e 100644 --- a/ironic/conf/ipmi.py +++ b/ironic/conf/ipmi.py @@ -35,6 +35,14 @@ opts = [ 'sent to a server. There is a risk with some hardware ' 'that setting this too low may cause the BMC to crash. ' 'Recommended setting is 5 seconds.')), + cfg.BoolOpt('use_ipmitool_retries', + default=True, + help=_('When set to True and the parameters are supported by ' + 'ipmitool, the number of retries and the retry ' + 'interval are passed to ipmitool as parameters, and ' + 'ipmitool will do the retries. When set to False, ' + 'ironic will retry the ipmitool commands. ' + 'Recommended setting is True')), cfg.BoolOpt('kill_on_timeout', default=True, help=_('Kill `ipmitool` process invoked by ironic to read ' diff --git a/ironic/drivers/modules/ipmitool.py b/ironic/drivers/modules/ipmitool.py index 295d186e00..3871b9dc96 100644 --- a/ironic/drivers/modules/ipmitool.py +++ b/ironic/drivers/modules/ipmitool.py @@ -479,7 +479,10 @@ def _exec_ipmitool(driver_info, command, check_exit_code=None, if _is_option_supported('timing'): args.append('-R') - args.append(str(num_tries)) + if CONF.ipmi.use_ipmitool_retries: + args.append(str(num_tries)) + else: + args.append('1') args.append('-N') args.append(str(CONF.ipmi.min_command_interval)) @@ -529,9 +532,12 @@ def _exec_ipmitool(driver_info, command, check_exit_code=None, IPMITOOL_RETRYABLE_FAILURES + CONF.ipmi.additional_retryable_ipmi_errors) if x in six.text_type(e)] + # If Ironic is doing retries then retry all errors + retry_failures = (err_list + or not CONF.ipmi.use_ipmitool_retries) if ((time.time() > end_time) or (num_tries == 0) - or not err_list): + or not retry_failures): LOG.error('IPMI Error while attempting "%(cmd)s" ' 'for node %(node)s. Error: %(error)s', {'node': driver_info['uuid'], diff --git a/ironic/tests/unit/drivers/modules/test_ipmitool.py b/ironic/tests/unit/drivers/modules/test_ipmitool.py index a9857f8251..e661b47127 100644 --- a/ironic/tests/unit/drivers/modules/test_ipmitool.py +++ b/ironic/tests/unit/drivers/modules/test_ipmitool.py @@ -1026,6 +1026,63 @@ class IPMIToolPrivateMethodTestCase(Base): mock_support.assert_called_once_with('timing') mock_exec.assert_called_once_with(*args) + @mock.patch.object(ipmi, '_is_option_supported', autospec=True) + @mock.patch.object(ipmi, '_make_password_file', _make_password_file_stub) + @mock.patch.object(utils, 'execute', autospec=True) + def test__exec_ipmitool_with_ironic_retries( + self, mock_exec, mock_support): + args = [ + 'ipmitool', + '-I', 'lanplus', + '-H', self.info['address'], + '-L', self.info['priv_level'], + '-U', self.info['username'], + '-v', + '-R', '1', + '-N', '5', + '-f', awesome_password_filename, + 'A', 'B', 'C', + ] + + mock_support.return_value = True + mock_exec.return_value = (None, None) + + self.config(use_ipmitool_retries=False, group='ipmi') + + ipmi._exec_ipmitool(self.info, 'A B C') + + mock_support.assert_called_once_with('timing') + mock_exec.assert_called_once_with(*args) + + @mock.patch.object(ipmi, '_is_option_supported', autospec=True) + @mock.patch.object(ipmi, '_make_password_file', _make_password_file_stub) + @mock.patch.object(utils, 'execute', autospec=True) + def test__exec_ipmitool_with_ironic_retries_multiple( + self, mock_exec, mock_support): + + mock_exec.side_effect = [ + processutils.ProcessExecutionError( + stderr="Unknown" + ), + processutils.ProcessExecutionError( + stderr="Unknown" + ), + processutils.ProcessExecutionError( + stderr="Unknown" + ), + ] + + self.config(min_command_interval=1, group='ipmi') + self.config(command_retry_timeout=3, group='ipmi') + self.config(use_ipmitool_retries=False, group='ipmi') + + self.assertRaises(processutils.ProcessExecutionError, + ipmi._exec_ipmitool, + self.info, 'A B C') + + mock_support.assert_called_once_with('timing') + self.assertEqual(3, mock_exec.call_count) + def test__exec_ipmitool_wait(self): mock_popen = mock.MagicMock() mock_popen.poll.side_effect = [1, 1, 1, 1, 1] diff --git a/releasenotes/notes/ipmi-retries-min-command-interval-070cd7eff5eb74dd.yaml b/releasenotes/notes/ipmi-retries-min-command-interval-070cd7eff5eb74dd.yaml new file mode 100644 index 0000000000..413224b028 --- /dev/null +++ b/releasenotes/notes/ipmi-retries-min-command-interval-070cd7eff5eb74dd.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + When Ironic is doing IPMI retries the configured ``min_command_interval`` + should be used instead of a default value of ``1``, which may be too short + for some BMCs. diff --git a/releasenotes/notes/ipmitool-use_ipmitool_retries-b55b2b8ed5cab603.yaml b/releasenotes/notes/ipmitool-use_ipmitool_retries-b55b2b8ed5cab603.yaml new file mode 100644 index 0000000000..3a051f0285 --- /dev/null +++ b/releasenotes/notes/ipmitool-use_ipmitool_retries-b55b2b8ed5cab603.yaml @@ -0,0 +1,16 @@ +--- +features: + - | + Adds a new ``[ipmi]use_ipmitool_retries`` option. When set to + ``True`` and timing is supported by ipmitool, the number of + retries and command interval will be passed to ipmitool so + that ipmitool will do the retries. When set to ``False``, + ironic will do the retries. Default is ``True``. +issues: + - | + Some BMCs do not support the ``Channel Cipher Suites`` command + that newer versions of ipmitool use. These versions of + ipmitool will resend this command for each ipmitool retry, + resulting in long response times. Setting + ``[ipmi]use_ipmitool_retries`` to ``false`` will avoid this + situation by implementing retries on the ironic level.