From 4d020a68fbcfc49be88841f92acb409bd64cbedd Mon Sep 17 00:00:00 2001 From: Ilya Etingof Date: Wed, 9 May 2018 14:58:39 +0200 Subject: [PATCH] Adds more `ipmitool` errors as retryable This change extends the list of `ipmitool` errors that ironic treats as retryable on failure. Change-Id: I5fddc95404a1725f03bd26da51932c3ece5a5a35 Story: 2001989 Task: 19611 --- ironic/drivers/modules/ipmitool.py | 22 +- .../unit/drivers/modules/test_ipmitool.py | 191 ++++++++++-------- ...able-ipmitool-errors-1c9351a89ff0ec1a.yaml | 9 + 3 files changed, 137 insertions(+), 85 deletions(-) create mode 100644 releasenotes/notes/add-more-retryable-ipmitool-errors-1c9351a89ff0ec1a.yaml diff --git a/ironic/drivers/modules/ipmitool.py b/ironic/drivers/modules/ipmitool.py index b6c505c3c9..9a0399364d 100644 --- a/ironic/drivers/modules/ipmitool.py +++ b/ironic/drivers/modules/ipmitool.py @@ -121,12 +121,22 @@ ipmitool_command_options = { 'dual_bridge': ['ipmitool', '-m', '0', '-b', '0', '-t', '0', '-B', '0', '-T', '0', '-h']} -# Note(TheJulia): This string is hardcoded in ipmitool's lanplus driver -# and is substituted in return for the error code received from the IPMI -# controller. As of 1.8.15, no internationalization support appears to -# be in ipmitool which means the string should always be returned in this -# form regardless of locale. -IPMITOOL_RETRYABLE_FAILURES = ['insufficient resources for session'] +# Note(etingof): For more information on IPMI error codes and `ipmitool` +# human interface please refer to: +# https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/ipmi-second-gen-interface-spec-v2-rev1-1.pdf +# https://github.com/scottjg/ipmitool/blob/master/lib/ipmi_strings.c#L367 +# +# Note(TheJulia): The strings below are hardcoded in ipmitool and get +# substituted in return for the error code received from the IPMI controller. +# As of 1.8.15, no internationalization support appears to be in ipmitool +# which means the strings should always be returned in this form regardless +# of locale. +IPMITOOL_RETRYABLE_FAILURES = ['insufficient resources for session', + # Generic completion codes considered retryable + 'Node busy', + 'Timeout', + 'Out of space', + 'BMC initialization in progress'] # NOTE(lucasagomes): A mapping for the boot devices and their hexadecimal # value. For more information about these values see the "Set System Boot diff --git a/ironic/tests/unit/drivers/modules/test_ipmitool.py b/ironic/tests/unit/drivers/modules/test_ipmitool.py index fdf7d48339..2ebd4755dd 100644 --- a/ironic/tests/unit/drivers/modules/test_ipmitool.py +++ b/ironic/tests/unit/drivers/modules/test_ipmitool.py @@ -338,6 +338,114 @@ def _make_password_file_stub(password): yield awesome_password_filename +class IPMIToolPrivateMethodTestCaseMeta(type): + """Generate and inject parametrized test cases""" + + ipmitool_errors = [ + 'insufficient resources for session', + 'Node busy', + 'Timeout', + 'Out of space', + 'BMC initialization in progress' + ] + + def __new__(mcs, name, bases, attrs): + + def gen_test_methods(message): + + @mock.patch.object(ipmi, '_is_option_supported', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def exec_ipmitool_exception_retry( + self, mock_exec, mock_support): + ipmi.LAST_CMD_TIME = {} + mock_support.return_value = False + mock_exec.side_effect = [ + processutils.ProcessExecutionError( + stderr=message + ), + (None, None) + ] + + # Directly set the configuration values such that + # the logic will cause _exec_ipmitool to retry twice. + self.config(min_command_interval=1, group='ipmi') + self.config(retry_timeout=2, group='ipmi') + + ipmi._exec_ipmitool(self.info, 'A B C') + + mock_support.assert_called_once_with('timing') + self.assertEqual(2, mock_exec.call_count) + + @mock.patch.object(ipmi, '_is_option_supported', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def exec_ipmitool_exception_retries_exceeded( + self, mock_exec, mock_support): + ipmi.LAST_CMD_TIME = {} + mock_support.return_value = False + + mock_exec.side_effect = [processutils.ProcessExecutionError( + stderr=message + )] + + # Directly set the configuration values such that + # the logic will cause _exec_ipmitool to timeout. + self.config(min_command_interval=1, group='ipmi') + self.config(retry_timeout=1, group='ipmi') + + self.assertRaises(processutils.ProcessExecutionError, + ipmi._exec_ipmitool, + self.info, 'A B C') + mock_support.assert_called_once_with('timing') + self.assertEqual(1, mock_exec.call_count) + + @mock.patch.object(ipmi, '_is_option_supported', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def exec_ipmitool_exception_non_retryable_failure( + self, mock_exec, mock_support): + ipmi.LAST_CMD_TIME = {} + mock_support.return_value = False + + # Return a retryable error, then an error that cannot + # be retried thus resulting in a single retry + # attempt by _exec_ipmitool. + mock_exec.side_effect = [ + processutils.ProcessExecutionError( + stderr=message + ), + processutils.ProcessExecutionError( + stderr="Unknown" + ), + ] + + # Directly set the configuration values such that + # the logic will cause _exec_ipmitool to retry up + # to 3 times. + self.config(min_command_interval=1, group='ipmi') + self.config(retry_timeout=3, group='ipmi') + + self.assertRaises(processutils.ProcessExecutionError, + ipmi._exec_ipmitool, + self.info, 'A B C') + mock_support.assert_called_once_with('timing') + self.assertEqual(2, mock_exec.call_count) + + return (exec_ipmitool_exception_retry, + exec_ipmitool_exception_retries_exceeded, + exec_ipmitool_exception_non_retryable_failure) + + # NOTE(etingof): this loop will inject some methods into the + # class being built to be then picked up by unittest to test + # ironic's handling of specific `ipmitool` errors + for ipmi_message in mcs.ipmitool_errors: + for fun in gen_test_methods(ipmi_message): + suffix = ipmi_message.lower().replace(' ', '_') + test_name = "test_%s_%s" % (fun.__name__, suffix) + attrs[test_name] = fun + + return type.__new__(mcs, name, bases, attrs) + + +@six.add_metaclass(IPMIToolPrivateMethodTestCaseMeta) class IPMIToolPrivateMethodTestCase(db_base.DbTestCase): def setUp(self): @@ -359,6 +467,10 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase): fixtures.MockPatchObject(time, 'sleep', autospec=True)) self.mock_sleep = mock_sleep_fixture.mock + # NOTE(etingof): besides the conventional unittest methods that follow, + # the metaclass will inject some more `test_` methods aimed at testing + # the handling of specific errors potentially returned by the `ipmitool` + def _mock_system_random_distribution(self): # random.SystemRandom with gauss distribution is used by oslo_service's # BackoffLoopingCall, it multiplies default interval (equals to 1) by @@ -1068,85 +1180,6 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase): mock_exec.assert_called_once_with(*args) self.assertEqual(1, mock_exec.call_count) - @mock.patch.object(ipmi, '_is_option_supported', autospec=True) - @mock.patch.object(utils, 'execute', autospec=True) - def test__exec_ipmitool_exception_retry( - self, mock_exec, mock_support): - - ipmi.LAST_CMD_TIME = {} - mock_support.return_value = False - mock_exec.side_effect = [ - processutils.ProcessExecutionError( - stderr="insufficient resources for session" - ), - (None, None) - ] - - # Directly set the configuration values such that - # the logic will cause _exec_ipmitool to retry twice. - self.config(min_command_interval=1, group='ipmi') - self.config(retry_timeout=2, group='ipmi') - - ipmi._exec_ipmitool(self.info, 'A B C') - - mock_support.assert_called_once_with('timing') - self.assertEqual(2, mock_exec.call_count) - - @mock.patch.object(ipmi, '_is_option_supported', autospec=True) - @mock.patch.object(utils, 'execute', autospec=True) - def test__exec_ipmitool_exception_retries_exceeded( - self, mock_exec, mock_support): - - ipmi.LAST_CMD_TIME = {} - mock_support.return_value = False - - mock_exec.side_effect = [processutils.ProcessExecutionError( - stderr="insufficient resources for session" - )] - - # Directly set the configuration values such that - # the logic will cause _exec_ipmitool to timeout. - self.config(min_command_interval=1, group='ipmi') - self.config(retry_timeout=1, group='ipmi') - - self.assertRaises(processutils.ProcessExecutionError, - ipmi._exec_ipmitool, - self.info, 'A B C') - mock_support.assert_called_once_with('timing') - self.assertEqual(1, mock_exec.call_count) - - @mock.patch.object(ipmi, '_is_option_supported', autospec=True) - @mock.patch.object(utils, 'execute', autospec=True) - def test__exec_ipmitool_exception_non_retryable_failure( - self, mock_exec, mock_support): - - ipmi.LAST_CMD_TIME = {} - mock_support.return_value = False - - # Return a retryable error, then an error that cannot - # be retried thus resulting in a single retry - # attempt by _exec_ipmitool. - mock_exec.side_effect = [ - processutils.ProcessExecutionError( - stderr="insufficient resources for session" - ), - processutils.ProcessExecutionError( - stderr="Unknown" - ), - ] - - # Directly set the configuration values such that - # the logic will cause _exec_ipmitool to retry up - # to 3 times. - self.config(min_command_interval=1, group='ipmi') - self.config(retry_timeout=3, group='ipmi') - - self.assertRaises(processutils.ProcessExecutionError, - ipmi._exec_ipmitool, - self.info, 'A B C') - mock_support.assert_called_once_with('timing') - self.assertEqual(2, mock_exec.call_count) - @mock.patch.object(ipmi, '_is_option_supported', autospec=True) @mock.patch.object(ipmi, '_make_password_file', _make_password_file_stub) @mock.patch.object(utils, 'execute', autospec=True) diff --git a/releasenotes/notes/add-more-retryable-ipmitool-errors-1c9351a89ff0ec1a.yaml b/releasenotes/notes/add-more-retryable-ipmitool-errors-1c9351a89ff0ec1a.yaml new file mode 100644 index 0000000000..c64fbd05b7 --- /dev/null +++ b/releasenotes/notes/add-more-retryable-ipmitool-errors-1c9351a89ff0ec1a.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - Adds more ``ipmitool`` error messages to be treated as retryable by the + ipmitool interfaces (such as power and management hardware interfaces). + Specifically, ``Node busy``, ``Timeout``, ``Out of space`` and + ``BMC initialization in progress`` reporting emitted by ``ipmitool`` + will cause ironic to retry IPMI command. + This change should improve the reliability of IPMI-based communicaton + with BMC.