Adds more `ipmitool` errors as retryable

This change extends the list of `ipmitool` errors that
ironic treats as retryable on failure.

Change-Id: I5fddc95404a1725f03bd26da51932c3ece5a5a35
Story: 2001989
Task: 19611
This commit is contained in:
Ilya Etingof 2018-05-09 14:58:39 +02:00
parent 6ff9a6b14c
commit 4d020a68fb
3 changed files with 137 additions and 85 deletions

View File

@ -121,12 +121,22 @@ ipmitool_command_options = {
'dual_bridge': ['ipmitool', '-m', '0', '-b', '0', '-t', '0',
'-B', '0', '-T', '0', '-h']}
# Note(TheJulia): This string is hardcoded in ipmitool's lanplus driver
# and is substituted in return for the error code received from the IPMI
# controller. As of 1.8.15, no internationalization support appears to
# be in ipmitool which means the string should always be returned in this
# form regardless of locale.
IPMITOOL_RETRYABLE_FAILURES = ['insufficient resources for session']
# Note(etingof): For more information on IPMI error codes and `ipmitool`
# human interface please refer to:
# https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/ipmi-second-gen-interface-spec-v2-rev1-1.pdf
# https://github.com/scottjg/ipmitool/blob/master/lib/ipmi_strings.c#L367
#
# Note(TheJulia): The strings below are hardcoded in ipmitool and get
# substituted in return for the error code received from the IPMI controller.
# As of 1.8.15, no internationalization support appears to be in ipmitool
# which means the strings should always be returned in this form regardless
# of locale.
IPMITOOL_RETRYABLE_FAILURES = ['insufficient resources for session',
# Generic completion codes considered retryable
'Node busy',
'Timeout',
'Out of space',
'BMC initialization in progress']
# NOTE(lucasagomes): A mapping for the boot devices and their hexadecimal
# value. For more information about these values see the "Set System Boot

View File

@ -338,6 +338,114 @@ def _make_password_file_stub(password):
yield awesome_password_filename
class IPMIToolPrivateMethodTestCaseMeta(type):
"""Generate and inject parametrized test cases"""
ipmitool_errors = [
'insufficient resources for session',
'Node busy',
'Timeout',
'Out of space',
'BMC initialization in progress'
]
def __new__(mcs, name, bases, attrs):
def gen_test_methods(message):
@mock.patch.object(ipmi, '_is_option_supported', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def exec_ipmitool_exception_retry(
self, mock_exec, mock_support):
ipmi.LAST_CMD_TIME = {}
mock_support.return_value = False
mock_exec.side_effect = [
processutils.ProcessExecutionError(
stderr=message
),
(None, None)
]
# Directly set the configuration values such that
# the logic will cause _exec_ipmitool to retry twice.
self.config(min_command_interval=1, group='ipmi')
self.config(retry_timeout=2, group='ipmi')
ipmi._exec_ipmitool(self.info, 'A B C')
mock_support.assert_called_once_with('timing')
self.assertEqual(2, mock_exec.call_count)
@mock.patch.object(ipmi, '_is_option_supported', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def exec_ipmitool_exception_retries_exceeded(
self, mock_exec, mock_support):
ipmi.LAST_CMD_TIME = {}
mock_support.return_value = False
mock_exec.side_effect = [processutils.ProcessExecutionError(
stderr=message
)]
# Directly set the configuration values such that
# the logic will cause _exec_ipmitool to timeout.
self.config(min_command_interval=1, group='ipmi')
self.config(retry_timeout=1, group='ipmi')
self.assertRaises(processutils.ProcessExecutionError,
ipmi._exec_ipmitool,
self.info, 'A B C')
mock_support.assert_called_once_with('timing')
self.assertEqual(1, mock_exec.call_count)
@mock.patch.object(ipmi, '_is_option_supported', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def exec_ipmitool_exception_non_retryable_failure(
self, mock_exec, mock_support):
ipmi.LAST_CMD_TIME = {}
mock_support.return_value = False
# Return a retryable error, then an error that cannot
# be retried thus resulting in a single retry
# attempt by _exec_ipmitool.
mock_exec.side_effect = [
processutils.ProcessExecutionError(
stderr=message
),
processutils.ProcessExecutionError(
stderr="Unknown"
),
]
# Directly set the configuration values such that
# the logic will cause _exec_ipmitool to retry up
# to 3 times.
self.config(min_command_interval=1, group='ipmi')
self.config(retry_timeout=3, group='ipmi')
self.assertRaises(processutils.ProcessExecutionError,
ipmi._exec_ipmitool,
self.info, 'A B C')
mock_support.assert_called_once_with('timing')
self.assertEqual(2, mock_exec.call_count)
return (exec_ipmitool_exception_retry,
exec_ipmitool_exception_retries_exceeded,
exec_ipmitool_exception_non_retryable_failure)
# NOTE(etingof): this loop will inject some methods into the
# class being built to be then picked up by unittest to test
# ironic's handling of specific `ipmitool` errors
for ipmi_message in mcs.ipmitool_errors:
for fun in gen_test_methods(ipmi_message):
suffix = ipmi_message.lower().replace(' ', '_')
test_name = "test_%s_%s" % (fun.__name__, suffix)
attrs[test_name] = fun
return type.__new__(mcs, name, bases, attrs)
@six.add_metaclass(IPMIToolPrivateMethodTestCaseMeta)
class IPMIToolPrivateMethodTestCase(db_base.DbTestCase):
def setUp(self):
@ -359,6 +467,10 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase):
fixtures.MockPatchObject(time, 'sleep', autospec=True))
self.mock_sleep = mock_sleep_fixture.mock
# NOTE(etingof): besides the conventional unittest methods that follow,
# the metaclass will inject some more `test_` methods aimed at testing
# the handling of specific errors potentially returned by the `ipmitool`
def _mock_system_random_distribution(self):
# random.SystemRandom with gauss distribution is used by oslo_service's
# BackoffLoopingCall, it multiplies default interval (equals to 1) by
@ -1068,85 +1180,6 @@ class IPMIToolPrivateMethodTestCase(db_base.DbTestCase):
mock_exec.assert_called_once_with(*args)
self.assertEqual(1, mock_exec.call_count)
@mock.patch.object(ipmi, '_is_option_supported', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test__exec_ipmitool_exception_retry(
self, mock_exec, mock_support):
ipmi.LAST_CMD_TIME = {}
mock_support.return_value = False
mock_exec.side_effect = [
processutils.ProcessExecutionError(
stderr="insufficient resources for session"
),
(None, None)
]
# Directly set the configuration values such that
# the logic will cause _exec_ipmitool to retry twice.
self.config(min_command_interval=1, group='ipmi')
self.config(retry_timeout=2, group='ipmi')
ipmi._exec_ipmitool(self.info, 'A B C')
mock_support.assert_called_once_with('timing')
self.assertEqual(2, mock_exec.call_count)
@mock.patch.object(ipmi, '_is_option_supported', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test__exec_ipmitool_exception_retries_exceeded(
self, mock_exec, mock_support):
ipmi.LAST_CMD_TIME = {}
mock_support.return_value = False
mock_exec.side_effect = [processutils.ProcessExecutionError(
stderr="insufficient resources for session"
)]
# Directly set the configuration values such that
# the logic will cause _exec_ipmitool to timeout.
self.config(min_command_interval=1, group='ipmi')
self.config(retry_timeout=1, group='ipmi')
self.assertRaises(processutils.ProcessExecutionError,
ipmi._exec_ipmitool,
self.info, 'A B C')
mock_support.assert_called_once_with('timing')
self.assertEqual(1, mock_exec.call_count)
@mock.patch.object(ipmi, '_is_option_supported', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test__exec_ipmitool_exception_non_retryable_failure(
self, mock_exec, mock_support):
ipmi.LAST_CMD_TIME = {}
mock_support.return_value = False
# Return a retryable error, then an error that cannot
# be retried thus resulting in a single retry
# attempt by _exec_ipmitool.
mock_exec.side_effect = [
processutils.ProcessExecutionError(
stderr="insufficient resources for session"
),
processutils.ProcessExecutionError(
stderr="Unknown"
),
]
# Directly set the configuration values such that
# the logic will cause _exec_ipmitool to retry up
# to 3 times.
self.config(min_command_interval=1, group='ipmi')
self.config(retry_timeout=3, group='ipmi')
self.assertRaises(processutils.ProcessExecutionError,
ipmi._exec_ipmitool,
self.info, 'A B C')
mock_support.assert_called_once_with('timing')
self.assertEqual(2, mock_exec.call_count)
@mock.patch.object(ipmi, '_is_option_supported', autospec=True)
@mock.patch.object(ipmi, '_make_password_file', _make_password_file_stub)
@mock.patch.object(utils, 'execute', autospec=True)

View File

@ -0,0 +1,9 @@
---
fixes:
- Adds more ``ipmitool`` error messages to be treated as retryable by the
ipmitool interfaces (such as power and management hardware interfaces).
Specifically, ``Node busy``, ``Timeout``, ``Out of space`` and
``BMC initialization in progress`` reporting emitted by ``ipmitool``
will cause ironic to retry IPMI command.
This change should improve the reliability of IPMI-based communicaton
with BMC.