Retry on iDRAC SYS518 errors for all requests

When iDRAC is still loading, some of the components is not available
for short period of time and returns HTTP 500 error with SYS518 error
code and message "iDRAC is currently unable to display any information
because data sources are unavailable.". There is already retry on GET
errors with Server side error, but this can happen also for other
requests, for example, DELETE.
This patch adds retry for all SYS518 failures disregarding request
method. This helps to fix a known intermittent issue when deleting
set of volumes one after another and iDRAC is reloading after
deleting each volume.

Co-Authored-By: Swapnil Machikar <swapnil.machikar@yahoo.com>
Change-Id: Ief49157438813c650fdd34d60dfe7ea8ea2d1702
This commit is contained in:
Kamlesh Chauvhan 2022-11-17 15:06:29 +00:00
parent 56efdee0ee
commit 2b760a6c9e
3 changed files with 58 additions and 3 deletions

View File

@ -0,0 +1,8 @@
---
fixes:
- |
Add retries on iDRAC error with code SYS518 and message "iDRAC is
currently unable to display any information because data sources are
unavailable." for all request types in addition to existing GET methods.
This helps to fix a known intermittent issue when deleting set of volumes
one after another and iDRAC is reloading after deleting each volume.

View File

@ -85,6 +85,15 @@ class Connector(object):
"""Close this connector and the associated HTTP session."""
self._session.close()
def check_retry_on_exception(self, exception_msg):
"""Checks whether retry on exception is required."""
if ('SYS518' in str(exception_msg)):
LOG.debug('iDRAC is not yet ready after previous operation. '
'Error: %(err)s', {'err': str(exception_msg)})
return True
else:
return False
def _op(self, method, path='', data=None, headers=None, blocking=False,
timeout=60, **extra_session_req_kwargs):
"""Generic RESTful request handler.
@ -197,9 +206,9 @@ class Connector(object):
"%s", e.message)
raise
except exceptions.ServerSideError as e:
if method.lower() != 'get' or self._server_side_retries <= 0:
raise
else:
if ((method.lower() == 'get'
or self.check_retry_on_exception(e.message))
and self._server_side_retries > 0):
LOG.warning('Got server side error %s in response to a '
'GET request, retrying after %d seconds',
e, self._server_side_retries)
@ -208,6 +217,8 @@ class Connector(object):
return self._op(method, path, data=data, headers=headers,
blocking=blocking, timeout=timeout,
**extra_session_req_kwargs)
else:
raise
if blocking and response.status_code == 202:
if not response.headers.get('Location'):

View File

@ -405,6 +405,42 @@ class ConnectorOpTestCase(base.TestCase):
self.assertEqual(10, mock_sleep.call_count)
self.assertEqual(11, self.request.call_count)
@mock.patch('time.sleep', autospec=True)
def test_op_retry_on_server_500_sys518(self, mock_sleep):
response_info = {"error": {"@Message.ExtendedInfo": [
{'MessageId': 'IDRAC.2.7.SYS518'}]}}
mock_error = mock.Mock()
mock_error.status_code = 500
mock_error.json.return_value = response_info
self.request.return_value.status_code = (
http_client.INTERNAL_SERVER_ERROR)
self.request.return_value.json.side_effect =\
exceptions.ServerSideError(
method='DELETE', url='http://foo.bar', response=mock_error)
self.assertRaises(exceptions.ServerSideError, self.conn._op, 'DELETE',
'http://foo.bar')
self.assertEqual(10, mock_sleep.call_count)
self.assertEqual(11, self.request.call_count)
@mock.patch('time.sleep', autospec=True)
def test_op_retry_on_server_500_other_than_sys518(self, mock_sleep):
response_info = {"error": {"@Message.ExtendedInfo": [
{'MessageId': 'IDRAC.2.7.SYS999'}]}}
mock_error = mock.Mock()
mock_error.status_code = 500
mock_error.json.return_value = response_info
self.request.return_value.status_code = (
http_client.INTERNAL_SERVER_ERROR)
self.request.return_value.json.side_effect =\
exceptions.ServerSideError(
method='DELETE', url='http://foo.bar', response=mock_error)
self.assertRaises(exceptions.ServerSideError, self.conn._op, 'DELETE',
'http://foo.bar')
self.assertEqual(0, mock_sleep.call_count)
self.assertEqual(1, self.request.call_count)
def test_access_error(self):
self.conn._auth = None