Merge "Remove redfish cache entry upon errors"

This commit is contained in:
Zuul 2021-12-06 19:34:03 +00:00 committed by Gerrit Code Review
commit 39d91de922
3 changed files with 111 additions and 1 deletions

View File

@ -240,6 +240,16 @@ class SessionCache(object):
# NOTE(etingof): perhaps this session token is no good
if isinstance(exc_val, sushy.exceptions.ConnectionError):
self.__class__._sessions.pop(self._session_key, None)
# NOTE(TheJulia): A hard access error has surfaced, we
# likely need to eliminate the session.
if isinstance(exc_val, sushy.exceptions.AccessError):
self.__class__._sessions.pop(self._session_key, None)
# NOTE(TheJulia): Something very bad has happened, such
# as the session is out of date, and refresh of the SessionService
# failed resulting in an AttributeError surfacing.
# https://storyboard.openstack.org/#!/story/2009719
if isinstance(exc_val, AttributeError):
self.__class__._sessions.pop(self._session_key, None)
@classmethod
def _expire_oldest_session(cls):
@ -364,6 +374,23 @@ def _get_connection(node, lambda_fun, *args):
'auth_type': driver_info['auth_type'],
'node': node.uuid, 'error': e})
raise exception.RedfishConnectionError(node=node.uuid, error=e)
except sushy.exceptions.AccessError as e:
LOG.warning('For node %(node)s, we receieved an authentication '
'access error from address %(address)s with auth_type '
'%(auth_type)s. The client will not be re-used upon '
'the next re-attempt. Please ensure your using the '
'correct credentials. Error: %(error)s',
{'address': driver_info['address'],
'auth_type': driver_info['auth_type'],
'node': node.uuid, 'error': e})
raise exception.RedfishError(node=node.uuid, error=e)
except AttributeError as e:
LOG.warning('For node %(node)s, we receieved at AttributeError '
'when attempting to utilize the client. A new '
'client session shall be used upon the next attempt.'
'Error: %(error)s',
{'node': node.uuid, 'error': e})
raise exception.RedfishError(node=node.uuid, error=e)
try:
return _get_cached_connection(lambda_fun, *args)

View File

@ -426,8 +426,9 @@ class RedfishUtilsSystemTestCase(db_base.DbTestCase):
# Redfish specific configurations
self.config(connection_attempts=3, group='redfish')
fake_conn = mock_sushy.return_value
fake_conn = mock.Mock()
fake_conn.get_system.side_effect = sushy.exceptions.ConnectionError()
mock_sushy.return_value = fake_conn
self.assertRaises(exception.RedfishConnectionError,
redfish_utils.get_system, self.node)
@ -477,3 +478,72 @@ class RedfishUtilsSystemTestCase(db_base.DbTestCase):
redfish_utils.wait_until_get_system_ready, self.node)
self.assertEqual(fake_conn.get_system.call_count, 2)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(sushy, 'Sushy', autospec=True)
@mock.patch('ironic.drivers.modules.redfish.utils.'
'SessionCache._sessions', {})
def test_get_system_resource_access_error_retry(self, mock_sushy):
# Sushy access errors HTTP Errors
class fake_response(object):
status_code = 401
body = None
def json():
return {}
fake_conn = mock_sushy.return_value
fake_system = mock.Mock()
fake_conn.get_system.side_effect = iter(
[
sushy.exceptions.AccessError(
method='GET',
url='http://path/to/url',
response=fake_response),
fake_system,
])
self.assertRaises(exception.RedfishError,
redfish_utils.get_system, self.node)
# Retry, as in next power sync perhaps
client = redfish_utils.get_system(self.node)
client('foo')
expected_get_system_calls = [
mock.call(self.parsed_driver_info['system_id']),
mock.call(self.parsed_driver_info['system_id']),
]
fake_conn.get_system.assert_has_calls(expected_get_system_calls)
fake_system.assert_called_with('foo')
self.assertEqual(fake_conn.get_system.call_count, 2)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(sushy, 'Sushy', autospec=True)
@mock.patch('ironic.drivers.modules.redfish.utils.'
'SessionCache._sessions', {})
def test_get_system_resource_attribute_error(self, mock_sushy):
fake_conn = mock_sushy.return_value
fake_system = mock.Mock()
fake_conn.get_system.side_effect = iter(
[
AttributeError,
fake_system,
])
# We need to check for AttributeError explicitly as
# otherwise we break existing tests if we try to catch
# it explicitly.
self.assertRaises(exception.RedfishError,
redfish_utils.get_system, self.node)
# Retry, as in next power sync perhaps
client = redfish_utils.get_system(self.node)
client('bar')
expected_get_system_calls = [
mock.call(self.parsed_driver_info['system_id']),
mock.call(self.parsed_driver_info['system_id']),
]
fake_conn.get_system.assert_has_calls(expected_get_system_calls)
fake_system.assert_called_once_with('bar')
self.assertEqual(fake_conn.get_system.call_count, 2)

View File

@ -0,0 +1,13 @@
---
fixes:
- |
Fixes connection caching issues with Redfish BMCs where AccessErrors were
previously not disqualifying the cached connection from being re-used.
Ironic will now explicitly open a new connection instead of using the
previous connection in the cache. Under normal circumstances, the
``sushy`` redfish library would detect and refresh sessions,
however a prior case exists where it may not detect a failure and contain
cached session credential data which is ultimately invalid, blocking
future access to the BMC via Redfish until the cache entry expired or
the ``ironic-conductor`` service was restarted. For more information
please see `story 2009719 <https://storyboard.openstack.org/#!/story/2009719>`_.