Merge "Fix TypeError on agent lookup failure" into stable/rocky
This commit is contained in:
commit
353dfe0915
|
@ -148,12 +148,41 @@ class APIClient(object):
|
||||||
'GET', self.lookup_api,
|
'GET', self.lookup_api,
|
||||||
headers=self._get_ironic_api_version_header(),
|
headers=self._get_ironic_api_version_header(),
|
||||||
params=params)
|
params=params)
|
||||||
except Exception as err:
|
except (requests.exceptions.Timeout,
|
||||||
LOG.exception(
|
requests.exceptions.ConnectTimeout,
|
||||||
'Unhandled error looking up node with addresses %r at %s: %s',
|
requests.exceptions.ConnectionError,
|
||||||
params['addresses'], self.api_url, err,
|
requests.exceptions.ReadTimeout,
|
||||||
|
requests.exceptions.HTTPError) as err:
|
||||||
|
LOG.warning(
|
||||||
|
'Error detected while attempting to perform lookup '
|
||||||
|
'with %s, retrying. Error: %s', self.api_url, err
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
except Exception as err:
|
||||||
|
# NOTE(TheJulia): If you're looking here, and you're wondering
|
||||||
|
# why the retry logic is not working or your investigating a weird
|
||||||
|
# error or even IPA just exiting,
|
||||||
|
# See https://storyboard.openstack.org/#!/story/2007968
|
||||||
|
# To be clear, we're going to try to provide as much detail as
|
||||||
|
# possible in the exit handling
|
||||||
|
msg = ('Unhandled error looking up node with addresses {} at '
|
||||||
|
'{}: {}'.format(params['addresses'], self.api_url, err))
|
||||||
|
# No matter what we do at this point, IPA is going to exit.
|
||||||
|
# This is because we don't know why the exception occured and
|
||||||
|
# we likely should not try to retry as such.
|
||||||
|
# We will attempt to provide as much detail to the logs as
|
||||||
|
# possible as to what occured, although depending on the logging
|
||||||
|
# subsystem, additional errors can occur, thus the additional
|
||||||
|
# handling below.
|
||||||
|
try:
|
||||||
|
LOG.exception(msg)
|
||||||
|
return False
|
||||||
|
except Exception as exc_err:
|
||||||
|
LOG.error(msg)
|
||||||
|
exc_msg = ('Unexpected exception occured while trying to '
|
||||||
|
'log additional detail. Error: {}'.format(exc_err))
|
||||||
|
LOG.error(exc_msg)
|
||||||
|
raise errors.LookupNodeError(msg)
|
||||||
|
|
||||||
if response.status_code != requests.codes.OK:
|
if response.status_code != requests.codes.OK:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
import mock
|
import mock
|
||||||
from oslo_serialization import jsonutils
|
from oslo_serialization import jsonutils
|
||||||
from oslo_service import loopingcall
|
from oslo_service import loopingcall
|
||||||
|
import requests
|
||||||
|
|
||||||
from ironic_python_agent import errors
|
from ironic_python_agent import errors
|
||||||
from ironic_python_agent import hardware
|
from ironic_python_agent import hardware
|
||||||
|
@ -286,6 +287,47 @@ class TestBaseIronicPythonAgent(base.IronicAgentTest):
|
||||||
'node_uuid': 'someuuid'},
|
'node_uuid': 'someuuid'},
|
||||||
params)
|
params)
|
||||||
|
|
||||||
|
@mock.patch.object(ironic_api_client, 'LOG', autospec=True)
|
||||||
|
def test_do_lookup_transient_exceptions(self, mock_log):
|
||||||
|
exc_list = [requests.exceptions.ConnectionError,
|
||||||
|
requests.exceptions.ReadTimeout,
|
||||||
|
requests.exceptions.HTTPError,
|
||||||
|
requests.exceptions.Timeout,
|
||||||
|
requests.exceptions.ConnectTimeout]
|
||||||
|
self.api_client.session.request = mock.Mock()
|
||||||
|
for exc in exc_list:
|
||||||
|
self.api_client.session.request.reset_mock()
|
||||||
|
mock_log.reset_mock()
|
||||||
|
self.api_client.session.request.side_effect = exc
|
||||||
|
error = self.api_client._do_lookup(self.hardware_info,
|
||||||
|
node_uuid=None)
|
||||||
|
self.assertFalse(error)
|
||||||
|
mock_log.error.assert_has_calls([])
|
||||||
|
self.assertEqual(1, mock_log.warning.call_count)
|
||||||
|
|
||||||
|
@mock.patch.object(ironic_api_client, 'LOG', autospec=True)
|
||||||
|
def test_do_lookup_unknown_exception(self, mock_log):
|
||||||
|
self.api_client.session.request = mock.Mock()
|
||||||
|
self.api_client.session.request.side_effect = \
|
||||||
|
requests.exceptions.RequestException('meow')
|
||||||
|
self.assertFalse(
|
||||||
|
self.api_client._do_lookup(self.hardware_info,
|
||||||
|
node_uuid=None))
|
||||||
|
self.assertEqual(1, mock_log.exception.call_count)
|
||||||
|
|
||||||
|
@mock.patch.object(ironic_api_client, 'LOG', autospec=True)
|
||||||
|
def test_do_lookup_unknown_exception_fallback(self, mock_log):
|
||||||
|
mock_log.exception.side_effect = TypeError
|
||||||
|
self.api_client.session.request = mock.Mock()
|
||||||
|
self.api_client.session.request.side_effect = \
|
||||||
|
requests.exceptions.RequestException('meow')
|
||||||
|
self.assertRaises(errors.LookupNodeError,
|
||||||
|
self.api_client._do_lookup,
|
||||||
|
self.hardware_info,
|
||||||
|
node_uuid=None)
|
||||||
|
self.assertEqual(1, mock_log.exception.call_count)
|
||||||
|
self.assertEqual(2, mock_log.error.call_count)
|
||||||
|
|
||||||
def test_do_lookup_bad_response_code(self):
|
def test_do_lookup_bad_response_code(self):
|
||||||
response = FakeResponse(status_code=400, content={
|
response = FakeResponse(status_code=400, content={
|
||||||
'node': {
|
'node': {
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes retry logic issues with the Agent Lookup which can result in
|
||||||
|
the lookup failing prematurely before being completed, typically
|
||||||
|
resulting in an abrupt end to the agent logging and potentially
|
||||||
|
weird errors like TypeError being reported on the agent process
|
||||||
|
standard error output. For more information see
|
||||||
|
`bug 2007968 <https://storyboard.openstack.org/#!/story/2007968>`_.
|
Loading…
Reference in New Issue