Make placement client keep trying to connect

In Newton, placement is optional and computes will stop even trying
to connect to placement when they encounter an error or missing
configuration. We really want them to keep trying so that enabling
the service pre-upgrade does not require restarting all computes
to start filling data.

This patch removes the auto-disable logic and replaces it with a
limited, but persistent warning to the logs about the required
nature of placement for the upgrade. If we had messaged the upcoming
requirement better, I think we could have been less chatty here.
However, we know that it's not been received, so this patch periodically
re-emits the warning and mentions the upgrade specifically.

Conflicts:
        nova/scheduler/client/report.py

NOTE(mriedem): The conflict is due to 3c217acb9 not being in Newton.

Closes-Bug: #1655494
Change-Id: Ie6387afeb239a20d39c00f519e8288f3b3a5e9cb
(cherry picked from commit bbf9b431ee)
This commit is contained in:
Dan Smith 2017-01-10 13:49:19 -08:00 committed by Matt Riedemann
parent 9fb203f17e
commit 13ba33a574
2 changed files with 49 additions and 37 deletions

View File

@ -32,32 +32,42 @@ LOG = logging.getLogger(__name__)
VCPU = fields.ResourceClass.VCPU
MEMORY_MB = fields.ResourceClass.MEMORY_MB
DISK_GB = fields.ResourceClass.DISK_GB
WARN_EVERY = 10
def warn_limit(self, msg):
if self._warn_count:
self._warn_count -= 1
else:
self._warn_count = WARN_EVERY
LOG.warning(msg)
def safe_connect(f):
@functools.wraps(f)
def wrapper(self, *a, **k):
try:
# We've failed in a non recoverable way, fully give up.
if self._disabled:
return
return f(self, *a, **k)
except ks_exc.EndpointNotFound:
msg = _LW("The placement API endpoint not found. Optional use of "
"placement API for reporting is now disabled.")
LOG.warning(msg)
self._disabled = True
warn_limit(
self,
_LW('The placement API endpoint not found. Placement is '
'optional in Newton, but required in Ocata. Please '
'enable the placement service before upgrading.'))
except ks_exc.MissingAuthPlugin:
msg = _LW("No authentication information found for placement API. "
"Optional use of placement API for reporting is now "
"disabled.")
LOG.warning(msg)
self._disabled = True
warn_limit(
self,
_LW('No authentication information found for placement '
'API. Placement is optional in Newton, but required '
'in Ocata. Please enable the placement service '
'before upgrading.'))
except ks_exc.Unauthorized:
msg = _LW('Placement service credentials do not work. Optional '
'use of placement API for reporting is now disabled.')
LOG.warning(msg)
self._disabled = True
warn_limit(
self,
_LW('Placement service credentials do not work. '
'Placement is optional in Newton, but required '
'in Ocata. Please enable the placement service '
'before upgrading.'))
except ks_exc.ConnectFailure:
msg = _LW('Placement API service is not responding.')
LOG.warning(msg)
@ -78,9 +88,8 @@ class SchedulerReportClient(object):
auth_plugin = keystone.load_auth_from_conf_options(
CONF, 'placement')
self._client = session.Session(auth=auth_plugin)
# TODO(sdague): use this to disable fully when we don't find
# the endpoint.
self._disabled = False
# NOTE(danms): Keep track of how naggy we've been
self._warn_count = 0
def get(self, url):
return self._client.get(

View File

@ -42,67 +42,59 @@ class SafeConnectedTestCase(test.NoDBTestCase):
def test_missing_endpoint(self, req):
"""Test EndpointNotFound behavior.
A missing endpoint entry should permanently disable the
client. And make future calls to it not happen.
A missing endpoint entry should not explode.
"""
req.side_effect = ks_exc.EndpointNotFound()
self.client._get_resource_provider("fake")
self.assertTrue(self.client._disabled)
# reset the call count to demonstrate that future calls don't
# reset the call count to demonstrate that future calls still
# work
req.reset_mock()
self.client._get_resource_provider("fake")
req.assert_not_called()
self.assertTrue(req.called)
@mock.patch('keystoneauth1.session.Session.request')
def test_missing_auth(self, req):
"""Test Missing Auth handled correctly.
A missing auth configuration should permanently disable the
client. And make future calls to it not happen.
A missing auth configuration should not explode.
"""
req.side_effect = ks_exc.MissingAuthPlugin()
self.client._get_resource_provider("fake")
self.assertTrue(self.client._disabled)
# reset the call count to demonstrate that future calls don't
# reset the call count to demonstrate that future calls still
# work
req.reset_mock()
self.client._get_resource_provider("fake")
req.assert_not_called()
self.assertTrue(req.called)
@mock.patch('keystoneauth1.session.Session.request')
def test_unauthorized(self, req):
"""Test Unauthorized handled correctly.
An unauthorized configuration should permanently disable the
client. And make future calls to it not happen.
An unauthorized configuration should not explode.
"""
req.side_effect = ks_exc.Unauthorized()
self.client._get_resource_provider("fake")
self.assertTrue(self.client._disabled)
# reset the call count to demonstrate that future calls don't
# reset the call count to demonstrate that future calls still
# work
req.reset_mock()
self.client._get_resource_provider("fake")
req.assert_not_called()
self.assertTrue(req.called)
@mock.patch('keystoneauth1.session.Session.request')
def test_connect_fail(self, req):
"""Test Connect Failure handled correctly.
If we get a connect failure, this is transient, and we expect
that this will end up working correctly later. We don't want
to disable the client.
that this will end up working correctly later.
"""
req.side_effect = ks_exc.ConnectFailure()
self.client._get_resource_provider("fake")
self.assertFalse(self.client._disabled)
# reset the call count to demonstrate that future calls do
# work
@ -110,6 +102,17 @@ class SafeConnectedTestCase(test.NoDBTestCase):
self.client._get_resource_provider("fake")
self.assertTrue(req.called)
@mock.patch.object(report, 'LOG')
def test_warning_limit(self, mock_log):
# Assert that __init__ initializes _warn_count as we expect
self.assertEqual(0, self.client._warn_count)
mock_self = mock.MagicMock()
mock_self._warn_count = 0
for i in range(0, report.WARN_EVERY + 3):
report.warn_limit(mock_self, 'warning')
mock_log.warning.assert_has_calls([mock.call('warning'),
mock.call('warning')])
class SchedulerReportClientTestCase(test.NoDBTestCase):