Make placement client keep trying to connect

In Newton, placement is optional and computes will stop even trying to connect to placement when they encounter an error or missing configuration. We really want them to keep trying so that enabling the service pre-upgrade does not require restarting all computes to start filling data. This patch removes the auto-disable logic and replaces it with a limited, but persistent warning to the logs about the required nature of placement for the upgrade. If we had messaged the upcoming requirement better, I think we could have been less chatty here. However, we know that it's not been received, so this patch periodically re-emits the warning and mentions the upgrade specifically. Conflicts: nova/scheduler/client/report.py NOTE(mriedem): The conflict is due to 3c217acb9 not being in Newton. Closes-Bug: #1655494 Change-Id: Ie6387afeb239a20d39c00f519e8288f3b3a5e9cb (cherry picked from commit bbf9b431ee)
2017-01-10 13:49:19 -08:00 · 2017-01-10 13:49:19 -08:00 · 13ba33a574
parent 9fb203f17e
commit 13ba33a574
2 changed files with 49 additions and 37 deletions
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -32,32 +32,42 @@ LOG = logging.getLogger(__name__)
 VCPU = fields.ResourceClass.VCPU
 MEMORY_MB = fields.ResourceClass.MEMORY_MB
 DISK_GB = fields.ResourceClass.DISK_GB
+WARN_EVERY = 10
+
+
+def warn_limit(self, msg):
+    if self._warn_count:
+        self._warn_count -= 1
+    else:
+        self._warn_count = WARN_EVERY
+        LOG.warning(msg)


 def safe_connect(f):
    @functools.wraps(f)
    def wrapper(self, *a, **k):
        try:
-            # We've failed in a non recoverable way, fully give up.
-            if self._disabled:
-                return
            return f(self, *a, **k)
        except ks_exc.EndpointNotFound:
-            msg = _LW("The placement API endpoint not found. Optional use of "
-                      "placement API for reporting is now disabled.")
-            LOG.warning(msg)
-            self._disabled = True
+            warn_limit(
+                self,
+                _LW('The placement API endpoint not found. Placement is '
+                    'optional in Newton, but required in Ocata. Please '
+                    'enable the placement service before upgrading.'))
        except ks_exc.MissingAuthPlugin:
-            msg = _LW("No authentication information found for placement API. "
-                      "Optional use of placement API for reporting is now "
-                      "disabled.")
-            LOG.warning(msg)
-            self._disabled = True
+            warn_limit(
+                self,
+                _LW('No authentication information found for placement '
+                    'API. Placement is optional in Newton, but required '
+                    'in Ocata. Please enable the placement service '
+                    'before upgrading.'))
        except ks_exc.Unauthorized:
-            msg = _LW('Placement service credentials do not work. Optional '
-                      'use of placement API for reporting is now disabled.')
-            LOG.warning(msg)
-            self._disabled = True
+            warn_limit(
+                self,
+                _LW('Placement service credentials do not work. '
+                    'Placement is optional in Newton, but required '
+                    'in Ocata. Please enable the placement service '
+                    'before upgrading.'))
        except ks_exc.ConnectFailure:
            msg = _LW('Placement API service is not responding.')
            LOG.warning(msg)
@ -78,9 +88,8 @@ class SchedulerReportClient(object):
        auth_plugin = keystone.load_auth_from_conf_options(
            CONF, 'placement')
        self._client = session.Session(auth=auth_plugin)
-        # TODO(sdague): use this to disable fully when we don't find
-        # the endpoint.
-        self._disabled = False
+        # NOTE(danms): Keep track of how naggy we've been
+        self._warn_count = 0

    def get(self, url):
        return self._client.get(
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@ -42,67 +42,59 @@ class SafeConnectedTestCase(test.NoDBTestCase):
    def test_missing_endpoint(self, req):
        """Test EndpointNotFound behavior.

-        A missing endpoint entry should permanently disable the
-        client. And make future calls to it not happen.
+        A missing endpoint entry should not explode.
        """
        req.side_effect = ks_exc.EndpointNotFound()
        self.client._get_resource_provider("fake")
-        self.assertTrue(self.client._disabled)

-        # reset the call count to demonstrate that future calls don't
+        # reset the call count to demonstrate that future calls still
        # work
        req.reset_mock()
        self.client._get_resource_provider("fake")
-        req.assert_not_called()
+        self.assertTrue(req.called)

    @mock.patch('keystoneauth1.session.Session.request')
    def test_missing_auth(self, req):
        """Test Missing Auth handled correctly.

-        A missing auth configuration should permanently disable the
-        client. And make future calls to it not happen.
+        A missing auth configuration should not explode.

        """
        req.side_effect = ks_exc.MissingAuthPlugin()
        self.client._get_resource_provider("fake")
-        self.assertTrue(self.client._disabled)

-        # reset the call count to demonstrate that future calls don't
+        # reset the call count to demonstrate that future calls still
        # work
        req.reset_mock()
        self.client._get_resource_provider("fake")
-        req.assert_not_called()
+        self.assertTrue(req.called)

    @mock.patch('keystoneauth1.session.Session.request')
    def test_unauthorized(self, req):
        """Test Unauthorized handled correctly.

-        An unauthorized configuration should permanently disable the
-        client. And make future calls to it not happen.
+        An unauthorized configuration should not explode.

        """
        req.side_effect = ks_exc.Unauthorized()
        self.client._get_resource_provider("fake")
-        self.assertTrue(self.client._disabled)

-        # reset the call count to demonstrate that future calls don't
+        # reset the call count to demonstrate that future calls still
        # work
        req.reset_mock()
        self.client._get_resource_provider("fake")
-        req.assert_not_called()
+        self.assertTrue(req.called)

    @mock.patch('keystoneauth1.session.Session.request')
    def test_connect_fail(self, req):
        """Test Connect Failure handled correctly.

        If we get a connect failure, this is transient, and we expect
-        that this will end up working correctly later. We don't want
-        to disable the client.
+        that this will end up working correctly later.

        """
        req.side_effect = ks_exc.ConnectFailure()
        self.client._get_resource_provider("fake")
-        self.assertFalse(self.client._disabled)

        # reset the call count to demonstrate that future calls do
        # work
@ -110,6 +102,17 @@ class SafeConnectedTestCase(test.NoDBTestCase):
        self.client._get_resource_provider("fake")
        self.assertTrue(req.called)

+    @mock.patch.object(report, 'LOG')
+    def test_warning_limit(self, mock_log):
+        # Assert that __init__ initializes _warn_count as we expect
+        self.assertEqual(0, self.client._warn_count)
+        mock_self = mock.MagicMock()
+        mock_self._warn_count = 0
+        for i in range(0, report.WARN_EVERY + 3):
+            report.warn_limit(mock_self, 'warning')
+        mock_log.warning.assert_has_calls([mock.call('warning'),
+                                           mock.call('warning')])
+

 class SchedulerReportClientTestCase(test.NoDBTestCase):