Added debug for Retry

Urllib3 Retry class's increment method has been used and we don't understand why. Since it's impossible to add logs directly in Retry class, this patch subclasses Retry and adds logging capabilities to log server response and relevant cause if we hit this bug again Signed-off-by: rmiao106 <rmiao@vmware.com> Change-Id: I2bd13ee635879a343c7a05886b397b3ffda5006a
2020-12-09 23:26:58 +00:00 · 2020-12-09 23:26:58 +00:00 · 3481739598
parent f6f86385ae
commit 3481739598
3 changed files with 125 additions and 5 deletions
--- a/vmware_nsxlib/tests/unit/v3/test_cluster.py
+++ b/vmware_nsxlib/tests/unit/v3/test_cluster.py
@ -133,13 +133,15 @@ class RequestsHTTPProviderTestCase(unittest.TestCase):
            mock_get_def_headers.assert_called_once_with(
                mock.ANY, cluster_provider, False, token_provider_inst)

+    @mock.patch("vmware_nsxlib.v3.debug_retry.RetryDebug.from_int")
    @mock.patch("vmware_nsxlib.v3.cluster.NSXHTTPAdapter.__init__")
-    def test_new_connection_with_ca_file(self, mock_adaptor_init):
+    def test_new_connection_with_ca_file(self, mock_adaptor_init, mock_retry):
        mock_api = mock.Mock()
        mock_api.nsxlib_config = mock.Mock()
        mock_api.nsxlib_config.retries = 100
        mock_api.nsxlib_config.insecure = False
        mock_adaptor_init.return_value = None
+        mock_retry.return_value = 100
        provider = cluster.NSXRequestsHTTPProvider()
        with mock.patch.object(cluster.TimeoutSession, 'request',
                               return_value=get_sess_create_resp()):
@ -150,15 +152,19 @@ class RequestsHTTPProviderTestCase(unittest.TestCase):
            self.assertEqual("ca_file", session.verify)
            mock_adaptor_init.assert_called_once_with(
                pool_connections=1, pool_maxsize=1,
-                max_retries=100, pool_block=False, thumbprint=None)
+                max_retries=100, pool_block=False,
+                thumbprint=None)

+    @mock.patch("vmware_nsxlib.v3.debug_retry.RetryDebug.from_int")
    @mock.patch("vmware_nsxlib.v3.cluster.NSXHTTPAdapter.__init__")
-    def test_new_connection_with_thumbprint(self, mock_adaptor_init):
+    def test_new_connection_with_thumbprint(self, mock_adaptor_init,
+                                            mock_retry):
        mock_api = mock.Mock()
        mock_api.nsxlib_config = mock.Mock()
        mock_api.nsxlib_config.retries = 100
        mock_api.nsxlib_config.insecure = False
        mock_adaptor_init.return_value = None
+        mock_retry.return_value = 100
        provider = cluster.NSXRequestsHTTPProvider()
        with mock.patch.object(cluster.TimeoutSession, 'request',
                               return_value=get_sess_create_resp()):
@ -169,7 +175,8 @@ class RequestsHTTPProviderTestCase(unittest.TestCase):
            self.assertIsNone(session.verify)
            mock_adaptor_init.assert_called_once_with(
                pool_connections=1, pool_maxsize=1,
-                max_retries=100, pool_block=False, thumbprint="thumbprint")
+                max_retries=100, pool_block=False,
+                thumbprint="thumbprint")

    def test_validate_connection_keep_alive(self):
        mock_conn = mocks.MockRequestSessionApi()
--- a/vmware_nsxlib/v3/cluster.py
+++ b/vmware_nsxlib/v3/cluster.py
@ -39,6 +39,7 @@ from requests import adapters
 from vmware_nsxlib._i18n import _
 from vmware_nsxlib.v3 import client as nsx_client
 from vmware_nsxlib.v3 import constants
+from vmware_nsxlib.v3.debug_retry import RetryDebug
 from vmware_nsxlib.v3 import exceptions
 from vmware_nsxlib.v3 import utils

@ -224,7 +225,7 @@ class NSXRequestsHTTPProvider(AbstractHTTPProvider):
        # we are pooling with eventlet in the cluster class
        adapter = NSXHTTPAdapter(
            pool_connections=1, pool_maxsize=1,
-            max_retries=config.retries,
+            max_retries=RetryDebug.from_int(config.retries),
            pool_block=False, thumbprint=thumbprint)
        session.mount('http://', adapter)
        session.mount('https://', adapter)
--- a/vmware_nsxlib/v3/debug_retry.py
+++ b/vmware_nsxlib/v3/debug_retry.py
@ -0,0 +1,112 @@
+# Copyright 2020 VMware, Inc.
+# All Rights Reserved
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+from collections import namedtuple
+import logging
+import traceback
+
+import six
+from urllib3.exceptions import MaxRetryError
+from urllib3.exceptions import ResponseError
+from urllib3.util.retry import Retry
+
+log = logging.getLogger(__name__)
+
+RequestHistory = namedtuple(
+    "RequestHistory", ["method", "url", "error", "status", "redirect_location"]
+)
+
+
+class RetryDebug(Retry):
+    """Class that adds debugging capabilities of Retry"""
+    def __init__(self, *args, **kw):
+        super(RetryDebug, self).__init__(*args, **kw)
+
+    def increment(self, method=None, url=None, response=None, error=None,
+                  _pool=None, _stacktrace=None, ):
+        log.debug("Retry Increment %s", traceback.format_stack())
+        if self.total is False and error:
+            # Disabled, indicate to re-raise the error.
+            raise six.reraise(type(error), error, _stacktrace)
+
+        total = self.total
+        if total is not None:
+            total -= 1
+
+        connect = self.connect
+        read = self.read
+        redirect = self.redirect
+        status_count = self.status
+        cause = "unknown"
+        status = None
+        redirect_location = None
+
+        if error and self._is_connection_error(error):
+            # Connect retry?
+            if connect is False:
+                raise six.reraise(type(error), error, _stacktrace)
+            elif connect is not None:
+                connect -= 1
+
+        elif error and self._is_read_error(error):
+            # Read retry?
+            if read is False or not self._is_method_retryable(method):
+                raise six.reraise(type(error), error, _stacktrace)
+            elif read is not None:
+                read -= 1
+
+        elif response and response.get_redirect_location():
+            # Redirect retry?
+            if redirect is not None:
+                redirect -= 1
+            cause = "too many redirects"
+            redirect_location = response.get_redirect_location()
+            status = response.status
+
+        else:
+            # Incrementing because of a server error like a 500 in
+            # status_forcelist and a the given method is in the whitelist
+            cause = ResponseError.GENERIC_ERROR
+            if response and response.status:
+                if status_count is not None:
+                    status_count -= 1
+                cause = ResponseError.SPECIFIC_ERROR.format(
+                    status_code=response.status)
+                status = response.status
+
+        history = self.history + (
+            RequestHistory(method, url, error, status, redirect_location),
+        )
+
+        new_retry = self.new(
+            total=total,
+            connect=connect,
+            read=read,
+            redirect=redirect,
+            status=status_count,
+            history=history,
+        )
+
+        if new_retry.is_exhausted():
+            raise MaxRetryError(_pool, url, error or ResponseError(cause))
+
+        # log the cause for this retry
+        log.debug("Cause for retry: %s", cause)
+        # log the server response for this retry
+        log.debug("Response: %s", response)
+        log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
+
+        return new_retry