Browse Source

Merge "Improve exponential backoff for wrap_db_retry"

Zuul 1 year ago
parent
commit
6a91f1e8b4
2 changed files with 90 additions and 7 deletions
  1. 34
    7
      oslo_db/api.py
  2. 56
    0
      oslo_db/tests/test_api.py

+ 34
- 7
oslo_db/api.py View File

@@ -24,6 +24,7 @@ API methods.
24 24
 """
25 25
 
26 26
 import logging
27
+import random
27 28
 import threading
28 29
 import time
29 30
 
@@ -103,15 +104,21 @@ class wrap_db_retry(object):
103 104
 
104 105
     :param exception_checker: checks if an exception should trigger a retry
105 106
     :type exception_checker: callable
107
+
108
+    :param jitter: determine increase retry interval use jitter or not, jitter
109
+           is always interpreted as True for a DBDeadlockError
110
+    :type jitter: bool
106 111
     """
107 112
 
108 113
     def __init__(self, retry_interval=1, max_retries=20,
109 114
                  inc_retry_interval=True,
110 115
                  max_retry_interval=10, retry_on_disconnect=False,
111 116
                  retry_on_deadlock=False,
112
-                 exception_checker=lambda exc: False):
117
+                 exception_checker=lambda exc: False,
118
+                 jitter=False):
113 119
         super(wrap_db_retry, self).__init__()
114 120
 
121
+        self.jitter = jitter
115 122
         self.db_error = (exception.RetryRequest, )
116 123
         # default is that we re-raise anything unexpected
117 124
         self.exception_checker = exception_checker
@@ -127,7 +134,7 @@ class wrap_db_retry(object):
127 134
     def __call__(self, f):
128 135
         @six.wraps(f)
129 136
         def wrapper(*args, **kwargs):
130
-            next_interval = self.retry_interval
137
+            sleep_time = next_interval = self.retry_interval
131 138
             remaining = self.max_retries
132 139
 
133 140
             while True:
@@ -150,12 +157,20 @@ class wrap_db_retry(object):
150 157
                     # NOTE(vsergeyev): We are using patched time module, so
151 158
                     #                  this effectively yields the execution
152 159
                     #                  context to another green thread.
153
-                    time.sleep(next_interval)
160
+                    time.sleep(sleep_time)
154 161
                     if self.inc_retry_interval:
155
-                        next_interval = min(
156
-                            next_interval * 2,
157
-                            self.max_retry_interval
158
-                        )
162
+                        # NOTE(jiangyikun): In order to minimize the chance of
163
+                        # regenerating a deadlock and reduce the average sleep
164
+                        # time, we are using jitter by default when the
165
+                        # deadlock is detected. With the jitter,
166
+                        # sleep_time = [0, next_interval), otherwise, without
167
+                        # the jitter, sleep_time = next_interval.
168
+                        if isinstance(e, exception.DBDeadlock):
169
+                            jitter = True
170
+                        else:
171
+                            jitter = self.jitter
172
+                        sleep_time, next_interval = self._get_inc_interval(
173
+                            next_interval, jitter)
159 174
                     remaining -= 1
160 175
 
161 176
         return wrapper
@@ -170,6 +185,18 @@ class wrap_db_retry(object):
170 185
             return True
171 186
         return self.exception_checker(exc)
172 187
 
188
+    def _get_inc_interval(self, n, jitter):
189
+        # NOTE(jiangyikun): The "n" help us to record the 2 ** retry_times.
190
+        # The "sleep_time" means the real time to sleep:
191
+        # - Without jitter: sleep_time = 2 ** retry_times = n
192
+        # - With jitter:    sleep_time = [0, 2 ** retry_times) < n
193
+        n = n * 2
194
+        if jitter:
195
+            sleep_time = random.uniform(0, n)
196
+        else:
197
+            sleep_time = n
198
+        return min(sleep_time, self.max_retry_interval), n
199
+
173 200
 
174 201
 class DBAPI(object):
175 202
     """Initialize the chosen DB API backend.

+ 56
- 0
oslo_db/tests/test_api.py View File

@@ -253,3 +253,59 @@ class DBRetryRequestCase(DBAPITestCase):
253 253
 
254 254
         self.assertRaises(AttributeError, some_method)
255 255
         self.assertFalse(mock_log.called)
256
+
257
+    @mock.patch('oslo_db.api.time.sleep', return_value=None)
258
+    def test_retry_wrapper_deadlock(self, mock_sleep):
259
+
260
+        # Tests that jitter is False, if the retry wrapper hits a
261
+        # non-deadlock error
262
+        @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True)
263
+        def some_method_no_deadlock():
264
+            raise exception.RetryRequest(ValueError())
265
+        with mock.patch(
266
+                'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
267
+            mock_get.return_value = 2, 2
268
+            self.assertRaises(ValueError, some_method_no_deadlock)
269
+            mock_get.assert_called_once_with(1, False)
270
+
271
+        # Tests that jitter is True, if the retry wrapper hits a deadlock
272
+        # error.
273
+        @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True)
274
+        def some_method_deadlock():
275
+            raise exception.DBDeadlock('test')
276
+        with mock.patch(
277
+                'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
278
+            mock_get.return_value = 0.1, 2
279
+            self.assertRaises(exception.DBDeadlock, some_method_deadlock)
280
+            mock_get.assert_called_once_with(1, True)
281
+
282
+        # Tests that jitter is True, if the jitter is enable by user
283
+        @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True, jitter=True)
284
+        def some_method_no_deadlock_exp():
285
+            raise exception.RetryRequest(ValueError())
286
+        with mock.patch(
287
+                'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
288
+            mock_get.return_value = 0.1, 2
289
+            self.assertRaises(ValueError, some_method_no_deadlock_exp)
290
+            mock_get.assert_called_once_with(1, True)
291
+
292
+    def test_wrap_db_retry_get_interval(self):
293
+        x = api.wrap_db_retry(max_retries=5, retry_on_deadlock=True,
294
+                              max_retry_interval=11)
295
+        self.assertEqual(11, x.max_retry_interval)
296
+        for i in (1, 2, 4):
297
+            # With jitter: sleep_time = [0, 2 ** retry_times)
298
+            sleep_time, n = x._get_inc_interval(i, True)
299
+            self.assertEqual(2 * i, n)
300
+            self.assertTrue(2 * i > sleep_time)
301
+            # Without jitter: sleep_time = 2 ** retry_times
302
+            sleep_time, n = x._get_inc_interval(i, False)
303
+            self.assertEqual(2 * i, n)
304
+            self.assertEqual(2 * i, sleep_time)
305
+        for i in (8, 16, 32):
306
+            sleep_time, n = x._get_inc_interval(i, False)
307
+            self.assertEqual(x.max_retry_interval, sleep_time)
308
+            self.assertEqual(2 * i, n)
309
+            sleep_time, n = x._get_inc_interval(i, True)
310
+            self.assertTrue(x.max_retry_interval >= sleep_time)
311
+            self.assertEqual(2 * i, n)

Loading…
Cancel
Save