diff --git a/neutron_lib/db/api.py b/neutron_lib/db/api.py index ea6aab9db..f66aa0178 100644 --- a/neutron_lib/db/api.py +++ b/neutron_lib/db/api.py @@ -36,7 +36,7 @@ from neutron_lib import exceptions from neutron_lib.objects import exceptions as obj_exc -MAX_RETRIES = 10 +MAX_RETRIES = 20 OSPROFILER_TRACE_NAMES = {'neutron.db', 'neutron_lib.db'} LOG = logging.getLogger(__name__) _synchronized = lockutils.synchronized_with_prefix("neutron-") @@ -148,7 +148,7 @@ def _copy_if_lds(item): _retry_db_errors = oslo_db_api.wrap_db_retry( max_retries=MAX_RETRIES, - retry_interval=0.1, + retry_interval=0.5, inc_retry_interval=True, exception_checker=is_retriable ) diff --git a/neutron_lib/tests/unit/db/test_api.py b/neutron_lib/tests/unit/db/test_api.py index 3b38bc37b..c12969569 100644 --- a/neutron_lib/tests/unit/db/test_api.py +++ b/neutron_lib/tests/unit/db/test_api.py @@ -129,9 +129,42 @@ class TestDeadLockDecorator(_base.BaseTestCase): e = db_exc.DBConnectionError() mock.patch('time.sleep').start() with testtools.ExpectedException(db_exc.DBConnectionError): - # after 10 failures, the inner retry should give up and + # after 20 failures, the inner retry should give up and # the exception should be tagged to prevent the outer retry - self._alt_context_function(context, 11, e) + self._alt_context_function(context, db_api.MAX_RETRIES + 1, e) + + def _test_retry_time_cost(self, exc_to_raise): + worst_case = [0.5, 1, 2, 4, 8, + 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10] + + class FakeTime(object): + def __init__(self): + self.counter = 0 + + def sleep(self, t): + self.counter += t + + fake_timer = FakeTime() + + def fake_sleep(t): + fake_timer.sleep(t) + + e = exc_to_raise() + mock.patch('time.sleep', side_effect=fake_sleep).start() + with testtools.ExpectedException(exc_to_raise): + self._decorated_function(db_api.MAX_RETRIES + 1, e) + if exc_to_raise == db_exc.DBDeadlock: + self.assertEqual(True, (fake_timer.counter <= sum(worst_case))) + else: + self.assertEqual(sum(worst_case), fake_timer.counter) + + def test_all_deadlock_time_elapsed(self): + self._test_retry_time_cost(db_exc.DBDeadlock) + + def test_not_deadlock_time_elapsed(self): + self._test_retry_time_cost(db_exc.DBConnectionError) def test_retry_if_session_inactive_args_not_mutated_after_retries(self): context = mock.Mock() diff --git a/releasenotes/notes/reset-db-retry-settings-49e51cef4c842f69.yaml b/releasenotes/notes/reset-db-retry-settings-49e51cef4c842f69.yaml new file mode 100644 index 000000000..d5ecbf7a3 --- /dev/null +++ b/releasenotes/notes/reset-db-retry-settings-49e51cef4c842f69.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Increase the DB retry interval and max retry times for the + ``retry_db_errors`` decorator in ``neutron_lib.db.api`` to + 0.5 seconds and 20 times, respectively. For those actions + which have a higher chance for DBDeadlock, users should have + a higher success rate due to the larger random range and retry + times. For more information + see bug `1777968 `_