From 6596164f51217cc7fabf302ce14ccc9d9beaff1f Mon Sep 17 00:00:00 2001 From: Terry Wilson Date: Thu, 13 May 2021 14:14:20 -0500 Subject: [PATCH] Don't spam retries 100s of times a second We can get a TRY_AGAIN because the DB is down or because the a command is doing an update that will never pass the verify() condition. If this happens, we shouldn't send 100s of requests per second until we eventually time out or things come back up, so add a simple exponential backoff with reasonable max retry sleep of 8s. Closes-Bug: #1907836 Change-Id: Ib757f182a4c7fdf25ed9374963ac5210e4d35523 --- ovsdbapp/backend/ovs_idl/transaction.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ovsdbapp/backend/ovs_idl/transaction.py b/ovsdbapp/backend/ovs_idl/transaction.py index e481b8df..091b6b83 100644 --- a/ovsdbapp/backend/ovs_idl/transaction.py +++ b/ovsdbapp/backend/ovs_idl/transaction.py @@ -23,6 +23,7 @@ from ovsdbapp.backend.ovs_idl import idlutils from ovsdbapp import exceptions LOG = logging.getLogger(__name__) +MAX_SLEEP = 8 class Transaction(api.Transaction): @@ -74,6 +75,7 @@ class Transaction(api.Transaction): def do_commit(self): self.start_time = time.time() attempts = 0 + retries = 0 if not self.commands: LOG.debug("There are no commands to commit") return [] @@ -103,7 +105,13 @@ class Transaction(api.Transaction): # TRY_AGAIN until we time out and Connection.run() calls # idl.run() again. So, call idl.run() here just in case. self.api.idl.run() + + # In the event that there is an issue with the txn or the db + # is down, don't spam new txns as fast as we can + time.sleep(min(2 ** retries, self.time_remaining(), MAX_SLEEP)) + retries += 1 continue + retries = 0 if status in (txn.ERROR, txn.NOT_LOCKED): msg = 'OVSDB Error: ' if status == txn.NOT_LOCKED: