Tweak exponential backoff
This commit attempts to tweak and simplify the exponential backoff that we use by making the default interval 1 instead of 3 (so that it won't raise that fast), locking default maximum wait at 60 seconds (so that we won't wait e.g. more than 2 minutes as a backoff waiting for pod to become active) and introducing small jitter instead of fully random choice of time that we had. Change-Id: Iaf7abb1a82d213ba0aeeec5b5b17760b1622c549
This commit is contained in:
parent
d8892d2e72
commit
9db38c85b2
|
@ -33,10 +33,6 @@ from kuryr_kubernetes import utils
|
||||||
CONF = config.CONF
|
CONF = config.CONF
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Hardcoding 60 seconds as I don't see a scenario when we want to wait more
|
|
||||||
# than a minute for reconnection.
|
|
||||||
MAX_BACKOFF = 60
|
|
||||||
|
|
||||||
|
|
||||||
class K8sClient(object):
|
class K8sClient(object):
|
||||||
# REVISIT(ivc): replace with python-k8sclient if it could be extended
|
# REVISIT(ivc): replace with python-k8sclient if it could be extended
|
||||||
|
@ -298,8 +294,7 @@ class K8sClient(object):
|
||||||
resource_version = m.get('resourceVersion', None)
|
resource_version = m.get('resourceVersion', None)
|
||||||
except (requests.ReadTimeout, requests.ConnectionError,
|
except (requests.ReadTimeout, requests.ConnectionError,
|
||||||
ssl.SSLError, requests.exceptions.ChunkedEncodingError):
|
ssl.SSLError, requests.exceptions.ChunkedEncodingError):
|
||||||
t = utils.exponential_backoff(attempt, min_backoff=0,
|
t = utils.exponential_backoff(attempt)
|
||||||
max_backoff=MAX_BACKOFF)
|
|
||||||
log = LOG.debug
|
log = LOG.debug
|
||||||
if attempt > 0:
|
if attempt > 0:
|
||||||
# Only make it a warning if it's happening again, no need
|
# Only make it a warning if it's happening again, no need
|
||||||
|
|
|
@ -50,9 +50,8 @@ class TestRetryHandler(test_base.TestCase):
|
||||||
'kuryr_kubernetes.clients.get_kubernetes_client'))
|
'kuryr_kubernetes.clients.get_kubernetes_client'))
|
||||||
f_k8s.mock.return_value = self.k8s
|
f_k8s.mock.return_value = self.k8s
|
||||||
|
|
||||||
@mock.patch('random.randint')
|
|
||||||
@mock.patch('time.sleep')
|
@mock.patch('time.sleep')
|
||||||
def test_should_not_sleep(self, m_sleep, m_randint):
|
def test_should_not_sleep(self, m_sleep):
|
||||||
deadline = self.now - 1
|
deadline = self.now - 1
|
||||||
retry = h_retry.Retry(mock.Mock())
|
retry = h_retry.Retry(mock.Mock())
|
||||||
|
|
||||||
|
@ -60,28 +59,25 @@ class TestRetryHandler(test_base.TestCase):
|
||||||
|
|
||||||
self.assertFalse(ret)
|
self.assertFalse(ret)
|
||||||
m_sleep.assert_not_called()
|
m_sleep.assert_not_called()
|
||||||
m_randint.assert_not_called()
|
|
||||||
|
|
||||||
def _test_should_sleep(self, seconds_left, slept):
|
def _test_should_sleep(self, seconds_left, slept):
|
||||||
attempt = 5
|
attempt = 2
|
||||||
timeout = 20
|
timeout = 20
|
||||||
interval = 3
|
interval = 3
|
||||||
randint = 2
|
|
||||||
deadline = self.now + seconds_left
|
deadline = self.now + seconds_left
|
||||||
retry = h_retry.Retry(mock.Mock(), timeout=timeout, interval=interval)
|
retry = h_retry.Retry(mock.Mock(), timeout=timeout, interval=interval)
|
||||||
|
|
||||||
with mock.patch('random.randint') as m_randint, \
|
with mock.patch('random.randint') as m_randint, \
|
||||||
mock.patch('time.sleep') as m_sleep:
|
mock.patch('time.sleep') as m_sleep:
|
||||||
m_randint.return_value = randint
|
m_randint.return_value = 0 # Assume 0 as jitter
|
||||||
|
|
||||||
ret = retry._sleep(deadline, attempt, _EX2())
|
ret = retry._sleep(deadline, attempt, _EX2())
|
||||||
|
|
||||||
self.assertEqual(slept, ret)
|
self.assertEqual(slept, ret)
|
||||||
m_randint.assert_called_once_with(1, 2 ** attempt - 1)
|
|
||||||
m_sleep.assert_called_once_with(slept)
|
m_sleep.assert_called_once_with(slept)
|
||||||
|
|
||||||
def test_should_sleep(self):
|
def test_should_sleep(self):
|
||||||
self._test_should_sleep(7, 6)
|
self._test_should_sleep(20, 12)
|
||||||
|
|
||||||
def test_should_sleep_last(self):
|
def test_should_sleep_last(self):
|
||||||
self._test_should_sleep(5, 5)
|
self._test_should_sleep(5, 5)
|
||||||
|
|
|
@ -42,7 +42,9 @@ VALID_MULTI_POD_POOLS_OPTS = {'noop': ['neutron-vif',
|
||||||
'nested': ['nested-vlan'],
|
'nested': ['nested-vlan'],
|
||||||
}
|
}
|
||||||
DEFAULT_TIMEOUT = 500
|
DEFAULT_TIMEOUT = 500
|
||||||
DEFAULT_INTERVAL = 3
|
DEFAULT_INTERVAL = 1
|
||||||
|
DEFAULT_JITTER = 3
|
||||||
|
MAX_BACKOFF = 60
|
||||||
MAX_ATTEMPTS = 10
|
MAX_ATTEMPTS = 10
|
||||||
|
|
||||||
subnet_caching_opts = [
|
subnet_caching_opts = [
|
||||||
|
@ -110,18 +112,15 @@ def check_suitable_multi_pool_driver_opt(pool_driver, pod_driver):
|
||||||
return pod_driver in VALID_MULTI_POD_POOLS_OPTS.get(pool_driver, [])
|
return pod_driver in VALID_MULTI_POD_POOLS_OPTS.get(pool_driver, [])
|
||||||
|
|
||||||
|
|
||||||
def exponential_sleep(deadline, attempt, interval=DEFAULT_INTERVAL):
|
def exponential_sleep(deadline, attempt, interval=DEFAULT_INTERVAL,
|
||||||
|
max_backoff=MAX_BACKOFF, jitter=DEFAULT_JITTER):
|
||||||
"""Sleep for exponential duration.
|
"""Sleep for exponential duration.
|
||||||
|
|
||||||
This implements a variation of exponential backoff algorithm [1] and
|
|
||||||
ensures that there is a minimal time `interval` to sleep.
|
|
||||||
(expected backoff E(c) = interval * 2 ** c / 2).
|
|
||||||
|
|
||||||
[1] https://en.wikipedia.org/wiki/Exponential_backoff
|
|
||||||
|
|
||||||
:param deadline: sleep timeout duration in seconds.
|
:param deadline: sleep timeout duration in seconds.
|
||||||
:param attempt: attempt count of sleep function.
|
:param attempt: attempt count of sleep function.
|
||||||
:param interval: minimal time interval to sleep
|
:param interval: minimal time interval to sleep
|
||||||
|
:param max_backoff: maximum time to sleep
|
||||||
|
:param jitter: max value of jitter added to the sleep time
|
||||||
:return: the actual time that we've slept
|
:return: the actual time that we've slept
|
||||||
"""
|
"""
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
@ -130,7 +129,8 @@ def exponential_sleep(deadline, attempt, interval=DEFAULT_INTERVAL):
|
||||||
if seconds_left <= 0:
|
if seconds_left <= 0:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
to_sleep = exponential_backoff(attempt, interval)
|
to_sleep = exponential_backoff(attempt, interval, max_backoff=max_backoff,
|
||||||
|
jitter=jitter)
|
||||||
|
|
||||||
if to_sleep > seconds_left:
|
if to_sleep > seconds_left:
|
||||||
to_sleep = seconds_left
|
to_sleep = seconds_left
|
||||||
|
@ -142,17 +142,28 @@ def exponential_sleep(deadline, attempt, interval=DEFAULT_INTERVAL):
|
||||||
return to_sleep
|
return to_sleep
|
||||||
|
|
||||||
|
|
||||||
def exponential_backoff(attempt, interval=DEFAULT_INTERVAL, min_backoff=1,
|
def exponential_backoff(attempt, interval=DEFAULT_INTERVAL,
|
||||||
max_backoff=None):
|
max_backoff=MAX_BACKOFF, jitter=DEFAULT_JITTER):
|
||||||
|
"""Return exponential backoff duration with jitter.
|
||||||
|
|
||||||
|
This implements a variation of exponential backoff algorithm [1] (expected
|
||||||
|
backoff E(c) = interval * 2 ** attempt / 2).
|
||||||
|
|
||||||
|
[1] https://en.wikipedia.org/wiki/Exponential_backoff
|
||||||
|
"""
|
||||||
|
|
||||||
if attempt >= MAX_ATTEMPTS:
|
if attempt >= MAX_ATTEMPTS:
|
||||||
# No need to calculate very long intervals
|
# No need to calculate very long intervals
|
||||||
attempt = MAX_ATTEMPTS
|
attempt = MAX_ATTEMPTS
|
||||||
|
|
||||||
backoff = random.randint(min_backoff, 2 ** attempt - 1) * interval
|
backoff = 2 ** attempt * interval
|
||||||
|
|
||||||
if max_backoff is not None and backoff > max_backoff:
|
if max_backoff is not None and backoff > max_backoff:
|
||||||
backoff = max_backoff
|
backoff = max_backoff
|
||||||
|
|
||||||
|
if jitter:
|
||||||
|
backoff += random.randint(0, jitter)
|
||||||
|
|
||||||
return backoff
|
return backoff
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue