Add min_interval to BackOffLoopingCall

The backoff timer has a few issues that can cause it to get stuck
in an infinite loop and never time out.

  1. The random.gauss() function used to generate random jitter can
     return negative values, so when it does, it makes the elapsed time
     self._error_time go "backward."
  2. The random jitter is used as a multiplier for the self._interval,
     so self._interval can deviate far away from the mean over time and
     walk to zero, causing self._interval to be 0, which will prevent
     the timer from making progress from that point on because idle
     will always evaluate to zero and the elapsed time won't increase.
  3. The evaluated interval doesn't have a lower bound, so over time
     it can get extremely small if jitter (the mean) < 0.5.

This adds a min_interval keyword argument to the BackOffLoopingCall
start() function that defaults to 0.001s and uses it to lower bound
the interval calculations. We'll also take the absolute value of the
return from random.gauss() to prevent elapsed time going backward, and
we'll calculate the running self._interval separately to make it track
the desired growth rate of the backoff and not let it drift with the
random.gauss() values.

Closes-Bug: #1686159

Change-Id: Id17668a34d5cedbe870c9056350a7e9c7196faa7
This commit is contained in:
melanie witt 2017-04-20 15:46:17 +00:00
parent 3be392497a
commit 8f67ed7d53
2 changed files with 57 additions and 23 deletions

View File

@ -290,6 +290,8 @@ class BackOffLoopingCall(LoopingCallBase):
random.gauss(jitter, 0.1), with jitter as the mean for the
distribution. If set below .5, it can cause the calls to
come more rapidly after each failure.
:param min_interval: The minimum interval in seconds between calls to
function.
:raises: LoopingCallTimeout if time spent doing error retries would exceed
timeout.
"""
@ -305,7 +307,7 @@ class BackOffLoopingCall(LoopingCallBase):
self._interval = 1
def start(self, initial_delay=None, starting_interval=1, timeout=300,
max_interval=300, jitter=0.75):
max_interval=300, jitter=0.75, min_interval=0.001):
if self._thread is not None:
raise RuntimeError(self._RUN_ONLY_ONE_MESSAGE)
@ -314,16 +316,20 @@ class BackOffLoopingCall(LoopingCallBase):
self._interval = starting_interval
def _idle_for(success, _elapsed):
random_jitter = self._RNG.gauss(jitter, 0.1)
random_jitter = abs(self._RNG.gauss(jitter, 0.1))
if success:
# Reset error state now that it didn't error...
self._interval = starting_interval
self._error_time = 0
return self._interval * random_jitter
else:
# Perform backoff
self._interval = idle = min(
self._interval * 2 * random_jitter, max_interval)
# Perform backoff, random jitter around the next interval
# bounded by min_interval and max_interval.
idle = max(self._interval * 2 * random_jitter, min_interval)
idle = min(idle, max_interval)
# Calculate the next interval based on the mean, so that the
# backoff grows at the desired rate.
self._interval = max(self._interval * 2 * jitter, min_interval)
# Don't go over timeout, end early if necessary. If
# timeout is 0, keep going.
if timeout > 0 and self._error_time + idle > timeout:

View File

@ -326,16 +326,43 @@ class TestBackOffLoopingCall(test_base.BaseTestCase):
loopingcall.BackOffLoopingCall(false).start()
.wait)
expected_times = [mock.call(1.6000000000000001),
mock.call(2.5600000000000005),
mock.call(4.096000000000001),
mock.call(6.5536000000000021),
mock.call(10.485760000000004),
mock.call(16.777216000000006),
mock.call(26.843545600000013),
mock.call(42.949672960000022),
mock.call(68.719476736000033),
mock.call(109.95116277760006)]
expected_times = [mock.call(1.6),
mock.call(2.4000000000000004),
mock.call(3.6),
mock.call(5.4),
mock.call(8.1),
mock.call(12.15),
mock.call(18.225),
mock.call(27.337500000000002),
mock.call(41.00625),
mock.call(61.509375000000006),
mock.call(92.26406250000001)]
self.assertEqual(expected_times, sleep_mock.call_args_list)
@mock.patch('random.SystemRandom.gauss')
@mock.patch('eventlet.greenthread.sleep')
def test_exponential_backoff_negative_value(self, sleep_mock, random_mock):
def false():
return False
# random.gauss() can return negative values
random_mock.return_value = -.8
self.assertRaises(loopingcall.LoopingCallTimeOut,
loopingcall.BackOffLoopingCall(false).start()
.wait)
expected_times = [mock.call(1.6),
mock.call(2.4000000000000004),
mock.call(3.6),
mock.call(5.4),
mock.call(8.1),
mock.call(12.15),
mock.call(18.225),
mock.call(27.337500000000002),
mock.call(41.00625),
mock.call(61.509375000000006),
mock.call(92.26406250000001)]
self.assertEqual(expected_times, sleep_mock.call_args_list)
@mock.patch('random.SystemRandom.gauss')
@ -379,14 +406,15 @@ class TestBackOffLoopingCall(test_base.BaseTestCase):
max_interval=60)
.wait)
expected_times = [mock.call(1.6000000000000001),
mock.call(2.5600000000000005),
mock.call(4.096000000000001),
mock.call(6.5536000000000021),
mock.call(10.485760000000004),
mock.call(16.777216000000006),
mock.call(26.843545600000013),
mock.call(42.949672960000022),
expected_times = [mock.call(1.6),
mock.call(2.4000000000000004),
mock.call(3.6),
mock.call(5.4),
mock.call(8.1),
mock.call(12.15),
mock.call(18.225),
mock.call(27.337500000000002),
mock.call(41.00625),
mock.call(60),
mock.call(60),
mock.call(60)]