coordination: do not retry the whole heartbeat on fail

The current code tries to re-execute the whole _beat_forever_until_stopped
method when it fails, whereas what it wants is only retry the driver code.

The current behavior triggers an issue on interpreter shutdown:

  https://github.com/gnocchixyz/gnocchi/issues/979

Exception in thread Thread-1 (most likely raised during interpreter shutdown):
Exception in thread Thread-2 (most likely raised during interpreter shutdown):
Traceback (most recent call last):
Traceback (most recent call last):
File "/usr/lib64/python2.7/threading.py", line 812, in __bootstrap_inner
File "/usr/lib64/python2.7/threading.py", line 765, in run
File "/usr/lib/python2.7/site-packages/oslo_utils/excutils.py", line 250, in wrapper
File "/usr/lib/python2.7/site-packages/tooz/coordination.py", line 186, in _beat_forever_until_stopped
<type 'exceptions.AttributeError'>: 'NoneType' object has no attribute 'StopWatch'
File "/usr/lib64/python2.7/threading.py", line 812, in __bootstrap_inner
File "/usr/lib64/python2.7/threading.py", line 765, in run
File "/usr/lib/python2.7/site-packages/oslo_utils/excutils.py", line 250, in wrapper
File "/usr/lib/python2.7/site-packages/tooz/coordination.py", line 186, in _beat_forever_until_stopped
<type 'exceptions.AttributeError'>: 'NoneType' object has no attribute 'StopWatch'

Change-Id: I558b89558e29c97e6d2d8036f068a593dd7540ab
This commit is contained in:
Julien Danjou 2018-10-05 10:27:11 +02:00
parent ae54cf2f54
commit 140fc3d94b
1 changed files with 6 additions and 3 deletions

View File

@ -23,11 +23,11 @@ import logging
import threading
from oslo_utils import encodeutils
from oslo_utils import excutils
from oslo_utils import netutils
from oslo_utils import timeutils
import six
from stevedore import driver
import tenacity
import tooz
from tooz import _retry
@ -179,12 +179,15 @@ class Heart(object):
return not (self._runner is None
or not self._runner.is_alive())
@excutils.forever_retry_uncaught_exceptions
def _beat_forever_until_stopped(self):
"""Inner beating loop."""
retry = tenacity.Retrying(
wait=tenacity.wait_fixed(1),
before_sleep=tenacity.before_sleep_log(LOG, logging.warning),
)
while not self._dead.is_set():
with timeutils.StopWatch() as w:
wait_until_next_beat = self._driver.heartbeat()
wait_until_next_beat = retry(self._driver.heartbeat)
ran_for = w.elapsed()
has_to_sleep_for = wait_until_next_beat - ran_for
if has_to_sleep_for < 0: