From 140fc3d94b010b04fd89bb49ec6bc92778943d3f Mon Sep 17 00:00:00 2001 From: Julien Danjou Date: Fri, 5 Oct 2018 10:27:11 +0200 Subject: [PATCH] coordination: do not retry the whole heartbeat on fail The current code tries to re-execute the whole _beat_forever_until_stopped method when it fails, whereas what it wants is only retry the driver code. The current behavior triggers an issue on interpreter shutdown: https://github.com/gnocchixyz/gnocchi/issues/979 Exception in thread Thread-1 (most likely raised during interpreter shutdown): Exception in thread Thread-2 (most likely raised during interpreter shutdown): Traceback (most recent call last): Traceback (most recent call last): File "/usr/lib64/python2.7/threading.py", line 812, in __bootstrap_inner File "/usr/lib64/python2.7/threading.py", line 765, in run File "/usr/lib/python2.7/site-packages/oslo_utils/excutils.py", line 250, in wrapper File "/usr/lib/python2.7/site-packages/tooz/coordination.py", line 186, in _beat_forever_until_stopped : 'NoneType' object has no attribute 'StopWatch' File "/usr/lib64/python2.7/threading.py", line 812, in __bootstrap_inner File "/usr/lib64/python2.7/threading.py", line 765, in run File "/usr/lib/python2.7/site-packages/oslo_utils/excutils.py", line 250, in wrapper File "/usr/lib/python2.7/site-packages/tooz/coordination.py", line 186, in _beat_forever_until_stopped : 'NoneType' object has no attribute 'StopWatch' Change-Id: I558b89558e29c97e6d2d8036f068a593dd7540ab --- tooz/coordination.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tooz/coordination.py b/tooz/coordination.py index 332764e2..4960fd26 100755 --- a/tooz/coordination.py +++ b/tooz/coordination.py @@ -23,11 +23,11 @@ import logging import threading from oslo_utils import encodeutils -from oslo_utils import excutils from oslo_utils import netutils from oslo_utils import timeutils import six from stevedore import driver +import tenacity import tooz from tooz import _retry @@ -179,12 +179,15 @@ class Heart(object): return not (self._runner is None or not self._runner.is_alive()) - @excutils.forever_retry_uncaught_exceptions def _beat_forever_until_stopped(self): """Inner beating loop.""" + retry = tenacity.Retrying( + wait=tenacity.wait_fixed(1), + before_sleep=tenacity.before_sleep_log(LOG, logging.warning), + ) while not self._dead.is_set(): with timeutils.StopWatch() as w: - wait_until_next_beat = self._driver.heartbeat() + wait_until_next_beat = retry(self._driver.heartbeat) ran_for = w.elapsed() has_to_sleep_for = wait_until_next_beat - ran_for if has_to_sleep_for < 0: