From 325324137649cc40791260b6d12df33c0407e039 Mon Sep 17 00:00:00 2001 From: Julien Danjou Date: Mon, 27 Feb 2017 13:55:40 +0100 Subject: [PATCH] Rewrite heartbeat runner with event The current implementation uses an event attached to the Heart class to detect if the thread is done. Since the thread is a daemon, it might be killed after the interpreter is done, especially if an application is killed before calling stop() on the coordinator. This results in the following backtrace: Exception in thread Thread-1 (most likely raised during interpreter shutdown): Traceback (most recent call last): File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner File "/usr/lib/python2.7/threading.py", line 754, in run File "/usr/local/lib/python2.7/dist-packages/oslo_utils/excutils.py", line 250, in wrapper File "/usr/local/lib/python2.7/dist-packages/tooz/coordination.py", line 206, in _beat_forever_until_stopped File "/usr/lib/python2.7/threading.py", line 585, in set File "/usr/lib/python2.7/threading.py", line 407, in notifyAll : 'NoneType' object is not callable This patches simplifies the heart class by using the thread status rather than an event to check if it's alive and wait for it to stop. Change-Id: I03ccbefd3d68e9dd85ea23a3840d606d1ffc80bb --- tooz/coordination.py | 49 +++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/tooz/coordination.py b/tooz/coordination.py index 8ee8a400..d360e8c7 100755 --- a/tooz/coordination.py +++ b/tooz/coordination.py @@ -163,8 +163,6 @@ class Heart(object): event_cls=threading.Event): self._thread_cls = thread_cls self._dead = event_cls() - self._finished = event_cls() - self._finished.set() self._runner = None self._driver = driver self._beats = 0 @@ -177,38 +175,33 @@ class Heart(object): def is_alive(self): """Returns if the heart is beating.""" return not (self._runner is None - or not self._runner.is_alive() - or self._finished.is_set()) + or not self._runner.is_alive()) @excutils.forever_retry_uncaught_exceptions def _beat_forever_until_stopped(self): """Inner beating loop.""" - try: - while not self._dead.is_set(): - with timeutils.StopWatch() as w: - wait_until_next_beat = self._driver.heartbeat() - ran_for = w.elapsed() - if ran_for > wait_until_next_beat: - LOG.warning( - "Heartbeating took too long to execute (it ran for" - " %0.2f seconds which is %0.2f seconds longer than" - " the next heartbeat idle time). This may cause" - " timeouts (in locks, leadership, ...) to" - " happen (which will not end well).", ran_for, - ran_for - wait_until_next_beat) - self._beats += 1 - # NOTE(harlowja): use the event object for waiting and - # not a sleep function since doing that will allow this code - # to terminate early if stopped via the stop() method vs - # having to wait until the sleep function returns. - self._dead.wait(wait_until_next_beat) - finally: - self._finished.set() + while not self._dead.is_set(): + with timeutils.StopWatch() as w: + wait_until_next_beat = self._driver.heartbeat() + ran_for = w.elapsed() + if ran_for > wait_until_next_beat: + LOG.warning( + "Heartbeating took too long to execute (it ran for" + " %0.2f seconds which is %0.2f seconds longer than" + " the next heartbeat idle time). This may cause" + " timeouts (in locks, leadership, ...) to" + " happen (which will not end well).", ran_for, + ran_for - wait_until_next_beat) + self._beats += 1 + # NOTE(harlowja): use the event object for waiting and + # not a sleep function since doing that will allow this code + # to terminate early if stopped via the stop() method vs + # having to wait until the sleep function returns. + self._dead.wait(wait_until_next_beat) def start(self, thread_cls=None): """Starts the heart beating thread (noop if already started).""" if not self.is_alive(): - self._finished.clear() self._dead.clear() self._beats = 0 if thread_cls is None: @@ -223,8 +216,8 @@ class Heart(object): def wait(self, timeout=None): """Wait up to given timeout for the heart beating thread to stop.""" - self._finished.wait(timeout) - return self._finished.is_set() + self._runner.join(timeout) + return self._runner.is_alive() class CoordinationDriver(object):