From 9839152765bc6a58bab2755f02b61bad8db4fe7f Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Fri, 2 Oct 2015 11:08:18 -0700 Subject: [PATCH] Use a stopwatch in 'forever_retry_uncaught_exceptions' We might as well use a oslo.utils stopwatch object to track the periodic 60 seconds logging on exceptions instead of manually using time.time. Change-Id: If47a6e91c92942297d923c54b02d285d075fdc70 --- oslo_utils/excutils.py | 60 ++++++++++------- oslo_utils/tests/test_excutils.py | 107 +++++++++++++++++++++--------- 2 files changed, 113 insertions(+), 54 deletions(-) diff --git a/oslo_utils/excutils.py b/oslo_utils/excutils.py index 33a9431e..65aa24e8 100644 --- a/oslo_utils/excutils.py +++ b/oslo_utils/excutils.py @@ -27,6 +27,7 @@ import six from oslo_utils._i18n import _LE from oslo_utils import reflection +from oslo_utils import timeutils class CausedByException(Exception): @@ -196,42 +197,55 @@ class save_and_reraise_exception(object): def forever_retry_uncaught_exceptions(infunc): + """Decorates provided function with infinite retry behavior. + + If the original function fails it may trigger a logging output to + be written. The function retry delay is **always** one second. + """ def inner_func(*args, **kwargs): - last_log_time = 0 last_exc_message = None - exc_count = 0 + same_failure_count = 0 + unknown_failure_count = 0 + watch = timeutils.StopWatch(duration=60) while True: try: return infunc(*args, **kwargs) except Exception as exc: try: this_exc_message = six.u(str(exc)) - if this_exc_message == last_exc_message: - exc_count += 1 - else: - exc_count = 1 - # Do not log any more frequently than once a minute unless - # the exception message changes - cur_time = int(time.time()) - if (cur_time - last_log_time > 60 or - this_exc_message != last_exc_message): - logging.exception( - _LE('Unexpected exception occurred %d time(s)... ' - 'retrying.') % exc_count) - last_log_time = cur_time - last_exc_message = this_exc_message - exc_count = 0 - # This should be a very rare event. In case it isn't, do - # a sleep. - time.sleep(1) except Exception: + unknown_failure_count += 1 try: logging.exception( - _LE('Unexpected error occurred while ' - 'processing Exception')) + _LE('Unexpected exception occurred %d' + ' time(s)... ') % unknown_failure_count) except Exception: # In case either serialization of the last exception # or logging failed, ignore the error pass - + else: + unknown_failure_count = 0 + if this_exc_message == last_exc_message: + same_failure_count += 1 + else: + same_failure_count = 1 + if this_exc_message != last_exc_message or watch.expired(): + # The watch has expired or the exception message + # changed, so time to log it again... + logging.exception( + _LE('Unexpected exception occurred %d time(s)... ' + 'retrying.') % same_failure_count) + if not watch.has_started(): + watch.start() + else: + watch.restart() + same_failure_count = 0 + last_exc_message = this_exc_message + # This should be a very rare event. In case it isn't, do + # a sleep. + # + # TODO(harlowja): why this is hard coded as one second + # really should be fixed (since I'm not really sure why + # one over some other value was chosen). + time.sleep(1) return inner_func diff --git a/oslo_utils/tests/test_excutils.py b/oslo_utils/tests/test_excutils.py index 60f752d9..3591efc6 100644 --- a/oslo_utils/tests/test_excutils.py +++ b/oslo_utils/tests/test_excutils.py @@ -20,6 +20,7 @@ from oslotest import base as test_base from oslotest import moxstubout from oslo_utils import excutils +from oslo_utils import timeutils mox = moxstubout.mox @@ -154,18 +155,24 @@ class ForeverRetryUncaughtExceptionsTest(test_base.BaseTestCase): def exc_retrier_common_start(self): self.stubs.Set(time, 'sleep', self.my_time_sleep) self.mox.StubOutWithMock(logging, 'exception') - self.mox.StubOutWithMock(time, 'time', + self.mox.StubOutWithMock(timeutils, 'now', use_mock_anything=True) self.mox.StubOutWithMock(self, 'exception_to_raise') - def exc_retrier_sequence(self, exc_id=None, timestamp=None, - exc_count=None): + def exc_retrier_sequence(self, exc_id=None, + exc_count=None, before_timestamp_calls=(), + after_timestamp_calls=()): self.exception_to_raise().AndReturn( Exception('unexpected %d' % exc_id)) - time.time().AndReturn(timestamp) + # Timestamp calls that happen before the logging is possibly triggered. + for timestamp in before_timestamp_calls: + timeutils.now().AndReturn(timestamp) if exc_count != 0: logging.exception(mox.In( 'Unexpected exception occurred %d time(s)' % exc_count)) + # Timestamp calls that happen after the logging is possibly triggered. + for timestamp in after_timestamp_calls: + timeutils.now().AndReturn(timestamp) def exc_retrier_common_end(self): self.exception_to_raise().AndReturn(None) @@ -175,70 +182,108 @@ class ForeverRetryUncaughtExceptionsTest(test_base.BaseTestCase): def test_exc_retrier_1exc_gives_1log(self): self.exc_retrier_common_start() - self.exc_retrier_sequence(exc_id=1, timestamp=1, exc_count=1) + self.exc_retrier_sequence(exc_id=1, exc_count=1, + after_timestamp_calls=[0]) self.exc_retrier_common_end() def test_exc_retrier_same_10exc_1min_gives_1log(self): self.exc_retrier_common_start() - self.exc_retrier_sequence(exc_id=1, timestamp=1, exc_count=1) + self.exc_retrier_sequence(exc_id=1, + after_timestamp_calls=[0], exc_count=1) # By design, the following exception don't get logged because they # are within the same minute. for i in range(2, 11): - self.exc_retrier_sequence(exc_id=1, timestamp=i, exc_count=0) + self.exc_retrier_sequence(exc_id=1, + before_timestamp_calls=[i], + exc_count=0) self.exc_retrier_common_end() def test_exc_retrier_same_2exc_2min_gives_2logs(self): self.exc_retrier_common_start() - self.exc_retrier_sequence(exc_id=1, timestamp=1, exc_count=1) - self.exc_retrier_sequence(exc_id=1, timestamp=65, exc_count=1) + self.exc_retrier_sequence(exc_id=1, + after_timestamp_calls=[0], exc_count=1) + self.exc_retrier_sequence(exc_id=1, + before_timestamp_calls=[65], exc_count=1, + after_timestamp_calls=[65, 66]) self.exc_retrier_common_end() def test_exc_retrier_same_10exc_2min_gives_2logs(self): self.exc_retrier_common_start() - self.exc_retrier_sequence(exc_id=1, timestamp=1, exc_count=1) - self.exc_retrier_sequence(exc_id=1, timestamp=12, exc_count=0) - self.exc_retrier_sequence(exc_id=1, timestamp=23, exc_count=0) - self.exc_retrier_sequence(exc_id=1, timestamp=34, exc_count=0) - self.exc_retrier_sequence(exc_id=1, timestamp=45, exc_count=0) + self.exc_retrier_sequence(exc_id=1, + after_timestamp_calls=[0], exc_count=1) + for ts in [12, 23, 34, 45]: + self.exc_retrier_sequence(exc_id=1, + before_timestamp_calls=[ts], + exc_count=0) # The previous 4 exceptions are counted here - self.exc_retrier_sequence(exc_id=1, timestamp=106, exc_count=5) + self.exc_retrier_sequence(exc_id=1, + before_timestamp_calls=[106], + exc_count=5, + after_timestamp_calls=[106, 107]) # Again, the following are not logged due to being within # the same minute - self.exc_retrier_sequence(exc_id=1, timestamp=117, exc_count=0) - self.exc_retrier_sequence(exc_id=1, timestamp=128, exc_count=0) - self.exc_retrier_sequence(exc_id=1, timestamp=139, exc_count=0) - self.exc_retrier_sequence(exc_id=1, timestamp=150, exc_count=0) + for ts in [117, 128, 139, 150]: + self.exc_retrier_sequence(exc_id=1, + before_timestamp_calls=[ts], + exc_count=0) self.exc_retrier_common_end() def test_exc_retrier_mixed_4exc_1min_gives_2logs(self): self.exc_retrier_common_start() - self.exc_retrier_sequence(exc_id=1, timestamp=1, exc_count=1) + self.exc_retrier_sequence(exc_id=1, + # The stop watch will be started, + # which will consume one timestamp call. + after_timestamp_calls=[0], exc_count=1) # By design, this second 'unexpected 1' exception is not counted. This # is likely a rare thing and is a sacrifice for code simplicity. - self.exc_retrier_sequence(exc_id=1, timestamp=10, exc_count=0) - self.exc_retrier_sequence(exc_id=2, timestamp=20, exc_count=1) + self.exc_retrier_sequence(exc_id=1, exc_count=0, + # Since the exception will be the same + # the expiry method will be called, which + # uses up a timestamp call. + before_timestamp_calls=[5]) + self.exc_retrier_sequence(exc_id=2, exc_count=1, + # The watch should get reset, which uses + # up two timestamp calls. + after_timestamp_calls=[10, 20]) # Again, trailing exceptions within a minute are not counted. - self.exc_retrier_sequence(exc_id=2, timestamp=30, exc_count=0) + self.exc_retrier_sequence(exc_id=2, exc_count=0, + # Since the exception will be the same + # the expiry method will be called, which + # uses up a timestamp call. + before_timestamp_calls=[25]) self.exc_retrier_common_end() def test_exc_retrier_mixed_4exc_2min_gives_2logs(self): self.exc_retrier_common_start() - self.exc_retrier_sequence(exc_id=1, timestamp=1, exc_count=1) + self.exc_retrier_sequence(exc_id=1, + # The stop watch will now be started. + after_timestamp_calls=[0], exc_count=1) # Again, this second exception of the same type is not counted # for the sake of code simplicity. - self.exc_retrier_sequence(exc_id=1, timestamp=10, exc_count=0) + self.exc_retrier_sequence(exc_id=1, + before_timestamp_calls=[10], exc_count=0) # The difference between this and the previous case is the log # is also triggered by more than a minute expiring. - self.exc_retrier_sequence(exc_id=2, timestamp=100, exc_count=1) - self.exc_retrier_sequence(exc_id=2, timestamp=110, exc_count=0) + self.exc_retrier_sequence(exc_id=2, exc_count=1, + # The stop watch will now be restarted. + after_timestamp_calls=[100, 105]) + self.exc_retrier_sequence(exc_id=2, + before_timestamp_calls=[110], exc_count=0) self.exc_retrier_common_end() def test_exc_retrier_mixed_4exc_2min_gives_3logs(self): self.exc_retrier_common_start() - self.exc_retrier_sequence(exc_id=1, timestamp=1, exc_count=1) + self.exc_retrier_sequence(exc_id=1, + # The stop watch will now be started. + after_timestamp_calls=[0], exc_count=1) # This time the second 'unexpected 1' exception is counted due # to the same exception occurring same when the minute expires. - self.exc_retrier_sequence(exc_id=1, timestamp=10, exc_count=0) - self.exc_retrier_sequence(exc_id=1, timestamp=100, exc_count=2) - self.exc_retrier_sequence(exc_id=2, timestamp=110, exc_count=1) + self.exc_retrier_sequence(exc_id=1, + before_timestamp_calls=[10], exc_count=0) + self.exc_retrier_sequence(exc_id=1, + before_timestamp_calls=[100], + after_timestamp_calls=[100, 105], + exc_count=2) + self.exc_retrier_sequence(exc_id=2, exc_count=1, + after_timestamp_calls=[110, 111]) self.exc_retrier_common_end()