From 7a0118fca0d5a348e940283e201c7657ca553217 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Thu, 10 Nov 2022 13:00:08 +0000 Subject: [PATCH] proxy-server: include suppression time in error limit log Also, add some more test coverage and tidy up some tests. Change-Id: Icf3a1df87d52dc5f95e15e676da229a3323326a2 --- swift/proxy/server.py | 8 +- test/unit/common/test_error_limiter.py | 21 +++-- test/unit/proxy/test_server.py | 104 ++++++++++--------------- 3 files changed, 55 insertions(+), 78 deletions(-) diff --git a/swift/proxy/server.py b/swift/proxy/server.py index ceb13017ce..0f10d87798 100644 --- a/swift/proxy/server.py +++ b/swift/proxy/server.py @@ -675,8 +675,8 @@ class Application(object): self.error_limiter.limit(node) self.logger.increment('error_limiter.forced_limit') self.logger.error( - 'Node will be error limited from now: %s, error: %s', - node_to_string(node), + 'Node will be error limited for %.2fs: %s, error: %s', + self.error_limiter.suppression_interval, node_to_string(node), msg) def _error_increment(self, node): @@ -689,8 +689,8 @@ class Application(object): if self.error_limiter.increment(node): self.logger.increment('error_limiter.incremented_limit') self.logger.error( - 'Node will be error limited from now: %s', - node_to_string(node)) + 'Node will be error limited for %.2fs: %s', + self.error_limiter.suppression_interval, node_to_string(node)) def error_occurred(self, node, msg): """ diff --git a/test/unit/common/test_error_limiter.py b/test/unit/common/test_error_limiter.py index f4675f184d..8e22ecfbdd 100644 --- a/test/unit/common/test_error_limiter.py +++ b/test/unit/common/test_error_limiter.py @@ -71,24 +71,21 @@ class TestErrorLimiter(unittest.TestCase): def test_increment(self): node = self.ring.devs[-1] limiter = ErrorLimiter(suppression_interval=60, suppression_limit=10) - last_errors = 0 node_key = limiter.node_key(node) - for _ in range(limiter.suppression_limit): + for i in range(limiter.suppression_limit): self.assertFalse(limiter.increment(node)) - node_errors = limiter.stats.get(node_key) - self.assertGreater(node_errors['errors'], last_errors) + self.assertEqual(i + 1, limiter.stats.get(node_key)['errors']) self.assertFalse(limiter.is_limited(node)) - last_errors = node_errors['errors'] - # One more to make sure it is > suppression_limit - self.assertTrue(limiter.increment(node)) - node_errors = limiter.stats.get(node_key) - self.assertEqual(limiter.suppression_limit + 1, - node_errors['errors']) - self.assertTrue(limiter.is_limited(node)) - last_time = node_errors['last_error'] + # A few more to make sure it is > suppression_limit + for i in range(1, 4): + self.assertTrue(limiter.increment(node)) + self.assertEqual(limiter.suppression_limit + i, + limiter.stats.get(node_key)['errors']) + self.assertTrue(limiter.is_limited(node)) # Simulate time with no errors have gone by. + last_time = limiter.stats.get(node_key)['last_error'] now = last_time + limiter.suppression_interval + 1 with mock.patch('swift.common.error_limiter.time', return_value=now): diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index d716082c01..2f3ac906d1 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -1182,7 +1182,9 @@ class TestProxyServer(unittest.TestCase): expected_info = additional_info.decode('utf8') else: expected_info = additional_info - for i in range(suppression_limit): + + incremented_limit_samples = [] + for i in range(suppression_limit + 1): try: raise Exception('kaboom1!') except Exception as err: @@ -1190,42 +1192,23 @@ class TestProxyServer(unittest.TestCase): app.exception_occurred( node, 'server-type', additional_info) self.assertEqual(i + 1, node_error_count(app, node)) - line = logger.get_lines_for_level('error')[-1] + line = logger.get_lines_for_level('error')[i] self.assertIn('server-type server', line) self.assertIn(expected_info, line) self.assertIn(node['ip'], line) self.assertIn(str(node['port']), line) self.assertIn(node['device'], line) - log_args, log_kwargs = logger.log_dict['error'][-1] + log_args, log_kwargs = logger.log_dict['error'][i] self.assertTrue(log_kwargs['exc_info']) self.assertIs(caught_exc, log_kwargs['exc_info'][1]) - - self.assertEqual(0, logger.get_increment_counts().get( - 'error_limiter.incremented_limit', 0)) - - # One more error to go beyond suppression_limit. - try: - raise Exception('kaboom1!') - except Exception as err: - caught_exc = err - app.exception_occurred(node, 'server-type', additional_info) - self.assertEqual(suppression_limit + 1, - node_error_count(app, node)) - lines = logger.get_lines_for_level('error') - self.assertIn('server-type server', lines[-2]) - self.assertIn(expected_info, lines[-2]) - self.assertIn(node['ip'], lines[-2]) - self.assertIn(str(node['port']), lines[-2]) - self.assertIn(node['device'], lines[-2]) - log_args, log_kwargs = logger.log_dict['error'][-2] - self.assertTrue(log_kwargs['exc_info']) - self.assertIs(caught_exc, log_kwargs['exc_info'][1]) + incremented_limit_samples.append( + logger.get_increment_counts().get( + 'error_limiter.incremented_limit', 0)) + self.assertEqual([0] * 10 + [1], incremented_limit_samples) self.assertEqual( - ('Node will be error limited from now: %s' % + ('Node will be error limited for 60.00s: %s' % node_to_string(node)), - lines[-1]) - self.assertEqual(1, logger.get_increment_counts().get( - 'error_limiter.incremented_limit', 0)) + logger.get_lines_for_level('error')[suppression_limit + 1]) do_test('success') do_test('succès') @@ -1248,33 +1231,36 @@ class TestProxyServer(unittest.TestCase): expected_msg = msg.decode('utf8') else: expected_msg = msg - for i in range(suppression_limit): + incremented_limit_samples = [] + for i in range(suppression_limit + 1): app.error_occurred(node, msg) self.assertEqual(i + 1, node_error_count(app, node)) - line = logger.get_lines_for_level('error')[-1] + line = logger.get_lines_for_level('error')[i] self.assertIn(expected_msg, line) - self.assertIn(node['ip'], line) - self.assertIn(str(node['port']), line) - self.assertIn(node['device'], line) + self.assertIn(node_to_string(node), line) + incremented_limit_samples.append( + logger.get_increment_counts().get( + 'error_limiter.incremented_limit', 0)) - self.assertEqual(0, logger.get_increment_counts().get( - 'error_limiter.incremented_limit', 0)) - - # One more error to go beyond suppression_limit. - app.error_occurred(node, msg) - self.assertEqual(suppression_limit + 1, - node_error_count(app, node)) - lines = logger.get_lines_for_level('error') - self.assertIn(expected_msg, lines[-2]) - self.assertIn(node['ip'], lines[-2]) - self.assertIn(str(node['port']), lines[-2]) - self.assertIn(node['device'], lines[-2]) + self.assertEqual([0] * 10 + [1], incremented_limit_samples) self.assertEqual( - ('Node will be error limited from now: %s' % + ('Node will be error limited for 60.00s: %s' % node_to_string(node)), - lines[-1]) - self.assertEqual(1, logger.get_increment_counts().get( + logger.get_lines_for_level('error')[-1]) + + # error limiting is extended if another error occurs + app.error_occurred(node, msg) + self.assertEqual(suppression_limit + 2, + node_error_count(app, node)) + line = logger.get_lines_for_level('error')[-2] + self.assertIn(expected_msg, line) + self.assertIn(node_to_string(node), line) + self.assertEqual(2, logger.get_increment_counts().get( 'error_limiter.incremented_limit', 0)) + self.assertEqual( + ('Node will be error limited for 60.00s: %s' % + node_to_string(node)), + logger.get_lines_for_level('error')[-1]) do_test('success') do_test('succès') @@ -1368,9 +1354,8 @@ class TestProxyServer(unittest.TestCase): error_lines = app.logger.get_lines_for_level('error') self.assertEqual(1, len(error_lines)) self.assertEqual( - 'Node will be error limited from now: 10.0.0.0:1000/sda, ' - 'error: ERROR Insufficient Storage', - error_lines[0]) + 'Node will be error limited for 60.00s: 10.0.0.0:1000/sda, ' + 'error: ERROR Insufficient Storage', error_lines[0]) self.assertEqual(11, node_error_count(app, node)) self.assertTrue(app.error_limited(node)) @@ -1381,9 +1366,8 @@ class TestProxyServer(unittest.TestCase): error_lines = app.logger.get_lines_for_level('error') self.assertEqual(1, len(error_lines)) self.assertEqual( - 'Node will be error limited from now: 10.0.0.0:1000/sda, ' - 'error: ERROR Insufficient Storage', - error_lines[0]) + 'Node will be error limited for 60.00s: 10.0.0.0:1000/sda, ' + 'error: ERROR Insufficient Storage', error_lines[0]) self.assertEqual(11, node_error_count(app, node)) self.assertTrue(app.error_limited(node)) @@ -3475,10 +3459,8 @@ class TestReplicatedObjectController( 'error_limiter.forced_limit', 0)) line = self.logger.get_lines_for_level('error')[-1] self.assertEqual( - ('Node will be error limited from now: %s, error: %s' % - (node_to_string(error_node), - 'test')), - line) + ('Node will be error limited for 60.00s: %s, error: %s' + % (node_to_string(error_node), 'test')), line) # no error limited checking yet. self.assertEqual( @@ -5339,10 +5321,8 @@ class TestReplicatedObjectController( 'error_limiter.forced_limit', 0)) line = self.logger.get_lines_for_level('error')[-1] self.assertEqual( - ('Node will be error limited from now: %s, error: %s' % - (node_to_string(first_nodes[0]), - 'test')), - line) + ('Node will be error limited for 60.00s: %s, error: %s' + % (node_to_string(first_nodes[0]), 'test')), line) second_nodes = list(self.app.iter_nodes( object_ring, 0, self.logger))