From ba00ff43764bcca0e710eb03fb3fa6583473988c Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Thu, 1 Apr 2021 15:43:24 -0500 Subject: [PATCH] Add sharding to swift-recon Note that this does not modify the recon middleware: we already support the /sharding endpoint in it. But inexplicably there's no CLI that interrogates and parses the sharding information. The overview_container_sharding.rst document tells operators to read raw JSON. It has the advantage of always having the full picture, but these days we deserve a digest by CLI. Change-Id: Iac71f68f6633764e0c926ca60990be3b16ef6855 --- swift/cli/recon.py | 75 ++++++++++++++++++++++++- test/unit/cli/test_recon.py | 108 ++++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/swift/cli/recon.py b/swift/cli/recon.py index 5d9abbbdcd..63f8afb3f1 100644 --- a/swift/cli/recon.py +++ b/swift/cli/recon.py @@ -674,6 +674,69 @@ class SwiftRecon(object): print("[ZBF_auditor] - No hosts returned valid data.") print("=" * 79) + def sharding_check(self, hosts): + """ + Obtain and print sharding statistics + + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6221), ('127.0.0.2', 6231)]) + """ + stats = {'sharding_time': [], + 'attempted': [], 'failure': [], 'success': []} + recon = Scout("sharding", self.verbose, + self.suppress_errors, self.timeout) + print("[%s] Checking on sharders" % self._ptime()) + least_recent_time = 9999999999 + least_recent_url = None + most_recent_time = 0 + most_recent_url = None + for url, response, status, ts_start, ts_end in self.pool.imap( + recon.scout, hosts): + if status == 200: + stats['sharding_time'].append(response.get('sharding_time', 0)) + shard_stats = response.get('sharding_stats') + if shard_stats: + # Sharding has a ton more stats, like "no_change". + # Not sure if we need them at all, or maybe for -v. + for stat_key in ['attempted', 'failure', 'success']: + stats[stat_key].append(shard_stats.get(stat_key)) + last = response.get('sharding_last', 0) + if last is None: + continue + if last < least_recent_time: + least_recent_time = last + least_recent_url = url + if last > most_recent_time: + most_recent_time = last + most_recent_url = url + for k in stats: + if stats[k]: + computed = self._gen_stats(stats[k], name=k) + if computed['reported'] > 0: + self._print_stats(computed) + else: + print("[%s] - No hosts returned valid data." % k) + else: + print("[%s] - No hosts returned valid data." % k) + if least_recent_url is not None: + host = urlparse(least_recent_url).netloc + if not least_recent_time: + print('Oldest completion was NEVER by %s.' % host) + else: + elapsed = time.time() - least_recent_time + elapsed, elapsed_unit = seconds2timeunit(elapsed) + print('Oldest completion was %s (%d %s ago) by %s.' % ( + self._ptime(least_recent_time), + elapsed, elapsed_unit, host)) + if most_recent_url is not None: + host = urlparse(most_recent_url).netloc + elapsed = time.time() - most_recent_time + elapsed, elapsed_unit = seconds2timeunit(elapsed) + print('Most recent completion was %s (%d %s ago) by %s.' % ( + self._ptime(most_recent_time), + elapsed, elapsed_unit, host)) + print("=" * 79) + def load_check(self, hosts): """ Obtain and print load average statistics @@ -998,6 +1061,8 @@ class SwiftRecon(object): help="Get updater stats") args.add_option('--expirer', action="store_true", help="Get expirer stats") + args.add_option('--sharding', action="store_true", + help="Get sharding stats") args.add_option('--unmounted', '-u', action="store_true", help="Check cluster for unmounted devices") args.add_option('--diskusage', '-d', action="store_true", @@ -1090,6 +1155,7 @@ class SwiftRecon(object): elif self.server_type == 'container': self.auditor_check(hosts) self.updater_check(hosts) + self.sharding_check(hosts) elif self.server_type == 'account': self.auditor_check(hosts) self.replication_check(hosts) @@ -1133,7 +1199,14 @@ class SwiftRecon(object): if self.server_type == 'object': self.expirer_check(hosts) else: - print("Error: Can't check expired on non object " + print("Error: Can't check expirer on non object " + "servers.") + print("=" * 79) + if options.sharding: + if self.server_type == 'container': + self.sharding_check(hosts) + else: + print("Error: Can't check sharding on non container " "servers.") print("=" * 79) if options.validate_servers: diff --git a/test/unit/cli/test_recon.py b/test/unit/cli/test_recon.py index 8226e0f4a9..b5290ad919 100644 --- a/test/unit/cli/test_recon.py +++ b/test/unit/cli/test_recon.py @@ -959,6 +959,114 @@ class TestReconCommands(unittest.TestCase): # that is returned from the recon middleware, thus can't rely on it mock_print.assert_has_calls(default_calls, any_order=True) + @mock.patch('six.moves.builtins.print') + @mock.patch('time.time') + def test_sharding_check(self, mock_now, mock_print): + now = 1430000000.0 + + def dummy_request(*args, **kwargs): + return [ + ('http://127.0.0.1:6011/recon/replication/container', + {"sharding_last": now - 50, + "sharding_stats": { + "attempted": 0, "deferred": 0, "diff": 0, + "diff_capped": 0, "empty": 0, "failure": 0, + "hashmatch": 0, "no_change": 0, "remote_merge": 0, + "remove": 0, "rsync": 0, + "sharding": { + "audit_root": { + "attempted": 0, "failure": 0, "success": 0}, + "audit_shard": { + "attempted": 0, "failure": 0, "success": 0}, + "cleaved": { + "attempted": 0, "failure": 0, "max_time": 0, + "min_time": 0, "success": 0}, + "created": { + "attempted": 0, "failure": 0, "success": 0}, + "misplaced": { + "attempted": 0, "failure": 0, "found": 0, + "placed": 0, "success": 0, "unplaced": 0}, + "scanned": { + "attempted": 0, "failure": 0, "found": 0, + "max_time": 0, "min_time": 0, "success": 0}, + "sharding_candidates": { + "found": 0, + "top": []}, + "shrinking_candidates": { + "found": 0, + "top": []}, + "visited": { + "attempted": 0, "completed": 0, "failure": 0, + "skipped": 1381, "success": 0}}, + "start": now - 80, + "success": 0, "ts_repl": 0}, + "sharding_time": 27.6}, + 200, + 0, + 0), + ('http://127.0.0.1:6021/recon/sharding', + {"sharding_last": now - 50, + "sharding_stats": { + "attempted": 0, "deferred": 0, "diff": 0, + "diff_capped": 0, "empty": 0, "failure": 0, + "hashmatch": 0, "no_change": 0, "remote_merge": 0, + "remove": 0, "rsync": 0, + "sharding": { + "audit_root": { + "attempted": 0, "failure": 0, "success": 0}, + "audit_shard": { + "attempted": 0, "failure": 0, "success": 0}, + "cleaved": { + "attempted": 0, "failure": 0, "max_time": 0, + "min_time": 0, "success": 0}, + "created": { + "attempted": 0, "failure": 0, "success": 0}, + "misplaced": { + "attempted": 0, "failure": 0, "found": 0, + "placed": 0, "success": 0, "unplaced": 0}, + "scanned": { + "attempted": 0, "failure": 0, "found": 0, + "max_time": 0, "min_time": 0, "success": 0}, + "sharding_candidates": { + "found": 0, + "top": []}, + "shrinking_candidates": { + "found": 0, + "top": []}, + "visited": { + "attempted": 0, "completed": 0, "failure": 0, + "skipped": 1381, "success": 0}}, + "start": now - 80, + "success": 0, "ts_repl": 0}, + "sharding_time": 27.6}, + 200, + 0, + 0), + ] + + cli = recon.SwiftRecon() + cli.pool.imap = dummy_request + + # All totals are zero in our test set above. Maybe do better later. + default_calls = [ + mock.call('[sharding_time] low: 27, high: 27, avg: 27.6, ' + + 'total: 55, Failed: 0.0%, no_result: 0, reported: 2'), + mock.call('[attempted] low: 0, high: 0, avg: 0.0, ' + + 'total: 0, Failed: 0.0%, no_result: 0, reported: 2'), + mock.call('[failure] low: 0, high: 0, avg: 0.0, ' + + 'total: 0, Failed: 0.0%, no_result: 0, reported: 2'), + mock.call('[success] low: 0, high: 0, avg: 0.0, ' + + 'total: 0, Failed: 0.0%, no_result: 0, reported: 2'), + mock.call('Oldest completion was 2015-04-25 22:12:30 ' + + '(1 minutes ago) by 127.0.0.1:6011.'), + mock.call('Most recent completion was 2015-04-25 22:12:30 ' + + '(1 minutes ago) by 127.0.0.1:6011.'), + ] + + mock_now.return_value = now + 48 + cli.sharding_check([('127.0.0.1', 6011), ('127.0.0.1', 6021)]) + mock_print.assert_has_calls(default_calls, any_order=True) + @mock.patch('six.moves.builtins.print') @mock.patch('time.time') def test_load_check(self, mock_now, mock_print):