Add and pipe reconstructor stats through recon
This patch plumbs the object-reconstructor stats that are dropped into recon cache out through the middleware and swift-recon tool. This adds a '/recon/reconstruction/object' to the middleware. As such the swift-recon tool has grown a '-R' or '--reconstruction' option access this data from each node. Plus some tests and documentation updates. Change-Id: I98582732ca5ccb2e7d2369b53abf9aa8c0ede00c
This commit is contained in:
parent
a8f1512863
commit
7a105b5ef0
@ -58,6 +58,8 @@ Get updater stats
|
||||
Get expirer stats
|
||||
.IP "\fB-r, --replication\fR"
|
||||
Get replication stats
|
||||
.IP "\fB-R, --reconstruction\fR"
|
||||
Get reconstruction stats
|
||||
.IP "\fB-u, --unmounted\fR"
|
||||
Check cluster for unmounted devices
|
||||
.IP "\fB-d, --diskusage\fR"
|
||||
|
@ -764,7 +764,7 @@ This information can also be queried via the swift-recon command line utility::
|
||||
fhines@ubuntu:~$ swift-recon -h
|
||||
Usage:
|
||||
usage: swift-recon <server_type> [-v] [--suppress] [-a] [-r] [-u] [-d]
|
||||
[-l] [-T] [--md5] [--auditor] [--updater] [--expirer] [--sockstat]
|
||||
[-R] [-l] [-T] [--md5] [--auditor] [--updater] [--expirer] [--sockstat]
|
||||
|
||||
<server_type> account|container|object
|
||||
Defaults to object server.
|
||||
@ -778,6 +778,7 @@ This information can also be queried via the swift-recon command line utility::
|
||||
--suppress Suppress most connection related errors
|
||||
-a, --async Get async stats
|
||||
-r, --replication Get replication stats
|
||||
-R, --reconstruction Get reconstruction stats
|
||||
--auditor Get auditor stats
|
||||
--updater Get updater stats
|
||||
--expirer Get expirer stats
|
||||
|
@ -431,55 +431,28 @@ class SwiftRecon(object):
|
||||
print("[%s] - No hosts returned valid data." % k)
|
||||
print("=" * 79)
|
||||
|
||||
def replication_check(self, hosts):
|
||||
"""
|
||||
Obtain and print replication statistics
|
||||
def _calculate_least_and_most_recent(self, url_time_data):
|
||||
"""calulate and print the least and most recent urls
|
||||
|
||||
:param hosts: set of hosts to check. in the format of:
|
||||
set([('127.0.0.1', 6220), ('127.0.0.2', 6230)])
|
||||
Given a list of url and time tuples calulate the most and least
|
||||
recent timings and print it out.
|
||||
:param url_time_data: list of url and time tuples: [(url, time_), ..]
|
||||
"""
|
||||
stats = {'replication_time': [], 'failure': [], 'success': [],
|
||||
'attempted': []}
|
||||
recon = Scout("replication/%s" % self.server_type, self.verbose,
|
||||
self.suppress_errors, self.timeout)
|
||||
print("[%s] Checking on replication" % self._ptime())
|
||||
least_recent_time = 9999999999
|
||||
least_recent_url = None
|
||||
most_recent_time = 0
|
||||
most_recent_url = None
|
||||
for url, response, status, ts_start, ts_end in self.pool.imap(
|
||||
recon.scout, hosts):
|
||||
if status == 200:
|
||||
stats['replication_time'].append(
|
||||
response.get('replication_time',
|
||||
response.get('object_replication_time', 0)))
|
||||
repl_stats = response.get('replication_stats')
|
||||
if repl_stats:
|
||||
for stat_key in ['attempted', 'failure', 'success']:
|
||||
stats[stat_key].append(repl_stats.get(stat_key))
|
||||
last = response.get('replication_last',
|
||||
response.get('object_replication_last', 0))
|
||||
if last is None:
|
||||
continue
|
||||
if last < least_recent_time:
|
||||
least_recent_time = last
|
||||
least_recent_url = url
|
||||
if last > most_recent_time:
|
||||
most_recent_time = last
|
||||
most_recent_url = url
|
||||
for k in stats:
|
||||
if stats[k]:
|
||||
if k != 'replication_time':
|
||||
computed = self._gen_stats(stats[k],
|
||||
name='replication_%s' % k)
|
||||
else:
|
||||
computed = self._gen_stats(stats[k], name=k)
|
||||
if computed['reported'] > 0:
|
||||
self._print_stats(computed)
|
||||
else:
|
||||
print("[%s] - No hosts returned valid data." % k)
|
||||
else:
|
||||
print("[%s] - No hosts returned valid data." % k)
|
||||
|
||||
for url, last in url_time_data:
|
||||
if last is None:
|
||||
continue
|
||||
if last < least_recent_time:
|
||||
least_recent_time = last
|
||||
least_recent_url = url
|
||||
if last > most_recent_time:
|
||||
most_recent_time = last
|
||||
most_recent_url = url
|
||||
|
||||
if least_recent_url is not None:
|
||||
host = urlparse(least_recent_url).netloc
|
||||
if not least_recent_time:
|
||||
@ -497,6 +470,79 @@ class SwiftRecon(object):
|
||||
print('Most recent completion was %s (%d %s ago) by %s.' % (
|
||||
self._ptime(most_recent_time),
|
||||
elapsed, elapsed_unit, host))
|
||||
|
||||
def reconstruction_check(self, hosts):
|
||||
"""
|
||||
Obtain and print reconstructon statistics
|
||||
|
||||
:param hosts: set of hosts to check. in the format of:
|
||||
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||
"""
|
||||
stats = []
|
||||
last_stats = []
|
||||
recon = Scout("reconstruction/%s" % self.server_type, self.verbose,
|
||||
self.suppress_errors, self.timeout)
|
||||
print("[%s] Checking on reconstructors" % self._ptime())
|
||||
for url, response, status, ts_start, ts_end in self.pool.imap(
|
||||
recon.scout, hosts):
|
||||
if status == 200:
|
||||
stats.append(response.get('object_reconstruction_time'))
|
||||
last = response.get('object_reconstruction_last', 0)
|
||||
last_stats.append((url, last))
|
||||
if stats:
|
||||
computed = self._gen_stats(stats,
|
||||
name='object_reconstruction_time')
|
||||
if computed['reported'] > 0:
|
||||
self._print_stats(computed)
|
||||
else:
|
||||
print("[object_reconstruction_time] - No hosts returned "
|
||||
"valid data.")
|
||||
else:
|
||||
print("[object_reconstruction_time] - No hosts returned "
|
||||
"valid data.")
|
||||
self._calculate_least_and_most_recent(last_stats)
|
||||
print("=" * 79)
|
||||
|
||||
def replication_check(self, hosts):
|
||||
"""
|
||||
Obtain and print replication statistics
|
||||
|
||||
:param hosts: set of hosts to check. in the format of:
|
||||
set([('127.0.0.1', 6220), ('127.0.0.2', 6230)])
|
||||
"""
|
||||
stats = {'replication_time': [], 'failure': [], 'success': [],
|
||||
'attempted': []}
|
||||
last_stats = []
|
||||
recon = Scout("replication/%s" % self.server_type, self.verbose,
|
||||
self.suppress_errors, self.timeout)
|
||||
print("[%s] Checking on replication" % self._ptime())
|
||||
for url, response, status, ts_start, ts_end in self.pool.imap(
|
||||
recon.scout, hosts):
|
||||
if status == 200:
|
||||
stats['replication_time'].append(
|
||||
response.get('replication_time',
|
||||
response.get('object_replication_time', 0)))
|
||||
repl_stats = response.get('replication_stats')
|
||||
if repl_stats:
|
||||
for stat_key in ['attempted', 'failure', 'success']:
|
||||
stats[stat_key].append(repl_stats.get(stat_key))
|
||||
last = response.get('replication_last',
|
||||
response.get('object_replication_last', 0))
|
||||
last_stats.append((url, last))
|
||||
for k in stats:
|
||||
if stats[k]:
|
||||
if k != 'replication_time':
|
||||
computed = self._gen_stats(stats[k],
|
||||
name='replication_%s' % k)
|
||||
else:
|
||||
computed = self._gen_stats(stats[k], name=k)
|
||||
if computed['reported'] > 0:
|
||||
self._print_stats(computed)
|
||||
else:
|
||||
print("[%s] - No hosts returned valid data." % k)
|
||||
else:
|
||||
print("[%s] - No hosts returned valid data." % k)
|
||||
self._calculate_least_and_most_recent(last_stats)
|
||||
print("=" * 79)
|
||||
|
||||
def updater_check(self, hosts):
|
||||
@ -1036,7 +1082,7 @@ class SwiftRecon(object):
|
||||
print("=" * 79)
|
||||
usage = '''
|
||||
usage: %prog <server_type> [<server_type> [<server_type>]]
|
||||
[-v] [--suppress] [-a] [-r] [-u] [-d]
|
||||
[-v] [--suppress] [-a] [-r] [-u] [-d] [-R]
|
||||
[-l] [-T] [--md5] [--auditor] [--updater] [--expirer] [--sockstat]
|
||||
[--human-readable]
|
||||
|
||||
@ -1055,6 +1101,8 @@ class SwiftRecon(object):
|
||||
help="Get async stats")
|
||||
args.add_option('--replication', '-r', action="store_true",
|
||||
help="Get replication stats")
|
||||
args.add_option('--reconstruction', '-R', action="store_true",
|
||||
help="Get reconstruction stats")
|
||||
args.add_option('--auditor', action="store_true",
|
||||
help="Get auditor stats")
|
||||
args.add_option('--updater', action="store_true",
|
||||
@ -1094,7 +1142,7 @@ class SwiftRecon(object):
|
||||
help='Also show the lowest COUNT entries in rank \
|
||||
order.')
|
||||
args.add_option('--all', action="store_true",
|
||||
help="Perform all checks. Equal to \t\t\t-arudlqT "
|
||||
help="Perform all checks. Equal to \t\t\t-arRudlqT "
|
||||
"--md5 --sockstat --auditor --updater --expirer "
|
||||
"--driveaudit --validate-servers --swift-versions")
|
||||
args.add_option('--region', type="int",
|
||||
@ -1152,6 +1200,7 @@ class SwiftRecon(object):
|
||||
self.object_auditor_check(hosts)
|
||||
self.updater_check(hosts)
|
||||
self.expirer_check(hosts)
|
||||
self.reconstruction_check(hosts)
|
||||
elif self.server_type == 'container':
|
||||
self.auditor_check(hosts)
|
||||
self.updater_check(hosts)
|
||||
@ -1209,6 +1258,13 @@ class SwiftRecon(object):
|
||||
print("Error: Can't check sharding on non container "
|
||||
"servers.")
|
||||
print("=" * 79)
|
||||
if options.reconstruction:
|
||||
if self.server_type == 'object':
|
||||
self.reconstruction_check(hosts)
|
||||
else:
|
||||
print("Error: Can't check reconstruction stats on "
|
||||
"non object servers.")
|
||||
print("=" * 79)
|
||||
if options.validate_servers:
|
||||
self.server_type_check(hosts)
|
||||
if options.loadstats:
|
||||
|
@ -169,6 +169,13 @@ class ReconMiddleware(object):
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_reconstruction_info(self):
|
||||
"""get reconstruction info"""
|
||||
reconstruction_list = ['object_reconstruction_last',
|
||||
'object_reconstruction_time']
|
||||
return self._from_recon_cache(reconstruction_list,
|
||||
self.object_recon_cache)
|
||||
|
||||
def get_device_info(self):
|
||||
"""get devices"""
|
||||
try:
|
||||
@ -399,6 +406,8 @@ class ReconMiddleware(object):
|
||||
content = self.get_sharding_info()
|
||||
elif rcheck == "relinker":
|
||||
content = self.get_relinker_info()
|
||||
elif rcheck == "reconstruction" and rtype == 'object':
|
||||
content = self.get_reconstruction_info()
|
||||
else:
|
||||
content = "Invalid path: %s" % req.path
|
||||
return Response(request=req, status="404 Not Found",
|
||||
|
@ -648,6 +648,45 @@ aliases = %s
|
||||
self.assertRaises(SystemExit, recon.main)
|
||||
self.assertIn('Invalid Storage Policy', stdout.getvalue())
|
||||
|
||||
def test_calculate_least_and_most_recent(self):
|
||||
now = 1517894596
|
||||
|
||||
def test_least_most(data, expected):
|
||||
stdout = StringIO()
|
||||
with mock.patch('sys.stdout', new=stdout), \
|
||||
mock.patch('time.time', return_value=now):
|
||||
self.recon_instance._calculate_least_and_most_recent(data)
|
||||
self.assertEqual(stdout.getvalue(), expected)
|
||||
|
||||
# first the empty set
|
||||
test_least_most([], '')
|
||||
expected = 'Oldest completion was NEVER by my.url.\n'
|
||||
test_least_most([('http://my.url/is/awesome', 0)], expected)
|
||||
|
||||
expected = (
|
||||
'Oldest completion was 2018-02-06 05:23:11 (5 seconds ago) '
|
||||
'by my.url.\n'
|
||||
'Most recent completion was 2018-02-06 05:23:11 (5 seconds ago) '
|
||||
'by my.url.\n')
|
||||
data = [('http://my.url/is/awesome', now - 5)]
|
||||
test_least_most(data, expected)
|
||||
|
||||
expected = (
|
||||
'Oldest completion was 2018-02-06 05:06:36 (16 minutes ago) '
|
||||
'by a.diff.url.\n'
|
||||
'Most recent completion was 2018-02-06 05:23:11 (5 seconds ago) '
|
||||
'by my.url.\n')
|
||||
data.append(('http://a.diff.url/not/as/awesome', now - 1000))
|
||||
test_least_most(data, expected)
|
||||
|
||||
# now through larger sets at it
|
||||
for extra in (5, 10, 40, 100):
|
||||
data.extend([
|
||||
('http://extra.%d.url/blah' % (extra + r),
|
||||
now - random.randint(6, 999)) for r in range(extra)])
|
||||
random.shuffle(data)
|
||||
test_least_most(data, expected)
|
||||
|
||||
|
||||
class TestReconCommands(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@ -1067,6 +1106,41 @@ class TestReconCommands(unittest.TestCase):
|
||||
cli.sharding_check([('127.0.0.1', 6011), ('127.0.0.1', 6021)])
|
||||
mock_print.assert_has_calls(default_calls, any_order=True)
|
||||
|
||||
@ mock.patch('six.moves.builtins.print')
|
||||
@ mock.patch('time.time')
|
||||
def test_reconstruction_check(self, mock_now, mock_print):
|
||||
now = 1430000000.0
|
||||
|
||||
def dummy_request(*args, **kwargs):
|
||||
return [
|
||||
('http://127.0.0.1:6011/recon/reconstruction',
|
||||
{"object_reconstruction_last": now,
|
||||
"object_reconstruction_time": 42},
|
||||
200, 0, 0),
|
||||
('http://127.0.0.1:6021/recon/reconstruction',
|
||||
{"object_reconstruction_last": now,
|
||||
"object_reconstruction_time": 23},
|
||||
200, 0, 0)]
|
||||
|
||||
cli = recon.SwiftRecon()
|
||||
cli.pool.imap = dummy_request
|
||||
|
||||
default_calls = [
|
||||
mock.call('[object_reconstruction_time] low: 23, high: 42, '
|
||||
'avg: 32.5, total: 65, Failed: 0.0%, no_result: 0, '
|
||||
'reported: 2'),
|
||||
mock.call('Oldest completion was 2015-04-25 22:13:20 ' +
|
||||
'(42 seconds ago) by 127.0.0.1:6011.'),
|
||||
mock.call('Most recent completion was 2015-04-25 22:13:20 ' +
|
||||
'(42 seconds ago) by 127.0.0.1:6011.'),
|
||||
]
|
||||
|
||||
mock_now.return_value = now + 42
|
||||
cli.reconstruction_check([('127.0.0.1', 6011), ('127.0.0.1', 6021)])
|
||||
# We need any_order=True because the order of calls depends on the dict
|
||||
# that is returned from the recon middleware, thus can't rely on it
|
||||
mock_print.assert_has_calls(default_calls, any_order=True)
|
||||
|
||||
@mock.patch('six.moves.builtins.print')
|
||||
@mock.patch('time.time')
|
||||
def test_load_check(self, mock_now, mock_print):
|
||||
@ -1077,16 +1151,11 @@ class TestReconCommands(unittest.TestCase):
|
||||
('http://127.0.0.1:6010/recon/load',
|
||||
{"1m": 0.2, "5m": 0.4, "15m": 0.25,
|
||||
"processes": 10000, "tasks": "1/128"},
|
||||
200,
|
||||
0,
|
||||
0),
|
||||
200, 0, 0),
|
||||
('http://127.0.0.1:6020/recon/load',
|
||||
{"1m": 0.4, "5m": 0.8, "15m": 0.75,
|
||||
"processes": 9000, "tasks": "1/200"},
|
||||
200,
|
||||
0,
|
||||
0),
|
||||
]
|
||||
200, 0, 0)]
|
||||
|
||||
cli = recon.SwiftRecon()
|
||||
cli.pool.imap = dummy_request
|
||||
|
@ -168,6 +168,9 @@ class FakeRecon(object):
|
||||
def fake_relinker(self):
|
||||
return {"relinktest": "1"}
|
||||
|
||||
def fake_reconstruction(self):
|
||||
return {'reconstructiontest': "1"}
|
||||
|
||||
def fake_updater(self, recon_type):
|
||||
self.fake_updater_rtype = recon_type
|
||||
return {'updatertest': "1"}
|
||||
@ -807,6 +810,21 @@ class TestReconSuccess(TestCase):
|
||||
rv = self.app.get_replication_info('unrecognized_recon_type')
|
||||
self.assertIsNone(rv)
|
||||
|
||||
def test_get_reconstruction(self):
|
||||
from_cache_response = {
|
||||
"object_reconstruction_time": 0.2615511417388916,
|
||||
"object_reconstruction_last": 1357969645.25}
|
||||
self.fakecache.fakeout_calls = []
|
||||
self.fakecache.fakeout = from_cache_response
|
||||
rv = self.app.get_reconstruction_info()
|
||||
self.assertEqual(self.fakecache.fakeout_calls,
|
||||
[((['object_reconstruction_last',
|
||||
'object_reconstruction_time'],
|
||||
'/var/cache/swift/object.recon'), {})])
|
||||
self.assertEqual(rv, {
|
||||
"object_reconstruction_time": 0.2615511417388916,
|
||||
"object_reconstruction_last": 1357969645.25})
|
||||
|
||||
def test_get_updater_info_container(self):
|
||||
from_cache_response = {"container_updater_sweep": 18.476239919662476}
|
||||
self.fakecache.fakeout_calls = []
|
||||
@ -1333,6 +1351,7 @@ class TestReconMiddleware(unittest.TestCase):
|
||||
self.app.get_async_info = self.frecon.fake_async
|
||||
self.app.get_device_info = self.frecon.fake_get_device_info
|
||||
self.app.get_replication_info = self.frecon.fake_replication
|
||||
self.app.get_reconstruction_info = self.frecon.fake_reconstruction
|
||||
self.app.get_auditor_info = self.frecon.fake_auditor
|
||||
self.app.get_updater_info = self.frecon.fake_updater
|
||||
self.app.get_expirer_info = self.frecon.fake_expirer
|
||||
@ -1380,6 +1399,13 @@ class TestReconMiddleware(unittest.TestCase):
|
||||
resp = self.app(req.environ, start_response)
|
||||
self.assertEqual(resp, get_device_resp)
|
||||
|
||||
def test_reconstruction_info(self):
|
||||
get_reconstruction_resp = [b'{"reconstructiontest": "1"}']
|
||||
req = Request.blank('/recon/reconstruction/object',
|
||||
environ={'REQUEST_METHOD': 'GET'})
|
||||
resp = self.app(req.environ, start_response)
|
||||
self.assertEqual(resp, get_reconstruction_resp)
|
||||
|
||||
def test_recon_get_replication_notype(self):
|
||||
get_replication_resp = [b'{"replicationtest": "1"}']
|
||||
req = Request.blank('/recon/replication',
|
||||
|
Loading…
Reference in New Issue
Block a user