Add swift-recon feature to track swift-drive-audit error count
This is a follow-on from a previous commit which added recon info for swift-drive-audit (https://review.openstack.org/#/c/122468/). Here, the "--drievaudit" option is added to swift-recon tool. This feature gives the statistics for the system-wide drive errors flagged by swift-drive-audit. An example of the output is as follows: (verbose mode) swift-recon --driveaudit -v =============================================================================== --> Starting reconnaissance on 5 hosts =============================================================================== [2015-03-11 17:13:39] Checking drive-audit errors -> http://1.2.3.4:6000/recon/driveaudit: {'drive_audit_errors': 14} -> http://1.2.3.5:6000/recon/driveaudit: {'drive_audit_errors': 0} -> http://1.2.3.6:6000/recon/driveaudit: {'drive_audit_errors': 37} -> http://1.2.3.7:6000/recon/driveaudit: {'drive_audit_errors': 101} -> http://1.2.3.8:6000/recon/driveaudit: {'drive_audit_errors': 0} [drive_audit_errors] low: 0, high: 101, avg: 30.4, total: 152, Failed: 0.0%, no_result: 0, reported: 5 =============================================================================== Change-Id: Ia16c52a9d613eeb3de1a5a428d88dd1233631912
This commit is contained in:
parent
61f14f0e90
commit
0a46793662
@ -176,6 +176,7 @@ if __name__ == '__main__':
|
||||
if not devices:
|
||||
logger.error("Error: No devices found!")
|
||||
recon_errors = {}
|
||||
total_errors = 0
|
||||
for device in devices:
|
||||
recon_errors[device['mount_point']] = 0
|
||||
errors = get_errors(error_re, log_file_pattern, minutes, logger)
|
||||
@ -198,8 +199,10 @@ if __name__ == '__main__':
|
||||
comment_fstab(mount_point)
|
||||
unmounts += 1
|
||||
recon_errors[mount_point] = count
|
||||
total_errors += count
|
||||
recon_file = recon_cache_path + "/drive.recon"
|
||||
dump_recon_cache(recon_errors, recon_file, logger)
|
||||
dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger)
|
||||
|
||||
if unmounts == 0:
|
||||
logger.info("No drives were unmounted")
|
||||
|
@ -330,6 +330,27 @@ class SwiftRecon(object):
|
||||
print("[async_pending] - No hosts returned valid data.")
|
||||
print("=" * 79)
|
||||
|
||||
def driveaudit_check(self, hosts):
|
||||
"""
|
||||
Obtain and print drive audit error statistics
|
||||
|
||||
:param hosts: set of hosts to check. in the format of:
|
||||
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]
|
||||
"""
|
||||
scan = {}
|
||||
recon = Scout("driveaudit", self.verbose, self.suppress_errors,
|
||||
self.timeout)
|
||||
print("[%s] Checking drive-audit errors" % self._ptime())
|
||||
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||
if status == 200:
|
||||
scan[url] = response['drive_audit_errors']
|
||||
stats = self._gen_stats(scan.values(), 'drive_audit_errors')
|
||||
if stats['reported'] > 0:
|
||||
self._print_stats(stats)
|
||||
else:
|
||||
print("[drive_audit_errors] - No hosts returned valid data.")
|
||||
print("=" * 79)
|
||||
|
||||
def umount_check(self, hosts):
|
||||
"""
|
||||
Check for and print unmounted drives
|
||||
@ -930,6 +951,8 @@ class SwiftRecon(object):
|
||||
"local copy")
|
||||
args.add_option('--sockstat', action="store_true",
|
||||
help="Get cluster socket usage stats")
|
||||
args.add_option('--driveaudit', action="store_true",
|
||||
help="Get drive audit error stats")
|
||||
args.add_option('--top', type='int', metavar='COUNT', default=0,
|
||||
help='Also show the top COUNT entries in rank order.')
|
||||
args.add_option('--all', action="store_true",
|
||||
@ -992,6 +1015,7 @@ class SwiftRecon(object):
|
||||
self.quarantine_check(hosts)
|
||||
self.socket_usage(hosts)
|
||||
self.server_type_check(hosts)
|
||||
self.driveaudit_check(hosts)
|
||||
else:
|
||||
if options.async:
|
||||
if self.server_type == 'object':
|
||||
@ -1033,6 +1057,8 @@ class SwiftRecon(object):
|
||||
self.quarantine_check(hosts)
|
||||
if options.sockstat:
|
||||
self.socket_usage(hosts)
|
||||
if options.driveaudit:
|
||||
self.driveaudit_check(hosts)
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -53,6 +53,8 @@ class ReconMiddleware(object):
|
||||
'container.recon')
|
||||
self.account_recon_cache = os.path.join(self.recon_cache_path,
|
||||
'account.recon')
|
||||
self.drive_recon_cache = os.path.join(self.recon_cache_path,
|
||||
'drive.recon')
|
||||
self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz')
|
||||
self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz')
|
||||
self.rings = [self.account_ring_path, self.container_ring_path]
|
||||
@ -124,6 +126,11 @@ class ReconMiddleware(object):
|
||||
return self._from_recon_cache(['async_pending'],
|
||||
self.object_recon_cache)
|
||||
|
||||
def get_driveaudit_error(self):
|
||||
"""get # of drive audit errors"""
|
||||
return self._from_recon_cache(['drive_audit_errors'],
|
||||
self.drive_recon_cache)
|
||||
|
||||
def get_replication_info(self, recon_type):
|
||||
"""get replication info"""
|
||||
if recon_type == 'account':
|
||||
@ -359,6 +366,8 @@ class ReconMiddleware(object):
|
||||
content = self.get_socket_info()
|
||||
elif rcheck == "version":
|
||||
content = self.get_version()
|
||||
elif rcheck == "driveaudit":
|
||||
content = self.get_driveaudit_error()
|
||||
else:
|
||||
content = "Invalid path: %s" % req.path
|
||||
return Response(request=req, status="404 Not Found",
|
||||
|
@ -293,6 +293,43 @@ class TestRecon(unittest.TestCase):
|
||||
% ex)
|
||||
self.assertFalse(expected)
|
||||
|
||||
def test_drive_audit_check(self):
|
||||
hosts = [('127.0.0.1', 6010), ('127.0.0.1', 6020),
|
||||
('127.0.0.1', 6030), ('127.0.0.1', 6040)]
|
||||
# sample json response from http://<host>:<port>/recon/driveaudit
|
||||
responses = {6010: {'drive_audit_errors': 15},
|
||||
6020: {'drive_audit_errors': 0},
|
||||
6030: {'drive_audit_errors': 257},
|
||||
6040: {'drive_audit_errors': 56}}
|
||||
# <low> <high> <avg> <total> <Failed> <no_result> <reported>
|
||||
expected = (0, 257, 82.0, 328, 0.0, 0, 4)
|
||||
|
||||
def mock_scout_driveaudit(app, host):
|
||||
url = 'http://%s:%s/recon/driveaudit' % host
|
||||
response = responses[host[1]]
|
||||
status = 200
|
||||
return url, response, status
|
||||
|
||||
stdout = StringIO()
|
||||
patches = [
|
||||
mock.patch('swift.cli.recon.Scout.scout', mock_scout_driveaudit),
|
||||
mock.patch('sys.stdout', new=stdout),
|
||||
]
|
||||
with nested(*patches):
|
||||
self.recon_instance.driveaudit_check(hosts)
|
||||
|
||||
output = stdout.getvalue()
|
||||
r = re.compile("\[drive_audit_errors(.*)\](.*)")
|
||||
lines = output.splitlines()
|
||||
self.assertTrue(lines)
|
||||
for line in lines:
|
||||
m = r.match(line)
|
||||
if m:
|
||||
self.assertEquals(m.group(2),
|
||||
" low: %s, high: %s, avg: %s, total: %s,"
|
||||
" Failed: %s%%, no_result: %s, reported: %s"
|
||||
% expected)
|
||||
|
||||
|
||||
class TestReconCommands(unittest.TestCase):
|
||||
def setUp(self):
|
||||
|
@ -172,6 +172,9 @@ class FakeRecon(object):
|
||||
def fake_sockstat(self):
|
||||
return {'sockstattest': "1"}
|
||||
|
||||
def fake_driveaudit(self):
|
||||
return {'driveaudittest': "1"}
|
||||
|
||||
def nocontent(self):
|
||||
return None
|
||||
|
||||
@ -829,6 +832,15 @@ class TestReconSuccess(TestCase):
|
||||
(('/proc/net/sockstat', 'r'), {}),
|
||||
(('/proc/net/sockstat6', 'r'), {})])
|
||||
|
||||
def test_get_driveaudit_info(self):
|
||||
from_cache_response = {'drive_audit_errors': 7}
|
||||
self.fakecache.fakeout = from_cache_response
|
||||
rv = self.app.get_driveaudit_error()
|
||||
self.assertEquals(self.fakecache.fakeout_calls,
|
||||
[((['drive_audit_errors'],
|
||||
'/var/cache/swift/drive.recon'), {})])
|
||||
self.assertEquals(rv, {'drive_audit_errors': 7})
|
||||
|
||||
|
||||
class TestReconMiddleware(unittest.TestCase):
|
||||
|
||||
@ -857,6 +869,7 @@ class TestReconMiddleware(unittest.TestCase):
|
||||
self.app.get_swift_conf_md5 = self.frecon.fake_swiftconfmd5
|
||||
self.app.get_quarantine_count = self.frecon.fake_quarantined
|
||||
self.app.get_socket_info = self.frecon.fake_sockstat
|
||||
self.app.get_driveaudit_error = self.frecon.fake_driveaudit
|
||||
|
||||
def test_recon_get_mem(self):
|
||||
get_mem_resp = ['{"memtest": "1"}']
|
||||
@ -1084,5 +1097,12 @@ class TestReconMiddleware(unittest.TestCase):
|
||||
resp = self.app(req.environ, start_response)
|
||||
self.assertEquals(resp, 'FAKE APP')
|
||||
|
||||
def test_recon_get_driveaudit(self):
|
||||
get_driveaudit_resp = ['{"driveaudittest": "1"}']
|
||||
req = Request.blank('/recon/driveaudit',
|
||||
environ={'REQUEST_METHOD': 'GET'})
|
||||
resp = self.app(req.environ, start_response)
|
||||
self.assertEquals(resp, get_driveaudit_resp)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user