Add swift-recon feature to track swift-drive-audit error count

This is a follow-on from a previous commit which added recon info
for swift-drive-audit (https://review.openstack.org/#/c/122468/).

Here, the "--drievaudit" option is added to swift-recon tool. This
feature gives the statistics for the system-wide drive errors flagged
by swift-drive-audit. An example of the output is as follows:
(verbose mode)

swift-recon --driveaudit -v
===============================================================================
--> Starting reconnaissance on 5 hosts
===============================================================================
[2015-03-11 17:13:39] Checking drive-audit errors
-> http://1.2.3.4:6000/recon/driveaudit: {'drive_audit_errors': 14}
-> http://1.2.3.5:6000/recon/driveaudit: {'drive_audit_errors': 0}
-> http://1.2.3.6:6000/recon/driveaudit: {'drive_audit_errors': 37}
-> http://1.2.3.7:6000/recon/driveaudit: {'drive_audit_errors': 101}
-> http://1.2.3.8:6000/recon/driveaudit: {'drive_audit_errors': 0}
[drive_audit_errors] low: 0, high: 101, avg: 30.4, total: 152, Failed: 0.0%, no_result: 0, reported: 5
===============================================================================

Change-Id: Ia16c52a9d613eeb3de1a5a428d88dd1233631912
This commit is contained in:
Lorcan 2015-03-12 15:40:39 +00:00
parent 61f14f0e90
commit 0a46793662
5 changed files with 95 additions and 0 deletions

View File

@ -176,6 +176,7 @@ if __name__ == '__main__':
if not devices:
logger.error("Error: No devices found!")
recon_errors = {}
total_errors = 0
for device in devices:
recon_errors[device['mount_point']] = 0
errors = get_errors(error_re, log_file_pattern, minutes, logger)
@ -198,8 +199,10 @@ if __name__ == '__main__':
comment_fstab(mount_point)
unmounts += 1
recon_errors[mount_point] = count
total_errors += count
recon_file = recon_cache_path + "/drive.recon"
dump_recon_cache(recon_errors, recon_file, logger)
dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger)
if unmounts == 0:
logger.info("No drives were unmounted")

View File

@ -330,6 +330,27 @@ class SwiftRecon(object):
print("[async_pending] - No hosts returned valid data.")
print("=" * 79)
def driveaudit_check(self, hosts):
"""
Obtain and print drive audit error statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]
"""
scan = {}
recon = Scout("driveaudit", self.verbose, self.suppress_errors,
self.timeout)
print("[%s] Checking drive-audit errors" % self._ptime())
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
scan[url] = response['drive_audit_errors']
stats = self._gen_stats(scan.values(), 'drive_audit_errors')
if stats['reported'] > 0:
self._print_stats(stats)
else:
print("[drive_audit_errors] - No hosts returned valid data.")
print("=" * 79)
def umount_check(self, hosts):
"""
Check for and print unmounted drives
@ -930,6 +951,8 @@ class SwiftRecon(object):
"local copy")
args.add_option('--sockstat', action="store_true",
help="Get cluster socket usage stats")
args.add_option('--driveaudit', action="store_true",
help="Get drive audit error stats")
args.add_option('--top', type='int', metavar='COUNT', default=0,
help='Also show the top COUNT entries in rank order.')
args.add_option('--all', action="store_true",
@ -992,6 +1015,7 @@ class SwiftRecon(object):
self.quarantine_check(hosts)
self.socket_usage(hosts)
self.server_type_check(hosts)
self.driveaudit_check(hosts)
else:
if options.async:
if self.server_type == 'object':
@ -1033,6 +1057,8 @@ class SwiftRecon(object):
self.quarantine_check(hosts)
if options.sockstat:
self.socket_usage(hosts)
if options.driveaudit:
self.driveaudit_check(hosts)
def main():

View File

@ -53,6 +53,8 @@ class ReconMiddleware(object):
'container.recon')
self.account_recon_cache = os.path.join(self.recon_cache_path,
'account.recon')
self.drive_recon_cache = os.path.join(self.recon_cache_path,
'drive.recon')
self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz')
self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz')
self.rings = [self.account_ring_path, self.container_ring_path]
@ -124,6 +126,11 @@ class ReconMiddleware(object):
return self._from_recon_cache(['async_pending'],
self.object_recon_cache)
def get_driveaudit_error(self):
"""get # of drive audit errors"""
return self._from_recon_cache(['drive_audit_errors'],
self.drive_recon_cache)
def get_replication_info(self, recon_type):
"""get replication info"""
if recon_type == 'account':
@ -359,6 +366,8 @@ class ReconMiddleware(object):
content = self.get_socket_info()
elif rcheck == "version":
content = self.get_version()
elif rcheck == "driveaudit":
content = self.get_driveaudit_error()
else:
content = "Invalid path: %s" % req.path
return Response(request=req, status="404 Not Found",

View File

@ -293,6 +293,43 @@ class TestRecon(unittest.TestCase):
% ex)
self.assertFalse(expected)
def test_drive_audit_check(self):
hosts = [('127.0.0.1', 6010), ('127.0.0.1', 6020),
('127.0.0.1', 6030), ('127.0.0.1', 6040)]
# sample json response from http://<host>:<port>/recon/driveaudit
responses = {6010: {'drive_audit_errors': 15},
6020: {'drive_audit_errors': 0},
6030: {'drive_audit_errors': 257},
6040: {'drive_audit_errors': 56}}
# <low> <high> <avg> <total> <Failed> <no_result> <reported>
expected = (0, 257, 82.0, 328, 0.0, 0, 4)
def mock_scout_driveaudit(app, host):
url = 'http://%s:%s/recon/driveaudit' % host
response = responses[host[1]]
status = 200
return url, response, status
stdout = StringIO()
patches = [
mock.patch('swift.cli.recon.Scout.scout', mock_scout_driveaudit),
mock.patch('sys.stdout', new=stdout),
]
with nested(*patches):
self.recon_instance.driveaudit_check(hosts)
output = stdout.getvalue()
r = re.compile("\[drive_audit_errors(.*)\](.*)")
lines = output.splitlines()
self.assertTrue(lines)
for line in lines:
m = r.match(line)
if m:
self.assertEquals(m.group(2),
" low: %s, high: %s, avg: %s, total: %s,"
" Failed: %s%%, no_result: %s, reported: %s"
% expected)
class TestReconCommands(unittest.TestCase):
def setUp(self):

View File

@ -172,6 +172,9 @@ class FakeRecon(object):
def fake_sockstat(self):
return {'sockstattest': "1"}
def fake_driveaudit(self):
return {'driveaudittest': "1"}
def nocontent(self):
return None
@ -829,6 +832,15 @@ class TestReconSuccess(TestCase):
(('/proc/net/sockstat', 'r'), {}),
(('/proc/net/sockstat6', 'r'), {})])
def test_get_driveaudit_info(self):
from_cache_response = {'drive_audit_errors': 7}
self.fakecache.fakeout = from_cache_response
rv = self.app.get_driveaudit_error()
self.assertEquals(self.fakecache.fakeout_calls,
[((['drive_audit_errors'],
'/var/cache/swift/drive.recon'), {})])
self.assertEquals(rv, {'drive_audit_errors': 7})
class TestReconMiddleware(unittest.TestCase):
@ -857,6 +869,7 @@ class TestReconMiddleware(unittest.TestCase):
self.app.get_swift_conf_md5 = self.frecon.fake_swiftconfmd5
self.app.get_quarantine_count = self.frecon.fake_quarantined
self.app.get_socket_info = self.frecon.fake_sockstat
self.app.get_driveaudit_error = self.frecon.fake_driveaudit
def test_recon_get_mem(self):
get_mem_resp = ['{"memtest": "1"}']
@ -1084,5 +1097,12 @@ class TestReconMiddleware(unittest.TestCase):
resp = self.app(req.environ, start_response)
self.assertEquals(resp, 'FAKE APP')
def test_recon_get_driveaudit(self):
get_driveaudit_resp = ['{"driveaudittest": "1"}']
req = Request.blank('/recon/driveaudit',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_driveaudit_resp)
if __name__ == '__main__':
unittest.main()