Add swift-recon feature to track swift-drive-audit error count

This is a follow-on from a previous commit which added recon info for swift-drive-audit (https://review.openstack.org/#/c/122468/). Here, the "--drievaudit" option is added to swift-recon tool. This feature gives the statistics for the system-wide drive errors flagged by swift-drive-audit. An example of the output is as follows: (verbose mode) swift-recon --driveaudit -v =============================================================================== --> Starting reconnaissance on 5 hosts =============================================================================== [2015-03-11 17:13:39] Checking drive-audit errors -> http://1.2.3.4:6000/recon/driveaudit: {'drive_audit_errors': 14} -> http://1.2.3.5:6000/recon/driveaudit: {'drive_audit_errors': 0} -> http://1.2.3.6:6000/recon/driveaudit: {'drive_audit_errors': 37} -> http://1.2.3.7:6000/recon/driveaudit: {'drive_audit_errors': 101} -> http://1.2.3.8:6000/recon/driveaudit: {'drive_audit_errors': 0} [drive_audit_errors] low: 0, high: 101, avg: 30.4, total: 152, Failed: 0.0%, no_result: 0, reported: 5 =============================================================================== Change-Id: Ia16c52a9d613eeb3de1a5a428d88dd1233631912
2015-03-12 15:40:39 +00:00 · 2015-03-12 15:40:39 +00:00 · 0a46793662
commit 0a46793662
parent 61f14f0e90
5 changed files with 95 additions and 0 deletions
--- a/bin/swift-drive-audit
+++ b/bin/swift-drive-audit
@ -176,6 +176,7 @@ if __name__ == '__main__':
    if not devices:
        logger.error("Error: No devices found!")
    recon_errors = {}
+    total_errors = 0
    for device in devices:
        recon_errors[device['mount_point']] = 0
    errors = get_errors(error_re, log_file_pattern, minutes, logger)
@ -198,8 +199,10 @@ if __name__ == '__main__':
                        comment_fstab(mount_point)
                        unmounts += 1
                    recon_errors[mount_point] = count
+                    total_errors += count
    recon_file = recon_cache_path + "/drive.recon"
    dump_recon_cache(recon_errors, recon_file, logger)
+    dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger)

    if unmounts == 0:
        logger.info("No drives were unmounted")
--- a/swift/cli/recon.py
+++ b/swift/cli/recon.py
@ -330,6 +330,27 @@ class SwiftRecon(object):
            print("[async_pending] - No hosts returned valid data.")
        print("=" * 79)

+    def driveaudit_check(self, hosts):
+        """
+        Obtain and print drive audit error statistics
+
+        :param hosts: set of hosts to check. in the format of:
+            set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]
+        """
+        scan = {}
+        recon = Scout("driveaudit", self.verbose, self.suppress_errors,
+                      self.timeout)
+        print("[%s] Checking drive-audit errors" % self._ptime())
+        for url, response, status in self.pool.imap(recon.scout, hosts):
+            if status == 200:
+                scan[url] = response['drive_audit_errors']
+        stats = self._gen_stats(scan.values(), 'drive_audit_errors')
+        if stats['reported'] > 0:
+            self._print_stats(stats)
+        else:
+            print("[drive_audit_errors] - No hosts returned valid data.")
+        print("=" * 79)
+
    def umount_check(self, hosts):
        """
        Check for and print unmounted drives
@ -930,6 +951,8 @@ class SwiftRecon(object):
                        "local copy")
        args.add_option('--sockstat', action="store_true",
                        help="Get cluster socket usage stats")
+        args.add_option('--driveaudit', action="store_true",
+                        help="Get drive audit error stats")
        args.add_option('--top', type='int', metavar='COUNT', default=0,
                        help='Also show the top COUNT entries in rank order.')
        args.add_option('--all', action="store_true",
@ -992,6 +1015,7 @@ class SwiftRecon(object):
            self.quarantine_check(hosts)
            self.socket_usage(hosts)
            self.server_type_check(hosts)
+            self.driveaudit_check(hosts)
        else:
            if options.async:
                if self.server_type == 'object':
@ -1033,6 +1057,8 @@ class SwiftRecon(object):
                self.quarantine_check(hosts)
            if options.sockstat:
                self.socket_usage(hosts)
+            if options.driveaudit:
+                self.driveaudit_check(hosts)


 def main():
--- a/swift/common/middleware/recon.py
+++ b/swift/common/middleware/recon.py
@ -53,6 +53,8 @@ class ReconMiddleware(object):
                                                  'container.recon')
        self.account_recon_cache = os.path.join(self.recon_cache_path,
                                                'account.recon')
+        self.drive_recon_cache = os.path.join(self.recon_cache_path,
+                                              'drive.recon')
        self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz')
        self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz')
        self.rings = [self.account_ring_path, self.container_ring_path]
@ -124,6 +126,11 @@ class ReconMiddleware(object):
        return self._from_recon_cache(['async_pending'],
                                      self.object_recon_cache)

+    def get_driveaudit_error(self):
+        """get # of drive audit errors"""
+        return self._from_recon_cache(['drive_audit_errors'],
+                                      self.drive_recon_cache)
+
    def get_replication_info(self, recon_type):
        """get replication info"""
        if recon_type == 'account':
@ -359,6 +366,8 @@ class ReconMiddleware(object):
            content = self.get_socket_info()
        elif rcheck == "version":
            content = self.get_version()
+        elif rcheck == "driveaudit":
+            content = self.get_driveaudit_error()
        else:
            content = "Invalid path: %s" % req.path
            return Response(request=req, status="404 Not Found",
--- a/test/unit/cli/test_recon.py
+++ b/test/unit/cli/test_recon.py
@ -293,6 +293,43 @@ class TestRecon(unittest.TestCase):
                                  % ex)
        self.assertFalse(expected)

+    def test_drive_audit_check(self):
+        hosts = [('127.0.0.1', 6010), ('127.0.0.1', 6020),
+                 ('127.0.0.1', 6030), ('127.0.0.1', 6040)]
+        # sample json response from http://<host>:<port>/recon/driveaudit
+        responses = {6010: {'drive_audit_errors': 15},
+                     6020: {'drive_audit_errors': 0},
+                     6030: {'drive_audit_errors': 257},
+                     6040: {'drive_audit_errors': 56}}
+        # <low> <high> <avg> <total> <Failed> <no_result> <reported>
+        expected = (0, 257, 82.0, 328, 0.0, 0, 4)
+
+        def mock_scout_driveaudit(app, host):
+            url = 'http://%s:%s/recon/driveaudit' % host
+            response = responses[host[1]]
+            status = 200
+            return url, response, status
+
+        stdout = StringIO()
+        patches = [
+            mock.patch('swift.cli.recon.Scout.scout', mock_scout_driveaudit),
+            mock.patch('sys.stdout', new=stdout),
+        ]
+        with nested(*patches):
+            self.recon_instance.driveaudit_check(hosts)
+
+        output = stdout.getvalue()
+        r = re.compile("\[drive_audit_errors(.*)\](.*)")
+        lines = output.splitlines()
+        self.assertTrue(lines)
+        for line in lines:
+            m = r.match(line)
+            if m:
+                self.assertEquals(m.group(2),
+                                  " low: %s, high: %s, avg: %s, total: %s,"
+                                  " Failed: %s%%, no_result: %s, reported: %s"
+                                  % expected)
+

 class TestReconCommands(unittest.TestCase):
    def setUp(self):
--- a/test/unit/common/middleware/test_recon.py
+++ b/test/unit/common/middleware/test_recon.py
@ -172,6 +172,9 @@ class FakeRecon(object):
    def fake_sockstat(self):
        return {'sockstattest': "1"}

+    def fake_driveaudit(self):
+        return {'driveaudittest': "1"}
+
    def nocontent(self):
        return None

@ -829,6 +832,15 @@ class TestReconSuccess(TestCase):
            (('/proc/net/sockstat', 'r'), {}),
            (('/proc/net/sockstat6', 'r'), {})])

+    def test_get_driveaudit_info(self):
+        from_cache_response = {'drive_audit_errors': 7}
+        self.fakecache.fakeout = from_cache_response
+        rv = self.app.get_driveaudit_error()
+        self.assertEquals(self.fakecache.fakeout_calls,
+                          [((['drive_audit_errors'],
+                             '/var/cache/swift/drive.recon'), {})])
+        self.assertEquals(rv, {'drive_audit_errors': 7})
+

 class TestReconMiddleware(unittest.TestCase):

@ -857,6 +869,7 @@ class TestReconMiddleware(unittest.TestCase):
        self.app.get_swift_conf_md5 = self.frecon.fake_swiftconfmd5
        self.app.get_quarantine_count = self.frecon.fake_quarantined
        self.app.get_socket_info = self.frecon.fake_sockstat
+        self.app.get_driveaudit_error = self.frecon.fake_driveaudit

    def test_recon_get_mem(self):
        get_mem_resp = ['{"memtest": "1"}']
@ -1084,5 +1097,12 @@ class TestReconMiddleware(unittest.TestCase):
        resp = self.app(req.environ, start_response)
        self.assertEquals(resp, 'FAKE APP')

+    def test_recon_get_driveaudit(self):
+        get_driveaudit_resp = ['{"driveaudittest": "1"}']
+        req = Request.blank('/recon/driveaudit',
+                            environ={'REQUEST_METHOD': 'GET'})
+        resp = self.app(req.environ, start_response)
+        self.assertEquals(resp, get_driveaudit_resp)
+
 if __name__ == '__main__':
    unittest.main()