From fbe5836d86a02cf256115c4e7c14dd34be4c5100 Mon Sep 17 00:00:00 2001
From: "Carlos L. Torres" <carlos.torres@rackspace.com>
Date: Tue, 3 Mar 2015 18:09:32 -0600
Subject: [PATCH] [report] Improve reports data and units

- Adds new median metric both on CLI and HTML reports.
- Adds supports for table header that consolidates
  units from each metric, and gives a title.
- Re-organizes metrics based on order statistics
  (min, percentiles, max) then its followed by
  aggregate statistics metrics.
- Includes unit tests.

Change-Id: Icd154c9f00b5e9d6df11797685fe8f92c4fdbb1b
---
 rally/benchmark/processing/plot.py            | 14 +++--
 rally/benchmark/processing/utils.py           | 21 +++++++
 rally/cmd/cliutils.py                         | 63 +++++++++++++++++--
 rally/cmd/commands/task.py                    | 52 ++++++++-------
 tests/unit/benchmark/processing/test_plot.py  |  7 ++-
 tests/unit/benchmark/processing/test_utils.py | 24 +++++++
 tests/unit/cmd/test_cliutils.py               | 15 +++++
 7 files changed, 158 insertions(+), 38 deletions(-)

diff --git a/rally/benchmark/processing/plot.py b/rally/benchmark/processing/plot.py
index 7641dbbd4e..390f4f25ab 100644
--- a/rally/benchmark/processing/plot.py
+++ b/rally/benchmark/processing/plot.py
@@ -234,14 +234,15 @@ def _get_atomic_action_durations(result):
         if durations:
             data = [action,
                     round(min(durations), 3),
-                    round(utils.mean(durations), 3),
-                    round(max(durations), 3),
+                    round(utils.median(durations), 3),
                     round(utils.percentile(durations, 0.90), 3),
                     round(utils.percentile(durations, 0.95), 3),
+                    round(max(durations), 3),
+                    round(utils.mean(durations), 3),
                     "%.1f%%" % (len(durations) * 100.0 / len(raw)),
                     len(raw)]
         else:
-            data = [action, None, None, None, None, None, 0, len(raw)]
+            data = [action, None, None, None, None, None, None, 0, len(raw)]
 
         # Save 'total' - it must be appended last
         if action == "total":
@@ -261,10 +262,11 @@ def _process_results(results):
     for result in results:
         table_cols = ["Action",
                       "Min (sec)",
-                      "Avg (sec)",
+                      "Median (sec)",
+                      "90%ile (sec)",
+                      "95%ile (sec)",
                       "Max (sec)",
-                      "90 percentile",
-                      "95 percentile",
+                      "Avg (sec)",
                       "Success",
                       "Count"]
         table_rows = _get_atomic_action_durations(result)
diff --git a/rally/benchmark/processing/utils.py b/rally/benchmark/processing/utils.py
index 3b9ec5fbcd..d8808ee29f 100644
--- a/rally/benchmark/processing/utils.py
+++ b/rally/benchmark/processing/utils.py
@@ -15,6 +15,7 @@
 
 import math
 
+from rally.common.i18n import _
 from rally import exceptions
 
 
@@ -31,6 +32,26 @@ def mean(values):
     return math.fsum(values) / len(values)
 
 
+def median(values):
+    """Find the sample median of a list of values.
+
+    :parameter values: non-empty list of numbers
+
+    :returns: float value
+     """
+    if not values:
+        raise ValueError(_("no median for empty data"))
+
+    values = sorted(values)
+    size = len(values)
+
+    if size % 2 == 1:
+        return values[size // 2]
+    else:
+        index = size // 2
+        return (values[index - 1] + values[index]) / 2.0
+
+
 def percentile(values, percent):
     """Find the percentile of a list of values.
 
diff --git a/rally/cmd/cliutils.py b/rally/cmd/cliutils.py
index c77f71ba44..48b2991e51 100644
--- a/rally/cmd/cliutils.py
+++ b/rally/cmd/cliutils.py
@@ -86,7 +86,7 @@ def validate_args(fn, *args, **kwargs):
 
 def print_list(objs, fields, formatters=None, sortby_index=0,
                mixed_case_fields=None, field_labels=None,
-               print_header=True, print_border=True,
+               table_label=None, print_header=True, print_border=True,
                out=sys.stdout):
     """Print a list or objects as a table, one row per object.
 
@@ -98,6 +98,7 @@ def print_list(objs, fields, formatters=None, sortby_index=0,
         have mixed case names (e.g., 'serverId')
     :param field_labels: Labels to use in the heading of the table, default to
         fields.
+    :param table_label: Label to use as header for the whole table.
     :param print_header: print table header.
     :param print_border: print table border.
     :param out: stream to write output to.
@@ -136,14 +137,64 @@ def print_list(objs, fields, formatters=None, sortby_index=0,
         pt.left_padding_width = 0
         pt.right_padding_width = 1
 
-    outstr = pt.get_string(header=print_header,
-                           border=print_border,
-                           **kwargs) + "\n"
+    table_body = pt.get_string(header=print_header,
+                               border=print_border,
+                               **kwargs) + "\n"
+
+    table_header = ""
+
+    if table_label:
+        table_width = table_body.index("\n")
+        table_header = make_table_header(table_label, table_width)
+        table_header += "\n"
 
     if six.PY3:
-        out.write(encodeutils.safe_encode(outstr).decode())
+        if table_header:
+            out.write(encodeutils.safe_encode(table_header).decode())
+        out.write(encodeutils.safe_encode(table_body).decode())
     else:
-        out.write(encodeutils.safe_encode(outstr))
+        if table_header:
+            out.write(encodeutils.safe_encode(table_header))
+        out.write(encodeutils.safe_encode(table_body))
+
+
+def make_table_header(table_label, table_width,
+                      junction_char="+", horizontal_char="-",
+                      vertical_char="|"):
+    """Generalized way make a table header string.
+
+    :param table_label: label to print on header
+    :param table_width: total width of table
+    :param junction_char: character used where vertical and
+        horizontal lines meet.
+    :param horizontal_char: character used for horizontal lines.
+    :param vertical_char: character used for vertical lines.
+
+    :returns string
+    """
+
+    if len(table_label) >= (table_width - 2):
+        raise ValueError(_("Table header %s is longer than total"
+                           "width of the table."))
+
+    label_and_space_width = table_width - len(table_label) - 2
+    padding = 0 if label_and_space_width % 2 == 0 else 1
+
+    half_table_width = label_and_space_width // 2
+    left_spacing = (" " * half_table_width)
+    right_spacing = (" " * (half_table_width + padding))
+
+    border_line = "".join((junction_char,
+                           (horizontal_char * (table_width - 2)),
+                           junction_char,))
+
+    label_line = "".join((vertical_char,
+                          left_spacing,
+                          table_label,
+                          right_spacing,
+                          vertical_char,))
+
+    return "\n".join((border_line, label_line,))
 
 
 def make_header(text, size=80, symbol="-"):
diff --git a/rally/cmd/commands/task.py b/rally/cmd/commands/task.py
index 3b1823aba1..2682a80da6 100644
--- a/rally/cmd/commands/task.py
+++ b/rally/cmd/commands/task.py
@@ -325,11 +325,12 @@ class TaskCommands(object):
             print(json.dumps(key["kw"], indent=2))
 
             raw = result["data"]["raw"]
-            table_cols = ["action", "min (sec)", "avg (sec)", "max (sec)",
-                          "90 percentile", "95 percentile", "success",
-                          "count"]
-            float_cols = ["min (sec)", "avg (sec)", "max (sec)",
-                          "90 percentile", "95 percentile"]
+            table_cols = ["action", "min", "median",
+                          "90%ile", "95%ile", "max",
+                          "avg", "success", "count"]
+            float_cols = ["min", "median",
+                          "90%ile", "95%ile", "max",
+                          "avg"]
             formatters = dict(zip(float_cols,
                                   [cliutils.pretty_float_formatter(col, 3)
                                    for col in float_cols]))
@@ -340,20 +341,22 @@ class TaskCommands(object):
                 durations = actions_data[action]
                 if durations:
                     data = [action,
-                            min(durations),
-                            utils.mean(durations),
-                            max(durations),
-                            utils.percentile(durations, 0.90),
-                            utils.percentile(durations, 0.95),
+                            round(min(durations), 3),
+                            round(utils.median(durations), 3),
+                            round(utils.percentile(durations, 0.90), 3),
+                            round(utils.percentile(durations, 0.95), 3),
+                            round(max(durations), 3),
+                            round(utils.mean(durations), 3),
                             "%.1f%%" % (len(durations) * 100.0 / len(raw)),
                             len(raw)]
                 else:
-                    data = [action, None, None, None, None, None,
+                    data = [action, None, None, None, None, None, None,
                             "0.0%", len(raw)]
                 table_rows.append(rutils.Struct(**dict(zip(table_cols, data))))
 
             cliutils.print_list(table_rows, fields=table_cols,
-                                formatters=formatters)
+                                formatters=formatters,
+                                table_label="Response Times (sec)")
 
             if iterations_data:
                 _print_iterations_data(raw)
@@ -371,10 +374,11 @@ class TaskCommands(object):
                 keys = set()
                 for ssr in ssrs:
                     keys.update(ssr.keys())
-                headers = ["key", "max", "avg", "min",
-                           "90 pecentile", "95 pecentile"]
-                float_cols = ["max", "avg", "min",
-                              "90 pecentile", "95 pecentile"]
+                headers = ["key", "min", "median",
+                           "90%ile", "95%ile", "max",
+                           "avg"]
+                float_cols = ["min", "median", "90%ile",
+                              "95%ile", "max", "avg"]
                 formatters = dict(zip(float_cols,
                                   [cliutils.pretty_float_formatter(col, 3)
                                    for col in float_cols]))
@@ -384,18 +388,20 @@ class TaskCommands(object):
 
                     if values:
                         row = [str(key),
-                               max(values),
-                               utils.mean(values),
-                               min(values),
-                               utils.percentile(values, 0.90),
-                               utils.percentile(values, 0.95)]
+                               round(min(values), 3),
+                               round(utils.median(values), 3),
+                               round(utils.percentile(values, 0.90), 3),
+                               round(utils.percentile(values, 0.95), 3),
+                               round(max(values), 3),
+                               round(utils.mean(values), 3)]
                     else:
-                        row = [str(key)] + ["n/a"] * 5
+                        row = [str(key)] + ["n/a"] * 6
                     table_rows.append(rutils.Struct(**dict(zip(headers, row))))
                 print("\nScenario Specific Results\n")
                 cliutils.print_list(table_rows,
                                     fields=headers,
-                                    formatters=formatters)
+                                    formatters=formatters,
+                                    table_label="Response Times (sec)")
 
                 for result in raw:
                     errors = result["scenario_output"].get("errors")
diff --git a/tests/unit/benchmark/processing/test_plot.py b/tests/unit/benchmark/processing/test_plot.py
index a22d652b2f..f46a00463c 100644
--- a/tests/unit/benchmark/processing/test_plot.py
+++ b/tests/unit/benchmark/processing/test_plot.py
@@ -65,10 +65,11 @@ class PlotTestCase(test.TestCase):
         results = [result_(i) for i in (0, 1, 2)]
         table_cols = ["Action",
                       "Min (sec)",
-                      "Avg (sec)",
+                      "Median (sec)",
+                      "90%ile (sec)",
+                      "95%ile (sec)",
                       "Max (sec)",
-                      "90 percentile",
-                      "95 percentile",
+                      "Avg (sec)",
                       "Success",
                       "Count"]
         atomic_durations = [["atomic_1"], ["atomic_2"]]
diff --git a/tests/unit/benchmark/processing/test_utils.py b/tests/unit/benchmark/processing/test_utils.py
index 002068aaef..ed938ab8d8 100644
--- a/tests/unit/benchmark/processing/test_utils.py
+++ b/tests/unit/benchmark/processing/test_utils.py
@@ -44,6 +44,30 @@ class MathTestCase(test.TestCase):
         self.assertRaises(exceptions.InvalidArgumentsException,
                           utils.mean, lst)
 
+    def test_median_single_value(self):
+        lst = [5]
+        result = utils.median(lst)
+        self.assertEqual(5, result)
+
+    def test_median_odd_sized_list(self):
+        lst = [1, 2, 3, 4, 5]
+        result = utils.median(lst)
+        self.assertEqual(3, result)
+
+    def test_median_even_sized_list(self):
+        lst = [1, 2, 3, 4]
+        result = utils.median(lst)
+        self.assertEqual(2.5, result)
+
+    def test_median_empty_list(self):
+        lst = []
+        self.assertRaises(ValueError,
+                          utils.median, lst)
+
+        lst = None
+        self.assertRaises(ValueError,
+                          utils.median, lst)
+
     def _compare_items_lists(self, list1, list2):
         """Items lists comparison, compatible with Python 2.6/2.7.
 
diff --git a/tests/unit/cmd/test_cliutils.py b/tests/unit/cmd/test_cliutils.py
index 56df851dfa..ba2f12e20b 100644
--- a/tests/unit/cmd/test_cliutils.py
+++ b/tests/unit/cmd/test_cliutils.py
@@ -54,6 +54,21 @@ class CliUtilsTestCase(test.TestCase):
         h1 = cliutils.make_header("msg", size=4, symbol="=")
         self.assertEqual(h1, "====\n msg\n====\n")
 
+    def test_make_table_header(self):
+        actual = cliutils.make_table_header("Response Times (sec)", 40)
+        expected = "\n".join(
+            ("+--------------------------------------+",
+             "|         Response Times (sec)         |",)
+        )
+        self.assertEqual(expected, actual)
+
+        actual = cliutils.make_table_header("Response Times (sec)", 39)
+        expected = "\n".join(
+            ("+-------------------------------------+",
+             "|        Response Times (sec)         |",)
+        )
+        self.assertEqual(expected, actual)
+
     def test_pretty_float_formatter_rounding(self):
         test_table_rows = {"test_header": 6.56565}
         self.__dict__.update(**test_table_rows)