From fbe5836d86a02cf256115c4e7c14dd34be4c5100 Mon Sep 17 00:00:00 2001 From: "Carlos L. Torres" Date: Tue, 3 Mar 2015 18:09:32 -0600 Subject: [PATCH] [report] Improve reports data and units - Adds new median metric both on CLI and HTML reports. - Adds supports for table header that consolidates units from each metric, and gives a title. - Re-organizes metrics based on order statistics (min, percentiles, max) then its followed by aggregate statistics metrics. - Includes unit tests. Change-Id: Icd154c9f00b5e9d6df11797685fe8f92c4fdbb1b --- rally/benchmark/processing/plot.py | 14 +++-- rally/benchmark/processing/utils.py | 21 +++++++ rally/cmd/cliutils.py | 63 +++++++++++++++++-- rally/cmd/commands/task.py | 52 ++++++++------- tests/unit/benchmark/processing/test_plot.py | 7 ++- tests/unit/benchmark/processing/test_utils.py | 24 +++++++ tests/unit/cmd/test_cliutils.py | 15 +++++ 7 files changed, 158 insertions(+), 38 deletions(-) diff --git a/rally/benchmark/processing/plot.py b/rally/benchmark/processing/plot.py index 7641dbbd4e..390f4f25ab 100644 --- a/rally/benchmark/processing/plot.py +++ b/rally/benchmark/processing/plot.py @@ -234,14 +234,15 @@ def _get_atomic_action_durations(result): if durations: data = [action, round(min(durations), 3), - round(utils.mean(durations), 3), - round(max(durations), 3), + round(utils.median(durations), 3), round(utils.percentile(durations, 0.90), 3), round(utils.percentile(durations, 0.95), 3), + round(max(durations), 3), + round(utils.mean(durations), 3), "%.1f%%" % (len(durations) * 100.0 / len(raw)), len(raw)] else: - data = [action, None, None, None, None, None, 0, len(raw)] + data = [action, None, None, None, None, None, None, 0, len(raw)] # Save 'total' - it must be appended last if action == "total": @@ -261,10 +262,11 @@ def _process_results(results): for result in results: table_cols = ["Action", "Min (sec)", - "Avg (sec)", + "Median (sec)", + "90%ile (sec)", + "95%ile (sec)", "Max (sec)", - "90 percentile", - "95 percentile", + "Avg (sec)", "Success", "Count"] table_rows = _get_atomic_action_durations(result) diff --git a/rally/benchmark/processing/utils.py b/rally/benchmark/processing/utils.py index 3b9ec5fbcd..d8808ee29f 100644 --- a/rally/benchmark/processing/utils.py +++ b/rally/benchmark/processing/utils.py @@ -15,6 +15,7 @@ import math +from rally.common.i18n import _ from rally import exceptions @@ -31,6 +32,26 @@ def mean(values): return math.fsum(values) / len(values) +def median(values): + """Find the sample median of a list of values. + + :parameter values: non-empty list of numbers + + :returns: float value + """ + if not values: + raise ValueError(_("no median for empty data")) + + values = sorted(values) + size = len(values) + + if size % 2 == 1: + return values[size // 2] + else: + index = size // 2 + return (values[index - 1] + values[index]) / 2.0 + + def percentile(values, percent): """Find the percentile of a list of values. diff --git a/rally/cmd/cliutils.py b/rally/cmd/cliutils.py index c77f71ba44..48b2991e51 100644 --- a/rally/cmd/cliutils.py +++ b/rally/cmd/cliutils.py @@ -86,7 +86,7 @@ def validate_args(fn, *args, **kwargs): def print_list(objs, fields, formatters=None, sortby_index=0, mixed_case_fields=None, field_labels=None, - print_header=True, print_border=True, + table_label=None, print_header=True, print_border=True, out=sys.stdout): """Print a list or objects as a table, one row per object. @@ -98,6 +98,7 @@ def print_list(objs, fields, formatters=None, sortby_index=0, have mixed case names (e.g., 'serverId') :param field_labels: Labels to use in the heading of the table, default to fields. + :param table_label: Label to use as header for the whole table. :param print_header: print table header. :param print_border: print table border. :param out: stream to write output to. @@ -136,14 +137,64 @@ def print_list(objs, fields, formatters=None, sortby_index=0, pt.left_padding_width = 0 pt.right_padding_width = 1 - outstr = pt.get_string(header=print_header, - border=print_border, - **kwargs) + "\n" + table_body = pt.get_string(header=print_header, + border=print_border, + **kwargs) + "\n" + + table_header = "" + + if table_label: + table_width = table_body.index("\n") + table_header = make_table_header(table_label, table_width) + table_header += "\n" if six.PY3: - out.write(encodeutils.safe_encode(outstr).decode()) + if table_header: + out.write(encodeutils.safe_encode(table_header).decode()) + out.write(encodeutils.safe_encode(table_body).decode()) else: - out.write(encodeutils.safe_encode(outstr)) + if table_header: + out.write(encodeutils.safe_encode(table_header)) + out.write(encodeutils.safe_encode(table_body)) + + +def make_table_header(table_label, table_width, + junction_char="+", horizontal_char="-", + vertical_char="|"): + """Generalized way make a table header string. + + :param table_label: label to print on header + :param table_width: total width of table + :param junction_char: character used where vertical and + horizontal lines meet. + :param horizontal_char: character used for horizontal lines. + :param vertical_char: character used for vertical lines. + + :returns string + """ + + if len(table_label) >= (table_width - 2): + raise ValueError(_("Table header %s is longer than total" + "width of the table.")) + + label_and_space_width = table_width - len(table_label) - 2 + padding = 0 if label_and_space_width % 2 == 0 else 1 + + half_table_width = label_and_space_width // 2 + left_spacing = (" " * half_table_width) + right_spacing = (" " * (half_table_width + padding)) + + border_line = "".join((junction_char, + (horizontal_char * (table_width - 2)), + junction_char,)) + + label_line = "".join((vertical_char, + left_spacing, + table_label, + right_spacing, + vertical_char,)) + + return "\n".join((border_line, label_line,)) def make_header(text, size=80, symbol="-"): diff --git a/rally/cmd/commands/task.py b/rally/cmd/commands/task.py index 3b1823aba1..2682a80da6 100644 --- a/rally/cmd/commands/task.py +++ b/rally/cmd/commands/task.py @@ -325,11 +325,12 @@ class TaskCommands(object): print(json.dumps(key["kw"], indent=2)) raw = result["data"]["raw"] - table_cols = ["action", "min (sec)", "avg (sec)", "max (sec)", - "90 percentile", "95 percentile", "success", - "count"] - float_cols = ["min (sec)", "avg (sec)", "max (sec)", - "90 percentile", "95 percentile"] + table_cols = ["action", "min", "median", + "90%ile", "95%ile", "max", + "avg", "success", "count"] + float_cols = ["min", "median", + "90%ile", "95%ile", "max", + "avg"] formatters = dict(zip(float_cols, [cliutils.pretty_float_formatter(col, 3) for col in float_cols])) @@ -340,20 +341,22 @@ class TaskCommands(object): durations = actions_data[action] if durations: data = [action, - min(durations), - utils.mean(durations), - max(durations), - utils.percentile(durations, 0.90), - utils.percentile(durations, 0.95), + round(min(durations), 3), + round(utils.median(durations), 3), + round(utils.percentile(durations, 0.90), 3), + round(utils.percentile(durations, 0.95), 3), + round(max(durations), 3), + round(utils.mean(durations), 3), "%.1f%%" % (len(durations) * 100.0 / len(raw)), len(raw)] else: - data = [action, None, None, None, None, None, + data = [action, None, None, None, None, None, None, "0.0%", len(raw)] table_rows.append(rutils.Struct(**dict(zip(table_cols, data)))) cliutils.print_list(table_rows, fields=table_cols, - formatters=formatters) + formatters=formatters, + table_label="Response Times (sec)") if iterations_data: _print_iterations_data(raw) @@ -371,10 +374,11 @@ class TaskCommands(object): keys = set() for ssr in ssrs: keys.update(ssr.keys()) - headers = ["key", "max", "avg", "min", - "90 pecentile", "95 pecentile"] - float_cols = ["max", "avg", "min", - "90 pecentile", "95 pecentile"] + headers = ["key", "min", "median", + "90%ile", "95%ile", "max", + "avg"] + float_cols = ["min", "median", "90%ile", + "95%ile", "max", "avg"] formatters = dict(zip(float_cols, [cliutils.pretty_float_formatter(col, 3) for col in float_cols])) @@ -384,18 +388,20 @@ class TaskCommands(object): if values: row = [str(key), - max(values), - utils.mean(values), - min(values), - utils.percentile(values, 0.90), - utils.percentile(values, 0.95)] + round(min(values), 3), + round(utils.median(values), 3), + round(utils.percentile(values, 0.90), 3), + round(utils.percentile(values, 0.95), 3), + round(max(values), 3), + round(utils.mean(values), 3)] else: - row = [str(key)] + ["n/a"] * 5 + row = [str(key)] + ["n/a"] * 6 table_rows.append(rutils.Struct(**dict(zip(headers, row)))) print("\nScenario Specific Results\n") cliutils.print_list(table_rows, fields=headers, - formatters=formatters) + formatters=formatters, + table_label="Response Times (sec)") for result in raw: errors = result["scenario_output"].get("errors") diff --git a/tests/unit/benchmark/processing/test_plot.py b/tests/unit/benchmark/processing/test_plot.py index a22d652b2f..f46a00463c 100644 --- a/tests/unit/benchmark/processing/test_plot.py +++ b/tests/unit/benchmark/processing/test_plot.py @@ -65,10 +65,11 @@ class PlotTestCase(test.TestCase): results = [result_(i) for i in (0, 1, 2)] table_cols = ["Action", "Min (sec)", - "Avg (sec)", + "Median (sec)", + "90%ile (sec)", + "95%ile (sec)", "Max (sec)", - "90 percentile", - "95 percentile", + "Avg (sec)", "Success", "Count"] atomic_durations = [["atomic_1"], ["atomic_2"]] diff --git a/tests/unit/benchmark/processing/test_utils.py b/tests/unit/benchmark/processing/test_utils.py index 002068aaef..ed938ab8d8 100644 --- a/tests/unit/benchmark/processing/test_utils.py +++ b/tests/unit/benchmark/processing/test_utils.py @@ -44,6 +44,30 @@ class MathTestCase(test.TestCase): self.assertRaises(exceptions.InvalidArgumentsException, utils.mean, lst) + def test_median_single_value(self): + lst = [5] + result = utils.median(lst) + self.assertEqual(5, result) + + def test_median_odd_sized_list(self): + lst = [1, 2, 3, 4, 5] + result = utils.median(lst) + self.assertEqual(3, result) + + def test_median_even_sized_list(self): + lst = [1, 2, 3, 4] + result = utils.median(lst) + self.assertEqual(2.5, result) + + def test_median_empty_list(self): + lst = [] + self.assertRaises(ValueError, + utils.median, lst) + + lst = None + self.assertRaises(ValueError, + utils.median, lst) + def _compare_items_lists(self, list1, list2): """Items lists comparison, compatible with Python 2.6/2.7. diff --git a/tests/unit/cmd/test_cliutils.py b/tests/unit/cmd/test_cliutils.py index 56df851dfa..ba2f12e20b 100644 --- a/tests/unit/cmd/test_cliutils.py +++ b/tests/unit/cmd/test_cliutils.py @@ -54,6 +54,21 @@ class CliUtilsTestCase(test.TestCase): h1 = cliutils.make_header("msg", size=4, symbol="=") self.assertEqual(h1, "====\n msg\n====\n") + def test_make_table_header(self): + actual = cliutils.make_table_header("Response Times (sec)", 40) + expected = "\n".join( + ("+--------------------------------------+", + "| Response Times (sec) |",) + ) + self.assertEqual(expected, actual) + + actual = cliutils.make_table_header("Response Times (sec)", 39) + expected = "\n".join( + ("+-------------------------------------+", + "| Response Times (sec) |",) + ) + self.assertEqual(expected, actual) + def test_pretty_float_formatter_rounding(self): test_table_rows = {"test_header": 6.56565} self.__dict__.update(**test_table_rows)