From fa1f62644c8061d198847bde658832fa3c44848b Mon Sep 17 00:00:00 2001
From: Sergey Skripnick <sskripnick@mirantis.com>
Date: Tue, 8 Jul 2014 15:28:08 +0300
Subject: [PATCH] Reduce number of points in generated charts

In some cases number of iterations is too much for direct charting.

Also reduce number of digits after the decimal point.

Change-Id: I7ae526ae41500a8a0c33a7014e93cde210687df9
---
 rally-scenarios/rally.yaml                |  12 ++
 rally/benchmark/processing/plot.py        | 133 ++++++++++++++++------
 rally/benchmark/processing/src/index.mako |   1 -
 tests/benchmark/processing/test_plot.py   |  66 +++++++++--
 4 files changed, 170 insertions(+), 42 deletions(-)

diff --git a/rally-scenarios/rally.yaml b/rally-scenarios/rally.yaml
index da5258f11b..92db1367bc 100644
--- a/rally-scenarios/rally.yaml
+++ b/rally-scenarios/rally.yaml
@@ -166,6 +166,18 @@
           tenants: 1
           users_per_tenant: 1
 
+    -
+      args:
+        exception_probability: 0.05
+      runner:
+        type: "constant"
+        times: 2042
+        concurrency: 1
+      context:
+        users:
+          tenants: 1
+          users_per_tenant: 1
+
   Dummy.dummy_with_scenario_output:
     -
       runner:
diff --git a/rally/benchmark/processing/plot.py b/rally/benchmark/processing/plot.py
index 7fc9271bf2..6acec401c3 100644
--- a/rally/benchmark/processing/plot.py
+++ b/rally/benchmark/processing/plot.py
@@ -23,55 +23,112 @@ from rally.benchmark.processing.charts import histogram as histo
 from rally.benchmark.processing import utils
 
 
-def _process_main_duration(result):
+def _prepare_data(data, reduce_rows=1000):
+    """Prepare data to be displayed.
 
-    pie = filter(lambda t: not t["error"], result["result"])
-    num_successful_iterations = len(pie)
-    stacked_area = map(
-        lambda t: {"idle_duration": 0, "duration": 0} if t["error"] else t,
-        result["result"])
-    histogram_data = filter(None, map(
-        lambda t: t["duration"] if not t["error"] else None,
-        result["result"]))
+      * replace errors with zero values
+      * reduce number of rows if necessary
+      * count errors
+    """
 
+    def _append(d1, d2):
+        for k, v in d1.iteritems():
+            v.append(d2[k])
+
+    def _merge(d1, d2):
+        for k, v in d1.iteritems():
+            v[-1] = (v[-1] + d2[k]) / 2.0
+
+    zero_atomic_actions = {}
+    for row in data["result"]:
+        # find first non-error result to get atomic actions names
+        if not row["error"] and "atomic_actions" in row:
+            zero_atomic_actions = dict([(a["action"], 0)
+                                        for a in row["atomic_actions"]])
+            break
+
+    total_durations = {"duration": [], "idle_duration": []}
+    atomic_durations = dict([(a, []) for a in zero_atomic_actions])
+    num_errors = 0
+
+    # For determining which rows should be merged we are using "factor"
+    # e.g if we have 100 rows and should reduce it to 75 then we should
+    # delete (merge with previous) every 4th row.
+    # If we increment "store" to 0.25 in each iteration then we
+    # get store >= 1 every 4th iteration.
+
+    data_size = len(data["result"])
+    factor = (data_size - reduce_rows + 1) / float(data_size)
+    if factor < 0:
+        factor = 0.0
+    store = 0.0
+
+    for row in data["result"]:
+        row.setdefault("atomic_actions", zero_atomic_actions)
+        if row["error"]:
+            new_row_total = {"duration": 0, "idle_duration": 0}
+            new_row_atomic = zero_atomic_actions
+            num_errors += 1
+        else:
+            new_row_total = {
+                "duration": row["duration"],
+                "idle_duration": row["idle_duration"],
+            }
+            new_row_atomic = dict([(a["action"], a["duration"])
+                                   for a in row["atomic_actions"]])
+        if store < 1:
+            _append(total_durations, new_row_total)
+            _append(atomic_durations, new_row_atomic)
+        else:
+            _merge(total_durations, new_row_total)
+            _merge(atomic_durations, new_row_atomic)
+            store -= 1
+        store += factor
+
+    return {
+        "total_durations": total_durations,
+        "atomic_durations": atomic_durations,
+        "num_errors": num_errors,
+    }
+
+
+def _process_main_duration(result, data):
+    histogram_data = [r["duration"] for r in result["result"]
+                      if not r["error"]]
     histograms = []
-    if num_successful_iterations > 0:
+    if histogram_data:
         hvariety = histo.hvariety(histogram_data)
         for i in range(len(hvariety)):
             histograms.append(histo.Histogram(histogram_data,
                                               hvariety[i]['number_of_bins'],
                                               hvariety[i]['method']))
 
+    stacked_area = []
+    for key in "duration", "idle_duration":
+        stacked_area.append({
+            "key": key,
+            "values": list(enumerate([round(d, 2) for d in
+                                      data["total_durations"][key]], start=1)),
+        })
+
     return {
         "pie": [
-            {"key": "success", "value": len(pie)},
-            {"key": "errors",
-             "value": len(result["result"]) - len(pie)}
-        ],
-        "iter": [
-            {
-                "key": "duration",
-                "values": [[i + 1, v["duration"]]
-                           for i, v in enumerate(stacked_area)]
-            },
-            {
-                "key": "idle_duration",
-                "values": [[i + 1, v["idle_duration"]]
-                           for i, v in enumerate(stacked_area)]
-            }
+            {"key": "success", "value": len(histogram_data)},
+            {"key": "errors", "value": data["num_errors"]},
         ],
+        "iter": stacked_area,
         "histogram": [
             {
                 "key": "task",
                 "method": histogram.method,
-                "values": [{"x": x, "y": y}
+                "values": [{"x": round(x, 2), "y": y}
                            for x, y in zip(histogram.x_axis, histogram.y_axis)]
             } for histogram in histograms
         ],
     }
 
 
-def _process_atomic(result):
+def _process_atomic(result, data):
 
     def avg(lst, key=None):
         lst = lst if not key else map(lambda x: x[key], lst)
@@ -108,17 +165,16 @@ def _process_atomic(result):
     if stacked_area:
         pie = copy.deepcopy(stacked_area)
         histogram_data = copy.deepcopy(stacked_area)
-        for i, data in enumerate(result["result"]):
+        for i, res in enumerate(result["result"]):
             # in case of error put (order, 0.0) to all actions of stacked area
-            if data["error"]:
+            if res["error"]:
                 for k in range(len(stacked_area)):
                     stacked_area[k]["values"].append([i + 1, 0.0])
                 continue
 
             # in case of non error put real durations to pie and stacked area
-            for j, action in enumerate(data["atomic_actions"]):
+            for j, action in enumerate(res["atomic_actions"]):
                 pie[j]["values"].append(action["duration"])
-                stacked_area[j]["values"].append([i + 1, action["duration"]])
                 histogram_data[j]["values"].append(action["duration"])
 
     histograms = [[] for atomic_action in range(len(histogram_data))]
@@ -129,13 +185,21 @@ def _process_atomic(result):
                                                  hvariety[v]['number_of_bins'],
                                                  hvariety[v]['method'],
                                                  atomic_action['key']))
+    stacked_area = []
+    for name, durations in data["atomic_durations"].iteritems():
+        stacked_area.append({
+            "key": name,
+            "values": list(enumerate([round(d, 2) for d in durations],
+                           start=1)),
+        })
+
     return {
         "histogram": [[
             {
                 "key": action.key,
                 "disabled": i,
                 "method": action.method,
-                "values": [{"x": x, "y": y}
+                "values": [{"x": round(x, 2), "y": y}
                            for x, y in zip(action.x_axis, action.y_axis)]
             } for action in atomic_action_list]
             for i, atomic_action_list in enumerate(histograms)
@@ -201,11 +265,12 @@ def _process_results(results):
         info = result["key"]
         config = {}
         config[info["name"]] = [info["kw"]]
+        data = _prepare_data(result)
         output.append({
             "name": "%s (task #%d)" % (info["name"], info["pos"]),
             "config": config,
-            "duration": _process_main_duration(result),
-            "atomic": _process_atomic(result),
+            "duration": _process_main_duration(result, data),
+            "atomic": _process_atomic(result, data),
             "table_rows": table_rows,
             "table_cols": table_cols
         })
diff --git a/rally/benchmark/processing/src/index.mako b/rally/benchmark/processing/src/index.mako
index 8446b8a534..ce8d90b917 100644
--- a/rally/benchmark/processing/src/index.mako
+++ b/rally/benchmark/processing/src/index.mako
@@ -58,7 +58,6 @@
 
     <script>
         var DATA = ${data}
-        console.log(DATA)
 
         function draw_stacked(where, source){
             nv.addGraph(function() {
diff --git a/tests/benchmark/processing/test_plot.py b/tests/benchmark/processing/test_plot.py
index 96452aa817..22b96c3fbd 100644
--- a/tests/benchmark/processing/test_plot.py
+++ b/tests/benchmark/processing/test_plot.py
@@ -50,9 +50,11 @@ class PlotTestCase(test.TestCase):
         mock_open.assert_called_once_with("%s/src/index.mako"
                                           % mock_dirname.return_value)
 
+    @mock.patch("rally.benchmark.processing.plot._prepare_data")
     @mock.patch("rally.benchmark.processing.plot._process_atomic")
     @mock.patch("rally.benchmark.processing.plot._process_main_duration")
-    def test__process_results(self, mock_main_duration, mock_atomic):
+    def test__process_results(self, mock_main_duration, mock_atomic,
+                              mock_prepare):
         results = [
             {"key": {"name": "n1", "pos": 1, "kw": "config1"}},
             {"key": {"name": "n2", "pos": 2, "kw": "config2"}}
@@ -103,7 +105,8 @@ class PlotTestCase(test.TestCase):
             ]
         }
 
-        output = plot._process_main_duration(result)
+        output = plot._process_main_duration(result,
+                                             plot._prepare_data(result))
 
         self.assertEqual(output, {
             "pie": [
@@ -113,11 +116,11 @@ class PlotTestCase(test.TestCase):
             "iter": [
                 {
                     "key": "duration",
-                    "values": [[1, 1], [2, 0], [3, 2]]
+                    "values": [(1, 1), (2, 0), (3, 2)]
                 },
                 {
                     "key": "idle_duration",
-                    "values": [[1, 2], [2, 0], [3, 3]]
+                    "values": [(1, 2), (2, 0), (3, 3)]
                 }
             ],
             "histogram": [
@@ -172,7 +175,10 @@ class PlotTestCase(test.TestCase):
             ]
         }
 
-        output = plot._process_atomic(result)
+        data = {"atomic_durations": {"action1": [1, 0, 3],
+                                     "action2": [2, 0, 4]}}
+
+        output = plot._process_atomic(result, data)
 
         self.assertEqual(output, {
             "histogram": [
@@ -238,11 +244,57 @@ class PlotTestCase(test.TestCase):
             "iter": [
                 {
                     "key": "action1",
-                    "values": [[1, 1], [2, 0], [3, 3]]
+                    "values": [(1, 1), (2, 0), (3, 3)]
                 },
                 {
                     "key": "action2",
-                    "values": [[1, 2], [2, 0], [3, 4]]
+                    "values": [(1, 2), (2, 0), (3, 4)]
                 }
             ]
         })
+
+    def test__prepare_data(self):
+
+        def assertAlmostEqualLists(l1, l2, places=1):
+            self.assertEqual(len(l1), len(l2), "List sizes differs")
+            for vals in zip(l1, l2):
+                self.assertAlmostEqual(*vals, places=places)
+
+        data = []
+        for i in range(100):
+            atomic_actions = [
+                    {"action": "a1", "duration": i + 0.1},
+                    {"action": "a2", "duration": i + 0.8},
+            ]
+            row = {
+                    "duration": i * 3.14,
+                    "idle_duration": i * 0.2,
+                    "error": [],
+                    "atomic_actions": atomic_actions,
+            }
+            data.append(row)
+        data.insert(42, {"error": ["error"]})
+        data.insert(52, {"error": ["error"]})
+
+        new_data = plot._prepare_data({"result": data}, reduce_rows=10)
+        self.assertEqual(2, new_data["num_errors"])
+
+        expected_durations = [0.0, 31.4, 65.9, 100.5, 127.2,
+                              161.6, 201.0, 238.6, 273.2, 307.7]
+        total_durations = new_data["total_durations"]["duration"]
+        assertAlmostEqualLists(expected_durations, total_durations)
+
+        expected_durations = [0.0, 2.0, 4.2, 6.4, 8.1, 10.3,
+                              12.8, 15.2, 17.4, 19.6]
+        idle_durations = new_data["total_durations"]["idle_duration"]
+        assertAlmostEqualLists(expected_durations, idle_durations)
+
+        expected_durations = [0.1, 10.1, 21.1, 32.1, 40.6,
+                              51.6, 64.1, 76.1, 87.1, 98.1]
+        atomic_a1 = new_data["atomic_durations"]["a1"]
+        assertAlmostEqualLists(expected_durations, atomic_a1)
+
+        expected_durations = [0.8, 10.8, 21.8, 32.8, 41.3,
+                              52.2, 64.8, 76.8, 87.8, 98.8]
+        atomic_a2 = new_data["atomic_durations"]["a2"]
+        assertAlmostEqualLists(expected_durations, atomic_a2)