From 1fc89d3931b5c3c855b4be2655d5bd6b199eef51 Mon Sep 17 00:00:00 2001
From: Mikhail Dubov <mdubov@mirantis.com>
Date: Tue, 3 Feb 2015 12:35:24 +0300
Subject: [PATCH] Abort scenario execution on SLA failure

* Change the api of the SLA classes:
  - add_iteration() - processes a single iteration result from the queue
  - result() - return the SLAResult based on the data processed so far.
    It now also returns a successful result in case there was no iterations
    data.

* Add a new SLAChecker class:
  - add_iteration() - processes a single iteration result with different SLAs
  - results() - return cumulative SLA results for all SLAs

* Change the benchmark engine so that it supports scenario runner aborts
  on SLA failure

* Support the "abort on SLA failure" feature in the CLI:
    rally task start ... --abort-on-sla-failure

* Modify the SLA detailed messages (cleaner text and less decimal places)

* Update unit and functional tests correspondingly. Also remove the usage of the
  deprecated "max_failure_percent" SLA from the functional tests.

Change-Id: I91894a81649815428fd1ac7afcfce9cf47160fc9
---
 rally/api.py                          |   8 +-
 rally/benchmark/engine.py             |  23 ++-
 rally/benchmark/sla/base.py           | 196 ++++++++++++++----------
 rally/cmd/commands/task.py            |  14 +-
 tests/functional/test_cli_task.py     | 213 +++++++++++++++++++++++++-
 tests/unit/benchmark/sla/test_base.py | 205 ++++++++++++++++---------
 tests/unit/benchmark/test_engine.py   |  59 ++++++-
 tests/unit/cmd/commands/test_task.py  |   5 +-
 tests/unit/test_api.py                |   4 +-
 tools/rally.bash_completion           |   2 +-
 10 files changed, 556 insertions(+), 173 deletions(-)

diff --git a/rally/api.py b/rally/api.py
index e6ed91db25..7b8010240c 100644
--- a/rally/api.py
+++ b/rally/api.py
@@ -168,7 +168,7 @@ class Task(object):
         benchmark_engine.validate()
 
     @classmethod
-    def start(cls, deployment, config, task=None):
+    def start(cls, deployment, config, task=None, abort_on_sla_failure=False):
         """Start a task.
 
         Task is a list of benchmarks that will be called one by one, results of
@@ -177,13 +177,17 @@ class Task(object):
         :param deployment: UUID or name of the deployment
         :param config: a dict with a task configuration
         :param task: Task object. If None, it will be created
+        :param abort_on_sla_failure: if True, the execution of a benchmark
+                                     scenario will stop when any SLA check
+                                     for it fails
         """
         deployment = objects.Deployment.get(deployment)
         task = task or objects.Task(deployment_uuid=deployment["uuid"])
         LOG.info("Benchmark Task %s on Deployment %s" % (task["uuid"],
                                                          deployment["uuid"]))
         benchmark_engine = engine.BenchmarkEngine(
-            config, task, admin=deployment["admin"], users=deployment["users"])
+            config, task, admin=deployment["admin"], users=deployment["users"],
+            abort_on_sla_failure=abort_on_sla_failure)
 
         try:
             benchmark_engine.validate()
diff --git a/rally/benchmark/engine.py b/rally/benchmark/engine.py
index 2d456c8023..776929adcf 100644
--- a/rally/benchmark/engine.py
+++ b/rally/benchmark/engine.py
@@ -93,18 +93,22 @@ class BenchmarkEngine(object):
             engine.run()        # to run config
     """
 
-    def __init__(self, config, task, admin=None, users=None):
+    def __init__(self, config, task, admin=None, users=None,
+                 abort_on_sla_failure=False):
         """BenchmarkEngine constructor.
 
         :param config: The configuration with specified benchmark scenarios
         :param task: The current task which is being performed
         :param admin: Dict with admin credentials
         :param users: List of dicts with user credentials
+        :param abort_on_sla_failure: True if the execution should be stopped
+                                     when some SLA check fails
         """
         self.config = config
         self.task = task
         self.admin = admin and objects.Endpoint(**admin) or None
         self.users = map(lambda u: objects.Endpoint(**u), users or [])
+        self.abort_on_sla_failure = abort_on_sla_failure
 
     @rutils.log_task_wrapper(LOG.info, _("Task validation check cloud."))
     def _check_cloud(self):
@@ -221,7 +225,7 @@ class BenchmarkEngine(object):
                 is_done = threading.Event()
                 consumer = threading.Thread(
                     target=self.consume_results,
-                    args=(key, self.task, runner.result_queue, is_done))
+                    args=(key, self.task, is_done, runner))
                 consumer.start()
                 context_obj = self._prepare_context(kw.get("context", {}),
                                                     name, self.admin)
@@ -240,7 +244,7 @@ class BenchmarkEngine(object):
                     consumer.join()
         self.task.update_status(consts.TaskStatus.FINISHED)
 
-    def consume_results(self, key, task, result_queue, is_done):
+    def consume_results(self, key, task, is_done, runner):
         """Consume scenario runner results from queue and send them to db.
 
         Has to be run from different thread simultaneously with the runner.run
@@ -248,22 +252,25 @@ class BenchmarkEngine(object):
 
         :param key: Scenario identifier
         :param task: Running task
-        :param result_queue: Deque with runner results
         :param is_done: Event which is set from the runner thread after the
                         runner finishes it's work.
+        :param runner: ScenarioRunner object that was used to run a task
         """
         results = []
+        sla_checker = base_sla.SLAChecker(key["kw"])
         while True:
-            if result_queue:
-                result = result_queue.popleft()
+            if runner.result_queue:
+                result = runner.result_queue.popleft()
                 results.append(result)
+                success = sla_checker.add_iteration(result)
+                if self.abort_on_sla_failure and not success:
+                    runner.abort()
             elif is_done.isSet():
                 break
             else:
                 time.sleep(0.1)
 
-        sla = base_sla.SLA.check_all(key["kw"], results)
         task.append_results(key, {"raw": results,
                                   "load_duration": self.duration,
                                   "full_duration": self.full_duration,
-                                  "sla": sla})
+                                  "sla": sla_checker.results()})
diff --git a/rally/benchmark/sla/base.py b/rally/benchmark/sla/base.py
index 8351cb310b..03718c68b2 100644
--- a/rally/benchmark/sla/base.py
+++ b/rally/benchmark/sla/base.py
@@ -24,24 +24,43 @@ import abc
 import jsonschema
 import six
 
-from rally.benchmark.processing import utils as putils
 from rally.common.i18n import _
 from rally.common import utils
 from rally import consts
 from rally import exceptions
 
 
-class SLAResult(object):
+class SLAChecker(object):
+    """Base SLA checker class."""
 
-    def __init__(self, success=True, msg=None):
-        self.success = success
-        self.msg = msg
+    def __init__(self, config):
+        self.config = config
+        self.sla_criteria = [SLA.get_by_name(name)(criterion_value)
+                             for name, criterion_value
+                             in config.get("sla", {}).items()]
+
+    def add_iteration(self, iteration):
+        """Process the result of a single iteration.
+
+        The call to add_iteration() will return True if all the SLA checks
+        passed, and False otherwise.
+
+        :param iteration: iteration result object
+        """
+        return all([sla.add_iteration(iteration) for sla in self.sla_criteria])
+
+    def results(self):
+        return [sla.result() for sla in self.sla_criteria]
 
 
 @six.add_metaclass(abc.ABCMeta)
 class SLA(object):
     """Factory for criteria classes."""
 
+    def __init__(self, criterion_value):
+        self.criterion_value = criterion_value
+        self.success = True
+
     @staticmethod
     def validate(config):
         properties = dict([(c.OPTION_NAME, c.CONFIG_SCHEMA)
@@ -53,36 +72,6 @@ class SLA(object):
         }
         jsonschema.validate(config, schema)
 
-    @staticmethod
-    @abc.abstractmethod
-    def check(criterion_value, result):
-        """Check if task succeeded according to criterion.
-
-        :param criterion_value: Criterion value specified in configuration
-        :param result: result object
-        :returns: True if success
-        """
-
-    @staticmethod
-    def check_all(config, result):
-        """Check all SLA criteria.
-
-        :param config: sla related config for a task
-        :param result: Result of a task
-        :returns: A list of sla results
-        """
-
-        results = []
-        opt_name_map = dict([(c.OPTION_NAME, c)
-                             for c in utils.itersubclasses(SLA)])
-
-        for name, criterion in six.iteritems(config.get("sla", {})):
-            check_result = opt_name_map[name].check(criterion, result)
-            results.append({"criterion": name,
-                            "success": check_result.success,
-                            "detail": check_result.msg})
-        return results
-
     @staticmethod
     def get_by_name(name):
         """Returns SLA by name or config option name."""
@@ -91,23 +80,56 @@ class SLA(object):
                 return sla
         raise exceptions.NoSuchSLA(name=name)
 
+    @abc.abstractmethod
+    def add_iteration(self, iteration):
+        """Process the result of a single iteration and perform a SLA check.
+
+        The call to add_iteration() will return True if the SLA check passed,
+        and False otherwise.
+
+        :param iteration: iteration result object
+        :returns: True if the SLA check passed, False otherwise
+        """
+
+    def result(self):
+        """Returns the SLA result dict corresponding to the current state."""
+        return {
+            "criterion": self.OPTION_NAME,
+            "success": self.success,
+            "detail": self.details()
+        }
+
+    @abc.abstractmethod
+    def details(self):
+        """Returns the string describing the current results of the SLA."""
+
+    def status(self):
+        """Return "Passed" or "Failed" depending on the current SLA status."""
+        return "Passed" if self.success else "Failed"
+
 
 class FailureRateDeprecated(SLA):
     """[Deprecated] Failure rate in percents."""
     OPTION_NAME = "max_failure_percent"
     CONFIG_SCHEMA = {"type": "number", "minimum": 0.0, "maximum": 100.0}
 
-    @staticmethod
-    def check(criterion_value, result):
-        errors = len([x for x in result if x["error"]])
-        error_rate = errors * 100.0 / len(result) if len(result) > 0 else 100.0
-        if criterion_value < error_rate:
-            success = False
-        else:
-            success = True
-        msg = (_("Maximum failure percent %s%% failures, actually %s%%") %
-                (criterion_value * 100.0, error_rate))
-        return SLAResult(success, msg)
+    def __init__(self, criterion_value):
+        super(FailureRateDeprecated, self).__init__(criterion_value)
+        self.errors = 0
+        self.total = 0
+        self.error_rate = 0.0
+
+    def add_iteration(self, iteration):
+        self.total += 1
+        if iteration["error"]:
+            self.errors += 1
+        self.error_rate = self.errors * 100.0 / self.total
+        self.success = self.error_rate <= self.criterion_value
+        return self.success
+
+    def details(self):
+        return (_("Maximum failure rate %s%% <= %s%% - %s") %
+                (self.criterion_value, self.error_rate, self.status()))
 
 
 class FailureRate(SLA):
@@ -122,20 +144,26 @@ class FailureRate(SLA):
         }
     }
 
-    @staticmethod
-    def check(criterion_value, result):
-        min_percent = criterion_value.get("min", 0)
-        max_percent = criterion_value.get("max", 100)
-        errors = len([x for x in result if x["error"]])
-        error_rate = errors * 100.0 / len(result) if len(result) > 0 else 100.0
+    def __init__(self, criterion_value):
+        super(FailureRate, self).__init__(criterion_value)
+        self.min_percent = self.criterion_value.get("min", 0)
+        self.max_percent = self.criterion_value.get("max", 100)
+        self.errors = 0
+        self.total = 0
+        self.error_rate = 0.0
 
-        success = min_percent <= error_rate <= max_percent
+    def add_iteration(self, iteration):
+        self.total += 1
+        if iteration["error"]:
+            self.errors += 1
+        self.error_rate = self.errors * 100.0 / self.total
+        self.success = self.min_percent <= self.error_rate <= self.max_percent
+        return self.success
 
-        msg = (_("Maximum failure rate percent %s%% failures, minimum failure "
-               "rate percent %s%% failures, actually %s%%") %
-               (max_percent, min_percent, error_rate))
-
-        return SLAResult(success, msg)
+    def details(self):
+        return (_("Failure rate criteria %.2f%% <= %.2f%% <= %.2f%% - %s") %
+                (self.min_percent, self.error_rate, self.max_percent,
+                 self.status()))
 
 
 class IterationTime(SLA):
@@ -144,31 +172,41 @@ class IterationTime(SLA):
     CONFIG_SCHEMA = {"type": "number", "minimum": 0.0,
                      "exclusiveMinimum": True}
 
-    @staticmethod
-    def check(criterion_value, result):
-        duration = 0
-        success = True
-        for i in result:
-            if i["duration"] >= duration:
-                duration = i["duration"]
-            if i["duration"] > criterion_value:
-                success = False
-        msg = (_("Maximum seconds per iteration %ss, found with %ss") %
-                (criterion_value, duration))
-        return SLAResult(success, msg)
+    def __init__(self, criterion_value):
+        super(IterationTime, self).__init__(criterion_value)
+        self.max_iteration_time = 0.0
+
+    def add_iteration(self, iteration):
+        if iteration["duration"] > self.max_iteration_time:
+            self.max_iteration_time = iteration["duration"]
+        self.success = self.max_iteration_time <= self.criterion_value
+        return self.success
+
+    def details(self):
+        return (_("Maximum seconds per iteration %.2fs<= %.2fs - %s") %
+                (self.max_iteration_time, self.criterion_value, self.status()))
 
 
 class MaxAverageDuration(SLA):
-    """Maximum average duration for one iteration in seconds."""
+    """Maximum average duration of one iteration in seconds."""
     OPTION_NAME = "max_avg_duration"
     CONFIG_SCHEMA = {"type": "number", "minimum": 0.0,
                      "exclusiveMinimum": True}
 
-    @staticmethod
-    def check(criterion_value, result):
-        durations = [r["duration"] for r in result if not r.get("error")]
-        avg = putils.mean(durations)
-        success = avg < criterion_value
-        msg = (_("Maximum average duration per iteration %ss, found with %ss")
-               % (criterion_value, avg))
-        return SLAResult(success, msg)
+    def __init__(self, criterion_value):
+        super(MaxAverageDuration, self).__init__(criterion_value)
+        self.total_duration = 0.0
+        self.iterations = 0
+        self.avg = 0.0
+
+    def add_iteration(self, iteration):
+        if not iteration.get("error"):
+            self.total_duration += iteration["duration"]
+            self.iterations += 1
+        self.avg = self.total_duration / self.iterations
+        self.success = self.avg <= self.criterion_value
+        return self.success
+
+    def details(self):
+        return (_("Maximum average duration of one iteration %.2fs <= %.2fs - "
+                  "%s") % (self.avg, self.criterion_value, self.status()))
diff --git a/rally/cmd/commands/task.py b/rally/cmd/commands/task.py
index 6438ad8189..ca76e727d9 100644
--- a/rally/cmd/commands/task.py
+++ b/rally/cmd/commands/task.py
@@ -180,9 +180,13 @@ class TaskCommands(object):
     @cliutils.args("--tag", help="Tag for this task")
     @cliutils.args("--no-use", action="store_false", dest="do_use",
                    help="Don't set new task as default for future operations")
+    @cliutils.args("--abort-on-sla-failure", action="store_true",
+                   dest="abort_on_sla_failure",
+                   help="Abort the execution of a benchmark scenario when"
+                        "any SLA check for it fails")
     @envutils.with_default_deployment(cli_arg_name="deployment")
     def start(self, task, deployment=None, task_args=None, task_args_file=None,
-              tag=None, do_use=False):
+              tag=None, do_use=False, abort_on_sla_failure=False):
         """Start benchmark task.
 
         :param task: a file with yaml/json task
@@ -193,6 +197,11 @@ class TaskCommands(object):
                                is jinja2 template.
         :param deployment: UUID or name of a deployment
         :param tag: optional tag for this task
+        :param do_use: if True, the new task will be stored as the default one
+                       for future operations
+        :param abort_on_sla_failure: if True, the execution of a benchmark
+                                     scenario will stop when any SLA check
+                                     for it fails
         """
         try:
             input_task = self._load_task(task, task_args, task_args_file)
@@ -207,7 +216,8 @@ class TaskCommands(object):
             print("Benchmarking... This can take a while...\n")
             print("To track task status use:\n")
             print("\trally task status\n\tor\n\trally task detailed\n")
-            api.Task.start(deployment, input_task, task=task)
+            api.Task.start(deployment, input_task, task=task,
+                           abort_on_sla_failure=abort_on_sla_failure)
             self.detailed(task_id=task["uuid"])
             if do_use:
                 use.UseCommands().task(task["uuid"])
diff --git a/tests/functional/test_cli_task.py b/tests/functional/test_cli_task.py
index 81db910d81..26a1023f3b 100644
--- a/tests/functional/test_cli_task.py
+++ b/tests/functional/test_cli_task.py
@@ -13,6 +13,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+import json
 import os
 import re
 import unittest
@@ -243,6 +244,210 @@ class TaskTestCase(unittest.TestCase):
             r"(?P<task_id>[0-9a-f\-]{36}): started", output)
         self.assertIsNotNone(result)
 
+    def _test_start_abort_on_sla_failure_success(self, cfg, times):
+        rally = utils.Rally()
+        with mock.patch.dict("os.environ", utils.TEST_ENV):
+            deployment_id = envutils.get_global("RALLY_DEPLOYMENT")
+            config = utils.TaskConfig(cfg)
+            rally(("task start --task %(task_file)s "
+                   "--deployment %(deployment_id)s --abort-on-sla-failure") %
+                  {"task_file": config.filename,
+                   "deployment_id": deployment_id})
+            results = json.loads(rally("task results"))
+        iterations_completed = len(results[0]["result"])
+        self.assertEqual(times, iterations_completed)
+
+    def test_start_abort_on_sla_failure_success_constant(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy": [
+                {
+                    "args": {
+                        "sleep": 0.1
+                    },
+                    "runner": {
+                        "type": "constant",
+                        "times": times,
+                        "concurrency": 5
+                    },
+                    "sla": {
+                        "failure_rate": {"max": 0.0}
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure_success(cfg, times)
+
+    def test_start_abort_on_sla_failure_success_serial(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy": [
+                {
+                    "args": {
+                        "sleep": 0.1
+                    },
+                    "runner": {
+                        "type": "serial",
+                        "times": times
+                    },
+                    "sla": {
+                        "failure_rate": {"max": 0.0}
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure_success(cfg, times)
+
+    def test_start_abort_on_sla_failure_success_rps(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy": [
+                {
+                    "args": {
+                        "sleep": 0.1
+                    },
+                    "runner": {
+                        "type": "rps",
+                        "times": times,
+                        "rps": 20
+                    },
+                    "sla": {
+                        "failure_rate": {"max": 0.0}
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure_success(cfg, times)
+
+    def _test_start_abort_on_sla_failure(self, cfg, times):
+        rally = utils.Rally()
+        with mock.patch.dict("os.environ", utils.TEST_ENV):
+            deployment_id = envutils.get_global("RALLY_DEPLOYMENT")
+            config = utils.TaskConfig(cfg)
+            rally(("task start --task %(task_file)s "
+                   "--deployment %(deployment_id)s --abort-on-sla-failure") %
+                  {"task_file": config.filename,
+                   "deployment_id": deployment_id})
+            results = json.loads(rally("task results"))
+        iterations_completed = len(results[0]["result"])
+        # NOTE(msdubov): Change '<=' to '<' as soon as we fix the runners.
+        self.assertTrue(iterations_completed <= times)
+
+    def test_start_abort_on_sla_failure_max_seconds_constant(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy": [
+                {
+                    "args": {
+                        "sleep": 0.1
+                    },
+                    "runner": {
+                        "type": "constant",
+                        "times": times,
+                        "concurrency": 5
+                    },
+                    "sla": {
+                        "max_seconds_per_iteration": 0.01
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure(cfg, times)
+
+    def test_start_abort_on_sla_failure_max_seconds_serial(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy": [
+                {
+                    "args": {
+                        "sleep": 0.1
+                    },
+                    "runner": {
+                        "type": "serial",
+                        "times": times
+                    },
+                    "sla": {
+                        "max_seconds_per_iteration": 0.01
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure(cfg, times)
+
+    def test_start_abort_on_sla_failure_max_seconds_rps(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy": [
+                {
+                    "args": {
+                        "sleep": 0.1
+                    },
+                    "runner": {
+                        "type": "rps",
+                        "times": times,
+                        "rps": 20
+                    },
+                    "sla": {
+                        "max_seconds_per_iteration": 0.01
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure(cfg, times)
+
+    def test_start_abort_on_sla_failure_max_failure_rate_constant(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy_exception": [
+                {
+                    "runner": {
+                        "type": "constant",
+                        "times": times,
+                        "concurrency": 5
+                    },
+                    "sla": {
+                        "failure_rate": {"max": 0.0}
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure(cfg, times)
+
+    def test_start_abort_on_sla_failure_max_failure_rate_serial(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy_exception": [
+                {
+                    "runner": {
+                        "type": "serial",
+                        "times": times
+                    },
+                    "sla": {
+                        "failure_rate": {"max": 0.0}
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure(cfg, times)
+
+    def test_start_abort_on_sla_failure_max_failure_rate_rps(self):
+        times = 100
+        cfg = {
+            "Dummy.dummy_exception": [
+                {
+                    "runner": {
+                        "type": "rps",
+                        "times": times,
+                        "rps": 20
+                    },
+                    "sla": {
+                        "failure_rate": {"max": 0.0}
+                    }
+                }
+            ]
+        }
+        self._test_start_abort_on_sla_failure(cfg, times)
+
     # NOTE(oanufriev): Not implemented
     def test_abort(self):
         pass
@@ -251,7 +456,7 @@ class TaskTestCase(unittest.TestCase):
 class SLATestCase(unittest.TestCase):
 
     def _get_sample_task_config(self, max_seconds_per_iteration=4,
-                                max_failure_percent=0):
+                                failure_rate_max=0):
         return {
             "KeystoneBasic.create_and_list_users": [
                 {
@@ -265,7 +470,7 @@ class SLATestCase(unittest.TestCase):
                     },
                     "sla": {
                         "max_seconds_per_iteration": max_seconds_per_iteration,
-                        "max_failure_percent": max_failure_percent,
+                        "failure_rate": {"max": failure_rate_max}
                     }
                 }
             ]
@@ -289,9 +494,9 @@ class SLATestCase(unittest.TestCase):
              "detail": mock.ANY,
              "pos": 0, "status": "PASS"},
             {"benchmark": "KeystoneBasic.create_and_list_users",
-             "criterion": "max_failure_percent",
+             "criterion": "failure_rate",
              "detail": mock.ANY,
-             "pos": 0, "status": "PASS"},
+             "pos": 0, "status": "PASS"}
         ]
         data = rally("task sla_check --json", getjson=True)
         self.assertEqual(expected, data)
diff --git a/tests/unit/benchmark/sla/test_base.py b/tests/unit/benchmark/sla/test_base.py
index 5350004cc2..6230a30d54 100644
--- a/tests/unit/benchmark/sla/test_base.py
+++ b/tests/unit/benchmark/sla/test_base.py
@@ -25,10 +25,32 @@ class TestCriterion(base.SLA):
     OPTION_NAME = "test_criterion"
     CONFIG_SCHEMA = {"type": "integer"}
 
-    @staticmethod
-    def check(criterion_value, result):
-        return base.SLAResult(criterion_value == result,
-                              msg="detail")
+    def add_iteration(self, iteration):
+        self.success = self.criterion_value == iteration
+        return self.success
+
+    def details(self):
+        return "detail"
+
+
+class SLACheckerTestCase(test.TestCase):
+
+    def test_add_iteration_and_results(self):
+        sla_checker = base.SLAChecker({"sla": {"test_criterion": 42}})
+
+        iteration = {"key": {"name": "fake", "pos": 0}, "data": 42}
+        self.assertTrue(sla_checker.add_iteration(iteration["data"]))
+        expected_result = [{"criterion": "test_criterion",
+                            "detail": "detail",
+                            "success": True}]
+        self.assertEqual(expected_result, sla_checker.results())
+
+        iteration["data"] = 43
+        self.assertFalse(sla_checker.add_iteration(iteration["data"]))
+        expected_result = [{"criterion": "test_criterion",
+                            "detail": "detail",
+                            "success": False}]
+        self.assertEqual(expected_result, sla_checker.results())
 
 
 class BaseSLATestCase(test.TestCase):
@@ -52,39 +74,24 @@ class BaseSLATestCase(test.TestCase):
         self.assertRaises(jsonschema.ValidationError,
                           base.SLA.validate, {"test_criterion": 42.0})
 
-    def test_check_all(self):
-        config = {
-            "sla": {"test_criterion": 42},
-        }
-        result = {"key": {"kw": config, "name": "fake", "pos": 0},
-                  "data": 42}
-        results = list(base.SLA.check_all(config, result["data"]))
-        expected = [{"criterion": "test_criterion",
-                     "detail": "detail",
-                     "success": True}]
-        self.assertEqual(expected, results)
-        result["data"] = 43
-        results = list(base.SLA.check_all(config, result["data"]))
-        expected = [{"criterion": "test_criterion",
-                     "detail": "detail",
-                     "success": False}]
-        self.assertEqual(expected, results)
-
 
 class FailureRateDeprecatedTestCase(test.TestCase):
-    def test_check(self):
-        result = [
-                {"error": ["error"]},
-                {"error": []},
-        ]  # one error and one success. 50% success rate
-        # 50% < 75.0%
-        self.assertTrue(base.FailureRateDeprecated.check(75.0, result).success)
-        # 50% > 25%
-        self.assertFalse(base.FailureRateDeprecated.check(25, result).success)
 
-    def test_check_with_no_results(self):
-        result = []
-        self.assertFalse(base.FailureRateDeprecated.check(10, result).success)
+    def test_result(self):
+        sla1 = base.FailureRateDeprecated(75.0)
+        sla2 = base.FailureRateDeprecated(25.0)
+        # 50% failure rate
+        for sla in [sla1, sla2]:
+            sla.add_iteration({"error": ["error"]})
+            sla.add_iteration({"error": []})
+        self.assertTrue(sla1.result()["success"])   # 50% < 75.0%
+        self.assertFalse(sla2.result()["success"])  # 50% > 25.0%
+        self.assertEqual("Passed", sla1.status())
+        self.assertEqual("Failed", sla2.status())
+
+    def test_result_no_iterations(self):
+        sla = base.FailureRateDeprecated(10.0)
+        self.assertTrue(sla.result()["success"])
 
 
 class FailureRateTestCase(test.TestCase):
@@ -103,30 +110,60 @@ class FailureRateTestCase(test.TestCase):
                           base.IterationTime.validate,
                           {"failure_rate": {"max": 101}})
 
-    def test_check_min(self):
-        result = [{"error": ["error"]}, {"error": []}, {"error": ["error"]},
-                  {"error": ["error"]}, ]  # 75% failure rate
-        self.assertFalse(base.FailureRate.check({"min": 80}, result).success)
-        self.assertTrue(base.FailureRate.check({"min": 60.5}, result).success)
+    def test_result_min(self):
+        sla1 = base.FailureRate({"min": 80.0})
+        sla2 = base.FailureRate({"min": 60.5})
+        # 75% failure rate
+        for sla in [sla1, sla2]:
+            sla.add_iteration({"error": ["error"]})
+            sla.add_iteration({"error": []})
+            sla.add_iteration({"error": ["error"]})
+            sla.add_iteration({"error": ["error"]})
+        self.assertFalse(sla1.result()["success"])  # 80.0% > 75.0%
+        self.assertTrue(sla2.result()["success"])   # 60.5% < 75.0%
+        self.assertEqual("Failed", sla1.status())
+        self.assertEqual("Passed", sla2.status())
 
-    def test_check_max(self):
-        result = [{"error": ["error"]}, {"error": []}]  # 50% failure rate
-        self.assertFalse(base.FailureRate.check({"max": 25}, result).success)
-        self.assertTrue(base.FailureRate.check({"max": 75.0}, result).success)
+    def test_result_max(self):
+        sla1 = base.FailureRate({"max": 25.0})
+        sla2 = base.FailureRate({"max": 75.0})
+        # 50% failure rate
+        for sla in [sla1, sla2]:
+            sla.add_iteration({"error": ["error"]})
+            sla.add_iteration({"error": []})
+        self.assertFalse(sla1.result()["success"])  # 25.0% < 50.0%
+        self.assertTrue(sla2.result()["success"])   # 75.0% > 50.0%
+        self.assertEqual("Failed", sla1.status())
+        self.assertEqual("Passed", sla2.status())
 
-    def test_check_min_max(self):
-        result = [{"error": ["error"]}, {"error": []}, {"error": []},
-                  {"error": []}]  # 25% failure rate
-        self.assertFalse(base.FailureRate.check({"min": 50, "max": 90}, result)
-                         .success)
-        self.assertFalse(base.FailureRate.check({"min": 5, "max": 20}, result)
-                         .success)
-        self.assertTrue(base.FailureRate.check({"min": 24.9, "max": 25.1},
-                                               result).success)
+    def test_result_min_max(self):
+        sla1 = base.FailureRate({"min": 50, "max": 90})
+        sla2 = base.FailureRate({"min": 5, "max": 20})
+        sla3 = base.FailureRate({"min": 24.9, "max": 25.1})
+        # 25% failure rate
+        for sla in [sla1, sla2, sla3]:
+            sla.add_iteration({"error": ["error"]})
+            sla.add_iteration({"error": []})
+            sla.add_iteration({"error": []})
+            sla.add_iteration({"error": []})
+        self.assertFalse(sla1.result()["success"])  # 25.0% < 50.0%
+        self.assertFalse(sla2.result()["success"])  # 25.0% > 20.0%
+        self.assertTrue(sla3.result()["success"])   # 24.9% < 25.0% < 25.1%
+        self.assertEqual("Failed", sla1.status())
+        self.assertEqual("Failed", sla2.status())
+        self.assertEqual("Passed", sla3.status())
 
-    def test_check_empty_result(self):
-        result = []
-        self.assertFalse(base.FailureRate.check({"max": 10.0}, result).success)
+    def test_result_no_iterations(self):
+        sla = base.FailureRate({"max": 10.0})
+        self.assertTrue(sla.result()["success"])
+
+    def test_add_iteration(self):
+        sla = base.FailureRate({"max": 35.0})
+        self.assertTrue(sla.add_iteration({"error": []}))
+        self.assertTrue(sla.add_iteration({"error": []}))
+        self.assertTrue(sla.add_iteration({"error": []}))
+        self.assertTrue(sla.add_iteration({"error": ["error"]}))   # 33%
+        self.assertFalse(sla.add_iteration({"error": ["error"]}))  # 40%
 
 
 class IterationTimeTestCase(test.TestCase):
@@ -137,13 +174,28 @@ class IterationTimeTestCase(test.TestCase):
         self.assertRaises(jsonschema.ValidationError,
                           base.IterationTime.validate, properties)
 
-    def test_check(self):
-        result = [
-                {"duration": 3.14},
-                {"duration": 6.28},
-        ]
-        self.assertTrue(base.IterationTime.check(42, result).success)
-        self.assertFalse(base.IterationTime.check(3.62, result).success)
+    def test_result(self):
+        sla1 = base.IterationTime(42)
+        sla2 = base.IterationTime(3.62)
+        for sla in [sla1, sla2]:
+            sla.add_iteration({"duration": 3.14})
+            sla.add_iteration({"duration": 6.28})
+        self.assertTrue(sla1.result()["success"])   # 42 > 6.28
+        self.assertFalse(sla2.result()["success"])  # 3.62 < 6.28
+        self.assertEqual("Passed", sla1.status())
+        self.assertEqual("Failed", sla2.status())
+
+    def test_result_no_iterations(self):
+        sla = base.IterationTime(42)
+        self.assertTrue(sla.result()["success"])
+
+    def test_add_iteration(self):
+        sla = base.IterationTime(4.0)
+        self.assertTrue(sla.add_iteration({"duration": 3.14}))
+        self.assertTrue(sla.add_iteration({"duration": 2.0}))
+        self.assertTrue(sla.add_iteration({"duration": 3.99}))
+        self.assertFalse(sla.add_iteration({"duration": 4.5}))
+        self.assertFalse(sla.add_iteration({"duration": 3.8}))
 
 
 class MaxAverageDurationTestCase(test.TestCase):
@@ -154,10 +206,25 @@ class MaxAverageDurationTestCase(test.TestCase):
         self.assertRaises(jsonschema.ValidationError,
                           base.MaxAverageDuration.validate, properties)
 
-    def test_check(self):
-        result = [
-                {"duration": 3.14},
-                {"duration": 6.28},
-        ]
-        self.assertTrue(base.MaxAverageDuration.check(42, result).success)
-        self.assertFalse(base.MaxAverageDuration.check(3.62, result).success)
+    def test_result(self):
+        sla1 = base.MaxAverageDuration(42)
+        sla2 = base.MaxAverageDuration(3.62)
+        for sla in [sla1, sla2]:
+            sla.add_iteration({"duration": 3.14})
+            sla.add_iteration({"duration": 6.28})
+        self.assertTrue(sla1.result()["success"])   # 42 > avg([3.14, 6.28])
+        self.assertFalse(sla2.result()["success"])  # 3.62 < avg([3.14, 6.28])
+        self.assertEqual("Passed", sla1.status())
+        self.assertEqual("Failed", sla2.status())
+
+    def test_result_no_iterations(self):
+        sla = base.MaxAverageDuration(42)
+        self.assertTrue(sla.result()["success"])
+
+    def test_add_iteration(self):
+        sla = base.MaxAverageDuration(4.0)
+        self.assertTrue(sla.add_iteration({"duration": 3.5}))
+        self.assertTrue(sla.add_iteration({"duration": 2.5}))
+        self.assertTrue(sla.add_iteration({"duration": 5.0}))   # avg = 3.667
+        self.assertFalse(sla.add_iteration({"duration": 7.0}))  # avg = 4.5
+        self.assertTrue(sla.add_iteration({"duration": 1.0}))   # avg = 3.8
diff --git a/tests/unit/benchmark/test_engine.py b/tests/unit/benchmark/test_engine.py
index e3998f2f34..50de242a42 100644
--- a/tests/unit/benchmark/test_engine.py
+++ b/tests/unit/benchmark/test_engine.py
@@ -303,17 +303,68 @@ class BenchmarkEngineTestCase(test.TestCase):
         self.assertEqual(result, expected_result)
         mock_meta.assert_called_once_with(name, "context")
 
-    @mock.patch("rally.benchmark.sla.base.SLA.check_all")
-    def test_consume_results(self, mock_check_all):
+    @mock.patch("rally.benchmark.sla.base.SLAChecker")
+    def test_consume_results(self, mock_sla):
+        mock_sla_instance = mock.MagicMock()
+        mock_sla.return_value = mock_sla_instance
         key = {"kw": {"fake": 2}, "name": "fake", "pos": 0}
         task = mock.MagicMock()
         config = {
             "a.benchmark": [{"context": {"context_a": {"a": 1}}}],
         }
+        runner = mock.MagicMock()
+        runner.result_queue = collections.deque([1, 2])
         is_done = mock.MagicMock()
         is_done.isSet.side_effect = [False, False, True]
         eng = engine.BenchmarkEngine(config, task)
         eng.duration = 123
         eng.full_duration = 456
-        eng.consume_results(key, task, collections.deque([1, 2]), is_done)
-        mock_check_all.assert_called_once_with({"fake": 2}, [1, 2])
+        eng.consume_results(key, task, is_done, runner)
+        mock_sla.assert_called_once_with({"fake": 2})
+        expected_iteration_calls = [mock.call(1), mock.call(2)]
+        self.assertEqual(expected_iteration_calls,
+                         mock_sla_instance.add_iteration.mock_calls)
+
+    @mock.patch("rally.benchmark.sla.base.SLAChecker")
+    def test_consume_results_sla_failure_abort(self, mock_sla):
+        mock_sla_instance = mock.MagicMock()
+        mock_sla.return_value = mock_sla_instance
+        mock_sla_instance.add_iteration.side_effect = [True, True, False,
+                                                       False]
+        key = {"kw": {"fake": 2}, "name": "fake", "pos": 0}
+        task = mock.MagicMock()
+        config = {
+            "a.benchmark": [{"context": {"context_a": {"a": 1}}}],
+        }
+        runner = mock.MagicMock()
+        runner.result_queue = collections.deque([1, 2, 3, 4])
+        is_done = mock.MagicMock()
+        is_done.isSet.side_effect = [False, False, False, False, True]
+        eng = engine.BenchmarkEngine(config, task, abort_on_sla_failure=True)
+        eng.duration = 123
+        eng.full_duration = 456
+        eng.consume_results(key, task, is_done, runner)
+        mock_sla.assert_called_once_with({"fake": 2})
+        self.assertTrue(runner.abort.called)
+
+    @mock.patch("rally.benchmark.sla.base.SLAChecker")
+    def test_consume_results_sla_failure_continue(self, mock_sla):
+        mock_sla_instance = mock.MagicMock()
+        mock_sla.return_value = mock_sla_instance
+        mock_sla_instance.add_iteration.side_effect = [True, True, False,
+                                                       False]
+        key = {"kw": {"fake": 2}, "name": "fake", "pos": 0}
+        task = mock.MagicMock()
+        config = {
+            "a.benchmark": [{"context": {"context_a": {"a": 1}}}],
+        }
+        runner = mock.MagicMock()
+        runner.result_queue = collections.deque([1, 2, 3, 4])
+        is_done = mock.MagicMock()
+        is_done.isSet.side_effect = [False, False, False, False, True]
+        eng = engine.BenchmarkEngine(config, task, abort_on_sla_failure=False)
+        eng.duration = 123
+        eng.full_duration = 456
+        eng.consume_results(key, task, is_done, runner)
+        mock_sla.assert_called_once_with({"fake": 2})
+        self.assertEqual(0, runner.abort.call_count)
diff --git a/tests/unit/cmd/commands/test_task.py b/tests/unit/cmd/commands/test_task.py
index 645dee9301..6d7c8d26e2 100644
--- a/tests/unit/cmd/commands/test_task.py
+++ b/tests/unit/cmd/commands/test_task.py
@@ -112,7 +112,8 @@ class TaskCommandsTestCase(test.TestCase):
         task_path = "path_to_config.json"
         self.task.start(task_path, deployment_id)
         mock_api.assert_called_once_with(deployment_id, {"some": "json"},
-                                         task=mock_create_task.return_value)
+                                         task=mock_create_task.return_value,
+                                         abort_on_sla_failure=False)
         mock_load.assert_called_once_with(task_path, None, None)
 
     @mock.patch("rally.cmd.commands.task.TaskCommands._load_task",
@@ -142,7 +143,7 @@ class TaskCommandsTestCase(test.TestCase):
         mock_api.Task.create.assert_called_once_with("deployment", "tag")
         mock_api.Task.start.assert_called_once_with(
             "deployment", mock_load.return_value,
-            task=mock_api.Task.create.return_value)
+            task=mock_api.Task.create.return_value, abort_on_sla_failure=False)
 
     @mock.patch("rally.cmd.commands.task.api")
     def test_abort(self, mock_api):
diff --git a/tests/unit/test_api.py b/tests/unit/test_api.py
index 0c957b2760..c5eb54575a 100644
--- a/tests/unit/test_api.py
+++ b/tests/unit/test_api.py
@@ -115,9 +115,9 @@ class TaskAPITestCase(test.TestCase):
         mock_engine.assert_has_calls([
             mock.call("config", mock_task.return_value,
                       admin=mock_deployment_get.return_value["admin"],
-                      users=[]),
+                      users=[], abort_on_sla_failure=False),
             mock.call().validate(),
-            mock.call().run(),
+            mock.call().run()
         ])
 
         mock_task.assert_called_once_with(
diff --git a/tools/rally.bash_completion b/tools/rally.bash_completion
index 3eb77b902f..234a821a2e 100644
--- a/tools/rally.bash_completion
+++ b/tools/rally.bash_completion
@@ -32,7 +32,7 @@ _rally()
     OPTS["task_report"]="--tasks --out --open"
     OPTS["task_results"]="--uuid"
     OPTS["task_sla_check"]="--uuid --json"
-    OPTS["task_start"]="--deployment --task --task-args --task-args-file --tag --no-use"
+    OPTS["task_start"]="--deployment --task --task-args --task-args-file --tag --no-use --abort-on-sla-failure"
     OPTS["task_status"]="--uuid"
     OPTS["task_validate"]="--deployment --task --task-args --task-args-file"
     OPTS["use_deployment"]="--deployment"