Added new atomic scenarios

We need to add more small atomic destructive scenarios to run them in parallel with load and performance tests. each small destructive scenario should only perform some destructive action and then recover the cloud, without any additional checks in the destructive scenario. Change-Id: I8f87a1d935786d05cdba76608d2efd28b3cc927f
2015-05-26 14:53:53 +03:00
parent e0225ff639
commit 51f973e460
19 changed files with 270 additions and 243 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ doc/source/api/
 .testrepository/
 .tox/
 .installed
+.idea*
--- a/README.md
+++ b/README.md
@@ -7,36 +7,30 @@ Introduction
 HAOS is a suite of HA/destructive tests for OpenStack clouds. These tests
 are written as Rally plugins and are executed by Rally and in
 parallel with the load/performance tests to simulate some disaster/failover
-scenarios with the OpenStack clouds. HAOS uses Shaker for remote execution
-of commands on OpenStack nodes and for data-plane performance load.
+scenarios with the OpenStack clouds. HAOS uses HAOS agent for remote execution
+of commands on OpenStack nodes and virtual machines in the cloud.


 How to install
 --------------

 1. Clone the repository:
-```bash
-git clone git://git.openstack.org/stackforge/haos
-```
-
-2. Make sure that ``sshpass`` is installed - on Ubuntu do ``sudo apt-get install sshpass``
-
-3. Fill in your ``openrc`` file based on the sample provided in ``etc/openrc``
-
+``git clone git://git.openstack.org/stackforge/haos``
+2. Make sure that ``sshpass`` is installed - for example, on Ubuntu execute the following command: ``sudo apt-get install sshpass``
+3. Edit etc/openrc.local file, set IP addresses, credentials and parameters for your cloud
 4. Import ``openrc`` into your environment by doing
-```bash
-source etc/openrc.local
-```
-
+``source etc/openrc.local``
 5. Run tox:
-```bash
-tox -erun
-```
+``tox -e run``

 How to run tests
 ----------------

 Run scenario with the command:
-```bash
-tox -erun <scenario>
-```
+``tox -e run <scenario>``
+
+How to run tests on MOS environments
+------------------------------------
+
+Run scenario with the command:
+``tox -e run-for-mos <scenario>``
--- a/haos/rally/context/cloud.py
+++ b/haos/rally/context/cloud.py
@@ -1,62 +0,0 @@
-# Copyright (c) 2015 Mirantis Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from rally.benchmark.context import base
-from rally.benchmark.context.cleanup import manager as resource_manager
-from rally.common import log as logging
-from rally import consts
-
-from haos.remote import server
-
-
-LOG = logging.getLogger(__name__)
-
-
-@base.context(name="cloud", order=800)
-class CloudNodesContext(base.Context):
-    """This context allows to define the list of nodes in the cloud."""
-
-    CONFIG_SCHEMA = {
-        "type": "object",
-        "$schema": consts.JSON_SCHEMA,
-        "additionalProperties": False,
-        "properties": {
-        }
-    }
-
-    def setup(self):
-        env_vars = {
-            'HAOS_SERVER_ENDPOINT': None,
-            'HAOS_IMAGE': None,
-            'HAOS_FLAVOR': None,
-            'HAOS_JOIN_TIMEOUT': 100,
-            'HAOS_COMMAND_TIMEOUT': 10
-        }
-
-        for var, def_value in env_vars.items():
-            value = os.environ.get(var) or def_value
-            if not value:
-                raise ValueError('Env var %s must be set', var)
-            self.context[var.lower()] = value
-
-        boss_inst = server.Server(self.context["haos_server_endpoint"])
-        self.context["haos_remote_control"] = boss_inst.remote_control
-
-    def cleanup(self):
-        """This method is called after the task finish."""
-        resource_manager.cleanup(names=["nova.servers"],
-                                 users=self.context.get("users", []))
--- a/haos/rally/context/cloud_nodes_context.py
+++ b/haos/rally/context/cloud_nodes_context.py
@@ -1,5 +1,16 @@
+import os
+
 from rally.benchmark.context import base
+from rally.benchmark.context.cleanup import manager as resource_manager
+from rally.common import log as logging
 from rally import consts
+from rally import exceptions
+
+from haos.remote import server
+from haos.remote import ssh_remote_control
+
+
+LOG = logging.getLogger(__name__)


@base.context(name="cloud_nodes", order=800)
@@ -19,21 +30,9 @@ class CloudNodesContext(base.Context):
                "type": "object",
                "default": {}
            },
-            "shaker_endpoint": {
+            "remote_control_type": {
                "type": "string",
-                "default": ""
-            },
-            "shaker_image": {
-                "type": "string",
-                "default": "TestVM"
-            },
-            "default_flavor": {
-                "type": "string",
-                "default": "m1.micro"
-            },
-            "shaker": {
-                "type": "object",
-                "default": {}
+                "default": "ssh"
            }
        }
    }
@@ -41,12 +40,40 @@ class CloudNodesContext(base.Context):
    def setup(self):
        """This method is called before the task start."""
        self.context["controllers"] = self.config.get("controllers")
+        remote_control_type = self.config.get("remote_control_type")
+        self.context["remote_control_type"] = remote_control_type
        power_control_node = self.config.get("power_control_node")
        self.context["power_control_node"] = power_control_node
-        self.context["shaker_endpoint"] = self.config.get("shaker_endpoint")
-        self.context["shaker_image"] = self.config.get("shaker_image")
-        self.context["default_flavor"] = self.config.get("default_flavor")
+
+        env_vars = {
+            'HAOS_SERVER_ENDPOINT': None,
+            'HAOS_IMAGE': None,
+            'HAOS_FLAVOR': None,
+            'HAOS_JOIN_TIMEOUT': 100,
+            'HAOS_COMMAND_TIMEOUT': 10
+        }
+
+        for var, def_value in env_vars.items():
+            value = os.environ.get(var) or def_value
+            if value:
+                self.context[var.lower()] = value
+            else:
+                LOG.debug('Env var %s must be set'.format(var))
+
+        if self.context["remote_control_type"] == "ssh":
+            ssh = ssh_remote_control.SSHConnection()
+            self.context["haos_remote_control"] = ssh.remote_control
+        elif self.context["remote_control_type"] == "haos_agents":
+            boss_inst = server.Server(self.context["haos_server_endpoint"])
+            self.context["haos_remote_control"] = boss_inst.remote_control
+        else:
+            msg = "remote_control_type {0} doesn't implemented yet.".format(
+                self.context["remote_control_type"]
+            )
+            raise exceptions.RallyException(msg)

    def cleanup(self):
        """This method is called after the task finish."""
        self.context["controllers"] = []
+        resource_manager.cleanup(names=["nova.servers"],
+                                 users=self.context.get("users", []))
--- a/haos/rally/context/recover_cloud_context.py
+++ b/haos/rally/context/recover_cloud_context.py
@@ -1,8 +1,6 @@
 from rally.benchmark.context import base
 from rally import consts

-from haos.rally.utils import run_command
-

@base.context(name="recover_cloud", order=900)
 class CloudNodesContext(base.Context):
@@ -20,31 +18,6 @@ class CloudNodesContext(base.Context):
        }
    }

-    def check_rabbitmq_cluster_status(self, controllers):
-        command = "rabbitmqctl cluster_status"
-
-        for controller in controllers:
-            nodes = []
-            active_nodes = []
-
-            output = run_command(self.context, controller["agent_endpoint"],
-                                 command)
-            rabbit_nodes = lambda str: [node for node in str.split("'")
-                                        if "rabbit" in node]
-            for line in output.splitlines():
-                if "running_nodes" in line:
-                    active_nodes = rabbit_nodes(line)
-                elif "nodes" in line:
-                    nodes = rabbit_nodes(line)
-
-            if not nodes or len(active_nodes) < len(nodes):
-                return False
-
-            for node in nodes:
-                if node not in active_nodes:
-                    return False
-        return True
-
    def setup(self):
        """This method is called before the task start."""
        self.context["recover_commands"] = []
@@ -53,12 +26,3 @@ class CloudNodesContext(base.Context):
    def cleanup(self):
        """This method is called after the task finish."""
        pass
-        # for action in self.context["recover_commands"]:
-        #     run_command(self.context, action["node"], action["command"],
-        #                 action["executor"])
-        #     time.sleep(action.get("timeout", 0))
-        #
-        # controllers = self.context["controllers"]
-        # if "rabbitmq_cluster_status" in self.context["checks"]:
-        #     if self.check_rabbitmq_cluster_status(controllers) is False:
-        #         raise Exception("RabbitMQ cluster wasn't recovered")
--- a/haos/rally/plugin/base_disaster.py
+++ b/haos/rally/plugin/base_disaster.py
@@ -162,7 +162,7 @@ class BaseDisaster(neutron_utils.NeutronScenario,

    # Add tcp rule for 22 port and icmp rule
    def add_rules_for_ping(self):
-        #self._clients = self._admin_clients
+        # self._clients = self._admin_clients
        sec_groups = self._list_security_groups()

        self.clients("nova").security_group_rules.create(
--- a/haos/rally/plugin/power_off_random_controller.py
+++ b/haos/rally/plugin/power_off_random_controller.py
@@ -0,0 +1,34 @@
+import random
+import time
+
+from haos.rally.plugin import base_disaster
+from rally.benchmark.scenarios import base
+from rally.common import log as logging
+
+
+LOG = logging.getLogger(__name__)
+
+
+class ControllerShutdown(base_disaster.BaseDisaster):
+
+    @base.scenario()
+    def power_off_and_on_one_controller(self):
+        """This scenario selects one controller and shutdown it
+
+        Controller will be selected randomly, after the shutdown
+        this controller will be started again.
+
+        Setup:
+        OpenStack cloud with at least 3 controllers.
+        """
+        controller_id = random.randint(0, len(self.context["controllers"]) - 1)
+        controller = self.context["controllers"][controller_id]
+        power_control_node = self.context["power_control_node"]
+
+        self.run_remote_command(power_control_node,
+                                command=controller["hardware_power_off_cmd"])
+        time.sleep(controller["power_off_timeout"])
+
+        self.run_remote_command(power_control_node,
+                                command=controller["hardware_power_on_cmd"])
+        time.sleep(controller["power_on_timeout"])
--- a/haos/rally/plugin/rabbitmq_disaster.py
+++ b/haos/rally/plugin/rabbitmq_disaster.py
@@ -1,26 +0,0 @@
-import random
-
-from rally.benchmark.scenarios import base
-
-from haos.rally.plugin import base_disaster
-
-
-class RabbitMQDisaster(base_disaster.BaseDisaster):
-
-    @base.scenario()
-    def power_off_one_controller(self):
-        """Poweroff one contoller and verify cloud
-
-        Setup:
-        OpenStack cloud with at least 3 controllers
-
-        Scenario:
-        1. Poweroff one controller
-        2. Verify cloud: create VM 10 times
-        """
-
-        controller_id = random.randint(0, len(self.context["controllers"]) - 1)
-        self.power_off_controller(controller_id)
-
-        for i in xrange(0, 10):
-            self.boot_server("test{0}".format(i))
--- a/haos/rally/plugin/run_command_on_random_controller.py
+++ b/haos/rally/plugin/run_command_on_random_controller.py
@@ -0,0 +1,24 @@
+import random
+
+from haos.rally.plugin import base_disaster
+from rally.benchmark.scenarios import base
+from rally.common import log as logging
+
+
+LOG = logging.getLogger(__name__)
+
+
+class RunCommand(base_disaster.BaseDisaster):
+
+    @base.scenario()
+    def run_command_on_random_controller(self, command='', timeout=300):
+        """This scenario executes bash command on random controller
+
+        :param command: command which should be executed
+        :param timeout: how long we will wait for command execution
+        """
+        controller_id = random.randint(0, len(self.context["controllers"]) - 1)
+        controller = self.context["controllers"][controller_id]
+
+        LOG.info('Running command on controller: %s', controller)
+        self.run_remote_command(controller, command, timeout)
--- a/haos/remote/ssh_remote_control.py
+++ b/haos/remote/ssh_remote_control.py
@@ -0,0 +1,29 @@
+import paramiko
+
+from rally.common import log as logging
+
+
+LOG = logging.getLogger(__name__)
+
+
+def run(host, username, password, command, timeout):
+    msg = 'Running command "{0}" on server {1}'
+    LOG.info(msg.format(command, host))
+
+    ssh = paramiko.SSHClient()
+    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    ssh.connect(host, username=username, password=password)
+    _, ssh_stdout, ssh_stderr = ssh.exec_command(command, timeout=timeout)
+
+    while not ssh_stdout.channel.exit_status_ready():
+        "Wait while all commands will be finished successfully."
+        pass
+
+    return ssh_stdout, ssh_stderr
+
+
+class SSHConnection(object):
+
+    def remote_control(self, host, command, timeout=30):
+        return run(host["public_ip"], host["user"], host["password"], command,
+                   timeout)
--- a/haos/scenarios/block_mysql_port_on_random_controller.json
+++ b/haos/scenarios/block_mysql_port_on_random_controller.json
@@ -0,0 +1,36 @@
+{
+  "RunCommand.run_command_on_random_controller": [
+    {
+      "args": {
+        "command": "iptables -I INPUT 1 -p tcp --destination-port galeracheck -j DROP && sleep 20 && iptables -D INPUT -p tcp --destination-port galeracheck -j DROP",
+        "timeout": 300
+      },
+      "runner": {
+        "type": "serial",
+        "times": 10
+      },
+      "context": {
+        "cloud_nodes": {
+          "controllers": [
+            {
+              "public_ip": "172.16.0.4",
+              "user": "root",
+              "password": "secret"
+            },
+            {
+              "public_ip": "172.16.0.5",
+              "user": "root",
+              "password": "secret"
+            },
+            {
+              "public_ip": "172.16.0.4",
+              "user": "root",
+              "password": "secret"
+            }
+          ],
+          "remote_control_type": "ssh"
+        }
+      }
+    }
+  ]
+}
--- a/haos/scenarios/block_rabbitmq_port_on_random_controller.json
+++ b/haos/scenarios/block_rabbitmq_port_on_random_controller.json
@@ -0,0 +1,36 @@
+{
+  "RunCommand.run_command_on_random_controller": [
+    {
+      "args": {
+        "command": "iptables -I INPUT 1 -p tcp --destination-port 5673 -j DROP && sleep 20 && iptables -D INPUT -p tcp --destination-port 5673 -j DROP",
+        "timeout": 300
+      },
+      "runner": {
+        "type": "serial",
+        "times": 10
+      },
+      "context": {
+        "cloud_nodes": {
+          "controllers": [
+            {
+              "public_ip": "172.16.0.4",
+              "user": "root",
+              "password": "secret"
+            },
+            {
+              "public_ip": "172.16.0.5",
+              "user": "root",
+              "password": "secret"
+            },
+            {
+              "public_ip": "172.16.0.4",
+              "user": "root",
+              "password": "secret"
+            }
+          ],
+          "remote_control_type": "ssh"
+        }
+      }
+    }
+  ]
+}
--- a/haos/scenarios/destroy_non_primary_controller.json
+++ b/haos/scenarios/destroy_non_primary_controller.json
@@ -1,41 +0,0 @@
-{
-  "NeutronDisaster.destroy_non_primary_controller": [
-    {
-      "runner": {
-        "type": "serial",
-        "times": 1
-      },
-      "context": {
-        "users": {
-          "tenants": 1,
-          "users_per_tenant": 1
-        },
-        "roles":[
-          "admin"
-        ],
-        "recover_cloud": {
-          "checks": ["rabbitmq_cluster_status"]
-        },
-        "network": {
-          "networks_per_tenant": 2
-        },
-        "cloud_nodes": {
-          "controllers": [
-            {
-              "agent_endpoint": "node-1.domain.tld"
-            },
-            {
-              "agent_endpoint": "node-2.domain.tld"
-            },
-            {
-              "agent_endpoint": "node-3.domain.tld"
-            }
-          ],
-          "shaker_endpoint": "172.18.76.21:5999",
-          "shaker_image": "2fb29a22-b351-4466-83ff-21446097b8c9",
-          "default_flavor": "18"
-        }
-      }
-    }
-  ]
-}
--- a/haos/scenarios/drop_rabbitmq_port.json
+++ b/haos/scenarios/drop_rabbitmq_port.json
@@ -1,35 +0,0 @@
-{
-  "NeutronDisaster.drop_rabbit_port": [
-    {
-      "runner": {
-        "type": "serial",
-        "times": 1
-      },
-      "context": {
-        "users": {
-          "tenants": 1,
-          "users_per_tenant": 1
-        },
-        "recover_cloud": {
-          "checks": ["rabbitmq_cluster_status"]
-        },
-        "cloud_nodes": {
-          "controllers": [
-            {
-              "agent_endpoint": "node-1.domain.tld"
-            },
-            {
-              "agent_endpoint": "node-2.domain.tld"
-            },
-            {
-              "agent_endpoint": "node-3.domain.tld"
-            }
-          ],
-          "shaker_endpoint": "172.18.76.21:5999",
-          "shaker_image": "2fb29a22-b351-4466-83ff-21446097b8c9",
-          "default_flavor": "18"
-        }
-      }
-    }
-  ]
-}
--- a/haos/scenarios/power_off_and_on_random_controller.json
+++ b/haos/scenarios/power_off_and_on_random_controller.json
@@ -0,0 +1,40 @@
+{
+  "ControllerShutdown.power_off_and_on_one_controller": [
+    {
+      "runner": {
+        "type": "serial",
+        "times": 1
+      },
+      "context": {
+        "cloud_nodes": {
+          "controllers": [
+            {
+              "hardware_power_on_cmd": "VBoxManage startvm fuel-slave-1 --type headless",
+              "hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-1 poweroff",
+              "power_off_timeout": 180,
+              "power_on_timeout": 90
+            },
+            {
+              "hardware_power_on_cmd": "VBoxManage startvm fuel-slave-2 --type headless",
+              "hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-2 poweroff",
+              "power_off_timeout": 180,
+              "power_on_timeout": 90
+            },
+            {
+              "hardware_power_on_cmd": "VBoxManage startvm fuel-slave-3 --type headless",
+              "hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-3 poweroff",
+              "power_off_timeout": 180,
+              "power_on_timeout": 90
+            }
+          ],
+          "power_control_node": {
+            "public_ip": "172.18.78.30",
+            "user": "xwizard",
+            "password": "xWizard707"
+          },
+          "remote_control_type": "ssh"
+        }
+      }
+    }
+  ]
+}
--- a/tools/run_rally.sh
+++ b/tools/run_rally.sh
@@ -5,5 +5,5 @@ TOP_DIR=$(cd $(dirname "$0") && pwd)
 SCENARIO=$1

 if [ ! -z ${SCENARIO} ]; then
-    rally --verbose --plugin-path ${TOP_DIR}/../haos/rally/context,${TOP_DIR}/../haos/rally/plugin task start ${SCENARIO}
+    rally --debug --plugin-path ${TOP_DIR}/../haos/rally/context,${TOP_DIR}/../haos/rally/plugin task start ${SCENARIO}
 fi
--- a/tox.ini
+++ b/tox.ini
@@ -23,6 +23,12 @@ commands =
        bash tools/run_rally.sh {posargs}
 whitelist_externals = bash

+[testenv:run-for-mos]
+commands =
+        bash tools/make_env.sh
+        bash tools/run_rally.sh {posargs}
+whitelist_externals = bash
+
 [flake8]
 # E125 continuation line does not distinguish itself from next logical line
 ignore = E125