Add Spark integration test

This integration test includes spinning up a cluster, running an EDP job, and deleting the cluster. Cluster scaling is not included currently. The Spark example included is SparkPi. Implements: blueprint edp-spark-integration-tests Change-Id: I9cdb3ab29d3364024f5cbe42b3cf4bae398cd547
2014-08-13 16:46:52 -04:00 · 2014-08-13 16:46:52 -04:00 · bab13bc26d
commit bab13bc26d
parent 90187b0322
7 changed files with 258 additions and 1 deletions
--- a/etc/edp-examples/edp-spark/NOTICE.txt
+++ b/etc/edp-examples/edp-spark/NOTICE.txt
@ -0,0 +1,2 @@
 This example includes software developed by The Apache Software
 Foundation (http://www.apache.org/).
--- a/etc/edp-examples/edp-spark/README.rst
+++ b/etc/edp-examples/edp-spark/README.rst
@ -0,0 +1,8 @@
 Example Spark Job
 =================
 This example contains the compiled classes for SparkPi extracted from
 the example jar distributed with Apache Spark version 1.0.0.
 SparkPi example estimates Pi. It can take a single optional integer
 argument specifying the number of slices (tasks) to use.
--- a/etc/edp-examples/edp-spark/spark-example.jar
+++ b/etc/edp-examples/edp-spark/spark-example.jar
--- a/sahara/tests/integration/configs/config.py
+++ b/sahara/tests/integration/configs/config.py
@ -547,6 +547,71 @@ HDP2_CONFIG_OPTS = [
 ]
 SPARK_CONFIG_GROUP = cfg.OptGroup(name='SPARK')
 SPARK_CONFIG_OPTS = [
    cfg.StrOpt('PLUGIN_NAME',
               default='spark',
               help='Name of plugin.'),
    cfg.StrOpt('IMAGE_ID',
               default=None,
               help='ID for image which is used for cluster creation. Also '
                    'you can specify image name or tag of image instead of '
                    'image ID. If you do not specify image related '
                    'parameters, then image for cluster creation will be '
                    'chosen by tag "sahara_i_tests".'),
    cfg.StrOpt('IMAGE_NAME',
               default=None,
               help='Name for image which is used for cluster creation. Also '
                    'you can specify image ID or tag of image instead of '
                    'image name. If you do not specify image related '
                    'parameters, then image for cluster creation will be '
                    'chosen by tag "sahara_i_tests".'),
    cfg.StrOpt('IMAGE_TAG',
               default=None,
               help='Tag for image which is used for cluster creation. Also '
                    'you can specify image ID or image name instead of tag of '
                    'image. If you do not specify image related parameters, '
                    'then image for cluster creation will be chosen by '
                    'tag "sahara_i_tests".'),
    cfg.ListOpt('MASTER_NODE_PROCESSES',
                default=['namenode', 'master'],
                help='A list of processes that will be launched '
                     'on master node'),
    cfg.ListOpt('WORKER_NODE_PROCESSES',
                default=['datanode', 'slave'],
                help='A list of processes that will be launched '
                     'on worker nodes'),
    cfg.StrOpt('HADOOP_VERSION',
               default='1.0.0',
               help='Version of Spark (even though it says "HADOOP".'),
    cfg.StrOpt('HADOOP_USER',
               default='hdfs',
               help='Username which is used for access to Hadoop services.'),
    cfg.DictOpt('HADOOP_PROCESSES_WITH_PORTS',
                default={
                    'master': 7077,
                    'namenode': 8020,
                    'datanode': 50075
                },
                help='Spark process map with ports for spark plugin.'
                ),
    cfg.DictOpt('PROCESS_NAMES',
                default={
                    'nn': 'namenode',
                    'tt': 'tasktracker',
                    'dn': 'datanode'
                },
                help='Names for namenode, tasktracker and datanode '
                     'processes.'),
    cfg.BoolOpt('SKIP_ALL_TESTS_FOR_PLUGIN',
                default=True,
                help='If this flag is True, then all tests for Spark plugin '
                     'will be skipped.'),
    cfg.BoolOpt('SKIP_EDP_TEST', default=False),
    cfg.BoolOpt('SKIP_SCALING_TEST', default=False)
 ]
 def register_config(config, config_group, config_opts):
    config.register_group(config_group)
    config.register_opts(config_opts, config_group)
@ -578,6 +643,7 @@ class ITConfig:
        register_config(cfg.CONF, HDP2_CONFIG_GROUP, HDP2_CONFIG_OPTS)
        register_config(
            cfg.CONF, VANILLA_TWO_CONFIG_GROUP, VANILLA_TWO_CONFIG_OPTS)
        register_config(cfg.CONF, SPARK_CONFIG_GROUP, SPARK_CONFIG_OPTS)
        cfg.CONF(
            [], project='Sahara_integration_tests',
@ -590,3 +656,4 @@ class ITConfig:
        self.cdh_config = cfg.CONF.CDH
        self.hdp_config = cfg.CONF.HDP
        self.hdp2_config = cfg.CONF.HDP2
        self.spark_config = cfg.CONF.SPARK
--- a/sahara/tests/integration/tests/edp.py
+++ b/sahara/tests/integration/tests/edp.py
@ -30,6 +30,8 @@ class EDPJobInfo(object):
    PIG_PATH = 'etc/edp-examples/pig-job/'
    JAVA_PATH = 'etc/edp-examples/edp-java/'
    MAPREDUCE_PATH = 'etc/edp-examples/edp-mapreduce/'
    SPARK_PATH = 'etc/edp-examples/edp-spark/'
    HADOOP2_JAVA_PATH = 'etc/edp-examples/hadoop2/edp-java/'
    def read_pig_example_script(self):
@ -81,6 +83,18 @@ class EDPJobInfo(object):
            }
        }
    def read_spark_example_jar(self):
        return open(self.SPARK_PATH + 'spark-example.jar').read()
    def spark_example_configs(self):
        return {
            'configs': {
                'edp.java.main_class':
                'org.apache.spark.examples.SparkPi'
            },
            'args': ['4']
        }
 class EDPTest(base.ITestCase):
    def setUp(self):
@ -227,7 +241,9 @@ class EDPTest(base.ITestCase):
            # Java jobs don't use data sources.  Input/output paths must
            # be passed as args with corresponding username/password configs
-            if not edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA):
+            if not edp.compare_job_type(job_type,
                                        edp.JOB_TYPE_JAVA,
                                        edp.JOB_TYPE_SPARK):
                input_id = self._create_data_source(
                    'input-%s' % str(uuid.uuid4())[:8], 'swift',
                    swift_input_url)
@ -265,6 +281,10 @@ class EDPTest(base.ITestCase):
            if not configs:
                configs = {}
            # TODO(tmckay): for spark we don't have support for swift
            # yet.  When we do, we'll need something to here to set up
            # swift paths and we can use a spark wordcount job
            # Append the input/output paths with the swift configs
            # if the caller has requested it...
            if edp.compare_job_type(
--- a/sahara/tests/integration/tests/gating/test_hdp2_gating.py
+++ b/sahara/tests/integration/tests/gating/test_hdp2_gating.py
@ -195,6 +195,8 @@ class HDP2GatingTest(swift.SwiftTest, scaling.ScalingTest,
        self._edp_test()
    @testcase.attr('hdp2')
    @testcase.skipIf(config.SKIP_ALL_TESTS_FOR_PLUGIN,
                     'All tests for HDP2 plugin were skipped')
    def test_hdp2_plugin_gating(self):
        self._prepare_test()
        self._create_rm_nn_ng_template()
--- a/sahara/tests/integration/tests/gating/test_spark_gating.py
+++ b/sahara/tests/integration/tests/gating/test_spark_gating.py
@ -0,0 +1,158 @@
 # Copyright 2014 OpenStack Foundation.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from testtools import testcase
 from sahara.tests.integration.configs import config as cfg
 from sahara.tests.integration.tests import base as b
 from sahara.tests.integration.tests import edp
 from sahara.tests.integration.tests import scaling
 from sahara.tests.integration.tests import swift
 from sahara.utils import edp as utils_edp
 class SparkGatingTest(swift.SwiftTest, scaling.ScalingTest,
                      edp.EDPTest):
    config = cfg.ITConfig().spark_config
    SKIP_EDP_TEST = config.SKIP_EDP_TEST
    def setUp(self):
        super(SparkGatingTest, self).setUp()
        self.cluster_id = None
        self.cluster_template_id = None
        self.ng_template_ids = []
    def _prepare_test(self):
        self.spark_config = cfg.ITConfig().spark_config
        self.floating_ip_pool = self.common_config.FLOATING_IP_POOL
        self.internal_neutron_net = None
        if self.common_config.NEUTRON_ENABLED:
            self.internal_neutron_net = self.get_internal_neutron_net_id()
            self.floating_ip_pool = (
                self.get_floating_ip_pool_id_for_neutron_net())
        self.spark_config.IMAGE_ID, self.spark_config.SSH_USERNAME = (
            self.get_image_id_and_ssh_username(self.spark_config))
    @b.errormsg("Failure while 'm-nn' node group template creation: ")
    def _create_m_nn_ng_template(self):
        template = {
            'name': 'test-node-group-template-spark-m-nn',
            'plugin_config': self.spark_config,
            'description': 'test node group template for Spark plugin',
            'node_processes': self.spark_config.MASTER_NODE_PROCESSES,
            'floating_ip_pool': self.floating_ip_pool,
            'node_configs': {}
        }
        self.ng_tmpl_m_nn_id = self.create_node_group_template(**template)
        self.ng_template_ids.append(self.ng_tmpl_m_nn_id)
    @b.errormsg("Failure while 's-dn' node group template creation: ")
    def _create_s_dn_ng_template(self):
        template = {
            'name': 'test-node-group-template-spark-s-dn',
            'plugin_config': self.spark_config,
            'description': 'test node group template for Spark plugin',
            'node_processes': self.spark_config.WORKER_NODE_PROCESSES,
            'floating_ip_pool': self.floating_ip_pool,
            'node_configs': {}
        }
        self.ng_tmpl_s_dn_id = self.create_node_group_template(**template)
        self.ng_template_ids.append(self.ng_tmpl_s_dn_id)
    @b.errormsg("Failure while cluster template creation: ")
    def _create_cluster_template(self):
        template = {
            'name': 'test-cluster-template-spark',
            'plugin_config': self.spark_config,
            'description': 'test cluster template for Spark plugin',
            'cluster_configs': {
            },
            'node_groups': [
                {
                    'name': 'master-node',
                    'node_group_template_id': self.ng_tmpl_m_nn_id,
                    'count': 1
                },
                {
                    'name': 'worker-node',
                    'node_group_template_id': self.ng_tmpl_s_dn_id,
                    'count': 1
                }
            ],
            'net_id': self.internal_neutron_net
        }
        self.cluster_template_id = self.create_cluster_template(**template)
    @b.errormsg("Failure while cluster creation: ")
    def _create_cluster(self):
        cluster_name = '%s-%s' % (self.common_config.CLUSTER_NAME,
                                  self.spark_config.PLUGIN_NAME)
        cluster = {
            'name': cluster_name,
            'plugin_config': self.spark_config,
            'cluster_template_id': self.cluster_template_id,
            'description': 'test cluster',
            'cluster_configs': {}
        }
        self.create_cluster(**cluster)
        self.cluster_info = self.get_cluster_info(self.spark_config)
        self.await_active_workers_for_namenode(self.cluster_info['node_info'],
                                               self.spark_config)
    @b.errormsg("Failure while EDP testing: ")
    def _check_edp(self):
        self._edp_test()
    def _edp_test(self):
        # check spark
        spark_jar = self.edp_info.read_spark_example_jar()
        spark_configs = self.edp_info.spark_example_configs()
        self.edp_testing(utils_edp.JOB_TYPE_SPARK,
                         job_data_list=[{'jar': spark_jar}],
                         lib_data_list=[],
                         configs=spark_configs)
    @b.errormsg("Failure while cluster scaling: ")
    def _check_scaling(self):
        pass
    @b.errormsg("Failure while EDP testing after cluster scaling: ")
    def _check_edp_after_scaling(self):
        # Leave this blank until scaling is implemented
        pass
    @testcase.attr('spark')
    @testcase.skipIf(config.SKIP_ALL_TESTS_FOR_PLUGIN,
                     'All tests for Spark plugin were skipped')
    def test_spark_plugin_gating(self):
        self._prepare_test()
        self._create_m_nn_ng_template()
        self._create_s_dn_ng_template()
        self._create_cluster_template()
        self._create_cluster()
        self._check_edp()
        if not self.spark_config.SKIP_SCALING_TEST:
            self._check_scaling()
            self._check_edp_after_scaling()
    def tearDown(self):
        self.delete_objects(self.cluster_id, self.cluster_template_id,
                            self.ng_template_ids)
        super(SparkGatingTest, self).tearDown()
		`@ -0,0 +1,2 @@`
							`This example includes software developed by The Apache Software`
							`Foundation (http://www.apache.org/).`