[CDH] Fix problem with launching Spark jobs

Sahara EDP should select SPARK_YARN_HISTORY_SERVER node
as the master node for running Spark jobs.
Also uploaded spark job to cdh_flow on CI.

Closes-bug: 1490012
Change-Id: I09a745b8c84e6cca2fb2924e85d845aafc8e1134
This commit is contained in:
Vitaly Gridnev 2015-09-04 16:20:17 +03:00
parent 19eb3f0071
commit 4824ca3dc6
6 changed files with 66 additions and 5 deletions

View File

@ -53,6 +53,7 @@ clusters:
- HDFS_SECONDARYNAMENODE - HDFS_SECONDARYNAMENODE
- HIVE_METASTORE - HIVE_METASTORE
- HIVE_SERVER2 - HIVE_SERVER2
- SPARK_YARN_HISTORY_SERVER
auto_security_group: true auto_security_group: true
cluster_template: cluster_template:
name: cdh540 name: cdh540
@ -71,4 +72,4 @@ clusters:
scenario: scenario:
- run_jobs - run_jobs
- sentry - sentry
edp_jobs_flow: hadoop_2 edp_jobs_flow: cdh_flow

View File

@ -169,3 +169,63 @@ edp_jobs_flow:
args: args:
- 10 - 10
- 10 - 10
cdh_flow:
- type: Pig
input_datasource:
type: swift
source: etc/edp-examples/edp-pig/trim-spaces/data/input
output_datasource:
type: hdfs
destination: /user/hadoop/edp-output
main_lib:
type: swift
source: etc/edp-examples/edp-pig/trim-spaces/example.pig
additional_libs:
- type: swift
source: etc/edp-examples/edp-pig/trim-spaces/udf.jar
- type: MapReduce
input_datasource:
type: swift
source: etc/edp-examples/edp-pig/trim-spaces/data/input
output_datasource:
type: hdfs
destination: /user/hadoop/edp-output
additional_libs:
- type: database
source: etc/edp-examples/edp-mapreduce/edp-mapreduce.jar
configs:
mapred.mapper.class: org.apache.oozie.example.SampleMapper
mapred.reducer.class: org.apache.oozie.example.SampleReducer
- type: MapReduce.Streaming
input_datasource:
type: swift
source: etc/edp-examples/edp-pig/trim-spaces/data/input
output_datasource:
type: hdfs
destination: /user/hadoop/edp-output
configs:
edp.streaming.mapper: /bin/cat
edp.streaming.reducer: /usr/bin/wc
- type: Java
additional_libs:
- type: database
source: etc/edp-examples/hadoop2/edp-java/hadoop-mapreduce-examples-2.6.0.jar
configs:
edp.java.main_class: org.apache.hadoop.examples.QuasiMonteCarlo
args:
- 10
- 10
- type: Spark
input_datasource:
type: swift
source: etc/edp-examples/edp-spark/sample_input.txt
main_lib:
type: database
source: etc/edp-examples/edp-spark/spark-wordcount.jar
configs:
edp.java.main_class: sahara.edp.spark.SparkWordCount
edp.spark.adapt_for_swift: true
fs.swift.service.sahara.username: ${OS_USERNAME}
fs.swift.service.sahara.password: ${OS_PASSWORD}
args:
- '{input_datasource}'

View File

@ -79,7 +79,7 @@ class EdpSparkEngine(edp_spark_engine.SparkJobEngine):
def __init__(self, cluster): def __init__(self, cluster):
super(EdpSparkEngine, self).__init__(cluster) super(EdpSparkEngine, self).__init__(cluster)
self.master = u.get_instance(cluster, "CLOUDERA_MANAGER") self.master = u.get_instance(cluster, "SPARK_YARN_HISTORY_SERVER")
self.plugin_params["spark-user"] = "sudo -u spark " self.plugin_params["spark-user"] = "sudo -u spark "
self.plugin_params["spark-submit"] = "spark-submit" self.plugin_params["spark-submit"] = "spark-submit"
self.plugin_params["deploy-mode"] = "cluster" self.plugin_params["deploy-mode"] = "cluster"

View File

@ -82,7 +82,7 @@ class EdpSparkEngine(edp_spark_engine.SparkJobEngine):
def __init__(self, cluster): def __init__(self, cluster):
super(EdpSparkEngine, self).__init__(cluster) super(EdpSparkEngine, self).__init__(cluster)
self.master = u.get_instance(cluster, "CLOUDERA_MANAGER") self.master = u.get_instance(cluster, "SPARK_YARN_HISTORY_SERVER")
self.plugin_params["spark-user"] = "sudo -u spark " self.plugin_params["spark-user"] = "sudo -u spark "
self.plugin_params["spark-submit"] = "spark-submit" self.plugin_params["spark-submit"] = "spark-submit"
self.plugin_params["deploy-mode"] = "cluster" self.plugin_params["deploy-mode"] = "cluster"

View File

@ -20,7 +20,7 @@ from sahara.tests.unit.service.edp.spark import base as tests
class TestClouderaPlugin(tests.TestSpark): class TestClouderaPlugin(tests.TestSpark):
def setUp(self): def setUp(self):
super(TestClouderaPlugin, self).setUp() super(TestClouderaPlugin, self).setUp()
self.master_host = "CLOUDERA_MANAGER" self.master_host = "SPARK_YARN_HISTORY_SERVER"
self.engine_class = edp_engine.EdpSparkEngine self.engine_class = edp_engine.EdpSparkEngine
self.spark_user = "sudo -u spark " self.spark_user = "sudo -u spark "
self.spark_submit = "spark-submit" self.spark_submit = "spark-submit"

View File

@ -20,7 +20,7 @@ from sahara.tests.unit.service.edp.spark import base as tests
class TestClouderaPlugin(tests.TestSpark): class TestClouderaPlugin(tests.TestSpark):
def setUp(self): def setUp(self):
super(TestClouderaPlugin, self).setUp() super(TestClouderaPlugin, self).setUp()
self.master_host = "CLOUDERA_MANAGER" self.master_host = "SPARK_YARN_HISTORY_SERVER"
self.engine_class = edp_engine.EdpSparkEngine self.engine_class = edp_engine.EdpSparkEngine
self.spark_user = "sudo -u spark " self.spark_user = "sudo -u spark "
self.spark_submit = "spark-submit" self.spark_submit = "spark-submit"