[CDH] Fix problem with launching Spark jobs
Sahara EDP should select SPARK_YARN_HISTORY_SERVER node as the master node for running Spark jobs. Also uploaded spark job to cdh_flow on CI. Closes-bug: 1490012 Change-Id: I09a745b8c84e6cca2fb2924e85d845aafc8e1134
This commit is contained in:
parent
19eb3f0071
commit
4824ca3dc6
@ -53,6 +53,7 @@ clusters:
|
||||
- HDFS_SECONDARYNAMENODE
|
||||
- HIVE_METASTORE
|
||||
- HIVE_SERVER2
|
||||
- SPARK_YARN_HISTORY_SERVER
|
||||
auto_security_group: true
|
||||
cluster_template:
|
||||
name: cdh540
|
||||
@ -71,4 +72,4 @@ clusters:
|
||||
scenario:
|
||||
- run_jobs
|
||||
- sentry
|
||||
edp_jobs_flow: hadoop_2
|
||||
edp_jobs_flow: cdh_flow
|
||||
|
@ -169,3 +169,63 @@ edp_jobs_flow:
|
||||
args:
|
||||
- 10
|
||||
- 10
|
||||
cdh_flow:
|
||||
- type: Pig
|
||||
input_datasource:
|
||||
type: swift
|
||||
source: etc/edp-examples/edp-pig/trim-spaces/data/input
|
||||
output_datasource:
|
||||
type: hdfs
|
||||
destination: /user/hadoop/edp-output
|
||||
main_lib:
|
||||
type: swift
|
||||
source: etc/edp-examples/edp-pig/trim-spaces/example.pig
|
||||
additional_libs:
|
||||
- type: swift
|
||||
source: etc/edp-examples/edp-pig/trim-spaces/udf.jar
|
||||
- type: MapReduce
|
||||
input_datasource:
|
||||
type: swift
|
||||
source: etc/edp-examples/edp-pig/trim-spaces/data/input
|
||||
output_datasource:
|
||||
type: hdfs
|
||||
destination: /user/hadoop/edp-output
|
||||
additional_libs:
|
||||
- type: database
|
||||
source: etc/edp-examples/edp-mapreduce/edp-mapreduce.jar
|
||||
configs:
|
||||
mapred.mapper.class: org.apache.oozie.example.SampleMapper
|
||||
mapred.reducer.class: org.apache.oozie.example.SampleReducer
|
||||
- type: MapReduce.Streaming
|
||||
input_datasource:
|
||||
type: swift
|
||||
source: etc/edp-examples/edp-pig/trim-spaces/data/input
|
||||
output_datasource:
|
||||
type: hdfs
|
||||
destination: /user/hadoop/edp-output
|
||||
configs:
|
||||
edp.streaming.mapper: /bin/cat
|
||||
edp.streaming.reducer: /usr/bin/wc
|
||||
- type: Java
|
||||
additional_libs:
|
||||
- type: database
|
||||
source: etc/edp-examples/hadoop2/edp-java/hadoop-mapreduce-examples-2.6.0.jar
|
||||
configs:
|
||||
edp.java.main_class: org.apache.hadoop.examples.QuasiMonteCarlo
|
||||
args:
|
||||
- 10
|
||||
- 10
|
||||
- type: Spark
|
||||
input_datasource:
|
||||
type: swift
|
||||
source: etc/edp-examples/edp-spark/sample_input.txt
|
||||
main_lib:
|
||||
type: database
|
||||
source: etc/edp-examples/edp-spark/spark-wordcount.jar
|
||||
configs:
|
||||
edp.java.main_class: sahara.edp.spark.SparkWordCount
|
||||
edp.spark.adapt_for_swift: true
|
||||
fs.swift.service.sahara.username: ${OS_USERNAME}
|
||||
fs.swift.service.sahara.password: ${OS_PASSWORD}
|
||||
args:
|
||||
- '{input_datasource}'
|
||||
|
@ -79,7 +79,7 @@ class EdpSparkEngine(edp_spark_engine.SparkJobEngine):
|
||||
|
||||
def __init__(self, cluster):
|
||||
super(EdpSparkEngine, self).__init__(cluster)
|
||||
self.master = u.get_instance(cluster, "CLOUDERA_MANAGER")
|
||||
self.master = u.get_instance(cluster, "SPARK_YARN_HISTORY_SERVER")
|
||||
self.plugin_params["spark-user"] = "sudo -u spark "
|
||||
self.plugin_params["spark-submit"] = "spark-submit"
|
||||
self.plugin_params["deploy-mode"] = "cluster"
|
||||
|
@ -82,7 +82,7 @@ class EdpSparkEngine(edp_spark_engine.SparkJobEngine):
|
||||
|
||||
def __init__(self, cluster):
|
||||
super(EdpSparkEngine, self).__init__(cluster)
|
||||
self.master = u.get_instance(cluster, "CLOUDERA_MANAGER")
|
||||
self.master = u.get_instance(cluster, "SPARK_YARN_HISTORY_SERVER")
|
||||
self.plugin_params["spark-user"] = "sudo -u spark "
|
||||
self.plugin_params["spark-submit"] = "spark-submit"
|
||||
self.plugin_params["deploy-mode"] = "cluster"
|
||||
|
@ -20,7 +20,7 @@ from sahara.tests.unit.service.edp.spark import base as tests
|
||||
class TestClouderaPlugin(tests.TestSpark):
|
||||
def setUp(self):
|
||||
super(TestClouderaPlugin, self).setUp()
|
||||
self.master_host = "CLOUDERA_MANAGER"
|
||||
self.master_host = "SPARK_YARN_HISTORY_SERVER"
|
||||
self.engine_class = edp_engine.EdpSparkEngine
|
||||
self.spark_user = "sudo -u spark "
|
||||
self.spark_submit = "spark-submit"
|
||||
|
@ -20,7 +20,7 @@ from sahara.tests.unit.service.edp.spark import base as tests
|
||||
class TestClouderaPlugin(tests.TestSpark):
|
||||
def setUp(self):
|
||||
super(TestClouderaPlugin, self).setUp()
|
||||
self.master_host = "CLOUDERA_MANAGER"
|
||||
self.master_host = "SPARK_YARN_HISTORY_SERVER"
|
||||
self.engine_class = edp_engine.EdpSparkEngine
|
||||
self.spark_user = "sudo -u spark "
|
||||
self.spark_submit = "spark-submit"
|
||||
|
Loading…
Reference in New Issue
Block a user