[CDH] Fix problem with launching Spark jobs
Sahara EDP should select SPARK_YARN_HISTORY_SERVER node as the master node for running Spark jobs. Also uploaded spark job to cdh_flow on CI. Closes-bug: 1490012 Change-Id: I09a745b8c84e6cca2fb2924e85d845aafc8e1134
This commit is contained in:
parent
19eb3f0071
commit
4824ca3dc6
@ -53,6 +53,7 @@ clusters:
|
|||||||
- HDFS_SECONDARYNAMENODE
|
- HDFS_SECONDARYNAMENODE
|
||||||
- HIVE_METASTORE
|
- HIVE_METASTORE
|
||||||
- HIVE_SERVER2
|
- HIVE_SERVER2
|
||||||
|
- SPARK_YARN_HISTORY_SERVER
|
||||||
auto_security_group: true
|
auto_security_group: true
|
||||||
cluster_template:
|
cluster_template:
|
||||||
name: cdh540
|
name: cdh540
|
||||||
@ -71,4 +72,4 @@ clusters:
|
|||||||
scenario:
|
scenario:
|
||||||
- run_jobs
|
- run_jobs
|
||||||
- sentry
|
- sentry
|
||||||
edp_jobs_flow: hadoop_2
|
edp_jobs_flow: cdh_flow
|
||||||
|
@ -169,3 +169,63 @@ edp_jobs_flow:
|
|||||||
args:
|
args:
|
||||||
- 10
|
- 10
|
||||||
- 10
|
- 10
|
||||||
|
cdh_flow:
|
||||||
|
- type: Pig
|
||||||
|
input_datasource:
|
||||||
|
type: swift
|
||||||
|
source: etc/edp-examples/edp-pig/trim-spaces/data/input
|
||||||
|
output_datasource:
|
||||||
|
type: hdfs
|
||||||
|
destination: /user/hadoop/edp-output
|
||||||
|
main_lib:
|
||||||
|
type: swift
|
||||||
|
source: etc/edp-examples/edp-pig/trim-spaces/example.pig
|
||||||
|
additional_libs:
|
||||||
|
- type: swift
|
||||||
|
source: etc/edp-examples/edp-pig/trim-spaces/udf.jar
|
||||||
|
- type: MapReduce
|
||||||
|
input_datasource:
|
||||||
|
type: swift
|
||||||
|
source: etc/edp-examples/edp-pig/trim-spaces/data/input
|
||||||
|
output_datasource:
|
||||||
|
type: hdfs
|
||||||
|
destination: /user/hadoop/edp-output
|
||||||
|
additional_libs:
|
||||||
|
- type: database
|
||||||
|
source: etc/edp-examples/edp-mapreduce/edp-mapreduce.jar
|
||||||
|
configs:
|
||||||
|
mapred.mapper.class: org.apache.oozie.example.SampleMapper
|
||||||
|
mapred.reducer.class: org.apache.oozie.example.SampleReducer
|
||||||
|
- type: MapReduce.Streaming
|
||||||
|
input_datasource:
|
||||||
|
type: swift
|
||||||
|
source: etc/edp-examples/edp-pig/trim-spaces/data/input
|
||||||
|
output_datasource:
|
||||||
|
type: hdfs
|
||||||
|
destination: /user/hadoop/edp-output
|
||||||
|
configs:
|
||||||
|
edp.streaming.mapper: /bin/cat
|
||||||
|
edp.streaming.reducer: /usr/bin/wc
|
||||||
|
- type: Java
|
||||||
|
additional_libs:
|
||||||
|
- type: database
|
||||||
|
source: etc/edp-examples/hadoop2/edp-java/hadoop-mapreduce-examples-2.6.0.jar
|
||||||
|
configs:
|
||||||
|
edp.java.main_class: org.apache.hadoop.examples.QuasiMonteCarlo
|
||||||
|
args:
|
||||||
|
- 10
|
||||||
|
- 10
|
||||||
|
- type: Spark
|
||||||
|
input_datasource:
|
||||||
|
type: swift
|
||||||
|
source: etc/edp-examples/edp-spark/sample_input.txt
|
||||||
|
main_lib:
|
||||||
|
type: database
|
||||||
|
source: etc/edp-examples/edp-spark/spark-wordcount.jar
|
||||||
|
configs:
|
||||||
|
edp.java.main_class: sahara.edp.spark.SparkWordCount
|
||||||
|
edp.spark.adapt_for_swift: true
|
||||||
|
fs.swift.service.sahara.username: ${OS_USERNAME}
|
||||||
|
fs.swift.service.sahara.password: ${OS_PASSWORD}
|
||||||
|
args:
|
||||||
|
- '{input_datasource}'
|
||||||
|
@ -79,7 +79,7 @@ class EdpSparkEngine(edp_spark_engine.SparkJobEngine):
|
|||||||
|
|
||||||
def __init__(self, cluster):
|
def __init__(self, cluster):
|
||||||
super(EdpSparkEngine, self).__init__(cluster)
|
super(EdpSparkEngine, self).__init__(cluster)
|
||||||
self.master = u.get_instance(cluster, "CLOUDERA_MANAGER")
|
self.master = u.get_instance(cluster, "SPARK_YARN_HISTORY_SERVER")
|
||||||
self.plugin_params["spark-user"] = "sudo -u spark "
|
self.plugin_params["spark-user"] = "sudo -u spark "
|
||||||
self.plugin_params["spark-submit"] = "spark-submit"
|
self.plugin_params["spark-submit"] = "spark-submit"
|
||||||
self.plugin_params["deploy-mode"] = "cluster"
|
self.plugin_params["deploy-mode"] = "cluster"
|
||||||
|
@ -82,7 +82,7 @@ class EdpSparkEngine(edp_spark_engine.SparkJobEngine):
|
|||||||
|
|
||||||
def __init__(self, cluster):
|
def __init__(self, cluster):
|
||||||
super(EdpSparkEngine, self).__init__(cluster)
|
super(EdpSparkEngine, self).__init__(cluster)
|
||||||
self.master = u.get_instance(cluster, "CLOUDERA_MANAGER")
|
self.master = u.get_instance(cluster, "SPARK_YARN_HISTORY_SERVER")
|
||||||
self.plugin_params["spark-user"] = "sudo -u spark "
|
self.plugin_params["spark-user"] = "sudo -u spark "
|
||||||
self.plugin_params["spark-submit"] = "spark-submit"
|
self.plugin_params["spark-submit"] = "spark-submit"
|
||||||
self.plugin_params["deploy-mode"] = "cluster"
|
self.plugin_params["deploy-mode"] = "cluster"
|
||||||
|
@ -20,7 +20,7 @@ from sahara.tests.unit.service.edp.spark import base as tests
|
|||||||
class TestClouderaPlugin(tests.TestSpark):
|
class TestClouderaPlugin(tests.TestSpark):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
super(TestClouderaPlugin, self).setUp()
|
super(TestClouderaPlugin, self).setUp()
|
||||||
self.master_host = "CLOUDERA_MANAGER"
|
self.master_host = "SPARK_YARN_HISTORY_SERVER"
|
||||||
self.engine_class = edp_engine.EdpSparkEngine
|
self.engine_class = edp_engine.EdpSparkEngine
|
||||||
self.spark_user = "sudo -u spark "
|
self.spark_user = "sudo -u spark "
|
||||||
self.spark_submit = "spark-submit"
|
self.spark_submit = "spark-submit"
|
||||||
|
@ -20,7 +20,7 @@ from sahara.tests.unit.service.edp.spark import base as tests
|
|||||||
class TestClouderaPlugin(tests.TestSpark):
|
class TestClouderaPlugin(tests.TestSpark):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
super(TestClouderaPlugin, self).setUp()
|
super(TestClouderaPlugin, self).setUp()
|
||||||
self.master_host = "CLOUDERA_MANAGER"
|
self.master_host = "SPARK_YARN_HISTORY_SERVER"
|
||||||
self.engine_class = edp_engine.EdpSparkEngine
|
self.engine_class = edp_engine.EdpSparkEngine
|
||||||
self.spark_user = "sudo -u spark "
|
self.spark_user = "sudo -u spark "
|
||||||
self.spark_submit = "spark-submit"
|
self.spark_submit = "spark-submit"
|
||||||
|
Loading…
Reference in New Issue
Block a user