From ab0ac92d283bbdec3abb523880c64484289d624c Mon Sep 17 00:00:00 2001 From: Shu Yingya Date: Tue, 27 Feb 2018 19:15:38 +0800 Subject: [PATCH] Fix Spark EDP job failed in vanilla 2.8.2 Vanilla should specify corresponded hadoop-openstack package according to plugin version in spark configuration. In Vanilla 2.8.2, there is an error that hadoop-openstack version was specified to 2.7.1. This causes spark job failed in vanilla 2.8.2 because of "No such file". Change-Id: I5b54d69def7b457715ed60da3663a0153fe94be8 --- .../plugins/vanilla/hadoop2/config_helper.py | 18 ------------- .../plugins/vanilla/v2_7_1/config_helper.py | 27 ++++++++++++++++++- .../plugins/vanilla/v2_8_2/config_helper.py | 27 ++++++++++++++++++- .../vanilla/hadoop2/test_config_helper.py | 13 --------- .../vanilla/v2_7_1/test_config_helper.py | 18 +++++-------- .../vanilla/v2_8_2/test_config_helper.py | 18 +++++-------- 6 files changed, 66 insertions(+), 55 deletions(-) diff --git a/sahara/plugins/vanilla/hadoop2/config_helper.py b/sahara/plugins/vanilla/hadoop2/config_helper.py index 9624ca3..007ac94 100644 --- a/sahara/plugins/vanilla/hadoop2/config_helper.py +++ b/sahara/plugins/vanilla/hadoop2/config_helper.py @@ -85,20 +85,9 @@ PRIORITY_1_CONFS = [ 'yarn.scheduler.minimum-allocation-vcores' ] -_default_executor_classpath = ":".join( - ['/opt/hadoop/share/hadoop/tools/lib/hadoop-openstack-2.7.1.jar']) - SPARK_CONFS = { 'Spark': { "OPTIONS": [ - { - 'name': 'Executor extra classpath', - 'description': 'Value for spark.executor.extraClassPath' - ' in spark-defaults.conf' - ' (default: %s)' % _default_executor_classpath, - 'default': '%s' % _default_executor_classpath, - 'priority': 2, - }, { 'name': 'Spark home', 'description': 'The location of the spark installation' @@ -278,13 +267,6 @@ def is_data_locality_enabled(pctx, cluster): ENABLE_DATA_LOCALITY.name, cluster) -def _get_spark_opt_default(opt_name): - for opt in SPARK_CONFS["Spark"]["OPTIONS"]: - if opt_name == opt["name"]: - return opt["default"] - return None - - def generate_spark_env_configs(cluster): configs = [] diff --git a/sahara/plugins/vanilla/v2_7_1/config_helper.py b/sahara/plugins/vanilla/v2_7_1/config_helper.py index 0c4fb60..3c1f060 100644 --- a/sahara/plugins/vanilla/v2_7_1/config_helper.py +++ b/sahara/plugins/vanilla/v2_7_1/config_helper.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy + from oslo_config import cfg import six @@ -41,6 +43,22 @@ OOZIE_DEFAULT = x.load_hadoop_xml_defaults( HIVE_DEFAULT = x.load_hadoop_xml_defaults( 'plugins/vanilla/v2_7_1/resources/hive-default.xml') +_default_executor_classpath = ":".join( + ['/opt/hadoop/share/hadoop/tools/lib/hadoop-openstack-2.7.1.jar']) + +SPARK_CONFS = copy.deepcopy(c_helper.SPARK_CONFS) + +SPARK_CONFS['Spark']['OPTIONS'].append( + { + 'name': 'Executor extra classpath', + 'description': 'Value for spark.executor.extraClassPath' + ' in spark-defaults.conf' + ' (default: %s)' % _default_executor_classpath, + 'default': '%s' % _default_executor_classpath, + 'priority': 2, + } +) + XML_CONFS = { "Hadoop": [CORE_DEFAULT], "HDFS": [HDFS_DEFAULT], @@ -83,9 +101,16 @@ def _init_all_configs(): return configs +def _get_spark_opt_default(opt_name): + for opt in SPARK_CONFS["Spark"]["OPTIONS"]: + if opt_name == opt["name"]: + return opt["default"] + return None + + def _get_spark_configs(): spark_configs = [] - for service, config_items in six.iteritems(c_helper.SPARK_CONFS): + for service, config_items in six.iteritems(SPARK_CONFS): for item in config_items['OPTIONS']: cfg = p.Config(name=item["name"], description=item["description"], diff --git a/sahara/plugins/vanilla/v2_8_2/config_helper.py b/sahara/plugins/vanilla/v2_8_2/config_helper.py index 447ff93..280330a 100644 --- a/sahara/plugins/vanilla/v2_8_2/config_helper.py +++ b/sahara/plugins/vanilla/v2_8_2/config_helper.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy + from oslo_config import cfg import six @@ -41,6 +43,22 @@ OOZIE_DEFAULT = x.load_hadoop_xml_defaults( HIVE_DEFAULT = x.load_hadoop_xml_defaults( 'plugins/vanilla/v2_8_2/resources/hive-default.xml') +_default_executor_classpath = ":".join( + ['/opt/hadoop/share/hadoop/tools/lib/hadoop-openstack-2.8.2.jar']) + +SPARK_CONFS = copy.deepcopy(c_helper.SPARK_CONFS) + +SPARK_CONFS['Spark']['OPTIONS'].append( + { + 'name': 'Executor extra classpath', + 'description': 'Value for spark.executor.extraClassPath' + ' in spark-defaults.conf' + ' (default: %s)' % _default_executor_classpath, + 'default': '%s' % _default_executor_classpath, + 'priority': 2, + } +) + XML_CONFS = { "Hadoop": [CORE_DEFAULT], "HDFS": [HDFS_DEFAULT], @@ -83,9 +101,16 @@ def _init_all_configs(): return configs +def _get_spark_opt_default(opt_name): + for opt in SPARK_CONFS["Spark"]["OPTIONS"]: + if opt_name == opt["name"]: + return opt["default"] + return None + + def _get_spark_configs(): spark_configs = [] - for service, config_items in six.iteritems(c_helper.SPARK_CONFS): + for service, config_items in six.iteritems(SPARK_CONFS): for item in config_items['OPTIONS']: cfg = p.Config(name=item["name"], description=item["description"], diff --git a/sahara/tests/unit/plugins/vanilla/hadoop2/test_config_helper.py b/sahara/tests/unit/plugins/vanilla/hadoop2/test_config_helper.py index 261b997..a105272 100644 --- a/sahara/tests/unit/plugins/vanilla/hadoop2/test_config_helper.py +++ b/sahara/tests/unit/plugins/vanilla/hadoop2/test_config_helper.py @@ -123,19 +123,6 @@ class TestConfigHelper(base.SaharaTestCase): get_config_value.assert_called_once_with(self.pctx, target, name, self.cluster) - def test_get_spark_opt_default(self): - c_helper.SPARK_CONFS = {'Spark': { - 'OPTIONS': [{'name': 'test_name', - 'default': 'test'}]} - } - opt_name = 'tt' - default = c_helper._get_spark_opt_default(opt_name) - self.assertIsNone(default) - - opt_name = 'test_name' - default = c_helper._get_spark_opt_default(opt_name) - self.assertEqual(default, 'test') - def test_generate_spark_env_configs(self): configs = 'HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop\n' \ 'YARN_CONF_DIR=/opt/hadoop/etc/hadoop' diff --git a/sahara/tests/unit/plugins/vanilla/v2_7_1/test_config_helper.py b/sahara/tests/unit/plugins/vanilla/v2_7_1/test_config_helper.py index 2794591..70f80c5 100644 --- a/sahara/tests/unit/plugins/vanilla/v2_7_1/test_config_helper.py +++ b/sahara/tests/unit/plugins/vanilla/v2_7_1/test_config_helper.py @@ -16,7 +16,6 @@ import mock from sahara.plugins import provisioning as p -from sahara.plugins.vanilla.hadoop2 import config_helper as h_helper from sahara.plugins.vanilla.v2_7_1 import config_helper as v_helper from sahara.tests.unit import base @@ -49,17 +48,14 @@ class TestConfigHelper(base.SaharaTestCase): init_configs = v_helper._init_all_configs() self.assertEqual(init_configs, configs) + def test_get_spark_opt_default(self): + opt_name = 'Executor extra classpath' + _default_executor_classpath = ":".join( + ['/opt/hadoop/share/hadoop/tools/lib/hadoop-openstack-2.7.1.jar']) + default = v_helper._get_spark_opt_default(opt_name) + self.assertEqual(default, _default_executor_classpath) + def test_get_spark_configs(self): - h_helper.SPARK_CONFS = { - 'Spark': { - 'OPTIONS': [{ - 'name': 'test', - 'description': 'This is a test', - 'default': 'default', - 'priority': 1 - }] - } - } spark_configs = v_helper._get_spark_configs() for i in spark_configs: self.assertIsInstance(i, p.Config) diff --git a/sahara/tests/unit/plugins/vanilla/v2_8_2/test_config_helper.py b/sahara/tests/unit/plugins/vanilla/v2_8_2/test_config_helper.py index 4c70be3..f0d67b2 100644 --- a/sahara/tests/unit/plugins/vanilla/v2_8_2/test_config_helper.py +++ b/sahara/tests/unit/plugins/vanilla/v2_8_2/test_config_helper.py @@ -16,7 +16,6 @@ import mock from sahara.plugins import provisioning as p -from sahara.plugins.vanilla.hadoop2 import config_helper as h_helper from sahara.plugins.vanilla.v2_8_2 import config_helper as v_helper from sahara.tests.unit import base @@ -49,17 +48,14 @@ class TestConfigHelper(base.SaharaTestCase): init_configs = v_helper._init_all_configs() self.assertEqual(init_configs, configs) + def test_get_spark_opt_default(self): + opt_name = 'Executor extra classpath' + _default_executor_classpath = ":".join( + ['/opt/hadoop/share/hadoop/tools/lib/hadoop-openstack-2.8.2.jar']) + default = v_helper._get_spark_opt_default(opt_name) + self.assertEqual(default, _default_executor_classpath) + def test_get_spark_configs(self): - h_helper.SPARK_CONFS = { - 'Spark': { - 'OPTIONS': [{ - 'name': 'test', - 'description': 'This is a test', - 'default': 'default', - 'priority': 1 - }] - } - } spark_configs = v_helper._get_spark_configs() for i in spark_configs: self.assertIsInstance(i, p.Config)