diff --git a/sahara/plugins/vanilla/v2_3_0/config.py b/sahara/plugins/vanilla/v2_3_0/config.py index 01a503f..235e768 100644 --- a/sahara/plugins/vanilla/v2_3_0/config.py +++ b/sahara/plugins/vanilla/v2_3_0/config.py @@ -20,6 +20,7 @@ from sahara.plugins.general import utils from sahara.plugins.vanilla.v2_3_0 import config_helper as c_helper from sahara.plugins.vanilla.v2_3_0 import oozie_helper as o_helper from sahara.swift import swift_helper as swift +from sahara.topology import topology_helper as th from sahara.utils import files as f from sahara.utils import xmlutils as x @@ -39,6 +40,7 @@ def configure_cluster(cluster): instances.append(instance) configure_instances(instances) + configure_topology_data(cluster) def configure_instances(instances): @@ -113,6 +115,11 @@ def _get_hadoop_configs(node_group): confs['Hadoop'].update(swift_configs) + if c_helper.is_data_locality_enabled(cluster): + confs['Hadoop'].update(th.TOPOLOGY_CONFIG) + confs['Hadoop'].update({"topology.script.file.name": + HADOOP_CONF_DIR + "/topology.sh"}) + return confs, c_helper.get_env_configs() @@ -233,6 +240,13 @@ def _post_configuration(instance): r.execute_command('chmod +x /tmp/post_conf.sh') r.execute_command('sudo /tmp/post_conf.sh') + if c_helper.is_data_locality_enabled(instance.node_group.cluster): + t_script = HADOOP_CONF_DIR + '/topology.sh' + r.write_file_to(t_script, f.get_file_text( + 'plugins/vanilla/v2_3_0/resources/topology.sh'), + run_as_root=True) + r.execute_command('chmod +x ' + t_script, run_as_root=True) + def _get_hadoop_dirs(node_group): dirs = {} @@ -268,3 +282,16 @@ def _merge_configs(a, b): update(a) update(b) return res + + +def configure_topology_data(cluster): + if c_helper.is_data_locality_enabled(cluster): + LOG.info("Node group awareness is not implemented in YARN yet " + "so enable_hypervisor_awareness set to False explicitly") + tpl_map = th.generate_topology_map(cluster, is_node_awareness=False) + topology_data = "\n".join( + [k + " " + v for k, v in tpl_map.items()]) + "\n" + for ng in cluster.node_groups: + for i in ng.instances: + i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data", + topology_data, run_as_root=True) diff --git a/sahara/plugins/vanilla/v2_3_0/config_helper.py b/sahara/plugins/vanilla/v2_3_0/config_helper.py index 54a49d2..7548c20 100644 --- a/sahara/plugins/vanilla/v2_3_0/config_helper.py +++ b/sahara/plugins/vanilla/v2_3_0/config_helper.py @@ -13,12 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +from oslo.config import cfg + from sahara import exceptions as ex from sahara.openstack.common import log as logging from sahara.plugins import provisioning as p from sahara.utils import types as types from sahara.utils import xmlutils as x +CONF = cfg.CONF +CONF.import_opt("enable_data_locality", "sahara.topology.topology_helper") + LOG = logging.getLogger(__name__) CORE_DEFAULT = x.load_hadoop_xml_defaults( @@ -69,6 +74,10 @@ ENABLE_MYSQL = p.Config('Enable MySQL', 'general', 'cluster', config_type="bool", priority=1, default_value=True, is_optional=True) +ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster', + config_type="bool", priority=1, + default_value=True, is_optional=True) + HIDDEN_CONFS = [ 'dfs.hosts', 'dfs.hosts.exclude', @@ -170,7 +179,10 @@ def _init_env_configs(): def _init_general_configs(): - return [ENABLE_SWIFT, ENABLE_MYSQL] + configs = [ENABLE_SWIFT, ENABLE_MYSQL] + if CONF.enable_data_locality: + configs.append(ENABLE_DATA_LOCALITY) + return configs # Initialise plugin Hadoop configurations @@ -220,3 +232,10 @@ def get_config_value(service, name, cluster=None): def is_mysql_enabled(cluster): return get_config_value( ENABLE_MYSQL.applicable_target, ENABLE_MYSQL.name, cluster) + + +def is_data_locality_enabled(cluster): + if not CONF.enable_data_locality: + return False + return get_config_value(ENABLE_DATA_LOCALITY.applicable_target, + ENABLE_DATA_LOCALITY.name, cluster) diff --git a/sahara/plugins/vanilla/v2_3_0/resources/topology.sh b/sahara/plugins/vanilla/v2_3_0/resources/topology.sh new file mode 100755 index 0000000..658b3b4 --- /dev/null +++ b/sahara/plugins/vanilla/v2_3_0/resources/topology.sh @@ -0,0 +1,20 @@ +#!/bin/bash +HADOOP_CONF=/opt/hadoop/etc/hadoop + +while [ $# -gt 0 ] ; do + nodeArg=$1 + exec< ${HADOOP_CONF}/topology.data + result="" + while read line ; do + ar=( $line ) + if [ "${ar[0]}" = "$nodeArg" ] ; then + result="${ar[1]}" + fi + done + shift + if [ -z "$result" ] ; then + echo -n "/default/rack " + else + echo -n "$result " + fi +done \ No newline at end of file diff --git a/sahara/plugins/vanilla/v2_3_0/scaling.py b/sahara/plugins/vanilla/v2_3_0/scaling.py index ccfd8ed..8cddc7a 100644 --- a/sahara/plugins/vanilla/v2_3_0/scaling.py +++ b/sahara/plugins/vanilla/v2_3_0/scaling.py @@ -29,6 +29,7 @@ def scale_cluster(cluster, instances): _update_include_files(cluster) run.refresh_hadoop_nodes(cluster) run.refresh_yarn_nodes(cluster) + config.configure_topology_data(cluster) for instance in instances: run.start_instance(instance) @@ -73,6 +74,8 @@ def decommission_nodes(cluster, instances): _update_include_files(cluster) _clear_exclude_files(cluster) + config.configure_topology_data(cluster) + def _update_exclude_files(cluster, instances): datanodes = _get_instances_with_service(instances, 'datanode')