Added rackawareness to Hadoop 2 in vanilla plugin

* Now Hadoop 2.3.0 in vanilla is able to push topology data for
  datanodes and nodemanagers
* Only 3-level topology (rackawareness) pushes to nodes. Why 4-level is
  not? Because this is not implemented in YARN yet (see YARN-18 and YARN-19):
  https://issues.apache.org/jira/browse/HADOOP-8468
* Even if admin configures 4-level topology configs Sahara will force
  down to 3-level.

Co-Authored-By: Sergey Reshetnyak <sreshetniak@mirantis.com>

Partially implements: blueprint vanilla-plugin-hadoop-2

Change-Id: If5b93dde1f6aaaf619def60634da8c8819a3c75a
This commit is contained in:
Alexander Ignatov 2014-04-02 11:58:47 +04:00
parent e07eab10be
commit 1513ff4071
4 changed files with 70 additions and 1 deletions

View File

@ -20,6 +20,7 @@ from sahara.plugins.general import utils
from sahara.plugins.vanilla.v2_3_0 import config_helper as c_helper
from sahara.plugins.vanilla.v2_3_0 import oozie_helper as o_helper
from sahara.swift import swift_helper as swift
from sahara.topology import topology_helper as th
from sahara.utils import files as f
from sahara.utils import xmlutils as x
@ -39,6 +40,7 @@ def configure_cluster(cluster):
instances.append(instance)
configure_instances(instances)
configure_topology_data(cluster)
def configure_instances(instances):
@ -113,6 +115,11 @@ def _get_hadoop_configs(node_group):
confs['Hadoop'].update(swift_configs)
if c_helper.is_data_locality_enabled(cluster):
confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
confs['Hadoop'].update({"topology.script.file.name":
HADOOP_CONF_DIR + "/topology.sh"})
return confs, c_helper.get_env_configs()
@ -233,6 +240,13 @@ def _post_configuration(instance):
r.execute_command('chmod +x /tmp/post_conf.sh')
r.execute_command('sudo /tmp/post_conf.sh')
if c_helper.is_data_locality_enabled(instance.node_group.cluster):
t_script = HADOOP_CONF_DIR + '/topology.sh'
r.write_file_to(t_script, f.get_file_text(
'plugins/vanilla/v2_3_0/resources/topology.sh'),
run_as_root=True)
r.execute_command('chmod +x ' + t_script, run_as_root=True)
def _get_hadoop_dirs(node_group):
dirs = {}
@ -268,3 +282,16 @@ def _merge_configs(a, b):
update(a)
update(b)
return res
def configure_topology_data(cluster):
if c_helper.is_data_locality_enabled(cluster):
LOG.info("Node group awareness is not implemented in YARN yet "
"so enable_hypervisor_awareness set to False explicitly")
tpl_map = th.generate_topology_map(cluster, is_node_awareness=False)
topology_data = "\n".join(
[k + " " + v for k, v in tpl_map.items()]) + "\n"
for ng in cluster.node_groups:
for i in ng.instances:
i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data",
topology_data, run_as_root=True)

View File

@ -13,12 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo.config import cfg
from sahara import exceptions as ex
from sahara.openstack.common import log as logging
from sahara.plugins import provisioning as p
from sahara.utils import types as types
from sahara.utils import xmlutils as x
CONF = cfg.CONF
CONF.import_opt("enable_data_locality", "sahara.topology.topology_helper")
LOG = logging.getLogger(__name__)
CORE_DEFAULT = x.load_hadoop_xml_defaults(
@ -69,6 +74,10 @@ ENABLE_MYSQL = p.Config('Enable MySQL', 'general', 'cluster',
config_type="bool", priority=1,
default_value=True, is_optional=True)
ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster',
config_type="bool", priority=1,
default_value=True, is_optional=True)
HIDDEN_CONFS = [
'dfs.hosts',
'dfs.hosts.exclude',
@ -170,7 +179,10 @@ def _init_env_configs():
def _init_general_configs():
return [ENABLE_SWIFT, ENABLE_MYSQL]
configs = [ENABLE_SWIFT, ENABLE_MYSQL]
if CONF.enable_data_locality:
configs.append(ENABLE_DATA_LOCALITY)
return configs
# Initialise plugin Hadoop configurations
@ -220,3 +232,10 @@ def get_config_value(service, name, cluster=None):
def is_mysql_enabled(cluster):
return get_config_value(
ENABLE_MYSQL.applicable_target, ENABLE_MYSQL.name, cluster)
def is_data_locality_enabled(cluster):
if not CONF.enable_data_locality:
return False
return get_config_value(ENABLE_DATA_LOCALITY.applicable_target,
ENABLE_DATA_LOCALITY.name, cluster)

View File

@ -0,0 +1,20 @@
#!/bin/bash
HADOOP_CONF=/opt/hadoop/etc/hadoop
while [ $# -gt 0 ] ; do
nodeArg=$1
exec< ${HADOOP_CONF}/topology.data
result=""
while read line ; do
ar=( $line )
if [ "${ar[0]}" = "$nodeArg" ] ; then
result="${ar[1]}"
fi
done
shift
if [ -z "$result" ] ; then
echo -n "/default/rack "
else
echo -n "$result "
fi
done

View File

@ -29,6 +29,7 @@ def scale_cluster(cluster, instances):
_update_include_files(cluster)
run.refresh_hadoop_nodes(cluster)
run.refresh_yarn_nodes(cluster)
config.configure_topology_data(cluster)
for instance in instances:
run.start_instance(instance)
@ -73,6 +74,8 @@ def decommission_nodes(cluster, instances):
_update_include_files(cluster)
_clear_exclude_files(cluster)
config.configure_topology_data(cluster)
def _update_exclude_files(cluster, instances):
datanodes = _get_instances_with_service(instances, 'datanode')