e019d9cd72
Partial-Bug: #1501663 Change-Id: Ib62b8085dbd60767e35828cfcca4b6798b0cbfed
370 lines
14 KiB
Python
370 lines
14 KiB
Python
# Copyright (c) 2014 Mirantis Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from oslo_config import cfg
|
|
from oslo_log import log as logging
|
|
import six
|
|
|
|
from sahara import context
|
|
from sahara.i18n import _
|
|
from sahara.i18n import _LW
|
|
from sahara.plugins import utils
|
|
from sahara.plugins.vanilla.hadoop2 import config_helper as c_helper
|
|
from sahara.plugins.vanilla.hadoop2 import oozie_helper as o_helper
|
|
from sahara.plugins.vanilla import utils as vu
|
|
from sahara.swift import swift_helper as swift
|
|
from sahara.topology import topology_helper as th
|
|
from sahara.utils import cluster_progress_ops as cpo
|
|
from sahara.utils import configs as s_cfg
|
|
from sahara.utils import files as f
|
|
from sahara.utils import proxy
|
|
from sahara.utils import xmlutils as x
|
|
|
|
CONF = cfg.CONF
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
HADOOP_CONF_DIR = '/opt/hadoop/etc/hadoop'
|
|
OOZIE_CONF_DIR = '/opt/oozie/conf'
|
|
HIVE_CONF_DIR = '/opt/hive/conf'
|
|
HADOOP_USER = 'hadoop'
|
|
HADOOP_GROUP = 'hadoop'
|
|
|
|
PORTS_MAP = {
|
|
"namenode": [50070, 9000],
|
|
"secondarynamenode": [50090],
|
|
"resourcemanager": [8088, 8032],
|
|
"historyserver": [19888],
|
|
"datanode": [50010, 50075, 50020],
|
|
"nodemanager": [8042],
|
|
"oozie": [11000],
|
|
"hiveserver": [9999, 10000]
|
|
}
|
|
|
|
|
|
def configure_cluster(pctx, cluster):
|
|
LOG.debug("Configuring cluster")
|
|
if (CONF.use_identity_api_v3 and CONF.use_domain_for_proxy_users and
|
|
vu.get_hiveserver(cluster) and
|
|
c_helper.is_swift_enabled(pctx, cluster)):
|
|
cluster = proxy.create_proxy_user_for_cluster(cluster)
|
|
|
|
instances = utils.get_instances(cluster)
|
|
configure_instances(pctx, instances)
|
|
configure_topology_data(pctx, cluster)
|
|
|
|
|
|
def configure_instances(pctx, instances):
|
|
if len(instances) == 0:
|
|
return
|
|
|
|
cpo.add_provisioning_step(
|
|
instances[0].cluster_id, _("Configure instances"), len(instances))
|
|
|
|
for instance in instances:
|
|
with context.set_current_instance_id(instance.instance_id):
|
|
_configure_instance(pctx, instance)
|
|
|
|
|
|
@cpo.event_wrapper(True)
|
|
def _configure_instance(pctx, instance):
|
|
_provisioning_configs(pctx, instance)
|
|
_post_configuration(pctx, instance)
|
|
|
|
|
|
def _provisioning_configs(pctx, instance):
|
|
xmls, env = _generate_configs(pctx, instance)
|
|
_push_xml_configs(instance, xmls)
|
|
_push_env_configs(instance, env)
|
|
|
|
|
|
def _generate_configs(pctx, instance):
|
|
hadoop_xml_confs = _get_hadoop_configs(pctx, instance)
|
|
user_xml_confs, user_env_confs = _get_user_configs(
|
|
pctx, instance.node_group)
|
|
xml_confs = s_cfg.merge_configs(user_xml_confs, hadoop_xml_confs)
|
|
env_confs = s_cfg.merge_configs(pctx['env_confs'], user_env_confs)
|
|
|
|
return xml_confs, env_confs
|
|
|
|
|
|
def _get_hadoop_configs(pctx, instance):
|
|
cluster = instance.node_group.cluster
|
|
nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
|
|
dirs = _get_hadoop_dirs(instance)
|
|
confs = {
|
|
'Hadoop': {
|
|
'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
|
|
},
|
|
'HDFS': {
|
|
'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
|
|
'dfs.datanode.data.dir': ','.join(dirs['hadoop_data_dirs']),
|
|
'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
|
|
'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
|
|
}
|
|
}
|
|
|
|
res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
|
|
if res_hostname:
|
|
confs['YARN'] = {
|
|
'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
|
|
'yarn.resourcemanager.hostname': '%s' % res_hostname,
|
|
'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
|
|
HADOOP_CONF_DIR),
|
|
'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
|
|
HADOOP_CONF_DIR)
|
|
}
|
|
confs['MapReduce'] = {
|
|
'mapreduce.framework.name': 'yarn'
|
|
}
|
|
hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
|
|
if hs_hostname:
|
|
confs['MapReduce']['mapreduce.jobhistory.address'] = (
|
|
"%s:10020" % hs_hostname)
|
|
|
|
oozie = vu.get_oozie(cluster)
|
|
if oozie:
|
|
hadoop_cfg = {
|
|
'hadoop.proxyuser.hadoop.hosts': '*',
|
|
'hadoop.proxyuser.hadoop.groups': 'hadoop'
|
|
}
|
|
confs['Hadoop'].update(hadoop_cfg)
|
|
|
|
oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
|
|
if c_helper.is_mysql_enabled(pctx, cluster):
|
|
oozie_cfg.update(o_helper.get_oozie_mysql_configs())
|
|
|
|
confs['JobFlow'] = oozie_cfg
|
|
|
|
if c_helper.is_swift_enabled(pctx, cluster):
|
|
swift_configs = {}
|
|
for config in swift.get_swift_configs():
|
|
swift_configs[config['name']] = config['value']
|
|
|
|
confs['Hadoop'].update(swift_configs)
|
|
|
|
if c_helper.is_data_locality_enabled(pctx, cluster):
|
|
confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
|
|
confs['Hadoop'].update({"topology.script.file.name":
|
|
HADOOP_CONF_DIR + "/topology.sh"})
|
|
|
|
hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))
|
|
if hive_hostname:
|
|
hive_cfg = {
|
|
'hive.warehouse.subdir.inherit.perms': True,
|
|
'javax.jdo.option.ConnectionURL':
|
|
'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
|
|
}
|
|
|
|
if c_helper.is_mysql_enabled(pctx, cluster):
|
|
hive_cfg.update({
|
|
'javax.jdo.option.ConnectionURL':
|
|
'jdbc:mysql://%s/metastore' % hive_hostname,
|
|
'javax.jdo.option.ConnectionDriverName':
|
|
'com.mysql.jdbc.Driver',
|
|
'javax.jdo.option.ConnectionUserName': 'hive',
|
|
'javax.jdo.option.ConnectionPassword': 'pass',
|
|
'datanucleus.autoCreateSchema': 'false',
|
|
'datanucleus.fixedDatastore': 'true',
|
|
'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname,
|
|
})
|
|
|
|
proxy_configs = cluster.cluster_configs.get('proxy_configs')
|
|
if proxy_configs and c_helper.is_swift_enabled(pctx, cluster):
|
|
hive_cfg.update({
|
|
swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'],
|
|
swift.HADOOP_SWIFT_PASSWORD: proxy_configs['proxy_password'],
|
|
swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'],
|
|
swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name
|
|
})
|
|
|
|
confs['Hive'] = hive_cfg
|
|
|
|
return confs
|
|
|
|
|
|
def _get_user_configs(pctx, node_group):
|
|
ng_xml_confs, ng_env_confs = _separate_configs(node_group.node_configs,
|
|
pctx['env_confs'])
|
|
cl_xml_confs, cl_env_confs = _separate_configs(
|
|
node_group.cluster.cluster_configs, pctx['env_confs'])
|
|
|
|
xml_confs = s_cfg.merge_configs(cl_xml_confs, ng_xml_confs)
|
|
env_confs = s_cfg.merge_configs(cl_env_confs, ng_env_confs)
|
|
return xml_confs, env_confs
|
|
|
|
|
|
def _separate_configs(configs, all_env_configs):
|
|
xml_configs = {}
|
|
env_configs = {}
|
|
for service, params in six.iteritems(configs):
|
|
for param, value in six.iteritems(params):
|
|
if all_env_configs.get(service, {}).get(param):
|
|
if not env_configs.get(service):
|
|
env_configs[service] = {}
|
|
env_configs[service][param] = value
|
|
else:
|
|
if not xml_configs.get(service):
|
|
xml_configs[service] = {}
|
|
xml_configs[service][param] = value
|
|
|
|
return xml_configs, env_configs
|
|
|
|
|
|
def _generate_xml(configs):
|
|
xml_confs = {}
|
|
for service, confs in six.iteritems(configs):
|
|
xml_confs[service] = x.create_hadoop_xml(confs)
|
|
|
|
return xml_confs
|
|
|
|
|
|
def _push_env_configs(instance, configs):
|
|
nn_heap = configs['HDFS']['NameNode Heap Size']
|
|
snn_heap = configs['HDFS']['SecondaryNameNode Heap Size']
|
|
dn_heap = configs['HDFS']['DataNode Heap Size']
|
|
rm_heap = configs['YARN']['ResourceManager Heap Size']
|
|
nm_heap = configs['YARN']['NodeManager Heap Size']
|
|
hs_heap = configs['MapReduce']['JobHistoryServer Heap Size']
|
|
|
|
with instance.remote() as r:
|
|
r.replace_remote_string(
|
|
'%s/hadoop-env.sh' % HADOOP_CONF_DIR,
|
|
'export HADOOP_NAMENODE_OPTS=.*',
|
|
'export HADOOP_NAMENODE_OPTS="-Xmx%dm"' % nn_heap)
|
|
r.replace_remote_string(
|
|
'%s/hadoop-env.sh' % HADOOP_CONF_DIR,
|
|
'export HADOOP_SECONDARYNAMENODE_OPTS=.*',
|
|
'export HADOOP_SECONDARYNAMENODE_OPTS="-Xmx%dm"' % snn_heap)
|
|
r.replace_remote_string(
|
|
'%s/hadoop-env.sh' % HADOOP_CONF_DIR,
|
|
'export HADOOP_DATANODE_OPTS=.*',
|
|
'export HADOOP_DATANODE_OPTS="-Xmx%dm"' % dn_heap)
|
|
r.replace_remote_string(
|
|
'%s/yarn-env.sh' % HADOOP_CONF_DIR,
|
|
'\\#export YARN_RESOURCEMANAGER_HEAPSIZE=.*',
|
|
'export YARN_RESOURCEMANAGER_HEAPSIZE=%d' % rm_heap)
|
|
r.replace_remote_string(
|
|
'%s/yarn-env.sh' % HADOOP_CONF_DIR,
|
|
'\\#export YARN_NODEMANAGER_HEAPSIZE=.*',
|
|
'export YARN_NODEMANAGER_HEAPSIZE=%d' % nm_heap)
|
|
r.replace_remote_string(
|
|
'%s/mapred-env.sh' % HADOOP_CONF_DIR,
|
|
'export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=.*',
|
|
'export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=%d' % hs_heap)
|
|
|
|
|
|
def _push_xml_configs(instance, configs):
|
|
xmls = _generate_xml(configs)
|
|
service_to_conf_map = {
|
|
'Hadoop': '%s/core-site.xml' % HADOOP_CONF_DIR,
|
|
'HDFS': '%s/hdfs-site.xml' % HADOOP_CONF_DIR,
|
|
'YARN': '%s/yarn-site.xml' % HADOOP_CONF_DIR,
|
|
'MapReduce': '%s/mapred-site.xml' % HADOOP_CONF_DIR,
|
|
'JobFlow': '%s/oozie-site.xml' % OOZIE_CONF_DIR,
|
|
'Hive': '%s/hive-site.xml' % HIVE_CONF_DIR
|
|
}
|
|
xml_confs = {}
|
|
for service, confs in six.iteritems(xmls):
|
|
if service not in service_to_conf_map.keys():
|
|
continue
|
|
|
|
xml_confs[service_to_conf_map[service]] = confs
|
|
|
|
_push_configs_to_instance(instance, xml_confs)
|
|
|
|
|
|
def _push_configs_to_instance(instance, configs):
|
|
LOG.debug("Push configs to instance {instance}".format(
|
|
instance=instance.instance_name))
|
|
with instance.remote() as r:
|
|
for fl, data in six.iteritems(configs):
|
|
r.write_file_to(fl, data, run_as_root=True)
|
|
|
|
|
|
def _post_configuration(pctx, instance):
|
|
dirs = _get_hadoop_dirs(instance)
|
|
args = {
|
|
'hadoop_user': HADOOP_USER,
|
|
'hadoop_group': HADOOP_GROUP,
|
|
'hadoop_conf_dir': HADOOP_CONF_DIR,
|
|
'oozie_conf_dir': OOZIE_CONF_DIR,
|
|
'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']),
|
|
'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']),
|
|
'hadoop_log_dir': dirs['hadoop_log_dir'],
|
|
'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'],
|
|
'yarn_log_dir': dirs['yarn_log_dir']
|
|
}
|
|
post_conf_script = f.get_file_text(
|
|
'plugins/vanilla/hadoop2/resources/post_conf.template')
|
|
post_conf_script = post_conf_script.format(**args)
|
|
|
|
with instance.remote() as r:
|
|
r.write_file_to('/tmp/post_conf.sh', post_conf_script)
|
|
r.execute_command('chmod +x /tmp/post_conf.sh')
|
|
r.execute_command('sudo /tmp/post_conf.sh')
|
|
|
|
if c_helper.is_data_locality_enabled(pctx,
|
|
instance.cluster):
|
|
t_script = HADOOP_CONF_DIR + '/topology.sh'
|
|
r.write_file_to(t_script, f.get_file_text(
|
|
'plugins/vanilla/hadoop2/resources/topology.sh'),
|
|
run_as_root=True)
|
|
r.execute_command('chmod +x ' + t_script, run_as_root=True)
|
|
|
|
|
|
def _get_hadoop_dirs(instance):
|
|
dirs = {}
|
|
storage_paths = instance.storage_paths()
|
|
dirs['hadoop_name_dirs'] = _make_hadoop_paths(
|
|
storage_paths, '/hdfs/namenode')
|
|
dirs['hadoop_data_dirs'] = _make_hadoop_paths(
|
|
storage_paths, '/hdfs/datanode')
|
|
dirs['hadoop_log_dir'] = _make_hadoop_paths(
|
|
storage_paths, '/hadoop/logs')[0]
|
|
dirs['hadoop_secure_dn_log_dir'] = _make_hadoop_paths(
|
|
storage_paths, '/hadoop/logs/secure')[0]
|
|
dirs['yarn_log_dir'] = _make_hadoop_paths(
|
|
storage_paths, '/yarn/logs')[0]
|
|
|
|
return dirs
|
|
|
|
|
|
def _make_hadoop_paths(paths, hadoop_dir):
|
|
return [path + hadoop_dir for path in paths]
|
|
|
|
|
|
@cpo.event_wrapper(
|
|
True, step=_("Configure topology data"), param=('cluster', 1))
|
|
def configure_topology_data(pctx, cluster):
|
|
if c_helper.is_data_locality_enabled(pctx, cluster):
|
|
LOG.warning(_LW("Node group awareness is not implemented in YARN yet "
|
|
"so enable_hypervisor_awareness set to False "
|
|
"explicitly"))
|
|
tpl_map = th.generate_topology_map(cluster, is_node_awareness=False)
|
|
topology_data = "\n".join(
|
|
[k + " " + v for k, v in tpl_map.items()]) + "\n"
|
|
for ng in cluster.node_groups:
|
|
for i in ng.instances:
|
|
i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data",
|
|
topology_data, run_as_root=True)
|
|
|
|
|
|
def get_open_ports(node_group):
|
|
ports = []
|
|
for key in PORTS_MAP:
|
|
if key in node_group.node_processes:
|
|
ports += PORTS_MAP[key]
|
|
return ports
|