Implementation of Vanilla Plugin
* Implemented configure_cluster and start_cluster methods * Cluster configuring is service:process specific * Added basic unit tests * Added basic validation Implements blueprint vanilla-hadoop-plugin Change-Id: I51c55557b6bb073a7eae7eefc2f21cc2c2df0385
This commit is contained in:
parent
10ef73c80b
commit
fa62f6872a
180
savanna/plugins/vanilla/config_helper.py
Normal file
180
savanna/plugins/vanilla/config_helper.py
Normal file
@ -0,0 +1,180 @@
|
||||
# Copyright (c) 2013 Mirantis Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import pkg_resources as pkg
|
||||
import xml.dom.minidom as xml
|
||||
|
||||
import jinja2 as j2
|
||||
|
||||
from savanna.plugins import provisioning as p
|
||||
from savanna import version
|
||||
|
||||
|
||||
def _load_xml_default_configs(file_name):
|
||||
doc = xml.parse(
|
||||
pkg.resource_filename(version.version_info.package,
|
||||
'plugins/vanilla/resources/%s' % file_name)
|
||||
)
|
||||
|
||||
properties = doc.getElementsByTagName("name")
|
||||
return [prop.childNodes[0].data for prop in properties]
|
||||
|
||||
|
||||
CORE_DEFAULT = _load_xml_default_configs('core-default.xml')
|
||||
HDFS_DEFAULT = _load_xml_default_configs('hdfs-default.xml')
|
||||
MAPRED_DEFAULT = _load_xml_default_configs('mapred-default.xml')
|
||||
|
||||
XML_CONFS = {
|
||||
"HDFS": [CORE_DEFAULT, HDFS_DEFAULT],
|
||||
"MAPREDUCE": [MAPRED_DEFAULT]
|
||||
}
|
||||
|
||||
# TODO(aignatov): Environmental configs could be more complex
|
||||
ENV_CONFS = {
|
||||
"MAPREDUCE": {
|
||||
'job_tracker_heap_size': 'HADOOP_JOBTRACKER_OPTS=\\"-Xmx%sm\\"',
|
||||
'task_tracker_heap_size': 'HADOOP_TASKTRACKER_OPTS=\\"-Xmx%sm\\"'
|
||||
},
|
||||
"HDFS": {
|
||||
'name_node_heap_size': 'HADOOP_NAMENODE_OPTS=\\"-Xmx%sm\\"',
|
||||
'data_node_heap_size': 'HADOOP_DATANODE_OPTS=\\"-Xmx%sm\\"'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _initialise_configs():
|
||||
configs = []
|
||||
for service, config_lists in XML_CONFS.iteritems():
|
||||
for config_list in config_lists:
|
||||
for config_name in config_list:
|
||||
# TODO(aignatov): Need to add default values and types
|
||||
configs.append(
|
||||
p.Config(config_name, service, "node", is_optional=True))
|
||||
|
||||
for service, config_items in ENV_CONFS.iteritems():
|
||||
for name, param_format_str in config_items.iteritems():
|
||||
configs.append(p.Config(name, service, "node", default_value=1024))
|
||||
|
||||
return configs
|
||||
|
||||
# Initialise plugin Hadoop configurations
|
||||
PLUGIN_CONFIGS = _initialise_configs()
|
||||
|
||||
|
||||
def get_plugin_configs():
|
||||
return PLUGIN_CONFIGS
|
||||
|
||||
|
||||
def _create_xml(configs, global_conf):
|
||||
doc = xml.Document()
|
||||
|
||||
pi = doc.createProcessingInstruction('xml-stylesheet',
|
||||
'type="text/xsl" '
|
||||
'href="configuration.xsl"')
|
||||
doc.insertBefore(pi, doc.firstChild)
|
||||
|
||||
# Create the <configuration> base element
|
||||
configuration = doc.createElement("configuration")
|
||||
doc.appendChild(configuration)
|
||||
|
||||
for prop_name, prop_value in configs.items():
|
||||
if prop_name in global_conf:
|
||||
# Create the <property> element
|
||||
property = doc.createElement("property")
|
||||
configuration.appendChild(property)
|
||||
|
||||
# Create a <name> element in <property>
|
||||
name = doc.createElement("name")
|
||||
property.appendChild(name)
|
||||
|
||||
# Give the <name> element some hadoop config name
|
||||
name_text = doc.createTextNode(prop_name)
|
||||
name.appendChild(name_text)
|
||||
|
||||
# Create a <value> element in <property>
|
||||
value = doc.createElement("value")
|
||||
property.appendChild(value)
|
||||
|
||||
# Give the <value> element some hadoop config value
|
||||
value_text = doc.createTextNode(prop_value)
|
||||
value.appendChild(value_text)
|
||||
|
||||
# Return newly created XML
|
||||
return doc.toprettyxml(indent=" ")
|
||||
|
||||
|
||||
def generate_xml_configs(configs, nn_hostname, jt_hostname=None):
|
||||
# inserting common configs depends on provisioned VMs and HDFS placement
|
||||
# TODO(aignatov): should be moved to cluster context
|
||||
cfg = {
|
||||
'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
|
||||
'dfs.name.dir': '/mnt/lib/hadoop/hdfs/namenode',
|
||||
'dfs.data.dir': '/mnt/lib/hadoop/hdfs/datanode',
|
||||
}
|
||||
|
||||
if jt_hostname:
|
||||
mr_cfg = {
|
||||
'mapred.job.tracker': '%s:8021' % jt_hostname,
|
||||
'mapred.system.dir': '/mnt/mapred/mapredsystem',
|
||||
'mapred.local.dir': '/mnt/lib/hadoop/mapred'
|
||||
}
|
||||
cfg.update(mr_cfg)
|
||||
|
||||
# inserting user-defined configs
|
||||
for key, value in extract_xml_confs(configs):
|
||||
cfg[key] = value
|
||||
|
||||
# invoking applied configs to appropriate xml files
|
||||
xml_configs = {
|
||||
'core-site': _create_xml(cfg, CORE_DEFAULT),
|
||||
'mapred-site': _create_xml(cfg, MAPRED_DEFAULT),
|
||||
'hdfs-site': _create_xml(cfg, HDFS_DEFAULT)
|
||||
}
|
||||
|
||||
return xml_configs
|
||||
|
||||
|
||||
def extract_environment_confs(configs):
|
||||
"""Returns list of Hadoop parameters which should be passed via environment
|
||||
"""
|
||||
lst = []
|
||||
for service, srv_confs in configs.items():
|
||||
for param_name, param_value in srv_confs.items():
|
||||
for cfg_name, cfg_format_str in ENV_CONFS[service].items():
|
||||
if param_name == cfg_name and param_value is not None:
|
||||
lst.append(cfg_format_str % param_value)
|
||||
return lst
|
||||
|
||||
|
||||
def extract_xml_confs(configs):
|
||||
"""Returns list of Hadoop parameters which should be passed into general
|
||||
configs like core-site.xml
|
||||
"""
|
||||
lst = []
|
||||
for service, srv_confs in configs.items():
|
||||
for param_name, param_value in srv_confs.items():
|
||||
for cfg_list in XML_CONFS[service]:
|
||||
if param_name in cfg_list and param_value is not None:
|
||||
lst.append((param_name, param_value))
|
||||
return lst
|
||||
|
||||
|
||||
env = j2.Environment(loader=j2.PackageLoader('savanna',
|
||||
'plugins/vanilla/resources'))
|
||||
|
||||
|
||||
def render_template(template_name, **kwargs):
|
||||
template = env.get_template('%s.template' % template_name)
|
||||
return template.render(**kwargs)
|
36
savanna/plugins/vanilla/exceptions.py
Normal file
36
savanna/plugins/vanilla/exceptions.py
Normal file
@ -0,0 +1,36 @@
|
||||
# Copyright (c) 2013 Mirantis Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import savanna.exceptions as e
|
||||
|
||||
|
||||
class NotSingleNameNodeException(e.SavannaException):
|
||||
def __init__(self, nn_count):
|
||||
self.message = "Hadoop cluster should contain only 1 NameNode " \
|
||||
"instance. Actual NN count is %s" % nn_count
|
||||
self.code = "NOT_SINGLE_NAME_NODE"
|
||||
|
||||
|
||||
class NotSingleJobTrackerException(e.SavannaException):
|
||||
def __init__(self, jt_count):
|
||||
self.message = "Hadoop cluster should contain 0 or 1 JobTracker " \
|
||||
"instances. Actual JT count is %s" % jt_count
|
||||
self.code = "NOT_SINGLE_JOB_TRACKER"
|
||||
|
||||
|
||||
class TaskTrackersWithoutJobTracker(e.SavannaException):
|
||||
def __init__(self):
|
||||
self.message = "TaskTrackers cannot be configures without JobTracker"
|
||||
self.code = "TASK_TRACKERS_WITHOUT_JOB_TRACKER"
|
@ -13,10 +13,22 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from savanna.openstack.common import log as logging
|
||||
from savanna.plugins import provisioning as p
|
||||
from savanna.plugins.vanilla import config_helper as c_helper
|
||||
from savanna.plugins.vanilla import exceptions as ex
|
||||
from savanna.plugins.vanilla import utils
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VanillaProvider(p.ProvisioningPluginBase):
|
||||
def __init__(self):
|
||||
self.processes = {
|
||||
"HDFS": ["namenode", "datanode", "secondarynamenode"],
|
||||
"MAPREDUCE": ["tasktracker", "jobtracker"]
|
||||
}
|
||||
|
||||
def get_plugin_opts(self):
|
||||
return []
|
||||
|
||||
@ -32,28 +44,113 @@ class VanillaProvider(p.ProvisioningPluginBase):
|
||||
"cluster without any management consoles.")
|
||||
|
||||
def get_versions(self):
|
||||
return ['1.1.1']
|
||||
return ['1.1.2']
|
||||
|
||||
def get_configs(self, hadoop_version):
|
||||
return [
|
||||
p.Config('Task Tracker heap size', 'mapreduce', "node",
|
||||
default_value='1024M')
|
||||
]
|
||||
return c_helper.get_plugin_configs()
|
||||
|
||||
def get_node_processes(self, hadoop_version):
|
||||
return {
|
||||
'mapreduce': ['jobtracker', 'tasktracker'],
|
||||
'hdfs': ['namenode', 'datanode']
|
||||
}
|
||||
return self.processes
|
||||
|
||||
def validate(self, cluster):
|
||||
pass
|
||||
nn_count = sum([ng.count for ng
|
||||
in utils.get_node_groups(cluster, "namenode")])
|
||||
if nn_count is not 1:
|
||||
raise ex.NotSingleNameNodeException(nn_count)
|
||||
|
||||
jt_count = sum([ng.count for ng
|
||||
in utils.get_node_groups(cluster, "jobtracker")])
|
||||
|
||||
if jt_count not in [0, 1]:
|
||||
raise ex.NotSingleJobTrackerException(jt_count)
|
||||
|
||||
tt_count = sum([ng.count for ng
|
||||
in utils.get_node_groups(cluster, "tasktracker")])
|
||||
if jt_count is 0 and tt_count > 0:
|
||||
raise ex.TaskTrackersWithoutJobTracker()
|
||||
|
||||
def update_infra(self, cluster):
|
||||
pass
|
||||
|
||||
def configure_cluster(self, cluster):
|
||||
pass
|
||||
for ng in cluster.node_groups:
|
||||
for inst in ng.instances:
|
||||
inst.remote.execute_command(
|
||||
'sudo chown -R $USER:$USER /etc/hadoop'
|
||||
)
|
||||
|
||||
self._extract_configs(cluster)
|
||||
self._push_configs_to_nodes(cluster)
|
||||
|
||||
def start_cluster(self, cluster):
|
||||
pass
|
||||
nn_instance = utils.get_namenode(cluster)
|
||||
jt_instance = utils.get_jobtracker(cluster)
|
||||
|
||||
nn_instance.remote.execute_command(
|
||||
'sudo su -c /usr/sbin/start-dfs.sh hadoop >>'
|
||||
' /tmp/savanna-hadoop-start-dfs.log')
|
||||
|
||||
LOG.info("HDFS service at '%s' has been started", nn_instance.hostname)
|
||||
|
||||
if jt_instance:
|
||||
jt_instance.remote.execute_command(
|
||||
'sudo su -c /usr/sbin/start-mapred.sh hadoop >>'
|
||||
' /tmp/savanna-hadoop-start-mapred.log')
|
||||
LOG.info("MAPREDUCE service at '%s' has been started",
|
||||
jt_instance.hostname)
|
||||
|
||||
LOG.info('Cluster %s has been started successfully' % cluster.name)
|
||||
|
||||
def _extract_configs(self, cluster):
|
||||
nn = utils.get_namenode(cluster)
|
||||
jt = utils.get_jobtracker(cluster)
|
||||
for ng in cluster.node_groups:
|
||||
#TODO(aignatov): setup_script should be replaced with remote calls
|
||||
ng.extra = {
|
||||
'xml': c_helper.generate_xml_configs(ng.configuration,
|
||||
nn.hostname,
|
||||
jt.hostname
|
||||
if jt else None),
|
||||
'setup_script': c_helper.render_template(
|
||||
'setup-general.sh',
|
||||
args={
|
||||
'env_configs': c_helper.extract_environment_confs(
|
||||
ng.configuration)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
def _push_configs_to_nodes(self, cluster):
|
||||
for ng in cluster.node_groups:
|
||||
for inst in ng.instances:
|
||||
inst.remote.write_file_to('/etc/hadoop/core-site.xml',
|
||||
ng.extra['xml']['core-site'])
|
||||
inst.remote.write_file_to('/etc/hadoop/mapred-site.xml',
|
||||
ng.extra['xml']['mapred-site'])
|
||||
inst.remote.write_file_to('/etc/hadoop/hdfs-site.xml',
|
||||
ng.extra['xml']['hdfs-site'])
|
||||
inst.remote.write_file_to('/tmp/savanna-hadoop-init.sh',
|
||||
ng.extra['setup_script'])
|
||||
inst.remote.execute_command(
|
||||
'sudo chmod 0500 /tmp/savanna-hadoop-init.sh'
|
||||
)
|
||||
inst.remote.execute_command(
|
||||
'sudo /tmp/savanna-hadoop-init.sh '
|
||||
'>> /tmp/savanna-hadoop-init.log 2>&1')
|
||||
|
||||
nn = utils.get_namenode(cluster)
|
||||
jt = utils.get_jobtracker(cluster)
|
||||
|
||||
nn.remote.write_file_to('/etc/hadoop/slaves',
|
||||
utils.generate_host_names(
|
||||
utils.get_datanodes(cluster)))
|
||||
nn.remote.write_file_to('/etc/hadoop/masters',
|
||||
utils.generate_host_names(
|
||||
utils.get_secondarynamenodes(cluster)))
|
||||
nn.remote.execute_command(
|
||||
"sudo su -c 'hadoop namenode -format' hadoop")
|
||||
|
||||
if jt and nn.instance_id != jt.instance_id:
|
||||
jt.remote.write_file_to('/etc/hadoop/slaves',
|
||||
utils.generate_host_names(
|
||||
utils.get_tasktrackers(cluster)))
|
||||
|
20
savanna/plugins/vanilla/resources/README.rst
Normal file
20
savanna/plugins/vanilla/resources/README.rst
Normal file
@ -0,0 +1,20 @@
|
||||
Apache Hadoop Configurations for Savanna
|
||||
========================================
|
||||
|
||||
This directory contains default XML configuration files:
|
||||
|
||||
* core-default.xml,
|
||||
* hdfs-default.xml,
|
||||
* mapred-default.xml
|
||||
|
||||
These files are applied for Savanna's plugin of Apache Hadoop version 1.1.2.
|
||||
|
||||
Files were taken from here:
|
||||
https://github.com/apache/hadoop-common/blob/release-1.1.2/src/hdfs/hdfs-default.xml
|
||||
https://github.com/apache/hadoop-common/blob/release-1.1.2/src/mapred/mapred-default.xml
|
||||
https://github.com/apache/hadoop-common/blob/release-1.1.2/src/core/core-default.xml
|
||||
|
||||
XML configs are used to expose default Hadoop configurations to the users through
|
||||
the Savanna's REST API. It allows users to override some config values which will
|
||||
be pushed to the provisioned VMs running Hadoop services as part of appropriate
|
||||
xml config.
|
580
savanna/plugins/vanilla/resources/core-default.xml
Normal file
580
savanna/plugins/vanilla/resources/core-default.xml
Normal file
@ -0,0 +1,580 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
|
||||
<!-- Do not modify this file directly. Instead, copy entries that you -->
|
||||
<!-- wish to modify from this file into core-site.xml and change them -->
|
||||
<!-- there. If core-site.xml does not already exist, create it. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<!--- global properties -->
|
||||
|
||||
<property>
|
||||
<name>hadoop.tmp.dir</name>
|
||||
<value>/tmp/hadoop-${user.name}</value>
|
||||
<description>A base for other temporary directories.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.native.lib</name>
|
||||
<value>true</value>
|
||||
<description>Should native hadoop libraries, if present, be used.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.filter.initializers</name>
|
||||
<value></value>
|
||||
<description>A comma separated list of class names. Each class in the list
|
||||
must extend org.apache.hadoop.http.FilterInitializer. The corresponding
|
||||
Filter will be initialized. Then, the Filter will be applied to all user
|
||||
facing jsp and servlet web pages. The ordering of the list defines the
|
||||
ordering of the filters.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.group.mapping</name>
|
||||
<value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
|
||||
<description>Class for user to group mapping (get groups for a given user)
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.authorization</name>
|
||||
<value>false</value>
|
||||
<description>Is service-level authorization enabled?</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.authentication</name>
|
||||
<value>simple</value>
|
||||
<description>Possible values are simple (no authentication), and kerberos
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.token.service.use_ip</name>
|
||||
<value>true</value>
|
||||
<description>Controls whether tokens always use IP addresses. DNS changes
|
||||
will not be detected if this option is enabled. Existing client connections
|
||||
that break will always reconnect to the IP of the original host. New clients
|
||||
will connect to the host's new IP but fail to locate a token. Disabling
|
||||
this option will allow existing and new clients to detect an IP change and
|
||||
continue to locate the new host's token.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.use-weak-http-crypto</name>
|
||||
<value>false</value>
|
||||
<description>If enabled, use KSSL to authenticate HTTP connections to the
|
||||
NameNode. Due to a bug in JDK6, using KSSL requires one to configure
|
||||
Kerberos tickets to use encryption types that are known to be
|
||||
cryptographically weak. If disabled, SPNEGO will be used for HTTP
|
||||
authentication, which supports stronger encryption types.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!--
|
||||
<property>
|
||||
<name>hadoop.security.service.user.name.key</name>
|
||||
<value></value>
|
||||
<description>Name of the kerberos principal of the user that owns
|
||||
a given service daemon
|
||||
</description>
|
||||
</property>
|
||||
-->
|
||||
|
||||
<!--- logging properties -->
|
||||
|
||||
<property>
|
||||
<name>hadoop.logfile.size</name>
|
||||
<value>10000000</value>
|
||||
<description>The max size of each log file</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.logfile.count</name>
|
||||
<value>10</value>
|
||||
<description>The max number of log files</description>
|
||||
</property>
|
||||
|
||||
<!-- i/o properties -->
|
||||
<property>
|
||||
<name>io.file.buffer.size</name>
|
||||
<value>4096</value>
|
||||
<description>The size of buffer for use in sequence files.
|
||||
The size of this buffer should probably be a multiple of hardware
|
||||
page size (4096 on Intel x86), and it determines how much data is
|
||||
buffered during read and write operations.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.bytes.per.checksum</name>
|
||||
<value>512</value>
|
||||
<description>The number of bytes per checksum. Must not be larger than
|
||||
io.file.buffer.size.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.skip.checksum.errors</name>
|
||||
<value>false</value>
|
||||
<description>If true, when a checksum error is encountered while
|
||||
reading a sequence file, entries are skipped, instead of throwing an
|
||||
exception.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.compression.codecs</name>
|
||||
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec</value>
|
||||
<description>A list of the compression codec classes that can be used
|
||||
for compression/decompression.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.serializations</name>
|
||||
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
|
||||
<description>A list of serialization classes that can be used for
|
||||
obtaining serializers and deserializers.</description>
|
||||
</property>
|
||||
|
||||
<!-- file system properties -->
|
||||
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>file:///</value>
|
||||
<description>The name of the default file system. A URI whose
|
||||
scheme and authority determine the FileSystem implementation. The
|
||||
uri's scheme determines the config property (fs.SCHEME.impl) naming
|
||||
the FileSystem implementation class. The uri's authority is used to
|
||||
determine the host, port, etc. for a filesystem.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.trash.interval</name>
|
||||
<value>0</value>
|
||||
<description>Number of minutes between trash checkpoints.
|
||||
If zero, the trash feature is disabled.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.file.impl</name>
|
||||
<value>org.apache.hadoop.fs.LocalFileSystem</value>
|
||||
<description>The FileSystem for file: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.hdfs.impl</name>
|
||||
<value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
|
||||
<description>The FileSystem for hdfs: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.impl</name>
|
||||
<value>org.apache.hadoop.fs.s3.S3FileSystem</value>
|
||||
<description>The FileSystem for s3: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3n.impl</name>
|
||||
<value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
|
||||
<description>The FileSystem for s3n: (Native S3) uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.kfs.impl</name>
|
||||
<value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
|
||||
<description>The FileSystem for kfs: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.hftp.impl</name>
|
||||
<value>org.apache.hadoop.hdfs.HftpFileSystem</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.hsftp.impl</name>
|
||||
<value>org.apache.hadoop.hdfs.HsftpFileSystem</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.webhdfs.impl</name>
|
||||
<value>org.apache.hadoop.hdfs.web.WebHdfsFileSystem</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.ftp.impl</name>
|
||||
<value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
|
||||
<description>The FileSystem for ftp: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.ramfs.impl</name>
|
||||
<value>org.apache.hadoop.fs.InMemoryFileSystem</value>
|
||||
<description>The FileSystem for ramfs: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.har.impl</name>
|
||||
<value>org.apache.hadoop.fs.HarFileSystem</value>
|
||||
<description>The filesystem for Hadoop archives. </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.har.impl.disable.cache</name>
|
||||
<value>true</value>
|
||||
<description>Don't cache 'har' filesystem instances.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.checkpoint.dir</name>
|
||||
<value>${hadoop.tmp.dir}/dfs/namesecondary</value>
|
||||
<description>Determines where on the local filesystem the DFS secondary
|
||||
name node should store the temporary images to merge.
|
||||
If this is a comma-delimited list of directories then the image is
|
||||
replicated in all of the directories for redundancy.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.checkpoint.edits.dir</name>
|
||||
<value>${fs.checkpoint.dir}</value>
|
||||
<description>Determines where on the local filesystem the DFS secondary
|
||||
name node should store the temporary edits to merge.
|
||||
If this is a comma-delimited list of directoires then teh edits is
|
||||
replicated in all of the directoires for redundancy.
|
||||
Default value is same as fs.checkpoint.dir
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.checkpoint.period</name>
|
||||
<value>3600</value>
|
||||
<description>The number of seconds between two periodic checkpoints.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.checkpoint.size</name>
|
||||
<value>67108864</value>
|
||||
<description>The size of the current edit log (in bytes) that triggers
|
||||
a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
|
||||
<property>
|
||||
<name>fs.s3.block.size</name>
|
||||
<value>67108864</value>
|
||||
<description>Block size to use when writing files to S3.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.buffer.dir</name>
|
||||
<value>${hadoop.tmp.dir}/s3</value>
|
||||
<description>Determines where on the local filesystem the S3 filesystem
|
||||
should store files before sending them to S3
|
||||
(or after retrieving them from S3).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.maxRetries</name>
|
||||
<value>4</value>
|
||||
<description>The maximum number of retries for reading or writing files to S3,
|
||||
before we signal failure to the application.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.sleepTimeSeconds</name>
|
||||
<value>10</value>
|
||||
<description>The number of seconds to sleep between each S3 retry.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>local.cache.size</name>
|
||||
<value>10737418240</value>
|
||||
<description>The limit on the size of cache you want to keep, set by default
|
||||
to 10GB. This will act as a soft limit on the cache directory for out of band data.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.seqfile.compress.blocksize</name>
|
||||
<value>1000000</value>
|
||||
<description>The minimum block size for compression in block compressed
|
||||
SequenceFiles.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.seqfile.lazydecompress</name>
|
||||
<value>true</value>
|
||||
<description>Should values of block-compressed SequenceFiles be decompressed
|
||||
only when necessary.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.seqfile.sorter.recordlimit</name>
|
||||
<value>1000000</value>
|
||||
<description>The limit on number of records to be kept in memory in a spill
|
||||
in SequenceFiles.Sorter
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.mapfile.bloom.size</name>
|
||||
<value>1048576</value>
|
||||
<description>The size of BloomFilter-s used in BloomMapFile. Each time this many
|
||||
keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
|
||||
Larger values minimize the number of filters, which slightly increases the performance,
|
||||
but may waste too much space if the total number of keys is usually much smaller
|
||||
than this number.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.mapfile.bloom.error.rate</name>
|
||||
<value>0.005</value>
|
||||
<description>The rate of false positives in BloomFilter-s used in BloomMapFile.
|
||||
As this value decreases, the size of BloomFilter-s increases exponentially. This
|
||||
value is the probability of encountering false positives (default is 0.5%).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.util.hash.type</name>
|
||||
<value>murmur</value>
|
||||
<description>The default implementation of Hash. Currently this can take one of the
|
||||
two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<!-- ipc properties -->
|
||||
|
||||
<property>
|
||||
<name>ipc.client.idlethreshold</name>
|
||||
<value>4000</value>
|
||||
<description>Defines the threshold number of connections after which
|
||||
connections will be inspected for idleness.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.kill.max</name>
|
||||
<value>10</value>
|
||||
<description>Defines the maximum number of clients to disconnect in one go.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connection.maxidletime</name>
|
||||
<value>10000</value>
|
||||
<description>The maximum time in msec after which a client will bring down the
|
||||
connection to the server.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.max.retries</name>
|
||||
<value>10</value>
|
||||
<description>Indicates the number of retries a client will make to establish
|
||||
a server connection.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.server.listen.queue.size</name>
|
||||
<value>128</value>
|
||||
<description>Indicates the length of the listen queue for servers accepting
|
||||
client connections.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.server.tcpnodelay</name>
|
||||
<value>false</value>
|
||||
<description>Turn on/off Nagle's algorithm for the TCP socket connection on
|
||||
the server. Setting to true disables the algorithm and may decrease latency
|
||||
with a cost of more/smaller packets.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.tcpnodelay</name>
|
||||
<value>false</value>
|
||||
<description>Turn on/off Nagle's algorithm for the TCP socket connection on
|
||||
the client. Setting to true disables the algorithm and may decrease latency
|
||||
with a cost of more/smaller packets.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<!-- Web Interface Configuration -->
|
||||
|
||||
<property>
|
||||
<name>webinterface.private.actions</name>
|
||||
<value>false</value>
|
||||
<description> If set to true, the web interfaces of JT and NN may contain
|
||||
actions, such as kill job, delete file, etc., that should
|
||||
not be exposed to public. Enable this option if the interfaces
|
||||
are only reachable by those who have the right authorization.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- Proxy Configuration -->
|
||||
|
||||
<property>
|
||||
<name>hadoop.rpc.socket.factory.class.default</name>
|
||||
<value>org.apache.hadoop.net.StandardSocketFactory</value>
|
||||
<description> Default SocketFactory to use. This parameter is expected to be
|
||||
formatted as "package.FactoryClassName".
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
|
||||
<value></value>
|
||||
<description> SocketFactory to use to connect to a DFS. If null or empty, use
|
||||
hadoop.rpc.socket.class.default. This socket factory is also used by
|
||||
DFSClient to create sockets to DataNodes.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
|
||||
<property>
|
||||
<name>hadoop.socks.server</name>
|
||||
<value></value>
|
||||
<description> Address (host:port) of the SOCKS server to be used by the
|
||||
SocksSocketFactory.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- Rack Configuration -->
|
||||
|
||||
<property>
|
||||
<name>topology.node.switch.mapping.impl</name>
|
||||
<value>org.apache.hadoop.net.ScriptBasedMapping</value>
|
||||
<description> The default implementation of the DNSToSwitchMapping. It
|
||||
invokes a script specified in topology.script.file.name to resolve
|
||||
node names. If the value for topology.script.file.name is not set, the
|
||||
default value of DEFAULT_RACK is returned for all node names.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>topology.script.file.name</name>
|
||||
<value></value>
|
||||
<description> The script name that should be invoked to resolve DNS names to
|
||||
NetworkTopology names. Example: the script would take host.foo.bar as an
|
||||
argument, and return /rack1 as the output.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>topology.script.number.args</name>
|
||||
<value>100</value>
|
||||
<description> The max number of args that the script configured with
|
||||
topology.script.file.name should be run with. Each arg is an
|
||||
IP address.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.uid.cache.secs</name>
|
||||
<value>14400</value>
|
||||
<description> NativeIO maintains a cache from UID to UserName. This is
|
||||
the timeout for an entry in that cache. </description>
|
||||
</property>
|
||||
|
||||
<!-- HTTP web-consoles Authentication -->
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.authentication.type</name>
|
||||
<value>simple</value>
|
||||
<description>
|
||||
Defines authentication used for Oozie HTTP endpoint.
|
||||
Supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.authentication.token.validity</name>
|
||||
<value>36000</value>
|
||||
<description>
|
||||
Indicates how long (in seconds) an authentication token is valid before it has
|
||||
to be renewed.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.authentication.signature.secret.file</name>
|
||||
<value>${user.home}/hadoop-http-auth-signature-secret</value>
|
||||
<description>
|
||||
The signature secret for signing the authentication tokens.
|
||||
If not set a random secret is generated at startup time.
|
||||
The same secret should be used for JT/NN/DN/TT configurations.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.authentication.cookie.domain</name>
|
||||
<value></value>
|
||||
<description>
|
||||
The domain to use for the HTTP cookie that stores the authentication token.
|
||||
In order to authentiation to work correctly across all Hadoop nodes web-consoles
|
||||
the domain must be correctly set.
|
||||
IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings.
|
||||
For this setting to work properly all nodes in the cluster must be configured
|
||||
to generate URLs with hostname.domain names on it.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.authentication.simple.anonymous.allowed</name>
|
||||
<value>true</value>
|
||||
<description>
|
||||
Indicates if anonymous requests are allowed when using 'simple' authentication.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.authentication.kerberos.principal</name>
|
||||
<value>HTTP/localhost@LOCALHOST</value>
|
||||
<description>
|
||||
Indicates the Kerberos principal to be used for HTTP endpoint.
|
||||
The principal MUST start with 'HTTP/' as per Kerberos HTTP SPNEGO specification.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.authentication.kerberos.keytab</name>
|
||||
<value>${user.home}/hadoop.keytab</value>
|
||||
<description>
|
||||
Location of the keytab file with the credentials for the principal.
|
||||
Referring to the same keytab file Oozie uses for its Kerberos credentials for Hadoop.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.relaxed.worker.version.check</name>
|
||||
<value>false</value>
|
||||
<description>
|
||||
By default datanodes refuse to connect to namenodes if their build
|
||||
revision (svn revision) do not match, and tasktrackers refuse to
|
||||
connect to jobtrackers if their build version (version, revision,
|
||||
user, and source checksum) do not match. This option changes the
|
||||
behavior of hadoop workers to only check for a version match (eg
|
||||
"1.0.2") but ignore the other build fields (revision, user, and
|
||||
source checksum).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
</configuration>
|
547
savanna/plugins/vanilla/resources/hdfs-default.xml
Normal file
547
savanna/plugins/vanilla/resources/hdfs-default.xml
Normal file
@ -0,0 +1,547 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
|
||||
<!-- Do not modify this file directly. Instead, copy entries that you -->
|
||||
<!-- wish to modify from this file into hdfs-site.xml and change them -->
|
||||
<!-- there. If hdfs-site.xml does not already exist, create it. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.logging.level</name>
|
||||
<value>info</value>
|
||||
<description>The logging level for dfs namenode. Other values are "dir"(trac
|
||||
e namespace mutations), "block"(trace block under/over replications and block
|
||||
creations/deletions), or "all".</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.secondary.http.address</name>
|
||||
<value>0.0.0.0:50090</value>
|
||||
<description>
|
||||
The secondary namenode http server address and port.
|
||||
If the port is 0 then the server will start on a free port.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.address</name>
|
||||
<value>0.0.0.0:50010</value>
|
||||
<description>
|
||||
The datanode server address and port for data transfer.
|
||||
If the port is 0 then the server will start on a free port.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.http.address</name>
|
||||
<value>0.0.0.0:50075</value>
|
||||
<description>
|
||||
The datanode http server address and port.
|
||||
If the port is 0 then the server will start on a free port.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.ipc.address</name>
|
||||
<value>0.0.0.0:50020</value>
|
||||
<description>
|
||||
The datanode ipc server address and port.
|
||||
If the port is 0 then the server will start on a free port.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.handler.count</name>
|
||||
<value>3</value>
|
||||
<description>The number of server threads for the datanode.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.http.address</name>
|
||||
<value>0.0.0.0:50070</value>
|
||||
<description>
|
||||
The address and the base port where the dfs namenode web ui will listen on.
|
||||
If the port is 0 then the server will start on a free port.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.https.enable</name>
|
||||
<value>false</value>
|
||||
<description>Decide if HTTPS(SSL) is supported on HDFS
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.https.need.client.auth</name>
|
||||
<value>false</value>
|
||||
<description>Whether SSL client certificate authentication is required
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.https.server.keystore.resource</name>
|
||||
<value>ssl-server.xml</value>
|
||||
<description>Resource file from which ssl server keystore
|
||||
information will be extracted
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.https.client.keystore.resource</name>
|
||||
<value>ssl-client.xml</value>
|
||||
<description>Resource file from which ssl client keystore
|
||||
information will be extracted
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.https.address</name>
|
||||
<value>0.0.0.0:50475</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.https.address</name>
|
||||
<value>0.0.0.0:50470</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.dns.interface</name>
|
||||
<value>default</value>
|
||||
<description>The name of the Network Interface from which a data node should
|
||||
report its IP address.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.dns.nameserver</name>
|
||||
<value>default</value>
|
||||
<description>The host name or IP address of the name server (DNS)
|
||||
which a DataNode should use to determine the host name used by the
|
||||
NameNode for communication and display purposes.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
|
||||
<property>
|
||||
<name>dfs.replication.considerLoad</name>
|
||||
<value>true</value>
|
||||
<description>Decide if chooseTarget considers the target's load or not
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.default.chunk.view.size</name>
|
||||
<value>32768</value>
|
||||
<description>The number of bytes to view for a file on the browser.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.du.reserved</name>
|
||||
<value>0</value>
|
||||
<description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.name.dir</name>
|
||||
<value>${hadoop.tmp.dir}/dfs/name</value>
|
||||
<description>Determines where on the local filesystem the DFS name node
|
||||
should store the name table(fsimage). If this is a comma-delimited list
|
||||
of directories then the name table is replicated in all of the
|
||||
directories, for redundancy. </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.name.edits.dir</name>
|
||||
<value>${dfs.name.dir}</value>
|
||||
<description>Determines where on the local filesystem the DFS name node
|
||||
should store the transaction (edits) file. If this is a comma-delimited list
|
||||
of directories then the transaction file is replicated in all of the
|
||||
directories, for redundancy. Default value is same as dfs.name.dir
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.web.ugi</name>
|
||||
<value>webuser,webgroup</value>
|
||||
<description>The user account used by the web interface.
|
||||
Syntax: USERNAME,GROUP1,GROUP2, ...
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.permissions</name>
|
||||
<value>true</value>
|
||||
<description>
|
||||
If "true", enable permission checking in HDFS.
|
||||
If "false", permission checking is turned off,
|
||||
but all other behavior is unchanged.
|
||||
Switching from one parameter value to the other does not change the mode,
|
||||
owner or group of files or directories.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.permissions.supergroup</name>
|
||||
<value>supergroup</value>
|
||||
<description>The name of the group of super-users.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.block.access.token.enable</name>
|
||||
<value>false</value>
|
||||
<description>
|
||||
If "true", access tokens are used as capabilities for accessing datanodes.
|
||||
If "false", no access tokens are checked on accessing datanodes.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.block.access.key.update.interval</name>
|
||||
<value>600</value>
|
||||
<description>
|
||||
Interval in minutes at which namenode updates its access keys.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.block.access.token.lifetime</name>
|
||||
<value>600</value>
|
||||
<description>The lifetime of access tokens in minutes.</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>dfs.data.dir</name>
|
||||
<value>${hadoop.tmp.dir}/dfs/data</value>
|
||||
<description>Determines where on the local filesystem an DFS data node
|
||||
should store its blocks. If this is a comma-delimited
|
||||
list of directories, then data will be stored in all named
|
||||
directories, typically on different devices.
|
||||
Directories that do not exist are ignored.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir.perm</name>
|
||||
<value>755</value>
|
||||
<description>Permissions for the directories on on the local filesystem where
|
||||
the DFS data node store its blocks. The permissions can either be octal or
|
||||
symbolic.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
<description>Default block replication.
|
||||
The actual number of replications can be specified when the file is created.
|
||||
The default is used if replication is not specified in create time.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.replication.max</name>
|
||||
<value>512</value>
|
||||
<description>Maximal block replication.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.replication.min</name>
|
||||
<value>1</value>
|
||||
<description>Minimal block replication.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.block.size</name>
|
||||
<value>67108864</value>
|
||||
<description>The default block size for new files.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.df.interval</name>
|
||||
<value>60000</value>
|
||||
<description>Disk usage statistics refresh interval in msec.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.block.write.retries</name>
|
||||
<value>3</value>
|
||||
<description>The number of retries for writing blocks to the data nodes,
|
||||
before we signal failure to the application.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.blockreport.intervalMsec</name>
|
||||
<value>3600000</value>
|
||||
<description>Determines block reporting interval in milliseconds.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.blockreport.initialDelay</name> <value>0</value>
|
||||
<description>Delay for first block report in seconds.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.heartbeat.interval</name>
|
||||
<value>3</value>
|
||||
<description>Determines datanode heartbeat interval in seconds.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.handler.count</name>
|
||||
<value>10</value>
|
||||
<description>The number of server threads for the namenode.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.safemode.threshold.pct</name>
|
||||
<value>0.999f</value>
|
||||
<description>
|
||||
Specifies the percentage of blocks that should satisfy
|
||||
the minimal replication requirement defined by dfs.replication.min.
|
||||
Values less than or equal to 0 mean not to wait for any particular
|
||||
percentage of blocks before exiting safemode.
|
||||
Values greater than 1 will make safe mode permanent.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.safemode.min.datanodes</name>
|
||||
<value>0</value>
|
||||
<description>
|
||||
Specifies the number of datanodes that must be considered alive
|
||||
before the name node exits safemode.
|
||||
Values less than or equal to 0 mean not to take the number of live
|
||||
datanodes into account when deciding whether to remain in safe mode
|
||||
during startup.
|
||||
Values greater than the number of datanodes in the cluster
|
||||
will make safe mode permanent.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.safemode.extension</name>
|
||||
<value>30000</value>
|
||||
<description>
|
||||
Determines extension of safe mode in milliseconds
|
||||
after the threshold level is reached.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.balance.bandwidthPerSec</name>
|
||||
<value>1048576</value>
|
||||
<description>
|
||||
Specifies the maximum amount of bandwidth that each datanode
|
||||
can utilize for the balancing purpose in term of
|
||||
the number of bytes per second.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.hosts</name>
|
||||
<value></value>
|
||||
<description>Names a file that contains a list of hosts that are
|
||||
permitted to connect to the namenode. The full pathname of the file
|
||||
must be specified. If the value is empty, all hosts are
|
||||
permitted.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.hosts.exclude</name>
|
||||
<value></value>
|
||||
<description>Names a file that contains a list of hosts that are
|
||||
not permitted to connect to the namenode. The full pathname of the
|
||||
file must be specified. If the value is empty, no hosts are
|
||||
excluded.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.max.objects</name>
|
||||
<value>0</value>
|
||||
<description>The maximum number of files, directories and blocks
|
||||
dfs supports. A value of zero indicates no limit to the number
|
||||
of objects that dfs supports.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.decommission.interval</name>
|
||||
<value>30</value>
|
||||
<description>Namenode periodicity in seconds to check if decommission is
|
||||
complete.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.decommission.nodes.per.interval</name>
|
||||
<value>5</value>
|
||||
<description>The number of nodes namenode checks if decommission is complete
|
||||
in each dfs.namenode.decommission.interval.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.replication.interval</name>
|
||||
<value>3</value>
|
||||
<description>The periodicity in seconds with which the namenode computes
|
||||
repliaction work for datanodes. </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.access.time.precision</name>
|
||||
<value>3600000</value>
|
||||
<description>The access time for HDFS file is precise upto this value.
|
||||
The default value is 1 hour. Setting a value of 0 disables
|
||||
access times for HDFS.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.support.append</name>
|
||||
<description>
|
||||
This option is no longer supported. HBase no longer requires that
|
||||
this option be enabled as sync is now enabled by default. See
|
||||
HADOOP-8230 for additional information.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.delegation.key.update-interval</name>
|
||||
<value>86400000</value>
|
||||
<description>The update interval for master key for delegation tokens
|
||||
in the namenode in milliseconds.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.delegation.token.max-lifetime</name>
|
||||
<value>604800000</value>
|
||||
<description>The maximum lifetime in milliseconds for which a delegation
|
||||
token is valid.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.delegation.token.renew-interval</name>
|
||||
<value>86400000</value>
|
||||
<description>The renewal interval for delegation token in milliseconds.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.failed.volumes.tolerated</name>
|
||||
<value>0</value>
|
||||
<description>The number of volumes that are allowed to
|
||||
fail before a datanode stops offering service. By default
|
||||
any volume failure will cause a datanode to shutdown.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.max.xcievers</name>
|
||||
<value>4096</value>
|
||||
<description>Specifies the maximum number of threads to use for transferring data
|
||||
in and out of the DN.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.use.datanode.hostname</name>
|
||||
<value>false</value>
|
||||
<description>Whether clients should use datanode hostnames when
|
||||
connecting to datanodes.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.use.datanode.hostname</name>
|
||||
<value>false</value>
|
||||
<description>Whether datanodes should use datanode hostnames when
|
||||
connecting to other datanodes for data transfer.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.local.interfaces</name>
|
||||
<value></value>
|
||||
<description>A comma separated list of network interface names to use
|
||||
for data transfer between the client and datanodes. When creating
|
||||
a connection to read from or write to a datanode, the client
|
||||
chooses one of the specified interfaces at random and binds its
|
||||
socket to the IP of that interface. Individual names may be
|
||||
specified as either an interface name (eg "eth0"), a subinterface
|
||||
name (eg "eth0:0"), or an IP address (which may be specified using
|
||||
CIDR notation to match a range of IPs).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.kerberos.internal.spnego.principal</name>
|
||||
<value>${dfs.web.authentication.kerberos.principal}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
|
||||
<value>${dfs.web.authentication.kerberos.principal}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.invalidate.work.pct.per.iteration</name>
|
||||
<value>0.32f</value>
|
||||
<description>
|
||||
*Note*: Advanced property. Change with caution.
|
||||
This determines the percentage amount of block
|
||||
invalidations (deletes) to do over a single DN heartbeat
|
||||
deletion command. The final deletion count is determined by applying this
|
||||
percentage to the number of live nodes in the system.
|
||||
The resultant number is the number of blocks from the deletion list
|
||||
chosen for proper invalidation over a single heartbeat of a single DN.
|
||||
Value should be a positive, non-zero percentage in float notation (X.Yf),
|
||||
with 1.0f meaning 100%.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.replication.work.multiplier.per.iteration</name>
|
||||
<value>2</value>
|
||||
<description>
|
||||
*Note*: Advanced property. Change with caution.
|
||||
This determines the total amount of block transfers to begin in
|
||||
parallel at a DN, for replication, when such a command list is being
|
||||
sent over a DN heartbeat by the NN. The actual number is obtained by
|
||||
multiplying this multiplier with the total number of live nodes in the
|
||||
cluster. The result number is the number of blocks to begin transfers
|
||||
immediately for, per DN heartbeat. This number can be any positive,
|
||||
non-zero integer.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.check.stale.datanode</name>
|
||||
<value>false</value>
|
||||
<description>
|
||||
Indicate whether or not to check "stale" datanodes whose
|
||||
heartbeat messages have not been received by the namenode
|
||||
for more than a specified time interval. If this configuration
|
||||
parameter is set as true, the stale datanodes will be moved to
|
||||
the end of the target node list for reading. The writing will
|
||||
also try to avoid stale nodes.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.stale.datanode.interval</name>
|
||||
<value>30000</value>
|
||||
<description>
|
||||
Default time interval for marking a datanode as "stale", i.e., if
|
||||
the namenode has not received heartbeat msg from a datanode for
|
||||
more than this time interval, the datanode will be marked and treated
|
||||
as "stale" by default.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
</configuration>
|
1282
savanna/plugins/vanilla/resources/mapred-default.xml
Normal file
1282
savanna/plugins/vanilla/resources/mapred-default.xml
Normal file
File diff suppressed because it is too large
Load Diff
20
savanna/plugins/vanilla/resources/setup-general.sh.template
Normal file
20
savanna/plugins/vanilla/resources/setup-general.sh.template
Normal file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "----- Setting up Hadoop enviroment config"
|
||||
|
||||
{% for envconf in env_configs -%}
|
||||
echo "{{envconf}}" >> /tmp/hadoop-env.sh
|
||||
{% endfor %}
|
||||
|
||||
cat /etc/hadoop/hadoop-env.sh >> /tmp/hadoop-env.sh
|
||||
mv /tmp/hadoop-env.sh /etc/hadoop/hadoop-env.sh
|
||||
|
||||
|
||||
echo "----- Creating directories permissions"
|
||||
|
||||
#TODO(aignatov): Need to put here /mnt via args in the future when HDFS placement feature will be ready
|
||||
chown -R hadoop:hadoop /mnt
|
||||
chmod -R 755 /mnt
|
||||
|
||||
{% block master %}
|
||||
{% endblock %}
|
51
savanna/plugins/vanilla/utils.py
Normal file
51
savanna/plugins/vanilla/utils.py
Normal file
@ -0,0 +1,51 @@
|
||||
# Copyright (c) 2013 Mirantis Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
def get_node_groups(cluster, proc_list=list()):
|
||||
proc_list = [proc_list] if type(proc_list) in [str, unicode] else proc_list
|
||||
return [ng for ng in cluster.node_groups
|
||||
if set(proc_list).issubset(ng.node_processes)]
|
||||
|
||||
|
||||
def get_instances(cluster, proc_list=list()):
|
||||
nodes = get_node_groups(cluster, proc_list)
|
||||
return reduce(lambda a, b: a + b.instances, nodes, [])
|
||||
|
||||
|
||||
def get_namenode(cluster):
|
||||
nn = get_instances(cluster, "namenode")
|
||||
return nn[0] if nn else None
|
||||
|
||||
|
||||
def get_jobtracker(cluster):
|
||||
jt = get_instances(cluster, "jobtracker")
|
||||
return jt[0] if jt else None
|
||||
|
||||
|
||||
def get_datanodes(cluster):
|
||||
return get_instances(cluster, 'datanode')
|
||||
|
||||
|
||||
def get_tasktrackers(cluster):
|
||||
return get_instances(cluster, 'tasktracker')
|
||||
|
||||
|
||||
def get_secondarynamenodes(cluster):
|
||||
return get_instances(cluster, 'secondarynamenode')
|
||||
|
||||
|
||||
def generate_host_names(nodes):
|
||||
return "\n".join([n.hostname for n in nodes])
|
0
savanna/tests/unit/plugins/__init__.py
Normal file
0
savanna/tests/unit/plugins/__init__.py
Normal file
0
savanna/tests/unit/plugins/vanilla/__init__.py
Normal file
0
savanna/tests/unit/plugins/vanilla/__init__.py
Normal file
53
savanna/tests/unit/plugins/vanilla/test_plugin.py
Normal file
53
savanna/tests/unit/plugins/vanilla/test_plugin.py
Normal file
@ -0,0 +1,53 @@
|
||||
# Copyright (c) 2013 Mirantis Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest2
|
||||
|
||||
from savanna.db import models as m
|
||||
from savanna.plugins.vanilla import exceptions as ex
|
||||
from savanna.plugins.vanilla import plugin as p
|
||||
|
||||
|
||||
class VanillaPluginTest(unittest2.TestCase):
|
||||
def setUp(self):
|
||||
self.pl = p.VanillaProvider()
|
||||
self.cl = m.Cluster("cluster1", "tenant1", "vanilla", "1.1.2")
|
||||
self.ng1 = m.NodeGroup("nn", "f1", ["namenode"], 1)
|
||||
self.ng2 = m.NodeGroup("jt", "f1", ["jobtracker"], 1)
|
||||
self.ng3 = m.NodeGroup("tt", "f1", ["tasktracker"], 10)
|
||||
|
||||
def test_validate(self):
|
||||
self.cl.node_groups = [self.ng1]
|
||||
self.pl.validate(self.cl)
|
||||
with self.assertRaises(ex.NotSingleNameNodeException):
|
||||
self.ng1.count = 0
|
||||
self.pl.validate(self.cl)
|
||||
with self.assertRaises(ex.NotSingleNameNodeException):
|
||||
self.ng1.count = 2
|
||||
self.pl.validate(self.cl)
|
||||
self.ng1.count = 1
|
||||
|
||||
self.cl.node_groups.append(self.ng2)
|
||||
self.pl.validate(self.cl)
|
||||
with self.assertRaises(ex.NotSingleJobTrackerException):
|
||||
self.ng2.count = 2
|
||||
self.pl.validate(self.cl)
|
||||
|
||||
self.cl.node_groups.append(self.ng3)
|
||||
self.ng2.count = 1
|
||||
self.pl.validate(self.cl)
|
||||
with self.assertRaises(ex.TaskTrackersWithoutJobTracker):
|
||||
self.ng2.count = 0
|
||||
self.pl.validate(self.cl)
|
58
savanna/tests/unit/plugins/vanilla/test_utils.py
Normal file
58
savanna/tests/unit/plugins/vanilla/test_utils.py
Normal file
@ -0,0 +1,58 @@
|
||||
# Copyright (c) 2013 Mirantis Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest2
|
||||
|
||||
from savanna.db import models as m
|
||||
from savanna.plugins.vanilla import utils as u
|
||||
|
||||
|
||||
class VanillaUtilsTest(unittest2.TestCase):
|
||||
def setUp(self):
|
||||
self.c1 = m.Cluster("cluster1", "tenant1", "vanilla", "1.1.2")
|
||||
self.ng1 = m.NodeGroup("master", "f1", ["jt", "nn"], 1)
|
||||
self.ng2 = m.NodeGroup("workers", "f1", ["tt", "dn"], 3)
|
||||
self.ng3 = m.NodeGroup("sn", "f1", ["dn"], 1)
|
||||
self.c1.node_groups = [self.ng1, self.ng2, self.ng3]
|
||||
|
||||
self.ng1.instances = [m.Instance("ng1", "i1", "master")]
|
||||
self.ng2.instances = [m.Instance("ng2", "i2", "worker1"),
|
||||
m.Instance("ng2", "i3", "worker2"),
|
||||
m.Instance("ng2", "i4", "worker3")]
|
||||
self.ng3.instances = [m.Instance("ng3", "i5", "sn")]
|
||||
|
||||
def test_get_node_groups(self):
|
||||
self.assertListEqual(u.get_node_groups(self.c1), self.c1.node_groups)
|
||||
self.assertListEqual(u.get_node_groups(self.c1, ["wrong-process"]), [])
|
||||
self.assertListEqual(u.get_node_groups(self.c1, ['dn', 'tt']),
|
||||
[self.ng2])
|
||||
self.assertListEqual(u.get_node_groups(self.c1, 'dn'),
|
||||
[self.ng2, self.ng3])
|
||||
self.assertListEqual(u.get_node_groups(self.c1, ['dn']),
|
||||
[self.ng2, self.ng3])
|
||||
self.assertListEqual(u.get_node_groups(self.c1, ['jt', 'tt']), [])
|
||||
|
||||
def test_get_instances(self):
|
||||
self.assertEquals(len(u.get_instances(self.c1)), 5)
|
||||
self.assertListEqual(u.get_instances(self.c1, 'wrong-process'), [])
|
||||
self.assertListEqual(u.get_instances(self.c1, 'nn'),
|
||||
self.ng1.instances)
|
||||
self.assertListEqual(u.get_instances(self.c1, 'dn'),
|
||||
self.ng2.instances + self.ng3.instances)
|
||||
|
||||
def test_generate_lines_from_list(self):
|
||||
self.assertEquals(u.generate_host_names(self.ng2.instances),
|
||||
"worker1\nworker2\nworker3")
|
||||
self.assertEquals(u.generate_host_names([]), "")
|
4
setup.py
4
setup.py
@ -27,8 +27,8 @@ setuptools.setup(
|
||||
cmdclass=common_setup.get_cmdclass(),
|
||||
packages=setuptools.find_packages(exclude=['bin']),
|
||||
package_data={'savanna': [
|
||||
'resources/*.template',
|
||||
'resources/*.xml'
|
||||
'plugins/vanilla/resources/*.template',
|
||||
'plugins/vanilla/resources/*.xml'
|
||||
]},
|
||||
install_requires=requires,
|
||||
dependency_links=depend_links,
|
||||
|
Loading…
Reference in New Issue
Block a user