diff --git a/MANIFEST.in b/MANIFEST.in
index 88d11b96..3c90fee1 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -9,6 +9,7 @@ include savanna/db/migration/alembic_migrations/versions/README
recursive-include savanna/locale *
+include savanna/plugins/intel/resources/*.xml
include savanna/plugins/vanilla/resources/*.xml
include savanna/plugins/vanilla/resources/*.sh
include savanna/plugins/vanilla/resources/*.sql
diff --git a/savanna/exceptions.py b/savanna/exceptions.py
index 40cbba35..ea142876 100644
--- a/savanna/exceptions.py
+++ b/savanna/exceptions.py
@@ -182,3 +182,10 @@ class ThreadException(SavannaException):
self.message = "An error occurred in thread '%s': %s" % (
thread_description, str(e))
self.code = "THREAD_EXCEPTION"
+
+
+class NotImplementedException(SavannaException):
+ code = "NOT_IMPLEMENTED"
+
+ def __init__(self, feature):
+ self.message = "Feature '%s' is not implemented" % feature
diff --git a/savanna/plugins/intel/__init__.py b/savanna/plugins/intel/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/savanna/plugins/intel/client/__init__.py b/savanna/plugins/intel/client/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/savanna/plugins/intel/client/client.py b/savanna/plugins/intel/client/client.py
new file mode 100644
index 00000000..0617094b
--- /dev/null
+++ b/savanna/plugins/intel/client/client.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna.plugins.intel.client import cluster
+from savanna.plugins.intel.client import nodes
+from savanna.plugins.intel.client import params
+from savanna.plugins.intel.client import rest as r
+from savanna.plugins.intel.client import services
+
+
+class IntelClient():
+ def __init__(self, manager_ip, cluster_name):
+ #TODO(alazarev) make credentials configurable (bug #1262881)
+ self.rest = r.RESTClient(manager_ip, 'admin', 'admin')
+ self.cluster_name = cluster_name
+ self._ctx = self
+
+ self.cluster = cluster.Cluster(self)
+ self.nodes = nodes.Nodes(self)
+ self.params = params.Params(self)
+ self.services = services.Services(self)
diff --git a/savanna/plugins/intel/client/cluster.py b/savanna/plugins/intel/client/cluster.py
new file mode 100644
index 00000000..2cc62dac
--- /dev/null
+++ b/savanna/plugins/intel/client/cluster.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna.plugins.intel.client import context as c
+from savanna.plugins.intel.client import session
+
+
+class Cluster(c.IntelContext):
+ def create(self):
+ url = '/cluster'
+ data = {
+ 'name': self.cluster_name,
+ 'dnsresolution': True,
+ 'acceptlicense': True
+ }
+
+ return self.rest.post(url, data)
+
+ def get(self):
+ url = '/cluster/%s' % self.cluster_name
+ return self.rest.get(url)
+
+ def install_software(self, nodes):
+ _nodes = [{'hostname': host} for host in nodes]
+ url = '/cluster/%s/nodes/commands/installsoftware' % self.cluster_name
+ session_id = self.rest.post(url, _nodes)['sessionID']
+ return session.wait(self, session_id)
+
+ def upload_authzkeyfile(self, authzkeyfile):
+ url = '/cluster/%s/upload/authzkey' % self.cluster_name
+ return self.rest.post(url,
+ files={'file': authzkeyfile})['upload result']
diff --git a/savanna/plugins/intel/client/context.py b/savanna/plugins/intel/client/context.py
new file mode 100644
index 00000000..13897e8b
--- /dev/null
+++ b/savanna/plugins/intel/client/context.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class IntelContext(object):
+ def __init__(self, ctx):
+ self._ctx = ctx._ctx
+ self.cluster_name = ctx.cluster_name
+ self.rest = ctx.rest
diff --git a/savanna/plugins/intel/client/nodes.py b/savanna/plugins/intel/client/nodes.py
new file mode 100644
index 00000000..45cefcda
--- /dev/null
+++ b/savanna/plugins/intel/client/nodes.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna.plugins.intel.client import context as c
+from savanna.plugins.intel.client import session
+from savanna.plugins.intel import exceptions as iex
+
+
+class Nodes(c.IntelContext):
+ def add(self, nodes, rack, username, path_to_key, keypass=''):
+ hosts = {
+ 'method': 'useauthzkeyfile',
+ 'nodeinfo': map(lambda host: {
+ 'hostname': host,
+ 'username': username,
+ 'passphrase': keypass,
+ 'authzkeyfile': path_to_key,
+ 'rackName': rack
+ }, nodes)
+ }
+
+ url = '/cluster/%s/nodes' % self.cluster_name
+ resp = self.rest.post(url, hosts)['items']
+
+ for node_info in resp:
+ if node_info['info'] != 'Connected':
+ raise iex.IntelPluginException(
+ 'Error adding nodes: %s' % node_info['iporhostname'])
+
+ def get(self):
+ url = '/cluster/%s/nodes' % self.cluster_name
+ return self.rest.get(url)
+
+ def get_status(self, node):
+ url = '/cluster/%s/nodes/%s' % (self.cluster_name, node)
+ return self.rest.get(url)['status']
+
+ def delete(self, node):
+ url = '/cluster/%s/nodes/%s' % (self.cluster_name, node)
+ return self.rest.delete(url)
+
+ def config(self, force=False):
+ url = ('/cluster/%s/nodes/commands/confignodes/%s'
+ % (self.cluster_name, 'force' if force else 'noforce'))
+
+ session_id = self.rest.post(url)['sessionID']
+ return session.wait(self, session_id)
+
+ def stop(self, nodes):
+ url = '/cluster/%s/nodes/commands/stopnodes' % self.cluster_name
+ data = [{'hostname': host} for host in nodes]
+
+ return self.rest.post(url, data)
diff --git a/savanna/plugins/intel/client/params.py b/savanna/plugins/intel/client/params.py
new file mode 100644
index 00000000..239bb70f
--- /dev/null
+++ b/savanna/plugins/intel/client/params.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna import exceptions
+from savanna.plugins.intel.client import context as c
+
+
+class BaseParams(c.IntelContext):
+ def __init__(self, ctx, service):
+ super(BaseParams, self).__init__(ctx)
+ self.service = service
+
+ def add(self, item, value, desc=''):
+ data = {
+ 'editdesc': desc,
+ 'items': [
+ {
+ 'type': self.service,
+ 'item': item,
+ 'value': value,
+ 'desc': desc
+ }
+ ]
+ }
+ url = ('/cluster/%s/configuration/%s'
+ % (self.cluster_name, self.service))
+ return self.rest.post(url, data)
+
+ def update(self, item, value, desc='', nodes=None):
+ data = {
+ 'editdesc': desc,
+ 'items': [
+ {
+ 'type': self.service,
+ 'item': item,
+ 'value': value
+ }
+ ]
+ }
+ if nodes:
+ data = {
+ 'editdesc': desc,
+ 'items': map(lambda node: {
+ 'type': self.service,
+ 'item': item,
+ 'value': value,
+ 'hostname': node
+ }, nodes)
+ }
+
+ url = ('/cluster/%s/configuration/%s'
+ % (self.cluster_name, self.service))
+ return self.rest.put(url, data)
+
+ def get(self, hosts, item):
+ raise exceptions.NotImplementedException("BaseParams.get")
+
+
+class Params(c.IntelContext):
+ def __init__(self, ctx):
+ super(Params, self).__init__(ctx)
+ self.hadoop = BaseParams(self, 'hadoop')
+ self.hdfs = BaseParams(self, 'hdfs')
+ self.mapred = BaseParams(self, 'mapred')
diff --git a/savanna/plugins/intel/client/rest.py b/savanna/plugins/intel/client/rest.py
new file mode 100644
index 00000000..19ebc1fa
--- /dev/null
+++ b/savanna/plugins/intel/client/rest.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import requests
+from requests import auth
+
+from savanna.openstack.common import log as logging
+from savanna.plugins.intel import exceptions as iex
+
+
+LOG = logging.getLogger(__name__)
+
+
+class RESTClient():
+ def __init__(self, manager_ip, auth_username, auth_password):
+ #TODO(alazarev) make port configurable (bug #1262895)
+ self.base_url = ('https://%s:9443/restapi/intelcloud/api/v1'
+ % manager_ip)
+ LOG.debug("Connecting to manager with URL of %s", self.base_url)
+
+ self.auth = auth.HTTPBasicAuth(auth_username, auth_password)
+
+ def get(self, url):
+ url = self.base_url + url
+ LOG.debug("Sending GET to URL of %s", url)
+ r = requests.get(url, verify=False, auth=self.auth)
+ return self._check_response(r)
+
+ def post(self, url, data=None, files=None):
+ url = self.base_url + url
+ LOG.debug("Sending POST to URL '%s' (%s files): %s", url,
+ len(files) if files else 0,
+ data if data else 'no data')
+ r = requests.post(url, data=json.dumps(data) if data else None,
+ verify=False, auth=self.auth, files=files)
+ return self._check_response(r)
+
+ def delete(self, url):
+ url = self.base_url + url
+ LOG.debug("Sending DELETE to URL of %s", url)
+ r = requests.delete(url, verify=False, auth=self.auth)
+ return self._check_response(r)
+
+ def put(self, url, data=None):
+ url = self.base_url + url
+ if data:
+ LOG.debug("Sending PUT to URL of %s: %s", url, data)
+ r = requests.put(url, data=json.dumps(data), verify=False,
+ auth=self.auth)
+ else:
+ LOG.debug("Sending PUT to URL of %s with no data", url)
+ r = requests.put(url, verify=False, auth=self.auth)
+
+ return self._check_response(r)
+
+ def _check_response(self, resp):
+ LOG.debug("Response with HTTP code %s, and content of %s",
+ resp.status_code, resp.text)
+ if not resp.ok:
+ raise iex.IntelPluginException(
+ "Request to manager returned with code '%s', reason '%s' "
+ "and message '%s'" % (resp.status_code, resp.reason,
+ json.loads(resp.text)['message']))
+ else:
+ return json.loads(resp.text)
diff --git a/savanna/plugins/intel/client/services.py b/savanna/plugins/intel/client/services.py
new file mode 100644
index 00000000..bc8d1e74
--- /dev/null
+++ b/savanna/plugins/intel/client/services.py
@@ -0,0 +1,137 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna import context
+from savanna.openstack.common import log as logging
+from savanna.plugins.intel.client import context as c
+from savanna.plugins.intel.client import session
+from savanna.plugins.intel import exceptions as iex
+
+LOG = logging.getLogger(__name__)
+
+
+class BaseService(c.IntelContext):
+ def __init__(self, ctx, service_name):
+ super(BaseService, self).__init__(ctx)
+ self.service = service_name
+
+ def start(self):
+ url = ('/cluster/%s/services/%s/commands/start'
+ % (self.cluster_name, self.service))
+
+ self.rest.post(url)
+
+ timeout = 120
+ cur_time = 0
+ while cur_time < timeout:
+ context.sleep(2)
+ if self.status() == 'running':
+ break
+ else:
+ cur_time += 2
+ else:
+ raise iex.IntelPluginException(
+ "Service '%s' has failed to start in %s seconds"
+ % (self.service, timeout))
+
+ def stop(self):
+ url = ('/cluster/%s/services/%s/commands/stop'
+ % (self.cluster_name, self.service))
+
+ return self.rest.post(url)
+
+ def status(self):
+ url = '/cluster/%s/services' % self.cluster_name
+ statuses = self.rest.get(url)['items']
+ for st in statuses:
+ if st['serviceName'] == self.service:
+ return st['status']
+
+ raise iex.IntelPluginException(
+ "Service '%s' is not installed on cluster '%s'"
+ % (self.service, self.cluster_name))
+
+ def get_nodes(self):
+ url = '/cluster/%s/services/%s' % (self.cluster_name, self.service)
+ return self.rest.get(url)
+
+ def add_nodes(self, role, nodes):
+ url = ('/cluster/%s/services/%s/roles'
+ % (self.cluster_name, self.service))
+
+ data = map(lambda host: {
+ 'rolename': role,
+ 'hostname': host
+ }, nodes)
+
+ return self.rest.post(url, data)
+
+
+class HDFSService(BaseService):
+ def format(self, force=False):
+ url = ('/cluster/%s/services/hdfs/commands/hdfsformat/%s'
+ % (self.cluster_name, 'force' if force else 'noforce'))
+
+ session_id = self.rest.post(url)['sessionID']
+ return session.wait(self, session_id)
+
+ def decommission_nodes(self, nodes, force=False):
+ url = ('/cluster/%s/nodes/commands/decommissionnodes/%s'
+ % (self.cluster_name, 'force' if force else 'noforce'))
+ data = map(lambda host: {
+ 'hostname': host
+ }, nodes)
+
+ return self.rest.post(url, data)
+
+ def get_datanodes_status(self):
+ url = '/cluster/%s/nodes/commands/datanodes/status' % self.cluster_name
+ return self.rest.get(url)['items']
+
+ def get_datanode_status(self, datanode):
+ stats = self.get_datanodes_status()
+ for stat in stats:
+ if stat['hostname'] == datanode:
+ return stat['status'].strip()
+
+ raise iex.IntelPluginException(
+ "Datanode service is is not installed on node '%s'" % datanode)
+
+
+class Services(c.IntelContext):
+ def __init__(self, ctx):
+ super(Services, self).__init__(ctx)
+ self.hdfs = HDFSService(self, 'hdfs')
+ self.mapred = BaseService(self, 'mapred')
+ self.hive = BaseService(self, 'hive')
+ self.oozie = BaseService(self, 'oozie')
+
+ def add(self, services):
+ _services = map(lambda service: {
+ 'serviceName': service,
+ 'type': service
+ }, services)
+ url = '/cluster/%s/services' % self.cluster_name
+
+ return self.rest.post(url, _services)
+
+ def get_services(self):
+ url = '/cluster/%s/services' % self.cluster_name
+
+ return self.rest.get(url)
+
+ def delete_service(self, service):
+ url = '/cluster/%s/services/%s' % (self.cluster_name, service)
+ return self.rest.delete(url)
diff --git a/savanna/plugins/intel/client/session.py b/savanna/plugins/intel/client/session.py
new file mode 100644
index 00000000..d4b02342
--- /dev/null
+++ b/savanna/plugins/intel/client/session.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna import context
+from savanna.openstack.common import log as logging
+from savanna.plugins.intel import exceptions as iex
+
+LOG = logging.getLogger(__name__)
+
+
+def get(ctx, session_id):
+ url = '/cluster/%s/session/%s' % (ctx.cluster_name, session_id)
+ return ctx.rest.get(url)
+
+
+def wait(ctx, session_id):
+ #TODO(lazarev) add check on savanna cluster state (exit on delete)
+ #TODO(alazarev) make configurable (bug #1262897)
+ timeout = 4*60*60 # 4 hours
+ cur_time = 0
+ while cur_time < timeout:
+ info_items = get(ctx, session_id)['items']
+ for item in info_items:
+ progress = item['nodeprogress']
+ if progress['info'].strip() == '_ALLFINISH':
+ return
+ else:
+ context.sleep(10)
+ cur_time += 10
+
+ debug_msg = 'Hostname: %s\nInfo: %s'
+ debug_msg = debug_msg % (progress['hostname'], progress['info'])
+ LOG.debug(debug_msg)
+ else:
+ raise iex.IntelPluginException(
+ "Cluster '%s' has failed to start in %s minutes"
+ % (ctx.cluster_name, timeout / 60))
diff --git a/savanna/plugins/intel/config_helper.py b/savanna/plugins/intel/config_helper.py
new file mode 100644
index 00000000..9b4cefee
--- /dev/null
+++ b/savanna/plugins/intel/config_helper.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna.plugins import provisioning as p
+from savanna.utils import xmlutils as x
+
+
+CORE_DEFAULT = x.load_hadoop_xml_defaults_with_type_and_locale(
+ 'plugins/intel/resources/hadoop-default.xml')
+
+HDFS_DEFAULT = x.load_hadoop_xml_defaults_with_type_and_locale(
+ 'plugins/intel/resources/hdfs-default.xml')
+
+MAPRED_DEFAULT = x.load_hadoop_xml_defaults_with_type_and_locale(
+ 'plugins/intel/resources/mapred-default.xml')
+
+XML_CONFS = {
+ "Hadoop": [CORE_DEFAULT],
+ "HDFS": [HDFS_DEFAULT],
+ "MapReduce": [MAPRED_DEFAULT]
+}
+
+IDH_TARBALL_URL = p.Config('IDH tarball URL', 'general', 'cluster', priority=1,
+ default_value='http://repo1.intelhadoop.com:3424/'
+ 'setup/setup-intelhadoop-'
+ '2.5.1-en-evaluation.RHEL.tar.gz')
+
+OS_REPO_URL = p.Config('OS repository URL', 'general', 'cluster', priority=1,
+ is_optional=True,
+ default_value='http://mirror.centos.org/'
+ 'centos-6/6/os/x86_64')
+
+IDH_REPO_URL = p.Config('IDH repository URL', 'general', 'cluster',
+ priority=1, is_optional=True,
+ default_value='http://repo1.intelhadoop.com:3424'
+ '/evaluation/en/RHEL/2.5.1/rpm')
+
+ENABLE_SWIFT = p.Config('Enable Swift', 'general', 'cluster',
+ config_type="bool", priority=1,
+ default_value=True, is_optional=True)
+
+HIDDEN_CONFS = ['fs.default.name', 'dfs.name.dir', 'dfs.data.dir',
+ 'mapred.job.tracker', 'mapred.system.dir', 'mapred.local.dir']
+
+CLUSTER_WIDE_CONFS = ['dfs.block.size', 'dfs.permissions', 'dfs.replication',
+ 'dfs.replication.min', 'dfs.replication.max',
+ 'io.file.buffer.size', 'mapreduce.job.counters.max',
+ 'mapred.output.compress', 'io.compression.codecs',
+ 'mapred.output.compression.codec',
+ 'mapred.output.compression.type',
+ 'mapred.compress.map.output',
+ 'mapred.map.output.compression.codec']
+
+PRIORITY_1_CONFS = ['dfs.datanode.du.reserved',
+ 'dfs.datanode.failed.volumes.tolerated',
+ 'dfs.datanode.max.xcievers', 'dfs.datanode.handler.count',
+ 'dfs.namenode.handler.count', 'mapred.child.java.opts',
+ 'mapred.jobtracker.maxtasks.per.job',
+ 'mapred.job.tracker.handler.count',
+ 'mapred.map.child.java.opts',
+ 'mapred.reduce.child.java.opts',
+ 'io.sort.mb', 'mapred.tasktracker.map.tasks.maximum',
+ 'mapred.tasktracker.reduce.tasks.maximum']
+
+PRIORITY_1_CONFS += CLUSTER_WIDE_CONFS
+
+CFG_TYPE = {
+ "Boolean": "bool",
+ "String": "string",
+ "Integer": "int",
+ "Choose": "string",
+ "Class": "string",
+ "Directory": "string",
+ "Float": "string",
+ "Int_range": "string",
+}
+
+
+def _initialise_configs():
+ configs = []
+
+ for service, config_lists in XML_CONFS.iteritems():
+ for config_list in config_lists:
+ for config in config_list:
+ if config['name'] not in HIDDEN_CONFS:
+ cfg = p.Config(
+ config['name'], service, "cluster", is_optional=True,
+ config_type="string",
+ default_value=str(config['value']),
+ description=config['description'])
+
+ if config.get('type'):
+ cfg.config_type = CFG_TYPE[config['type']]
+ if cfg.config_type == 'bool':
+ cfg.default_value = cfg.default_value == 'true'
+ if cfg.config_type == 'int':
+ if cfg.default_value:
+ cfg.default_value = int(cfg.default_value)
+ else:
+ cfg.config_type = 'string'
+ if config['name'] in PRIORITY_1_CONFS:
+ cfg.priority = 1
+ configs.append(cfg)
+
+ configs.append(IDH_TARBALL_URL)
+ configs.append(IDH_REPO_URL)
+ configs.append(OS_REPO_URL)
+ configs.append(ENABLE_SWIFT)
+ return configs
+
+
+PLUGIN_CONFIGS = _initialise_configs()
+
+
+def get_plugin_configs():
+ return PLUGIN_CONFIGS
diff --git a/savanna/plugins/intel/exceptions.py b/savanna/plugins/intel/exceptions.py
new file mode 100644
index 00000000..fccb5500
--- /dev/null
+++ b/savanna/plugins/intel/exceptions.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import savanna.exceptions as e
+
+
+class NotSingleManagerException(e.SavannaException):
+ message = "Intel hadoop cluster should contain only 1 Intel " \
+ "Manager instance. Actual manager count is %s"
+
+ def __init__(self, mng_count):
+ self.message = self.message % mng_count
+ self.code = "NOT_SINGLE_MANAGER"
+
+
+class IntelPluginException(e.SavannaException):
+ def __init__(self, message):
+ self.message = message
+ self.code = "INTEL_PLUGIN_EXCEPTION"
diff --git a/savanna/plugins/intel/installer.py b/savanna/plugins/intel/installer.py
new file mode 100644
index 00000000..3d1eca90
--- /dev/null
+++ b/savanna/plugins/intel/installer.py
@@ -0,0 +1,411 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import six
+import telnetlib
+
+from savanna import conductor
+from savanna import context
+from savanna.openstack.common import log as logging
+from savanna.plugins.general import utils as u
+from savanna.plugins.intel.client import client as c
+from savanna.plugins.intel import config_helper as c_helper
+from savanna.plugins.intel import exceptions as iex
+from savanna.swift import swift_helper as swift
+from savanna.utils import crypto
+
+
+conductor = conductor.API
+LOG = logging.getLogger(__name__)
+
+_INST_CONF_TEMPLATE = """
+network_interface=eth0
+mode=silent
+accept_jdk_license=accept
+how_to_setup_os_repo=2
+os_repo=%s
+os_repo_username=
+os_repo_password=
+os_repo_proxy=
+how_to_setup_idh_repo=1
+idh_repo=%s
+idh_repo_username=
+idh_repo_password=
+idh_repo_proxy=
+firewall_selinux_setting=1"""
+
+SWIFT_LIB_URL = \
+ 'http://savanna-files.mirantis.com/hadoop-swift/hadoop-swift-latest.jar'
+
+
+def install_manager(cluster):
+ LOG.info("Starting Install Manager Process")
+ mng_instance = u.get_instance(cluster, 'manager')
+
+ idh_tarball_path = \
+ cluster.cluster_configs['general'].get('IDH tarball URL')
+ if not idh_tarball_path:
+ idh_tarball_path = c_helper.IDH_TARBALL_URL.default_value
+
+ idh_tarball_filename = idh_tarball_path.rsplit('/', 1)[-1]
+ idh_dir = idh_tarball_filename[:idh_tarball_filename.find('.tar.gz')]
+ LOG.info("IDH tgz will be retrieved from: \'%s\'", idh_tarball_path)
+
+ idh_repo = cluster.cluster_configs['general'].get('IDH repository URL')
+ if not idh_repo:
+ idh_repo = c_helper.IDH_REPO_URL.default_value
+
+ os_repo = cluster.cluster_configs['general'].get('OS repository URL')
+ if not os_repo:
+ os_repo = c_helper.OS_REPO_URL.default_value
+
+ idh_install_cmd = 'sudo ./%s/install.sh --mode=silent 2>&1' % idh_dir
+
+ with mng_instance.remote() as r:
+ LOG.info("Download IDH manager ")
+ try:
+ r.execute_command('curl -O %s 2>&1' % idh_tarball_path)
+ except Exception as e:
+ raise RuntimeError("Unable to download IDH manager from %s",
+ idh_tarball_path, e)
+
+ # unpack archive
+ LOG.info("Unpack manager %s ", idh_tarball_filename)
+ try:
+ r.execute_command('tar xzf %s 2>&1' % idh_tarball_filename)
+ except Exception as e:
+ raise RuntimeError("Unable to unpack tgz %s",
+ idh_tarball_filename, e)
+
+ # install idh
+ LOG.debug("Install manager with %s : ", idh_install_cmd)
+ inst_conf = _INST_CONF_TEMPLATE % (os_repo, idh_repo)
+ r.write_file_to('%s/ui-installer/conf' % idh_dir, inst_conf)
+ #TODO(alazarev) make timeout configurable (bug #1262897)
+ r.execute_command(idh_install_cmd, timeout=3600)
+
+ # fix nginx persimmions bug
+ r.execute_command('sudo chmod o+x /var/lib/nginx/ /var/lib/nginx/tmp '
+ '/var/lib/nginx/tmp/client_body')
+
+ # waiting start idh manager
+ #TODO(alazarev) make timeout configurable (bug #1262897)
+ timeout = 600
+ LOG.debug("Waiting %s seconds for Manager to start : ", timeout)
+ while timeout:
+ try:
+ telnetlib.Telnet(mng_instance.management_ip, 9443)
+ break
+ except IOError:
+ timeout -= 2
+ context.sleep(2)
+ else:
+ message = ("IDH Manager failed to start in %s minutes on node '%s' "
+ "of cluster '%s'"
+ % (timeout / 60, mng_instance.management_ip, cluster.name))
+ LOG.error(message)
+ raise iex.IntelPluginException(message)
+
+
+def configure_os(cluster):
+ instances = u.get_instances(cluster)
+ configure_os_from_instances(cluster, instances)
+
+
+def create_hadoop_ssh_keys(cluster):
+ private_key, public_key = crypto.generate_key_pair()
+ extra = {
+ 'hadoop_private_ssh_key': private_key,
+ 'hadoop_public_ssh_key': public_key
+ }
+ return conductor.cluster_update(context.ctx(), cluster, {'extra': extra})
+
+
+def configure_os_from_instances(cluster, instances):
+ for instance in instances:
+ with instance.remote() as remote:
+ LOG.debug("Configuring OS settings on %s : ", instance.hostname())
+
+ # configure hostname, RedHat/Centos specific
+ remote.replace_remote_string('/etc/sysconfig/network',
+ 'HOSTNAME=.*',
+ 'HOSTNAME=%s' % instance.hostname())
+ # disable selinux and iptables, because Intel distribution requires
+ # this to be off
+ remote.execute_command('sudo /usr/sbin/setenforce 0')
+ remote.replace_remote_string('/etc/selinux/config',
+ 'SELINUX=.*', 'SELINUX=disabled')
+ # disable iptables
+ remote.execute_command('sudo /sbin/service iptables stop')
+ remote.execute_command('sudo /sbin/chkconfig iptables off')
+
+ # create 'hadoop' user
+ remote.write_files_to({
+ 'id_rsa': cluster.extra.get('hadoop_private_ssh_key'),
+ 'authorized_keys': cluster.extra.get('hadoop_public_ssh_key')
+ })
+ remote.execute_command(
+ 'sudo useradd hadoop && '
+ 'sudo sh -c \'echo "hadoop ALL=(ALL) NOPASSWD:ALL" '
+ '>> /etc/sudoers\' && '
+ 'sudo mkdir -p /home/hadoop/.ssh/ && '
+ 'sudo mv id_rsa authorized_keys /home/hadoop/.ssh && '
+ 'sudo chown -R hadoop:hadoop /home/hadoop/.ssh && '
+ 'sudo chmod 600 /home/hadoop/.ssh/{id_rsa,authorized_keys}')
+
+ swift_enable = \
+ cluster.cluster_configs['general'].get('Enable Swift')
+ if not swift_enable:
+ swift_enable = c_helper.ENABLE_SWIFT.default_value
+
+ if swift_enable:
+ swift_lib_path = '/usr/lib/hadoop/lib/hadoop-swift-latest.jar'
+ cmd = ('sudo curl \'%s\' -o %s --create-dirs'
+ % (SWIFT_LIB_URL, swift_lib_path))
+ remote.execute_command(cmd)
+
+
+def _configure_services(client, cluster):
+ nn_host = u.get_namenode(cluster).hostname()
+ snn = u.get_secondarynamenodes(cluster)
+ snn_host = snn[0].hostname() if snn else None
+ jt_host = u.get_jobtracker(cluster).hostname()
+ dn_hosts = [dn.hostname() for dn in u.get_datanodes(cluster)]
+ tt_hosts = [tt.hostname() for tt in u.get_tasktrackers(cluster)]
+
+ oozie_host = u.get_oozie(cluster).hostname() if u.get_oozie(
+ cluster) else None
+ hive_host = u.get_hiveserver(cluster).hostname() if u.get_hiveserver(
+ cluster) else None
+
+ services = []
+ if u.get_namenode(cluster):
+ services += ['hdfs']
+
+ if u.get_jobtracker(cluster):
+ services += ['mapred']
+
+ if oozie_host:
+ services += ['oozie']
+
+ if hive_host:
+ services += ['hive']
+
+ LOG.debug("Add services: %s" % ', '.join(services))
+ client.services.add(services)
+
+ LOG.debug("Assign roles to hosts")
+ client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host])
+
+ client.services.hdfs.add_nodes('DataNode', dn_hosts)
+ if snn:
+ client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host])
+
+ if oozie_host:
+ client.services.oozie.add_nodes('Oozie', [oozie_host])
+
+ if hive_host:
+ client.services.hive.add_nodes('HiveServer', [hive_host])
+
+ client.services.mapred.add_nodes('JobTracker', [jt_host])
+ client.services.mapred.add_nodes('TaskTracker', tt_hosts)
+
+
+def _configure_storage(client, cluster):
+ datanode_ng = u.get_node_groups(cluster, 'datanode')[0]
+ storage_paths = datanode_ng.storage_paths()
+ dn_hosts = [i.hostname() for i in u.get_datanodes(cluster)]
+
+ name_dir_param = ",".join(
+ [st_path + '/dfs/name' for st_path in storage_paths])
+ data_dir_param = ",".join(
+ [st_path + '/dfs/data' for st_path in storage_paths])
+ client.params.hdfs.update('dfs.name.dir', name_dir_param)
+ client.params.hdfs.update('dfs.data.dir', data_dir_param, nodes=dn_hosts)
+
+
+def _configure_swift(client, cluster):
+ swift_enable = cluster.cluster_configs['general'].get('Enable Swift')
+ if swift_enable is None or swift_enable:
+ swift_configs = swift.get_swift_configs()
+ for conf in swift_configs:
+ client.params.hadoop.add(conf['name'], conf['value'])
+
+
+def _add_user_params(client, cluster):
+ for p in six.iteritems(cluster.cluster_configs["Hadoop"]):
+ client.params.hadoop.update(p[0], p[1])
+
+ for p in six.iteritems(cluster.cluster_configs["HDFS"]):
+ client.params.hdfs.update(p[0], p[1])
+
+ for p in six.iteritems(cluster.cluster_configs["MapReduce"]):
+ client.params.mapred.update(p[0], p[1])
+
+
+def install_cluster(cluster):
+ mng_instance = u.get_instance(cluster, 'manager')
+ mng_ip = mng_instance.management_ip
+
+ all_hosts = list(set([i.hostname() for i in u.get_instances(cluster)]))
+
+ client = c.IntelClient(mng_ip, cluster.name)
+
+ LOG.info("Create cluster")
+ client.cluster.create()
+
+ LOG.info("Add nodes to cluster")
+ rack = '/Default'
+ client.nodes.add(all_hosts, rack, 'hadoop',
+ '/home/hadoop/.ssh/id_rsa')
+
+ LOG.info("Install software")
+ client.cluster.install_software(all_hosts)
+
+ LOG.info("Configure services")
+ _configure_services(client, cluster)
+
+ LOG.info("Deploy cluster")
+ client.nodes.config(force=True)
+
+ LOG.info("Provisioning configs")
+ # cinder and ephemeral drive support
+ _configure_storage(client, cluster)
+ # swift support
+ _configure_swift(client, cluster)
+ # user configs
+ _add_user_params(client, cluster)
+
+ LOG.info("Format HDFS")
+ client.services.hdfs.format()
+
+
+def start_cluster(cluster):
+ client = c.IntelClient(
+ u.get_instance(cluster, 'manager').management_ip, cluster.name)
+
+ LOG.debug("Starting hadoop services")
+ client.services.hdfs.start()
+
+ client.services.mapred.start()
+ if u.get_hiveserver(cluster):
+ client.services.hive.start()
+
+ if u.get_oozie(cluster):
+ client.services.oozie.start()
+
+
+def scale_cluster(cluster, instances):
+ scale_ins_hosts = [i.hostname() for i in instances]
+ dn_hosts = [dn.hostname() for dn in u.get_datanodes(cluster)]
+ tt_hosts = [tt.hostname() for tt in u.get_tasktrackers(cluster)]
+ to_scale_dn = []
+ to_scale_tt = []
+ for i in scale_ins_hosts:
+ if i in dn_hosts:
+ to_scale_dn.append(i)
+
+ if i in tt_hosts:
+ to_scale_tt.append(i)
+
+ mng_ip = u.get_instance(cluster, 'manager').management_ip
+ client = c.IntelClient(mng_ip, cluster.name)
+ rack = '/Default'
+ client.nodes.add(scale_ins_hosts, rack, 'hadoop',
+ cluster.extra['manager_authzkeyfile_path'])
+ client.cluster.install_software(scale_ins_hosts)
+
+ if to_scale_tt:
+ client.services.mapred.add_nodes('TaskTracker', to_scale_tt)
+
+ if to_scale_dn:
+ client.services.hdfs.add_nodes('DataNode', to_scale_dn)
+
+ client.nodes.config()
+
+ if to_scale_dn:
+ client.services.hdfs.start()
+
+ if to_scale_tt:
+ client.services.mapred.start()
+
+
+def decommission_nodes(cluster, instances):
+ dec_hosts = [i.hostname() for i in instances]
+ dn_hosts = [dn.hostname() for dn in u.get_datanodes(cluster)]
+ tt_hosts = [dn.hostname() for dn in u.get_tasktrackers(cluster)]
+
+ mng_ip = u.get_instances(cluster, 'manager')[0].management_ip
+ client = c.IntelClient(mng_ip, cluster.name)
+
+ dec_dn_hosts = []
+ for dec_host in dec_hosts:
+ if dec_host in dn_hosts:
+ dec_dn_hosts.append(dec_host)
+
+ if dec_dn_hosts:
+ client.services.hdfs.decommission_nodes(dec_dn_hosts)
+
+ #TODO(alazarev) make timeout configurable (bug #1262897)
+ timeout = 14400 # 4 hours
+ cur_time = 0
+ for host in dec_dn_hosts:
+ while cur_time < timeout:
+ if client.services.hdfs.get_datanode_status(
+ host) == 'Decomissioned':
+ break
+ context.sleep(5)
+ cur_time += 5
+ else:
+ LOG.warn("Failed to decomission node '%s' of cluster '%s' "
+ "in %s minutes" % (host, cluster.name, timeout/60))
+
+ client.nodes.stop(dec_hosts)
+
+ # wait stop services
+ #TODO(alazarev) make timeout configurable (bug #1262897)
+ timeout = 600 # 10 minutes
+ cur_time = 0
+ for instance in instances:
+ while cur_time < timeout:
+ stopped = True
+ if instance.hostname() in dn_hosts:
+ code, out = instance.remote().execute_command(
+ 'sudo /sbin/service hadoop-datanode status',
+ raise_when_error=False)
+ if out.strip() != 'datanode is stopped':
+ stopped = False
+ if out.strip() == 'datanode dead but pid file exists':
+ instance.remote().execute_command(
+ 'sudo rm -f '
+ '/var/run/hadoop/hadoop-hadoop-datanode.pid')
+ if instance.hostname() in tt_hosts:
+ code, out = instance.remote().execute_command(
+ 'sudo /sbin/service hadoop-tasktracker status',
+ raise_when_error=False)
+ if out.strip() != 'tasktracker is stopped':
+ stopped = False
+ if stopped:
+ break
+ else:
+ context.sleep(5)
+ cur_time += 5
+ else:
+ LOG.warn("Failed to stop services on node '%s' of cluster '%s' "
+ "in %s minutes" % (instance, cluster.name, timeout/60))
+
+ for node in dec_hosts:
+ LOG.info("Deleting node '%s' on cluster '%s'" % (node, cluster.name))
+ client.nodes.delete(node)
diff --git a/savanna/plugins/intel/plugin.py b/savanna/plugins/intel/plugin.py
new file mode 100644
index 00000000..3a2fcde9
--- /dev/null
+++ b/savanna/plugins/intel/plugin.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna import conductor
+from savanna import context
+from savanna.openstack.common import log as logging
+from savanna.plugins.general import exceptions as ex
+from savanna.plugins.general import utils as u
+from savanna.plugins.intel import config_helper as c_helper
+from savanna.plugins.intel import exceptions as i_ex
+from savanna.plugins.intel import installer as ins
+from savanna.plugins import provisioning as p
+
+conductor = conductor.API
+LOG = logging.getLogger(__name__)
+
+
+class IDHProvider(p.ProvisioningPluginBase):
+ def __init__(self):
+ self.processes = {
+ "Manager": ["manager"],
+ "HDFS": ["namenode", "datanode", "secondarynamenode"],
+ "MapReduce": ["jobtracker", "tasktracker"],
+ "Hadoop": [] # for hadoop parameters in UI
+ }
+
+ def get_description(self):
+ return \
+ 'The IDH OpenStack plugin works with project ' \
+ 'Savanna to automate the deployment of the Intel Distribution ' \
+ 'of Apache Hadoop on OpenStack based ' \
+ 'public & private clouds'
+
+ def get_node_processes(self, hadoop_version):
+ return self.processes
+
+ def get_versions(self):
+ return ['2.5.1']
+
+ def get_title(self):
+ return "Intel(R) Distribution for Apache Hadoop* Software"
+
+ def get_configs(self, hadoop_version):
+ return c_helper.get_plugin_configs()
+
+ def get_hive_config_path(self):
+ return '/etc/hive/conf/hive-site.xml'
+
+ def configure_cluster(self, cluster):
+ LOG.info("Configure IDH cluster")
+ cluster = ins.create_hadoop_ssh_keys(cluster)
+ ins.configure_os(cluster)
+ ins.install_manager(cluster)
+ ins.install_cluster(cluster)
+
+ def start_cluster(self, cluster):
+ LOG.info("Start IDH cluster")
+ ins.start_cluster(cluster)
+ self._set_cluster_info(cluster)
+
+ def validate(self, cluster):
+ nn_count = sum([ng.count for ng
+ in u.get_node_groups(cluster, 'namenode')])
+ if nn_count != 1:
+ raise ex.NotSingleNameNodeException(nn_count)
+
+ jt_count = sum([ng.count for ng
+ in u.get_node_groups(cluster, 'jobtracker')])
+ if jt_count > 1:
+ raise ex.NotSingleJobTrackerException(jt_count)
+
+ tt_count = sum([ng.count for ng
+ in u.get_node_groups(cluster, 'tasktracker')])
+ if jt_count == 0 and tt_count > 0:
+ raise ex.TaskTrackersWithoutJobTracker()
+
+ mng_count = sum([ng.count for ng
+ in u.get_node_groups(cluster, 'manager')])
+ if mng_count != 1:
+ raise i_ex.NotSingleManagerException(mng_count)
+
+ def scale_cluster(self, cluster, instances):
+ ins.configure_os_from_instances(cluster, instances)
+ ins.scale_cluster(cluster, instances)
+
+ def decommission_nodes(self, cluster, instances):
+ ins.decommission_nodes(cluster, instances)
+
+ def validate_scaling(self, cluster, existing, additional):
+ self._validate_additional_ng_scaling(cluster, additional)
+ self._validate_existing_ng_scaling(cluster, existing)
+
+ def _get_scalable_processes(self):
+ return ["datanode", "tasktracker"]
+
+ def _get_by_id(self, lst, id):
+ for obj in lst:
+ if obj.id == id:
+ return obj
+
+ def _validate_additional_ng_scaling(self, cluster, additional):
+ jt = u.get_jobtracker(cluster)
+ scalable_processes = self._get_scalable_processes()
+
+ for ng_id in additional:
+ ng = self._get_by_id(cluster.node_groups, ng_id)
+ if not set(ng.node_processes).issubset(scalable_processes):
+ raise ex.NodeGroupCannotBeScaled(
+ ng.name, "Intel plugin cannot scale nodegroup"
+ " with processes: " +
+ ' '.join(ng.node_processes))
+ if not jt and 'tasktracker' in ng.node_processes:
+ raise ex.NodeGroupCannotBeScaled(
+ ng.name, "Intel plugin cannot scale node group with "
+ "processes which have no master-processes run "
+ "in cluster")
+
+ def _validate_existing_ng_scaling(self, cluster, existing):
+ scalable_processes = self._get_scalable_processes()
+ dn_to_delete = 0
+ for ng in cluster.node_groups:
+ if ng.id in existing:
+ if ng.count > existing[ng.id] and "datanode" in \
+ ng.node_processes:
+ dn_to_delete += ng.count - existing[ng.id]
+ if not set(ng.node_processes).issubset(scalable_processes):
+ raise ex.NodeGroupCannotBeScaled(
+ ng.name, "Intel plugin cannot scale nodegroup"
+ " with processes: " +
+ ' '.join(ng.node_processes))
+
+ def _set_cluster_info(self, cluster):
+ mng = u.get_instances(cluster, 'manager')[0]
+ nn = u.get_namenode(cluster)
+ jt = u.get_jobtracker(cluster)
+ oozie = u.get_oozie(cluster)
+
+ #TODO(alazarev) make port configurable (bug #1262895)
+ info = {'IDH Manager': {
+ 'Web UI': 'https://%s:9443' % mng.management_ip
+ }}
+
+ if jt:
+ #TODO(alazarev) make port configurable (bug #1262895)
+ info['MapReduce'] = {
+ 'Web UI': 'http://%s:50030' % jt.management_ip
+ }
+ if nn:
+ #TODO(alazarev) make port configurable (bug #1262895)
+ info['HDFS'] = {
+ 'Web UI': 'http://%s:50070' % nn.management_ip
+ }
+
+ if oozie:
+ #TODO(alazarev) make port configurable (bug #1262895)
+ info['JobFlow'] = {
+ 'Oozie': 'http://%s:11000' % oozie.management_ip
+ }
+
+ ctx = context.ctx()
+ conductor.cluster_update(ctx, cluster, {'info': info})
diff --git a/savanna/plugins/intel/resources/configuration.xsd b/savanna/plugins/intel/resources/configuration.xsd
new file mode 100644
index 00000000..b5573d09
--- /dev/null
+++ b/savanna/plugins/intel/resources/configuration.xsd
@@ -0,0 +1,103 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/savanna/plugins/intel/resources/hadoop-default.xml b/savanna/plugins/intel/resources/hadoop-default.xml
new file mode 100644
index 00000000..6c85ee58
--- /dev/null
+++ b/savanna/plugins/intel/resources/hadoop-default.xml
@@ -0,0 +1,1371 @@
+
+
+
+
+
+
+ default.heap.size
+ 4096
+ Integer
+ perf
+
+ The default heap size of cluster
+
+ true
+
+ Default memory size for datanode, jobtracker and hbase master.
+
+
+
+ hadoop.extra.classpath
+
+ true
+ String
+ basic
+
+ Extra Java CLASSPATH element
+
+ true
+
+ Extra Java CLASSPATH elements. Will be appended to the value of HADOOP_CLASSPATH in hadoop-env.sh.
+
+
+
+ hadoop.tmp.dir
+ true
+ /tmp/hadoop-${user.name}
+ String
+ filesystem
+
+ The temp directory for hadoop
+
+
+ A base for other temporary directories.
+
+
+
+ hadoop.native.lib
+ true
+ true
+ Boolean
+ basic
+
+ use the native hadoop libraries
+
+
+ Should native hadoop libraries, if present, be used.
+
+
+
+ hadoop.http.filter.initializers
+
+ true
+ String
+ network
+
+ hadoop web filter
+
+
+ A comma separated list of class names. Each class in the list
+ must extend org.apache.hadoop.http.FilterInitializer. The
+ corresponding Filter will be initialized. Then, the Filter will be
+ applied to all user viewing jsp and servlet web pages. The ordering
+ of the list defines the ordering of the filters.
+
+
+
+ hadoop.security.group.mapping
+ org.apache.hadoop.security.ShellBasedUnixGroupsMapping
+ Class
+ security
+
+ hadoop user mapping
+
+
+ Class for user to group mapping (get groups for a given user)
+
+
+
+ hadoop.security.authorization
+ false
+ Boolean
+ security
+
+ hadoop security authorization
+
+
+ Is service-level authorization enabled?
+
+
+
+ hadoop.security.authentication
+ simple
+ Choose
+ simple,kerberos
+ security
+
+ hadoop security authorization level
+
+
+ possible values are simple (no authentication), and kerberos
+
+
+
+ hadoop.security.token.service.use_ip
+ true
+ Boolean
+ security
+
+ Controls whether tokens always use IP addresses
+
+
+
+ Controls whether tokens always use IP addresses. DNS changes will not
+ be detected if this option is enabled. Existing client connections
+ that break will always reconnect to the IP of the original host. New
+ clients will connect to the host's new IP but fail to locate a token.
+ Disabling this option will allow existing and new clients to detect
+ an IP change and continue to locate the new host's token.
+
+
+
+
+
+ hadoop.workaround.non.threadsafe.getpwuid
+ false
+ Boolean
+ security
+
+ hadoop thread safe
+
+
+ Some operating systems or authentication modules are known to
+ have broken implementations of getpwuid_r and getpwgid_r, such that these
+ calls are not thread-safe. Symptoms of this problem include JVM crashes
+ with a stack trace inside these functions. If your system exhibits this
+ issue, enable this configuration parameter to include a lock around the
+ calls as a workaround.
+ An incomplete list of some systems known to have this issue is available
+ at http://wiki.apache.org/hadoop/KnownBrokenPwuidImplementations
+
+
+
+ hadoop.kerberos.kinit.command
+ true
+ kinit
+ String
+ security
+
+ Kerberos credentials
+
+
+ Used to periodically renew Kerberos credentials when provided
+ to Hadoop. The default setting assumes that kinit is in the PATH of users
+ running the Hadoop client. Change this to the absolute path to kinit if this
+ is not the case.
+
+
+
+
+ hadoop.logfile.size
+ false
+ 10000000
+ Integer
+ perf
+
+ log file size
+
+
+ The max size of each log file
+
+
+
+ hadoop.logfile.count
+ false
+ 10
+ Integer
+ perf
+
+ log file number
+
+
+ The max number of log files
+
+
+
+
+ io.file.buffer.size
+ 4096
+ Integer
+ perf
+
+ File buffer size
+
+
+ The size of buffer for use in sequence files.
+ The size of this buffer should probably be a multiple of hardware
+ page size (4096 on Intel x86), and it determines how much data is
+ buffered during read and write operations.
+
+ basic
+
+ true
+
+
+ io.bytes.per.checksum
+ 512
+ 512
+ 4096
+ Integer
+ perf
+
+ Checksum size
+
+
+ The number of bytes per checksum. Must not be larger than
+ io.file.buffer.size.
+
+ ioconf
+
+ false
+
+
+ dfs.datanode.protocol.client-request.client-verification-field.exists
+ false
+ true
+ Boolean
+ io
+
+ checksum.client-verification-field
+
+
+ Allow DataNode to skip loading Checksum files.
+
+ true
+
+
+ io.skip.checksum.errors
+ false
+ Boolean
+ io
+
+ Skip checksum errors
+
+
+ If true, when a checksum error is encountered while
+ reading a sequence file, entries are skipped, instead of throwing an
+ exception.
+
+ ioconf
+
+ true,false
+ false
+
+
+ io.compression.codecs
+ org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec
+ Class
+ perf
+
+ Compression codecs
+
+
+ A list of the compression codec classes that can be used
+ for compression/decompression.
+
+
+
+ io.serializations
+ org.apache.hadoop.io.serializer.WritableSerialization
+ Class
+ perf
+
+ Io serializaions
+
+
+ A list of serialization classes that can be used for
+ obtaining serializers and deserializers.
+
+
+
+
+
+ fs.default.name
+ false
+ file:///
+ String
+ filesystem
+
+ File System name
+
+ true
+
+ The name of the default file system. A URI whose
+ scheme and authority determine the FileSystem implementation. The
+ uri's scheme determines the config property (fs.SCHEME.impl) naming
+ the FileSystem implementation class. The uri's authority is used to
+ determine the host, port, etc. for a filesystem.
+
+
+
+
+ hadoop.namenode
+
+ true
+ true
+ String
+ namenode
+
+ NameNode name
+
+
+ Server name for namenode.
+
+
+
+ hadoop.namenode.port
+ 8020
+ String
+ namenode
+
+ NameNode port
+
+ true
+
+ Server port for namenode.
+
+
+
+ fs.trash.interval
+ 0
+ Integer
+ filesystem
+
+ Interval to delete checkpoints
+
+
+ Number of minutes after which the checkpoints
+ get deleted. If zero, the trash feature is disabled.
+
+
+
+ fs.file.impl
+ org.apache.hadoop.fs.LocalFileSystem
+ Class
+ filesystem
+
+ File system for file implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for file: uris.
+
+
+
+ fs.hdfs.impl
+ org.apache.hadoop.hdfs.DistributedFileSystem
+ Class
+ filesystem
+
+ File system for hdfs implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for hdfs: uris.
+
+
+
+ fs.webhdfs.impl
+ org.apache.hadoop.hdfs.web.WebHdfsFileSystem
+ Class
+ filesystem
+
+ File system for webhdfs implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for webhdfs: uris.
+
+
+
+ fs.s3.impl
+ org.apache.hadoop.fs.s3.S3FileSystem
+ Class
+ filesystem
+
+ File system for s3 implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for s3: uris.
+
+
+
+
+ fs.s3n.impl
+ org.apache.hadoop.fs.s3native.NativeS3FileSystem
+ Class
+ filesystem
+
+ File system for s3n implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for s3n: (Native S3) uris.
+
+
+
+ fs.kfs.impl
+ org.apache.hadoop.fs.kfs.KosmosFileSystem
+ Class
+ filesystem
+
+ File system for kfs implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for kfs: uris.
+
+
+
+ fs.hftp.impl
+ org.apache.hadoop.hdfs.HftpFileSystem
+ Class
+ filesystem
+
+ File system for hftp implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for hftp: uris.
+
+
+
+ fs.hsftp.impl
+ org.apache.hadoop.hdfs.HsftpFileSystem
+ Class
+ filesystem
+
+ The FileSystem for hsftp
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for hsftp: uris
+
+
+
+ fs.ftp.impl
+ org.apache.hadoop.fs.ftp.FTPFileSystem
+ Class
+ filesystem
+
+ File system for ftp implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for ftp: uris.
+
+
+
+ fs.ramfs.impl
+ org.apache.hadoop.fs.InMemoryFileSystem
+ Class
+ filesystem
+
+ File system for ramfs implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The FileSystem for ramfs: uris.
+
+
+
+ fs.har.impl
+ org.apache.hadoop.fs.HarFileSystem
+ Class
+ filesystem
+
+ File system for hadoop archives implementation
+
+ org.apache.hadoop.fs.FileSystem
+
+ The filesystem for Hadoop archives.
+
+
+
+ fs.har.impl.disable.cache
+ true
+ Boolean
+ filesystem
+
+ Cache hadoop archive instance
+
+
+ Don't cache 'har' filesystem instances.
+
+
+
+ fs.checkpoint.dir
+ ${hadoop.tmp.dir}/dfs/namesecondary
+ ${hadoop.tmp.dir}/dfs/namesecondary
+ /hadoop/namesecondary
+ Directory
+ filesystem
+
+ Checkpoint directory
+
+
+ Determines where on the local filesystem the DFS secondary
+ name node should store the temporary images to merge.
+ If this is a comma-delimited list of directories then the image is
+ replicated in all of the directories for redundancy.
+
+
+
+ fs.checkpoint.edits.dir
+ ${fs.checkpoint.dir}
+ Directory
+ filesystem
+
+ Checkpoint edit directory
+
+
+ Determines where on the local filesystem the DFS secondary
+ name node should store the temporary edits to merge.
+ If this is a comma-delimited list of directoires then the edits is
+ replicated in all of the directoires for redundancy.
+ Default value is same as fs.checkpoint.dir
+
+
+
+ fs.checkpoint.period
+ 3600
+ Integer
+ filesystem
+
+ Checkpoint period
+
+
+ The number of seconds between two periodic checkpoints.
+
+
+
+ fs.checkpoint.size
+ 67108864
+ Integer
+ filesystem
+
+ Edit log size to checkpoint
+
+
+ The size of the current edit log (in bytes) that triggers
+ a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
+
+
+
+ fs.s3.block.size
+ 67108864
+ Integer
+ filesystem
+
+ Block size to write to S3
+
+
+ Block size to use when writing files to S3.
+
+
+
+ fs.s3.buffer.dir
+ ${hadoop.tmp.dir}/s3
+ Directory
+ filesystem
+
+ Buffer file directory
+
+
+ Determines where on the local filesystem the S3 filesystem
+ should store files before sending them to S3
+ (or after retrieving them from S3).
+
+
+
+
+ fs.s3.maxRetries
+ 4
+ Integer
+ filesystem
+
+ Maximum retries number to reading or writing file to S3
+
+
+ The maximum number of retries for reading or writing files to S3,
+ before we signal failure to the application.
+
+
+
+
+ fs.s3.sleepTimeSeconds
+ 10
+ Integer
+ filesystem
+
+ Seconds to sleep between every retries to reading or writing file to S3
+
+
+ The number of seconds to sleep between each S3 retry.
+
+
+
+
+ fs.automatic.close
+ true
+ Boolean
+ filesystem
+
+ FileSystem auto close
+
+
+ By default, FileSystem instances are automatically closed at program
+ exit using a JVM shutdown hook. Setting this property to false disables this
+ behavior. This is an advanced option that should only be used by server applications
+ requiring a more carefully orchestrated shutdown sequence.
+
+
+
+
+ fs.s3n.block.size
+ 67108864
+ Integer
+ filesystem
+
+ Block size
+
+
+ Block size to use when reading files using the Native S3
+ filesystem (s3n: URIs).
+
+
+
+ local.cache.size
+ 10737418240
+ Integer
+ perf
+
+ cache size
+
+
+
+ The limit on the size of cache you want to keep, set by default to
+ 10GB. This will act as a soft limit on the cache directory for out of
+ band data.
+
+
+
+
+ io.seqfile.compress.blocksize
+ 1000000
+ Integer
+ io
+
+ Minum block size for compression
+
+
+ The minimum block size for compression in block compressed
+ SequenceFiles.
+
+
+
+
+ io.seqfile.lazydecompress
+ true
+ Boolean
+ io
+
+ Lazy decompress
+
+
+ Should values of block-compressed SequenceFiles be decompressed
+ only when necessary.
+
+
+
+
+ io.seqfile.sorter.recordlimit
+ 1000000
+ Integer
+ io
+
+ Limit number of records
+
+
+ The limit on number of records to be kept in memory in a spill
+ in SequenceFiles.Sorter
+
+
+
+
+ io.mapfile.bloom.size
+ 1048576
+ Integer
+ io
+
+ The size of BloomFilters
+
+
+ The size of BloomFilter-s used in BloomMapFile. Each time this many
+ keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
+ Larger values minimize the number of filters, which slightly increases the performance,
+ but may waste too much space if the total number of keys is usually much smaller
+ than this number.
+
+
+
+
+ io.mapfile.bloom.error.rate
+ 0.005
+ Float
+ io
+
+ Rate of false positives in BloomFilter
+
+
+ The rate of false positives in BloomFilter-s used in BloomMapFile.
+ As this value decreases, the size of BloomFilter-s increases exponentially. This
+ value is the probability of encountering false positives (default is 0.5%).
+
+
+
+
+ hadoop.util.hash.type
+ murmur
+ Choose
+ murmur,jenkins
+ basic
+
+ Hash type
+
+
+ The default implementation of Hash. Currently this can take one of the
+ two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
+
+
+
+
+
+ ipc.client.idlethreshold
+ 4000
+ Integer
+ ipc
+
+ Connection threshold
+
+
+ Defines the threshold number of connections after which
+ connections will be inspected for idleness.
+
+
+
+
+ ipc.client.kill.max
+ 10
+ Integer
+ ipc
+
+ Maximum clients number
+
+
+ Defines the maximum number of clients to disconnect in one go.
+
+
+
+
+ ipc.client.connection.maxidletime
+ 10000
+ Integer
+ ipc
+
+ Maximum time for connection
+
+
+ The maximum time in msec after which a client will bring down the
+ connection to the server.
+
+
+
+
+ ipc.client.connect.max.retries
+ 10
+ Integer
+ ipc
+
+ Maximum number retries
+
+
+ Indicates the number of retries a client will make to establish
+ a server connection.
+
+
+
+
+ ipc.server.listen.queue.size
+ 128
+ Integer
+ ipc
+
+ Length of the server listen queue
+
+
+ Indicates the length of the listen queue for servers accepting
+ client connections.
+
+
+
+
+ ipc.server.tcpnodelay
+ false
+ Boolean
+ ipc
+
+ Turn on Nagle's algorithem
+
+
+ Turn on/off Nagle's algorithm for the TCP socket connection on
+ the server. Setting to true disables the algorithm and may decrease latency
+ with a cost of more/smaller packets.
+
+
+
+
+ ipc.client.tcpnodelay
+ false
+ Boolean
+ ipc
+
+ Whether to Turn on Nagle's algorithem on the client
+
+
+ Turn on/off Nagle's algorithm for the TCP socket connection on
+ the client. Setting to true disables the algorithm and may decrease latency
+ with a cost of more/smaller packets.
+
+
+
+
+ webinterface.private.actions
+ false
+ Boolean
+ network
+
+ Web interfaces
+
+
+ If set to true, the web interfaces of JT and NN may contain
+ actions, such as kill job, delete file, etc., that should
+ not be exposed to public. Enable this option if the interfaces
+ are only reachable by those who have the right authorization.
+
+
+
+
+
+ hadoop.rpc.socket.factory.class.default
+ org.apache.hadoop.net.StandardSocketFactory
+ Class
+ proxy
+
+ Socketfactory Class
+
+
+ Default SocketFactory to use. This parameter is expected to be
+ formatted as "package.FactoryClassName".
+
+
+
+
+ hadoop.rpc.socket.factory.class.ClientProtocol
+
+ Class
+ proxy
+
+ Socketfactory Class to use to Connect to DFS
+
+
+ SocketFactory to use to connect to a DFS. If null or empty, use
+ hadoop.rpc.socket.class.default. This socket factory is also used by
+ DFSClient to create sockets to DataNodes.
+
+
+
+
+ hadoop.socks.server
+
+ true
+ String
+ proxy
+
+ Address used by SocksSocketfactory
+
+
+ Address (host:port) of the SOCKS server to be used by the
+ SocksSocketFactory.
+
+
+
+
+
+ topology.node.switch.mapping.impl
+ org.apache.hadoop.net.ScriptBasedMapping
+ Class
+ org.apache.hadoop.net.DNSToSwitchMapping
+ rack
+
+ Topology node switch mapping implemention
+
+
+ The default implementation of the DNSToSwitchMapping. It
+ invokes a script specified in topology.script.file.name to resolve
+ node names. If the value for topology.script.file.name is not set, the
+ default value of DEFAULT_RACK is returned for all node names.
+
+
+
+
+ topology.script.file.name
+ false
+
+
+ /usr/lib/intelcloud/rackmap.sh
+ String
+ rack
+
+ The script name to get NetworkTopology name
+
+ true
+
+ The script name that should be invoked to resolve DNS names to
+ NetworkTopology names. Example: the script would take host.foo.bar as an
+ argument, and return /rack1 as the output.
+
+
+
+
+ topology.script.number.args
+ false
+ 100
+ 100
+ 1
+ Integer
+ rack
+
+ The number of topology script args
+
+ true
+
+ The max number of args that the script configured with
+ topology.script.file.name should be run with. Each arg is an
+ IP address.
+
+
+
+
+ hadoop.security.uid.cache.secs
+ 14400
+ Integer
+ security
+
+ The timeout for cache from UID to UserName
+
+
+ NativeIO maintains a cache from UID to UserName. This is
+ the timeout for an entry in that cache.
+
+
+
+
+ hadoop.http.authentication.type
+ simple
+
+ Choose
+ simple,kerberos,${hadoop.security.authentication}
+ security
+
+ Authentication type for HTTP endpoint
+
+
+
+ Defines authentication used for Oozie HTTP endpoint.
+ Supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#
+
+
+
+
+ hadoop.http.authentication.token.validity
+ 36000
+ Integer
+ security
+
+ Authentication token validity
+
+
+
+ Indicates how long (in seconds) an authentication token is valid before it has
+ to be renewed.
+
+
+
+
+ hadoop.http.authentication.signature.secret
+ hadoop
+ String
+ security
+
+ The signature secret for signing the Authentication token
+
+
+
+ The signature secret for signing the authentication tokens.
+ If not set a random secret is generated at startup time.
+ The same secret should be used for JT/NN/DN/TT configurations.
+
+
+
+
+ hadoop.http.authentication.cookie.domain
+
+ true
+ String
+ security
+
+ The domain to store authentication token
+
+
+
+ The domain to use for the HTTP cookie that stores the authentication token.
+ In order to authentiation to work correctly across all Hadoop nodes web-consoles
+ the domain must be correctly set.
+ IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings.
+ For this setting to work properly all nodes, the cluster must be configured
+ to generate URLs with hostname.domain names on it.
+
+
+
+
+ hadoop.http.authentication.simple.anonymous.allowed
+ true
+ Boolean
+ security
+
+ Simple authentication anonymous allowance
+
+
+
+ Indicates if anonymous requests are allowed when using 'simple' authentication.
+
+
+
+
+ hadoop.http.authentication.kerberos.principal
+ true
+ 1
+ HTTP/localhost@LOCALHOST
+ String
+ security
+
+ Authentication principal
+
+
+ Indicates the Kerberos principal to be used for HTTP endpoint.
+ The principal MUST start with 'HTTP/' as per Kerberos HTTP SPNEGO
+ specification.
+
+
+
+
+ hadoop.http.authentication.kerberos.keytab
+ true
+ ${user.home}/hadoop.keytab
+ String
+ security
+
+ Location of keytab file
+
+
+
+ Location of the keytab file with the credentials for the principal.
+ Referring to the same keytab file Oozie uses for its Kerberos credentials for Hadoop.
+
+
+
+
+
+ hadoop.security.group.mapping.ldap.url
+ true
+
+ true
+ String
+ security
+
+ The URL of the LDAP server
+
+
+
+ The URL of the LDAP server to use for resolving user groups when using
+ the LdapGroupsMapping user to group mapping.
+
+
+
+
+ hadoop.security.group.mapping.ldap.ssl
+ true
+ false
+ Boolean
+ security
+
+ Use SSL connecting to LDAP server
+
+
+
+ Whether or not to use SSL when connecting to the LDAP server.
+
+
+
+
+ hadoop.security.group.mapping.ldap.ssl.keystore
+ true
+
+ true
+ String
+ security
+
+ File path to the SSL keystore
+
+
+
+ File path to the SSL keystore that contains the SSL certificate required
+ by the LDAP server.
+
+
+
+
+ hadoop.security.group.mapping.ldap.ssl.keystore.password.file
+ true
+
+ true
+ String
+ security
+
+ File path containing the password of LDAP SSL keystore
+
+
+
+ The path to a file containing the password of the LDAP SSL keystore.
+ IMPORTANT: This file should be readable only by the Unix user running
+ the daemons.
+
+
+
+
+ hadoop.security.group.mapping.ldap.bind.user
+ true
+
+ true
+ String
+ security
+
+ The user name to bind as when connecting to the LDAP server
+
+
+
+ The distinguished name of the user to bind as when connecting to the LDAP
+ server. This may be left blank if the LDAP server supports anonymous binds.
+
+
+
+
+ hadoop.security.group.mapping.ldap.bind.password.file
+ true
+
+ true
+ String
+ security
+
+ The path to a file containing the password of the bind user
+
+
+
+ The path to a file containing the password of the bind user.
+ IMPORTANT: This file should be readable only by the Unix user running
+ the daemons.
+
+
+
+
+ hadoop.security.group.mapping.ldap.base
+ true
+
+ true
+ String
+ security
+
+ The search base for the LDAP connection
+
+
+
+ The search base for the LDAP connection. This is a distinguished name,
+ and will typically be the root of the LDAP directory.
+
+
+
+
+ hadoop.security.group.mapping.ldap.search.filter.user
+ true
+ (&(objectClass=user)(sAMAccountName={0}))
+ String
+ security
+
+ An filter to use when searching for LDAP users
+
+
+
+ An additional filter to use when searching for LDAP users. The default will
+ usually be appropriate for Active Directory installations. If connecting to
+ an LDAP server with a non-AD schema, this should be replaced with
+ (&(objectClass=inetOrgPerson)(uid={0}). {0} is a special string used to
+ denote where the username fits into the filter.
+
+
+
+
+ hadoop.security.group.mapping.ldap.search.filter.group
+ true
+ (objectClass=group)
+ String
+ security
+
+ An filter to use when searching for LDAP groups
+
+
+
+ An additional filter to use when searching for LDAP groups. This should be
+ changed when resolving groups against a non-Active Directory installation.
+ posixGroups are currently not a supported group class.
+
+
+
+
+ hadoop.security.group.mapping.ldap.search.attr.member
+ true
+ member
+ String
+ security
+
+ The attribute identifying the users that are members of the group
+
+
+
+ The attribute of the group object that identifies the users that are
+ members of the group. The default will usually be appropriate for
+ any LDAP installation.
+
+
+
+
+ hadoop.security.group.mapping.ldap.search.attr.group.name
+ cn
+ String
+ security
+
+ The attribute identifying the group name
+
+
+
+ The attribute of the group object that identifies the group name. The
+ default will usually be appropriate for all LDAP systems.
+
+
+
+
+ basic
+ Hadoop Basic
+ true
+ false
+
+
+ ioconf
+ Hadoop IO
+ true
+ false
+
+
+ basic
+ Basic Configuration
+ Basic configurations that get Hadoop running.
+
+
+ perf
+ Performance
+ Configurations that affect Hadoop's performance
+
+
+ security
+ Security
+ Security configurations like Kerberos.
+
+
+ network
+ Network Setting
+ Network Setting.
+
+
+ filesystem
+ File System
+ File System configurations.
+
+
+ namenode
+ Namenode
+ Configurations for HDFS Namenode.
+
+
+ io
+ IO Configuration
+ IO Configuration
+
+
+ ipc
+ IPC
+ IPC Configuration
+
+
+ proxy
+ Proxy Configuration
+ Proxy Configuration
+
+
+ rack
+ Rack Configuration
+ Rack Configuration
+
+
diff --git a/savanna/plugins/intel/resources/hdfs-default.xml b/savanna/plugins/intel/resources/hdfs-default.xml
new file mode 100644
index 00000000..e86aa73a
--- /dev/null
+++ b/savanna/plugins/intel/resources/hdfs-default.xml
@@ -0,0 +1,1193 @@
+
+
+
+
+
+
+
+ hadoop.namenode.memory
+ 0
+ Integer
+ 1
+ true
+ true
+ namenode
+
+ Namenode server memory size
+
+
+ Default size for namenode server memory.
+
+
+
+ hadoop.secondary.namenode.memory
+ 0
+ Integer
+ 1
+ true
+ true
+ namenode
+
+ Secondary namenode server memory size
+
+
+ Default size for secondary namenode server memory.
+
+
+
+ hadoop.datanode.memory
+ 4096
+ Integer
+ 1
+ true
+ true
+ datanode
+
+ Datanode server memory size
+
+
+ Default size for datanode server memory.
+
+
+
+ dfs.namenode.logging.level
+ info
+ String
+ namenode
+
+ Namenode logging level
+
+
+ The logging level for dfs namenode. Other values are "dir"(trac
+e namespace mutations), "block"(trace block under/over replications and block
+creations/deletions), or "all".
+
+
+
+ dfs.secondary.http.address
+ 0.0.0.0:50090
+ String
+ false
+ namenode
+
+ Secondary namenode http server address and port
+
+
+
+ The secondary namenode http server address and port.
+ If the port is 0 then the server will start on a free port.
+
+
+
+
+ dfs.datanode.address
+ 0.0.0.0:50010
+ String
+ datanode
+
+ Address datanode server listens to
+
+
+
+ The address where the datanode server will listen to.
+ If the port is 0 then the server will start on a free port.
+
+
+
+
+ dfs.datanode.http.address
+ 0.0.0.0:50075
+ String
+ datanode
+
+ Datanode http server address and port
+
+
+
+ The datanode http server address and port.
+ If the port is 0 then the server will start on a free port.
+
+
+
+
+ dfs.datanode.ipc.address
+ 0.0.0.0:50020
+ String
+ datanode
+
+ Datanode ipc server adderss and port
+
+
+
+ The datanode ipc server address and port.
+ If the port is 0 then the server will start on a free port.
+
+
+
+
+ dfs.block.local-path-access.user
+
+ hbase
+ String
+ datanode
+
+ DFS short circuit read allowed user list
+
+
+ The user in this list can directly read the local file system HDFS block,
+ instead of reading through DataNode, thus improving performance,
+ The list is seperated by comma.
+
+ true
+
+
+ dfs.datanode.handler.count
+ 3
+ 3
+ 100
+ Integer
+ datanode
+
+ Datanode server threads count
+
+
+ The number of server threads for the datanode.
+
+
+
+ dfs.datanode.max.xcievers
+ 32768
+ Integer
+ datanode
+
+ Datanode service threads count
+
+
+ Number of threads for the datanode service.
+
+
+
+ dfs.http.address
+ 0.0.0.0:50070
+ String
+ false
+ basic
+
+ Namenode Web UI address and port
+
+
+
+ The address and the base port where the dfs namenode web ui will listen on.
+ If the port is 0 then the server will start on a free port.
+
+
+
+
+ dfs.https.enable
+ false
+ Boolean
+ security
+
+ Whether to support HTTPS
+
+
+ Decide if HTTPS(SSL) is supported on HDFS
+
+
+
+
+ dfs.https.need.client.auth
+ false
+ Boolean
+ security
+
+ Whether require SSL client certificate authentication
+
+
+ Whether SSL client certificate authentication is required
+
+
+
+
+ dfs.https.server.keystore.resource
+ ssl-server.xml
+ String
+ security
+
+ Resource file to extract SSL server keystore information
+
+
+ Resource file from which ssl server keystore
+ information will be extracted
+
+
+
+
+ dfs.https.client.keystore.resource
+ ssl-client.xml
+ String
+ security
+
+ Resource file to extract SSL client keystore information
+
+
+ Resource file from which ssl client keystore
+ information will be extracted
+
+
+
+
+ dfs.datanode.https.address
+ 0.0.0.0:50475
+ String
+ datanode
+
+ Datanode https server address and port
+
+
+
+ The datanode https server address and port.
+ If the port is 0 then the server will start on a free port.
+
+
+
+
+ dfs.https.address
+ 0.0.0.0:50470
+ String
+ basic
+
+ dfs https address and port
+
+
+ dfs https address and port
+
+
+
+ dfs.datanode.dns.interface
+ default
+ String
+ datanode
+
+ Network Interface name from which Datanode should report its IP address
+
+
+ The name of the Network Interface from which a data node should
+ report its IP address.
+
+
+
+
+ dfs.datanode.dns.nameserver
+ default
+ String
+ datanode
+
+ Datanode name server address
+
+
+ The host name or IP address of the name server (DNS)
+ which a DataNode should use to determine the host name used by the
+ NameNode for communication and display purposes.
+
+
+
+
+ dfs.replication.considerLoad
+ true
+ Boolean
+ perf
+
+ Whether chooseTarget considers the target's load or not
+
+
+ Decide if chooseTarget considers the target's load or not
+
+
+
+
+ dfs.default.chunk.view.size
+ 32768
+ Integer
+ basic
+
+ Chunk size to view on a browser
+
+
+ The number of bytes to view for a file on the browser.
+
+
+
+
+ dfs.datanode.du.reserved
+ 0
+ Integer
+ datanode
+
+ Reserved space in bytes per volume
+
+
+ Reserved space in bytes per volume. Always leave this much space free for non dfs use.
+
+
+
+
+ dfs.name.dir
+ /hadoop/drbd/hadoop_image,/hadoop/hadoop_image_local
+ Directory
+ basic
+
+ DFS fsimage file directory
+
+
+ Determines where on the local filesystem the DFS name node
+ should store the name table(fsimage). If this is a comma-delimited list
+ of directories then the name table is replicated in all of the
+ directories, for redundancy.
+
+ NameNode-system
+
+
+
+ dfs.name.edits.dir
+ ${dfs.name.dir}
+ Directory
+ basic
+
+ DFS edits file directory
+
+
+ Determines where on the local filesystem the DFS name node
+ should store the transaction (edits) file. If this is a comma-delimited list
+ of directories then the transaction file is replicated in all of the
+ directories, for redundancy. Default value is same as dfs.name.dir
+
+
+
+
+ dfs.web.ugi
+ webuser,webgroup
+ String
+ basic
+
+ user account used by the web interface
+
+
+ The user account used by the web interface.
+ Syntax: USERNAME,GROUP1,GROUP2, ...
+
+
+
+
+ dfs.permissions
+ true
+ true
+ Boolean
+ security
+
+ Whether enable permission checking in HDFS
+
+
+
+ If "true", enable permission checking in HDFS.
+ If "false", permission checking is turned off,
+ but all other behavior is unchanged.
+ Switching from one parameter value to the other does not change the mode,
+ owner or group of files or directories.
+
+
+
+
+ dfs.permissions.extended
+ true
+ false
+ Boolean
+ security
+
+ Whether to enable permission extension in HDFS
+
+
+
+ If "true", enable permission extension in HDFS.
+ If "false", permission extension is turned off.
+
+
+
+
+ dfs.permissions.extended.permissions.file
+ true
+
+ String
+ true
+ security
+
+ Configuration file for extension rules
+
+
+
+ If extended permissions is enabled, then the needed configuration file should be
+ configured here for extension rules.
+
+
+
+
+ dfs.permissions.supergroup
+ supergroup
+ String
+ security
+
+ super-users group name
+
+
+ The name of the group of super-users.
+
+
+
+ dfs.block.access.token.enable
+ false
+ Boolean
+ security
+
+ Access tokens for accessing datanodes
+
+
+
+ If "true", access tokens are used as capabilities for accessing datanodes.
+ If "false", no access tokens are checked on accessing datanodes.
+
+
+
+
+ dfs.block.access.key.update.interval
+ 600
+ Integer
+ security
+
+ Interval at which namenode updates its access keys
+
+
+
+ Interval in minutes at which namenode updates its access keys.
+
+
+
+
+ dfs.block.access.token.lifetime
+ 600
+ Integer
+ security
+
+ Access tokens lifetime
+
+
+ The lifetime of access tokens in minutes.
+
+
+
+ dfs.data.dir
+ ${hadoop.tmp.dir}/dfs/data
+ ${hadoop.tmp.dir}/dfs/data
+
+ Directory
+ false
+
+ basic
+
+ Local filesystem directory datanode stores its blocks
+
+
+ Determines where on the local filesystem an DFS data node
+ should store its blocks. If this is a comma-delimited
+ list of directories, then data will be stored in all named
+ directories, typically on different devices.
+ Directories that do not exist are ignored.
+
+
+ 2
+
+
+ dfs.datanode.data.dir.perm
+ 755
+ 755
+ 755
+ Integer
+ security
+
+ Local filesystem directory permissions
+
+
+ Permissions for the directories on on the local filesystem where
+ the DFS data node store its blocks. The permissions can either be octal or
+ symbolic.
+
+
+
+ dfs.replication
+ 3
+ Integer
+ basic
+
+ Block replication count
+
+
+ Default block replication.
+ The actual number of replications can be specified when the file is created.
+ The default is used if replication is not specified in create time.
+
+
+ basic
+
+
+
+ dfs.replication.max
+ 512
+ Integer
+ basic
+
+ Maximal block replication count
+
+
+ Maximal block replication.
+
+
+
+
+ dfs.replication.min
+ 1
+ Integer
+ basic
+
+ Minimal block replication count
+
+
+ Minimal block replication.
+
+
+
+
+ dfs.block.size
+ 67108864
+ 67108864
+ 134217728
+ Integer
+ basic
+
+ Default block size
+
+
+ The default block size for new files.
+
+ basic
+
+
+
+ dfs.df.interval
+ 60000
+ Integer
+ basic
+
+ Disk usage statistics refresh interval
+
+
+ Disk usage statistics refresh interval in msec.
+
+
+
+ dfs.client.block.write.retries
+ 3
+ Integer
+ perf
+
+ Writing to datanodes retry times
+
+
+ The number of retries for writing blocks to the data nodes,
+ before we signal failure to the application.
+
+
+
+
+ dfs.blockreport.intervalMsec
+ 3600000
+ Integer
+ basic
+
+ Block reporting interval
+
+
+ Determines block reporting interval in milliseconds.
+
+
+
+ dfs.blockreport.initialDelay
+ 0
+ Integer
+ basic
+
+ Delay for first block report
+
+
+ Delay for first block report in seconds.
+
+
+
+ dfs.datanode.directoryscan.interval
+ 21600
+ Integer
+ datanode
+
+ Datanode scan data interval
+
+
+ Interval in seconds for Datanode to scan data directories and
+ reconcile the difference between blocks in memory and on the disk.
+
+
+
+
+ dfs.datanode.directoryscan.threads
+ 1
+ Integer
+ datanode
+
+ Number of threads to use when scanning volumes to
+ generate block reports
+
+
+ Number of threads to use when scanning volumes to
+ generate block reports. A value greater than one means
+ volumes will be scanned in parallel.
+
+
+
+ dfs.heartbeat.interval
+ 3
+ Integer
+ datanode
+
+ Datanode heartbeat
+
+
+ Determines datanode heartbeat interval in seconds.
+
+
+
+ dfs.namenode.handler.count
+ 10
+ 10
+ 100
+ Integer
+ namenode
+
+ Namenode server threads count
+
+
+ The number of server threads for the namenode.
+
+ NameNode-system
+
+
+
+ dfs.safemode.threshold.pct
+ 0.999f
+ Float
+ basic
+
+ percentage of blocks that should satisfy
+ the minimal replication requirement defined by dfs.replication.min.
+
+
+
+
+ Specifies the percentage of blocks that should satisfy
+ the minimal replication requirement defined by dfs.replication.min.
+ Values less than or equal to 0 mean not to wait for any particular
+ percentage of blocks before exiting safemode.
+ Values greater than 1 will make safe mode permanent.
+
+
+
+
+ dfs.safemode.min.datanodes
+ 0
+ Integer
+ true
+ basic
+
+ number of datanodes that must be considered alive
+ before the name node exits safemode.
+
+
+
+ Specifies the number of datanodes that must be considered alive
+ before the name node exits safemode.
+ Values less than or equal to 0 mean not to take the number of live
+ datanodes into account when deciding whether to remain in safe mode
+ during startup.
+ Values greater than the number of datanodes in the cluster
+ will make safe mode permanent.
+
+
+
+
+ dfs.safemode.extension
+ 30000
+ Integer
+ basic
+
+ Safe mode extension time
+
+
+
+ Determines extension of safe mode in milliseconds
+ after the threshold level is reached.
+
+
+
+
+ dfs.balance.bandwidthPerSec
+ 1048576
+ 1048576
+ 104857600
+ Integer
+ datanode
+
+ Maximal amount of bandwidth that each datanode
+ can utilize for the balancing purpose
+
+
+
+ Specifies the maximal amount of bandwidth that each datanode
+ can utilize for the balancing purpose in terms of
+ the number of bytes per second.
+
+
+
+
+ dfs.hosts
+
+ String
+ true
+ basic
+
+ File that contains a list of hosts
+ permitted to connect to the namenode
+
+
+ Names a file that contains a list of hosts that are
+ permitted to connect to the namenode. The full pathname of the file
+ must be specified. If the value is empty, all hosts are
+ permitted.
+
+
+
+ dfs.hosts.exclude
+
+ String
+ true
+ basic
+
+ File that contains a list of hosts
+ not permitted to connect to the namenode
+
+
+ Names a file that contains a list of hosts that are
+ not permitted to connect to the namenode. The full pathname of the
+ file must be specified. If the value is empty, no hosts are
+ excluded.
+
+
+
+ dfs.max.objects
+ 0
+ Integer
+ basic
+
+ The maximum number of files, directories and blocks
+ dfs supports
+
+
+ The maximum number of files, directories and blocks
+ dfs supports. A value of zero indicates no limit to the number
+ of objects that dfs supports.
+
+
+
+
+ dfs.namenode.decommission.interval
+ 30
+ Integer
+ namenode
+
+ Namenode periodicity to check if decommission is complete
+
+
+ Namenode periodicity in seconds to check if decommission is
+ complete.
+
+
+
+ dfs.namenode.decommission.nodes.per.interval
+ 5
+ Integer
+ namenode
+
+ The number of nodes namenode checks if decommission is complete
+
+
+ The number of nodes namenode checks if decommission is complete
+ in each dfs.namenode.decommission.interval.
+
+
+
+ dfs.replication.interval
+ 3
+ Integer
+ basic
+
+ The periodicity in seconds with which the namenode computes
+ repliaction work for datanodes.
+
+
+ The periodicity in seconds with which the namenode computes
+ repliaction work for datanodes.
+
+
+
+ dfs.access.time.precision
+ 3600000
+ Integer
+ basic
+
+ HDFS file access time precision
+
+
+ The access time for HDFS file is precise upto this value.
+ The default value is 1 hour. Setting a value of 0 disables
+ access times for HDFS.
+
+
+
+
+ dfs.support.append
+ true
+ Boolean
+ basic
+
+ Whether HDFS allows appends
+
+
+ Does HDFS allow appends to files?
+
+
+
+ dfs.datanode.plugins
+
+ String
+ true
+ datanode
+
+ Datanode plugins to be activated
+
+
+ Comma-separated list of datanode plug-ins to be activated.
+
+
+
+
+ dfs.namenode.plugins
+
+ String
+ true
+ namenode
+
+ Namenode plugins to be activated
+
+
+ Comma-separated list of namenode plug-ins to be activated.
+
+
+
+
+ dfs.datanode.failed.volumes.tolerated
+ 0
+ Integer
+ datanode
+
+ The number of volumes that are allowed to
+ fail before a datanode stops offering service
+
+
+ The number of volumes that are allowed to
+ fail before a datanode stops offering service. By default
+ any volume failure will cause a datanode to shutdown.
+
+
+
+
+ dfs.namenode.delegation.key.update-interval
+ 86400000
+ Integer
+ namenode
+
+ Namenode delegation key update interval
+
+
+ The update interval for master key for delegation tokens
+ in the namenode in milliseconds.
+
+
+
+
+ dfs.namenode.delegation.token.max-lifetime
+ 604800000
+ Integer
+ namenode
+
+ Namenode delegation key maximum lifetime
+
+
+ The maximum lifetime in milliseconds for which a delegation
+ token is valid.
+
+
+
+
+ dfs.namenode.delegation.token.renew-interval
+ 86400000
+ Integer
+ namenode
+
+ Namenode delegation key renewal interval
+
+
+ The renewal interval for delegation token in milliseconds.
+
+
+
+
+ dfs.drbd.name.dir
+ /hadoop/drbd
+ Directory
+ true
+ true
+ basic
+
+ Remote backup directory
+
+
+ Setting remote backup directory, this directory must be set in "dfs.name.dir".
+
+ NameNode-system
+
+
+
+ hdfs.shortcut.reader.user
+
+ String
+ true
+ true
+ datanode
+
+ User when reading local data in datanode
+
+
+ Which user do you want to use when reading local data in datanode.
+ Empty value will disable the feature.
+
+
+
+ namenode.memory.weight
+
+ 50
+ Integer
+ true
+ namenode
+
+ Weight of namenode heapsize
+
+
+ The weight of namenode heapsize and the default value is 50.
+
+ advanced
+
+
+
+ secondary.namenode.memory.weight
+
+ 50
+ Integer
+ true
+ namenode
+
+ Weight of secondary namenode heapsize
+
+
+ The weight of secondary namenode heapsize and the default value is 50.
+
+
+
+
+ dfs.socket.timeout
+ 120000
+ Integer
+ basic
+
+ Timeout for socket connection
+
+
+ Timeout for socket connection.
+
+
+
+ dfs.replication.adjust
+ false
+ Boolean
+ replication
+
+ Whether to adjust the replication count
+
+
+
+ If it is true, adjust the number of replication according to visiting numbers of blocks.
+
+
+
+
+ dfs.replication.historyWindow
+ 1440
+ Integer
+ replication
+
+ Retention cycle time
+
+
+
+ Determines retention cycle time of file visit statistics,(in Minutes), The default value is 1 day.
+
+
+
+
+ dfs.replication.adjustTimer
+ 720
+ Integer
+ replication
+
+ Replication adjusting interval
+
+
+
+ Determines the file's replication adjusting interval(in Minutes), The default value is 6 hours.
+
+
+
+
+ dfs.replication.adjust.maxPercent
+ 0.1
+ Float
+ replication
+
+ Replica adjusting max percentage of disk space
+
+
+
+ The max percentage of disk space for replica adjusting.
+
+
+
+
+ dfs.replication.reserved.datanode.number.percent
+ 0.1
+ Float
+ replication
+
+ Percentage of datanode numbers reserved to others
+
+
+
+ The percentage of Datanode numbers for reserving to others.
+
+
+
+
+ dfs.replication.adjust.blockWeight
+ 10
+ Integer
+ replication
+
+ Number of blocks for adjust weight
+
+
+
+ The number of blocks to be regarded as a basic unit for adjust weight.
+
+
+
+
+
+ basic
+ HDFS Basic
+ true
+ false
+
+
+ NameNode-system
+ NameNode Configuration
+ true
+ false
+ ""
+
+
+ advanced
+ Advanced Configuration
+ true
+ false
+ ""
+
+
+ basic
+ Basic Configuration
+ Basic configurations that get HDFS running.
+
+
+ perf
+ Performance
+ Configurations that affect Hadoop's performance
+
+
+ namenode
+ Namenode Configuration
+ Configurations for Namenode.
+
+
+ datanode
+ Datanode Configuration
+ Configurations for Datanode.
+
+
+ security
+ Security
+ Security configurations like permission control.
+
+
+ replication
+ Replication
+ Configurations for replication
+
+
diff --git a/savanna/plugins/intel/resources/mapred-default.xml b/savanna/plugins/intel/resources/mapred-default.xml
new file mode 100644
index 00000000..107f962a
--- /dev/null
+++ b/savanna/plugins/intel/resources/mapred-default.xml
@@ -0,0 +1,2678 @@
+
+
+
+
+
+
+
+ hadoop.jobtracker.memory
+ 4096
+ Integer
+ jobtracker
+
+ JobTracker server memory size
+
+ 1
+ true
+ true
+
+ Default size for jobtracker server memory.
+
+
+
+ hadoop.tasktracker.slot.memory
+ 4096
+ Integer
+ tasktracker
+
+ Total memory size for tasktracker map and reduce slot
+
+ 1
+ true
+ true
+
+ Total memory size for tasktracker map and reduce slot.
+
+
+
+ hadoop.tasktracker.memory
+ 4096
+ Integer
+ tasktracker
+
+ Default size for tasktracker server memory
+
+ 1
+ true
+ true
+
+ Default size for tasktracker server memory.
+
+
+
+ hadoop.job.history.location
+
+ true
+ String
+ basic
+
+ job history location
+
+
+ If job tracker is static the history files are stored
+ in this single well known place. If No value is set here, by default,
+ it is in the local file system at ${hadoop.log.dir}/history.
+
+
+
+
+ hadoop.job.history.user.location
+
+ true
+ String
+ basic
+
+ user specified history location
+
+
+ User can specify a location to store the history files of
+ a particular job. If nothing is specified, the logs are stored in
+ output directory. The files are stored in "_logs/history/" in the directory.
+ User can stop logging by giving the value "none".
+
+
+
+
+ mapred.job.tracker.history.completed.location
+
+ true
+ String
+ basic
+
+ The completed job history files location
+
+
+ The completed job history files are stored at this single well
+ known location. If nothing is specified, the files are stored at
+ ${hadoop.job.history.location}/done.
+
+
+
+
+
+ io.sort.factor
+ 10
+ 10
+ 100
+ Integer
+ perf
+
+ IO sort factor
+
+
+ The number of streams to merge at once while sorting
+ files. This determines the number of open file handles.
+
+
+
+ io.sort.mb
+ 100
+ 100
+ 200
+ Integer
+ perf
+
+ Total amount of buffer memory to use while sorting
+ files,
+
+
+ The total amount of buffer memory to use while sorting
+ files, in megabytes. By default, gives each merge stream 1MB, which
+ should minimize seeks.
+
+
+
+ io.sort.record.percent
+ 0.05
+ Float
+ perf
+
+ The percentage of io.sort.mb dedicated to tracking record
+ boundaries
+
+
+ The percentage of io.sort.mb dedicated to tracking record
+ boundaries. Let this value be r, io.sort.mb be x. The maximum number
+ of records collected before the collection thread must block is equal
+ to (r * x) / 4
+
+
+
+ io.sort.spill.percent
+ 0.80
+ Float
+ perf
+
+ buffer limit
+
+
+
+ The soft limit in either the buffer or record collection
+ buffers. Once reached, a thread will begin to spill the contents to disk
+ in the background. Note that this does not imply any chunking of data to
+ the spill. A value less than 0.5 is not recommended.
+
+
+
+ io.map.index.skip
+ 0
+ Integer
+ io
+
+ Number of index entries to skip between each entry
+
+
+ Number of index entries to skip between each entry.
+ Zero by default. Setting this to values larger than zero can
+ facilitate opening large map files using less memory.
+
+
+
+ mapred.job.tracker
+ false
+ local
+
+ true
+ String
+ jobtracker
+
+ The host and port that the MapReduce job tracker runs
+ at
+
+
+ The host that the MapReduce job tracker runs
+ at. If "local", then jobs are run in-process as a single map
+ and reduce task.
+
+
+
+
+ mapred.jobtracker
+ true
+
+ true
+ String
+ true
+ jobtracker
+
+ The address that the job tracker runs at
+
+ directory
+
+ The host and port that the MapReduce job tracker runs
+ at. If "local", then jobs are run in-process as a single map
+ and reduce task.
+
+
+
+
+
+ mapred.jobtracker.port
+ 54311
+ Integer
+ jobtracker
+
+ The port that the MapReduce job tracker runs
+ at
+
+ true
+
+ The port that the MapReduce job tracker runs
+ at. If "local", then jobs are run in-process as a single map
+ and reduce task.
+
+
+
+
+ hadoop.jobtracker.thrift.port
+ 9290
+ Integer
+ jobtracker
+
+ The port for jobtracker thrift server.
+
+ true
+
+ The port for jobtracker thrift server.
+
+
+
+ mapred.job.tracker.http.address
+ 0.0.0.0:50030
+ String
+ jobtracker
+
+ The job tracker http server address and port
+
+
+
+ The job tracker http server address and port the server will listen on.
+ If the port is 0 then the server will start on a free port.
+
+
+
+
+ mapred.job.tracker.handler.count
+ 10
+ 10
+ 60
+ Integer
+ jobtracker
+
+ The number of server threads for the JobTracker.
+
+
+
+ The number of server threads for the JobTracker. This should be roughly
+ 4% of the number of tasktracker nodes.
+
+
+
+
+ mapred.task.tracker.report.address
+ 127.0.0.1:0
+ String
+ tasktracker
+
+ The interface and port that task tracker server listens on
+
+
+ The interface and port that task tracker server listens on.
+ Since it is only connected to by the tasks, it uses the local interface.
+ EXPERT ONLY. Should only be changed if your host does not have the loopback
+ interface.
+
+
+
+ mapred.local.dir
+ ${hadoop.tmp.dir}/mapred/local
+
+ true
+ Directory
+ basic
+
+ MapReduce local directory
+
+
+ 2
+
+ The local directory where MapReduce stores intermediate
+ data files. May be a comma-separated list of
+ directories on different devices in order to spread disk i/o.
+ Directories that do not exist are ignored.
+
+
+
+
+ mapred.system.dir
+ ${hadoop.tmp.dir}/mapred/system
+ Directory
+ basic
+
+ The directory where MapReduce stores control files.
+
+ false
+
+ The directory where MapReduce stores control files.
+
+
+
+ mapreduce.jobtracker.staging.root.dir
+ ${hadoop.tmp.dir}/mapred/staging
+ Directory
+ basic
+
+ The root of the staging area for users' job files
+
+ false
+
+ The root of the staging area for users' job files
+ In practice, this should be the directory where users' home
+ directories are located (usually /user)
+
+
+
+
+ mapred.temp.dir
+ ${hadoop.tmp.dir}/mapred/temp
+ Directory
+ basic
+
+ A shared directory for temporary files.
+
+ false
+
+ A shared directory for temporary files.
+
+
+
+ mapred.local.dir.minspacestart
+ 0
+ Integer
+ basic
+
+ minimum space in mapred.local.dir to start task
+
+
+ If the space in mapred.local.dir drops under this,
+ do not ask for more tasks. Value in bytes.
+
+
+
+ mapred.local.dir.minspacekill
+ 0
+ Integer
+ basic
+
+ mapred.local.dir minimum space kill
+
+
+ If the space in mapred.local.dir drops under this,
+ do not ask more tasks until all the current ones have finished and
+ cleaned up. Also, to save the rest of the tasks we have running,
+ kill one of them, to clean up some space. Start with the reduce tasks,
+ then go with the ones that have finished the least.
+ Value in bytes.
+
+
+
+
+ mapred.tasktracker.expiry.interval
+ 600000
+ 600000
+ 180000
+ Integer
+ tasktracker
+
+ TaskTracker expiry interval
+
+
+ Expert: The time-interval, in miliseconds, after which
+ a tasktracker is declared 'lost' if it doesn't send heartbeats.
+
+
+
+
+ mapred.tasktracker.instrumentation
+ false
+ org.apache.hadoop.mapred.TaskTrackerMetricsSource
+ Class
+ tasktracker
+
+ TaskTracker instrumentation class
+
+
+ Expert: The instrumentation class to associate with each TaskTracker.
+
+
+
+
+ mapred.tasktracker.resourcecalculatorplugin
+
+ true
+ String
+ tasktracker
+
+ tasktracker resource calculator plugin
+
+
+
+ Name of the class whose instance will be used to query resource information
+ on the tasktracker.The class must be an instance of
+ org.apache.hadoop.util.ResourceCalculatorPlugin. If the value is null, the
+ tasktracker attempts to use a class appropriate to the platform.
+ Currently, the only platform supported is Linux.
+
+
+
+
+ mapred.tasktracker.taskmemorymanager.monitoring-interval
+ 5000
+ Integer
+ tasktracker
+
+ TaskTracker monitoring interval
+
+
+ The interval, in milliseconds, for which the tasktracker waits
+ between two cycles of monitoring its tasks' memory usage. Used only if
+ tasks' memory management is enabled via mapred.tasktracker.tasks.maxmemory.
+
+
+
+
+ mapred.tasktracker.tasks.sleeptime-before-sigkill
+ 5000
+ Integer
+ tasktracker
+
+ TaskTracker sleeptime before sending a SIGKILL
+
+
+ The time, in milliseconds, the tasktracker waits for sending a
+ SIGKILL to a process, after it has been sent a SIGTERM.
+
+
+
+ mapred.map.tasks
+ 2
+ Integer
+ map
+
+ map count
+
+
+ The default number of map tasks per job.
+ Ignored when mapred.job.tracker is "local".
+
+
+
+
+ mapred.reduce.tasks
+ 1
+ Integer
+ reduce
+
+ reduce count
+
+
+
+ The default number of reduce tasks per job. Typically set to 99%
+ of the cluster's reduce capacity, so that if a node fails the reduces can
+ still be executed in a single wave.
+ Ignored when mapred.job.tracker is "local".
+
+
+
+
+ mapreduce.tasktracker.outofband.heartbeat
+ false
+ true
+ Boolean
+ tasktracker
+
+ TaskTracker out-of-band heartbeat
+
+
+ Expert: Set this to true to let the tasktracker send an
+ out-of-band heartbeat on task-completion for better latency.
+
+
+
+
+ mapreduce.tasktracker.outofband.heartbeat.damper
+ 1000000
+ Integer
+ tasktracker
+
+ out-of-band heartbeat damper
+
+
+ When out-of-band heartbeats are enabled, provides
+ damping to avoid overwhelming the JobTracker if too many out-of-band
+ heartbeats would occur. The damping is calculated such that the
+ heartbeat interval is divided by (T*D + 1) where T is the number
+ of completed tasks and D is the damper value.
+ Setting this to a high value like the default provides no damping --
+ as soon as any task finishes, a heartbeat will be sent. Setting this
+ parameter to 0 is equivalent to disabling the out-of-band heartbeat feature.
+ A value of 1 would indicate that, after one task has completed, the
+ time to wait before the next heartbeat would be 1/2 the usual time.
+ After two tasks have finished, it would be 1/3 the usual time, etc.
+
+
+
+
+ mapred.jobtracker.restart.recover
+ false
+ Boolean
+ job
+
+ Whether enable job recovery
+
+
+ "true" to enable (job) recovery upon restart,
+ "false" to start afresh
+
+
+
+
+ mapred.jobtracker.job.history.block.size
+ 3145728
+ Integer
+ job
+
+ The block size of the job history file
+
+
+ The block size of the job history file. Since the job recovery
+ uses job history, its important to dump job history to disk as
+ soon as possible. Note that this is an expert level parameter.
+ The default value is set to 3 MB.
+
+
+
+
+ mapreduce.job.split.metainfo.maxsize
+ 10000000
+ Integer
+ job
+
+ The maximum permissible size of the split metainfo file
+
+
+ The maximum permissible size of the split metainfo file.
+ The JobTracker won't attempt to read split metainfo files bigger than
+ the configured value.
+ No limits if set to -1.
+
+
+
+
+ mapred.jobtracker.taskScheduler
+ false
+ org.apache.hadoop.mapred.JobQueueTaskScheduler
+ Class
+ basic
+
+ The class responsible for scheduling the tasks.
+
+
+ The class responsible for scheduling the tasks.
+
+
+
+ mapred.jobtracker.taskScheduler.maxRunningTasksPerJob
+
+ true
+ Integer
+ job
+
+ The maximum number of running tasks for a job before
+ it gets preempted
+
+ false
+
+ The maximum number of running tasks for a job before
+ it gets preempted. No limits if undefined.
+
+
+
+
+ mapred.map.max.attempts
+ 4
+ Integer
+ map
+
+ Map maximum attempt times
+
+
+ Expert: The maximum number of attempts per map task.
+ In other words, framework will try to execute a map task these many number
+ of times before giving up on it.
+
+
+
+
+ mapred.reduce.max.attempts
+ 4
+ Integer
+ reduce
+
+ Reduce maximum attempt times
+
+
+ Expert: The maximum number of attempts per reduce task.
+ In other words, framework will try to execute a reduce task these many number
+ of times before giving up on it.
+
+
+
+
+ mapred.reduce.parallel.copies
+ 5
+ 20
+ Integer
+ perf
+
+ The default number of parallel transfers run by reduce
+ during the copy(shuffle) phase
+
+
+ The default number of parallel transfers run by reduce
+ during the copy(shuffle) phase.
+
+
+
+
+ mapreduce.reduce.shuffle.maxfetchfailures
+ 10
+ Integer
+ reduce
+
+ reducer maximum fetch failures
+
+
+ The maximum number of times a reducer tries to
+ fetch a map output before it reports it.
+
+
+
+
+ mapreduce.reduce.shuffle.connect.timeout
+ 180000
+ Integer
+ reduce
+
+ reducer connect timeout
+
+
+ Expert: The maximum amount of time (in milli seconds) a reduce
+ task spends in trying to connect to a tasktracker for getting map output.
+
+
+
+
+ mapreduce.reduce.shuffle.read.timeout
+ 180000
+ Integer
+ reduce
+
+ reducer read timeout
+
+
+ Expert: The maximum amount of time (in milli seconds) a reduce
+ task waits for map output data to be available for reading after obtaining
+ connection.
+
+
+
+
+ mapred.task.timeout
+ 600000
+ 180000
+ Integer
+ basic
+
+ task timeout value
+
+
+ The number of milliseconds before a task will be
+ terminated if it neither reads an input, writes an output, nor
+ updates its status string.
+
+
+
+
+ mapred.tasktracker.map.tasks.maximum
+ 2
+
+ true
+ Integer
+ tasktracker
+
+ The maximum number of map tasks that will be run
+ simultaneously by a task tracker
+
+ true
+
+ 2
+
+ The maximum number of map tasks that will be run
+ simultaneously by a task tracker.
+
+
+
+
+ mapred.tasktracker.reduce.tasks.maximum
+ 2
+
+ true
+ Integer
+ tasktracker
+
+ The maximum number of reduce tasks that will be run
+ simultaneously by a task tracker.
+
+ true
+
+ 2
+
+ The maximum number of reduce tasks that will be run
+ simultaneously by a task tracker.
+
+
+
+
+ mapred.jobtracker.completeuserjobs.maximum
+ 100
+ Integer
+ job
+
+ maximum number of complete jobs to keep around per user
+
+
+ The maximum number of complete jobs per user to keep around
+ before delegating them to the job history.
+
+
+
+ mapreduce.reduce.input.limit
+ -1
+ Integer
+ reduce
+
+ The limit on the input size of the reduce
+
+
+ The limit on the input size of the reduce. If the estimated
+ input size of the reduce is greater than this value, job is failed. A
+ value of -1 means that there is no limit set.
+
+
+
+ mapred.job.tracker.retiredjobs.cache.size
+ 1000
+ Integer
+ job
+
+ The number of retired job status to keep in the cache.
+
+
+ The number of retired job status to keep in the cache.
+
+
+
+ mapred.job.tracker.jobhistory.lru.cache.size
+ 5
+ Integer
+ job
+
+ The number of job history files loaded in memory
+
+
+ The number of job history files loaded in memory. The jobs are
+ loaded when they are first accessed. The cache is cleared based on LRU.
+
+
+
+
+ mapred.jobtracker.instrumentation
+ false
+ org.apache.hadoop.mapred.JobTrackerMetricsSource
+ Class
+ jobtracker
+
+ JobTracker instrumentation class
+
+
+ Expert: The instrumentation class to associate with each JobTracker.
+
+
+
+ mapred.child.java.opts
+ -Xmx200m
+ -Xmx512m -XX:+UseConcMarkSweepGC -XX:ParallelCMSThreads=1 -XX:ParallelGCThreads=1
+ true
+ String
+ tasktracker
+
+ Java opts for the task tracker child processes
+
+
+ Java opts for the task tracker child processes.
+ The following symbol, if present, will be interpolated: @taskid@ is replaced
+ by current TaskID. Any other occurrences of '@' will go unchanged.
+ For example, to enable verbose gc logging to a file named for the taskid in
+ /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
+ -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
+ The configuration variable mapred.child.ulimit can be used to control the
+ maximum virtual memory of the child processes.
+
+
+
+
+ mapred.child.heapsize
+ 200
+ Integer
+ basic
+
+ The JVM heapsize used by each map task and reduce task.
+
+ true
+ true
+
+ The JVM heapsize used by each map task and reduce task.
+
+
+
+ mapred.child.env
+
+ true
+ Integer
+ tasktracker
+
+ tasktracker child process environment variable
+
+
+ User added environment variables for the task tracker child
+ processes. Example :
+ 1) A=foo This will set the env variable A to foo
+ 2) B=$B:c This is inherit tasktracker's B env variable.
+
+
+
+
+ mapred.child.ulimit
+
+ true
+
+ String
+ basic
+
+ MapReduce maximum virtual memory
+
+
+ The maximum virtual memory, in KB, of a process launched by the
+ Map-Reduce framework. This can be used to control both the Mapper/Reducer
+ tasks and applications using Hadoop Pipes, Hadoop Streaming etc.
+ By default it is left unspecified to let cluster admins control it via
+ limits.conf and other such relevant mechanisms.
+ Note: mapred.child.ulimit must be greater than or equal to the -Xmx passed to
+ JavaVM, else the VM might not start.
+
+
+
+
+ mapred.cluster.map.memory.mb
+ -1
+ Integer
+ map
+
+ map virtual memory size
+
+
+ The size, in terms of virtual memory, of a single map slot
+ in the Map-Reduce framework, used by the scheduler.
+ A job can ask for multiple slots for a single map task via
+ mapred.job.map.memory.mb, upto the limit specified by
+ mapred.cluster.max.map.memory.mb, if the scheduler supports the feature.
+ The value of -1 indicates that this feature is turned off.
+
+
+
+
+ mapred.cluster.reduce.memory.mb
+ -1
+ Integer
+ reduce
+
+ reduce virtual memory size
+
+
+ The size, in terms of virtual memory, of a single reduce slot
+ in the Map-Reduce framework, used by the scheduler.
+ A job can ask for multiple slots for a single reduce task via
+ mapred.job.reduce.memory.mb, upto the limit specified by
+ mapred.cluster.max.reduce.memory.mb, if the scheduler supports the feature.
+ The value of -1 indicates that this feature is turned off.
+
+
+
+
+ mapred.cluster.max.map.memory.mb
+ -1
+ Integer
+ map
+
+ map maximum virtual memory size
+
+
+ The maximum size, in terms of virtual memory, of a single map
+ task launched by the Map-Reduce framework, used by the scheduler.
+ A job can ask for multiple slots for a single map task via
+ mapred.job.map.memory.mb, upto the limit specified by
+ mapred.cluster.max.map.memory.mb, if the scheduler supports the feature.
+ The value of -1 indicates that this feature is turned off.
+
+
+
+
+ mapred.cluster.max.reduce.memory.mb
+ -1
+ Integer
+ reduce
+
+ reduce maximum virtual memory size
+
+
+ The maximum size, in terms of virtual memory, of a single reduce
+ task launched by the Map-Reduce framework, used by the scheduler.
+ A job can ask for multiple slots for a single reduce task via
+ mapred.job.reduce.memory.mb, upto the limit specified by
+ mapred.cluster.max.reduce.memory.mb, if the scheduler supports the feature.
+ The value of -1 indicates that this feature is turned off.
+
+
+
+
+ mapred.job.map.memory.mb
+ -1
+ Integer
+ map
+
+ map virtual memory size
+
+
+ The size, in terms of virtual memory, of a single map task
+ for the job.
+ A job can ask for multiple slots for a single map task, rounded up to the
+ next multiple of mapred.cluster.map.memory.mb and upto the limit
+ specified by mapred.cluster.max.map.memory.mb, if the scheduler supports
+ the feature.
+ The value of -1 indicates that this feature is turned off iff
+ mapred.cluster.map.memory.mb is also turned off (-1).
+
+
+
+
+ mapred.job.reduce.memory.mb
+ -1
+ Integer
+ reduce
+
+ reduce virtual memory size
+
+
+ The size, in terms of virtual memory, of a single reduce task
+ for the job.
+ A job can ask for multiple slots for a single map task, rounded up to the
+ next multiple of mapred.cluster.reduce.memory.mb and upto the limit
+ specified by mapred.cluster.max.reduce.memory.mb, if the scheduler supports
+ the feature.
+ The value of -1 indicates that this feature is turned off iff
+ mapred.cluster.reduce.memory.mb is also turned off (-1).
+
+
+
+
+ mapred.child.tmp
+ ./tmp
+ String
+ basic
+
+ tmp directory for map and reduce tasks
+
+
+ To set the value of tmp directory for map and reduce tasks.
+ If the value is an absolute path, it is directly assigned. Otherwise, it is
+ prepended with task's working directory. The java tasks are executed with
+ option -Djava.io.tmpdir='the absolute path of the tmp dir'. Pipes and
+ streaming are set with environment variable,
+ TMPDIR='the absolute path of the tmp dir'
+
+
+
+
+ mapred.inmem.merge.threshold
+ 1000
+ Integer
+ perf
+
+ in-memory merge threshold
+
+
+ The threshold, in terms of the number of files
+ for the in-memory merge process. When we accumulate threshold number of files
+ we initiate the in-memory merge and spill to disk. A value of 0 or less than
+ 0 indicates we want to DON'T have any threshold and instead depend only on
+ the ramfs's memory consumption to trigger the merge.
+
+
+
+
+ mapred.job.shuffle.merge.percent
+ 0.66
+ Float
+ perf
+
+ The usage threshold at which an in-memory merge will be
+ initiated
+
+
+ The usage threshold at which an in-memory merge will be
+ initiated, expressed as a percentage of the total memory allocated to
+ storing in-memory map outputs, as defined by
+ mapred.job.shuffle.input.buffer.percent.
+
+
+
+
+ mapred.job.shuffle.input.buffer.percent
+ 0.70
+ Float
+ perf
+
+ memory to store map outputs during the shuffle
+
+
+ The percentage of memory to be allocated from the maximum heap
+ size to storing map outputs during the shuffle.
+
+
+
+
+ mapred.job.reduce.input.buffer.percent
+ 0.0
+ Float
+ perf
+
+ The percentage of memory- relative to the maximum heap size- to
+ retain map outputs during the reduce
+
+
+ The percentage of memory- relative to the maximum heap size- to
+ retain map outputs during the reduce. When the shuffle is concluded, any
+ remaining map outputs in memory must consume less than this threshold before
+ the reduce can begin.
+
+
+
+
+ mapred.map.tasks.speculative.execution
+ true
+ true
+ Boolean
+ map
+
+ whether to execute multiple instances of map task in parallel
+
+
+ If true, then multiple instances of some map tasks
+ may be executed in parallel.
+
+
+
+ mapred.reduce.tasks.speculative.execution
+ true
+ true
+ false
+ Boolean
+ reduce
+
+ whether to execute multiple instances of reduce task in parallel
+
+
+ If true, then multiple instances of some reduce tasks
+ may be executed in parallel.
+
+
+
+ mapred.job.reuse.jvm.num.tasks
+ 1
+ Integer
+ perf
+
+ Number of taks to run per jvm
+
+
+ How many tasks to run per jvm. If set to -1, there is
+ no limit.
+
+
+
+
+ mapred.min.split.size
+ 0
+ Integer
+ map
+
+ The minimum size chunk that map input should be split
+ into
+
+
+ The minimum size chunk that map input should be split
+ into. Note that some file formats may have minimum split sizes that
+ take priority over this setting.
+
+
+
+ mapred.jobtracker.maxtasks.per.job
+ -1
+ Integer
+ job
+
+ The maximum number of tasks for a single job
+
+
+ The maximum number of tasks for a single job.
+ A value of -1 indicates that there is no maximum.
+
+
+
+ mapred.submit.replication
+ 10
+ Integer
+ job
+
+ The replication level for submitted job files
+
+
+ The replication level for submitted job files. This
+ should be around the square root of the number of nodes.
+
+
+
+
+ mapred.tasktracker.dns.interface
+ default
+ String
+ tasktracker
+
+ The name of the Network Interface from which a task
+ tracker should report its IP address
+
+
+ The name of the Network Interface from which a task
+ tracker should report its IP address.
+
+
+
+
+ mapred.tasktracker.dns.nameserver
+ default
+ String
+ tasktracker
+
+ TaskTracker name server address
+
+
+ The host name or IP address of the name server (DNS)
+ which a TaskTracker should use to determine the host name used by
+ the JobTracker for communication and display purposes.
+
+
+
+
+ tasktracker.http.threads
+ 40
+ 40
+ 60
+ Integer
+ perf
+
+ The number of worker threads that for the http server
+
+
+ The number of worker threads that for the http server. This is
+ used for map output fetching
+
+
+
+
+ mapred.task.tracker.http.address
+ 0.0.0.0:50060
+ String
+ tasktracker
+
+ The task tracker http server address and port
+
+
+
+ The task tracker http server address and port.
+ If the port is 0 then the server will start on a free port.
+
+
+
+
+ keep.failed.task.files
+ false
+ Boolean
+ basic
+
+ Whether to keep files for failed tasks
+
+
+ Should the files for failed tasks be kept. This should only be
+ used on jobs that are failing, because the storage is never
+ reclaimed. It also prevents the map outputs from being erased
+ from the reduce directory as they are consumed.
+
+
+
+
+ mapred.output.compress
+ false
+ Boolean
+ job
+
+ Whether to compress job output
+
+
+ Should the job outputs be compressed?
+
+
+
+
+ mapred.output.compression.type
+ RECORD
+ RECORD
+ BLOCK
+ String
+ job
+
+ compress type of job outputs
+
+
+ If the job outputs are to compressed as SequenceFiles, how should
+ they be compressed? Should be one of NONE, RECORD or BLOCK.
+
+
+
+
+ mapred.output.compression.codec
+ org.apache.hadoop.io.compress.DefaultCodec
+ Class
+ job
+
+ job outputs compress codec class
+
+
+ If the job outputs are compressed, how should they be compressed?
+
+
+
+
+ mapred.compress.map.output
+ false
+ false
+ true
+ Boolean
+ map
+
+ Whether to compress map outputs
+
+
+ Should the outputs of the maps be compressed before being
+ sent across the network. Uses SequenceFile compression.
+
+
+
+
+ mapred.map.output.compression.codec
+ org.apache.hadoop.io.compress.DefaultCodec
+ org.apache.hadoop.io.compress.DefaultCodec
+ org.apache.hadoop.io.compress.SnappyCodec
+ Class
+ map
+
+ map outputs compression class
+
+
+ If the map outputs are compressed, how should they be
+ compressed?
+
+
+
+
+ map.sort.class
+ org.apache.hadoop.util.QuickSort
+ Class
+ basic
+
+ The default sort class for sorting keys.
+
+
+ The default sort class for sorting keys.
+
+
+
+ mapred.userlog.limit.kb
+ 0
+ Integer
+ user
+
+ The maximum size of user-logs of each task in KB
+
+
+ The maximum size of user-logs of each task in KB. 0 disables the cap.
+
+
+
+
+ mapred.userlog.retain.hours
+ 24
+ Integer
+ user
+
+ user log maximum retain hours
+
+
+ The maximum time, in hours, for which the user-logs are to be
+ retained after the job completion.
+
+
+
+ mapred.user.jobconf.limit
+ 5242880
+ Integer
+ user
+
+ The maximum allowed size of the user jobconf
+
+
+ The maximum allowed size of the user jobconf. The
+ default is set to 5 MB
+
+
+
+ mapred.hosts
+
+ true
+ Integer
+ jobtracker
+
+ Names a file that contains the list of nodes that may
+ connect to the jobtracker.
+
+
+ Names a file that contains the list of nodes that may
+ connect to the jobtracker. If the value is empty, all hosts are
+ permitted.
+
+
+
+ mapred.hosts.exclude
+
+ true
+ String
+ jobtracker
+
+ Names a file that contains the list of hosts that
+ should be excluded by the jobtracker
+
+
+ Names a file that contains the list of hosts that
+ should be excluded by the jobtracker. If the value is empty, no
+ hosts are excluded.
+
+
+
+ mapred.heartbeats.in.second
+ 100
+ Integer
+ jobtracker
+
+ Approximate number of heart-beats that could arrive
+ at JobTracker in a second
+
+
+ Expert: Approximate number of heart-beats that could arrive
+ at JobTracker in a second. Assuming each RPC can be processed
+ in 10msec, the default value is made 100 RPCs in a second.
+
+
+
+
+ mapred.max.tracker.blacklists
+ 4
+ Integer
+ tasktracker
+
+ TaskTracker maximum number of blacklists
+
+
+ The number of blacklists for a tasktracker by various jobs
+ after which the tasktracker will be marked as potentially
+ faulty and is a candidate for graylisting across all jobs.
+ (Unlike blacklisting, this is advisory; the tracker remains
+ active. However, it is reported as graylisted in the web UI,
+ with the expectation that chronically graylisted trackers
+ will be manually decommissioned.) This value is tied to
+ mapred.jobtracker.blacklist.fault-timeout-window; faults
+ older than the window width are forgiven, so the tracker
+ will recover from transient problems. It will also become
+ healthy after a restart.
+
+
+
+
+ mapred.jobtracker.blacklist.fault-timeout-window
+ 180
+ Integer
+ tasktracker
+
+ The timeout (in minutes) after which per-job tasktracker
+ faults are forgiven
+
+
+ The timeout (in minutes) after which per-job tasktracker
+ faults are forgiven. The window is logically a circular
+ buffer of time-interval buckets whose width is defined by
+ mapred.jobtracker.blacklist.fault-bucket-width; when the
+ "now" pointer moves across a bucket boundary, the previous
+ contents (faults) of the new bucket are cleared. In other
+ words, the timeout's granularity is determined by the bucket
+ width.
+
+
+
+
+ mapred.jobtracker.blacklist.fault-bucket-width
+ 15
+ Integer
+ tasktracker
+
+ TaskTracker fault timeout window bucket width
+
+
+ The width (in minutes) of each bucket in the tasktracker
+ fault timeout window. Each bucket is reused in a circular
+ manner after a full timeout-window interval (defined by
+ mapred.jobtracker.blacklist.fault-timeout-window).
+
+
+
+
+ mapred.max.tracker.failures
+ 4
+ Integer
+ tasktracker
+
+ TaskTracker maximum failures
+
+
+ The number of task-failures on a tasktracker of a given job
+ after which new tasks of that job aren't assigned to it.
+
+
+
+
+ jobclient.output.filter
+ FAILED
+ String
+ user
+
+ he filter for controlling the output of the task's userlogs sent
+ to the console of the JobClient
+
+
+ The filter for controlling the output of the task's userlogs sent
+ to the console of the JobClient.
+ The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and
+ ALL.
+
+
+
+
+ jobclient.completion.poll.interval
+ 5000
+ Integer
+ user
+
+ JobClient polling JobTracker interval
+
+
+ The interval (in milliseconds) between which the JobClient
+ polls the JobTracker for updates about job status. You may want to set this
+ to a lower value to make tests run faster on a single node system. Adjusting
+ this value in production may lead to unwanted client-server traffic.
+
+
+
+
+ jobclient.progress.monitor.poll.interval
+ 1000
+ Integer
+ user
+
+ The interval (in milliseconds) between which the JobClient
+ reports status to the console and checks for job completion
+
+
+ The interval (in milliseconds) between which the JobClient
+ reports status to the console and checks for job completion. You may want to set this
+ to a lower value to make tests run faster on a single node system. Adjusting
+ this value in production may lead to unwanted client-server traffic.
+
+
+
+
+ mapred.job.tracker.persist.jobstatus.active
+ false
+ Boolean
+ job
+
+ Indicates if persistency of job status information is
+ active or not.
+
+
+ Indicates if persistency of job status information is
+ active or not.
+
+
+
+
+ mapred.job.tracker.persist.jobstatus.hours
+ 0
+ Integer
+ job
+
+ he number of hours job status information is persisted in DFS.
+
+
+ The number of hours job status information is persisted in DFS.
+ The job status information will be available after it drops of the memory
+ queue and between jobtracker restarts. With a zero value the job status
+ information is not persisted at all in DFS.
+
+
+
+
+ mapred.job.tracker.persist.jobstatus.dir
+ /jobtracker/jobsInfo
+ Directory
+ job
+
+ The directory where the job status information is persisted
+
+
+ The directory where the job status information is persisted
+ in a file system to be available after it drops of the memory queue and
+ between jobtracker restarts.
+
+
+
+
+ mapreduce.job.complete.cancel.delegation.tokens
+ true
+ Boolean
+ job
+
+ Whether to unregister delegation tokens from renewal
+
+
+ if false - do not unregister/cancel delegation tokens
+ from renewal, because same tokens may be used by spawned jobs
+
+
+
+
+ mapred.task.profile
+ false
+ Boolean
+ basic
+
+ Whether to collect profiler information
+
+
+ To set whether the system should collect profiler
+ information for some of the tasks in this job? The information is stored
+ in the user log directory. The value is "true" if task profiling
+ is enabled.
+
+
+
+ mapred.task.profile.maps
+ 0-2
+ Int_range
+ map
+
+ ranges of map tasks to profile
+
+
+ To set the ranges of map tasks to profile.
+ mapred.task.profile has to be set to true for the value to be accounted.
+
+
+
+
+ mapred.task.profile.reduces
+ 0-2
+ Int_range
+ reduce
+
+ ranges of reduce tasks to profile
+
+
+ To set the ranges of reduce tasks to profile.
+ mapred.task.profile has to be set to true for the value to be accounted.
+
+
+
+
+ mapred.line.input.format.linespermap
+ 1
+ Integer
+ basic
+
+ Number of lines per split in NLineInputFormat
+
+
+ Number of lines per split in NLineInputFormat.
+
+
+
+
+ mapred.skip.attempts.to.start.skipping
+ 2
+ Integer
+ skip
+
+ he number of Task attempts AFTER which skip mode
+ will be kicked off
+
+
+ The number of Task attempts AFTER which skip mode
+ will be kicked off. When skip mode is kicked off, the
+ tasks reports the range of records which it will process
+ next, to the TaskTracker. So that on failures, TT knows which
+ ones are possibly the bad records. On further executions,
+ those are skipped.
+
+
+
+
+ mapred.skip.map.auto.incr.proc.count
+ true
+ Boolean
+ skip
+
+ Whether to increment SkipBadRecords.COUNTER_MAP_PROCESSED_GROUPS
+
+
+ The flag which if set to true,
+ SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS is incremented
+ by MapRunner after invoking the map function. This value must be set to
+ false for applications which process the records asynchronously
+ or buffer the input records. For example streaming.
+ In such cases applications should increment this counter on their own.
+
+
+
+
+ mapred.skip.reduce.auto.incr.proc.count
+ true
+ Boolean
+ skip
+
+ Whether to increment SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS
+
+
+ The flag which if set to true,
+ SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS is incremented
+ by framework after invoking the reduce function. This value must be set to
+ false for applications which process the records asynchronously
+ or buffer the input records. For example streaming.
+ In such cases applications should increment this counter on their own.
+
+
+
+
+ mapred.skip.out.dir
+
+ true
+ String
+ skip
+
+ skipped records output directory
+
+
+ If no value is specified here, the skipped records are
+ written to the output directory at _logs/skip.
+ User can stop writing skipped records by giving the value "none".
+
+
+
+
+ mapred.skip.map.max.skip.records
+ 0
+ Integer
+ skip
+
+ The number of acceptable skip records surrounding the bad
+ record PER bad record in mapper
+
+
+ The number of acceptable skip records surrounding the bad
+ record PER bad record in mapper. The number includes the bad record as well.
+ To turn the feature of detection/skipping of bad records off, set the
+ value to 0.
+ The framework tries to narrow down the skipped range by retrying
+ until this threshold is met OR all attempts get exhausted for this task.
+ Set the value to Long.MAX_VALUE to indicate that framework need not try to
+ narrow down. Whatever records(depends on application) get skipped are
+ acceptable.
+
+
+
+
+ mapred.skip.reduce.max.skip.groups
+ 0
+ Integer
+ skip
+
+ The number of acceptable skip groups surrounding the bad
+ group PER bad group in reducer
+
+
+ The number of acceptable skip groups surrounding the bad
+ group PER bad group in reducer. The number includes the bad group as well.
+ To turn the feature of detection/skipping of bad groups off, set the
+ value to 0.
+ The framework tries to narrow down the skipped range by retrying
+ until this threshold is met OR all attempts get exhausted for this task.
+ Set the value to Long.MAX_VALUE to indicate that framework need not try to
+ narrow down. Whatever groups(depends on application) get skipped are
+ acceptable.
+
+
+
+
+
+
+ job.end.retry.attempts
+ 0
+ Integer
+ basic
+
+ hadoop retry attempts to contact notification URL
+
+
+ Indicates how many times hadoop should attempt to contact the
+ notification URL
+
+
+
+ job.end.retry.interval
+ 30000
+ Integer
+ basic
+
+ notifcation URL retry interval
+
+
+ Indicates time in milliseconds between notification URL retry
+ calls
+
+
+
+
+ hadoop.rpc.socket.factory.class.JobSubmissionProtocol
+
+ true
+ String
+ basic
+
+ SocketFactory to use to connect to a Map/Reduce master (JobTracker)
+
+
+ SocketFactory to use to connect to a Map/Reduce master
+ (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
+
+
+
+
+ mapred.task.cache.levels
+ 2
+ Integer
+ perf
+
+ max level of the task cache
+
+
+ This is the max level of the task cache. For example, if
+ the level is 2, the tasks cached are at the host level and at the rack
+ level.
+
+
+
+
+ mapred.queue.names
+ false
+ default
+ String
+ jobtracker
+
+ list of queues configured for this jobtracker
+
+
+ Comma separated list of queues configured for this jobtracker.
+ Jobs are added to queues and schedulers can configure different
+ scheduling properties for the various queues. To configure a property
+ for a queue, the name of the queue must match the name specified in this
+ value. Queue properties that are common to all schedulers are configured
+ here with the naming convention, mapred.queue.$QUEUE-NAME.$PROPERTY-NAME,
+ for e.g. mapred.queue.default.submit-job-acl.
+ The number of queues configured in this parameter could depend on the
+ type of scheduler being used, as specified in
+ mapred.jobtracker.taskScheduler. For example, the JobQueueTaskScheduler
+ supports only a single queue, which is the default configured here.
+ Before adding more queues, ensure that the scheduler you've configured
+ supports multiple queues.
+
+
+
+
+ mapred.acls.enabled
+ true
+ false
+ Boolean
+ basic
+
+ Whether to enable ACL check
+
+
+ Specifies whether ACLs should be checked
+ for authorization of users for doing various queue and job level operations.
+ ACLs are disabled by default. If enabled, access control checks are made by
+ JobTracker and TaskTracker when requests are made by users for queue
+ operations like submit job to a queue and kill a job in the queue and job
+ operations like viewing the job-details (See mapreduce.job.acl-view-job)
+ or for modifying the job (See mapreduce.job.acl-modify-job) using
+ Map/Reduce APIs, RPCs or via the console and web user interfaces.
+
+
+
+
+ mapred.queue.default.state
+ RUNNING
+ String
+ basic
+
+ job state value
+
+
+
+ This values defines the state , default queue is in.
+ the values can be either "STOPPED" or "RUNNING"
+ This value can be changed at runtime.
+
+
+
+
+ mapred.job.queue.name
+ default
+ String
+ job
+
+ Queue to which a job is submitted
+
+
+ Queue to which a job is submitted. This must match one of the
+ queues defined in mapred.queue.names for the system. Also, the ACL setup
+ for the queue must allow the current user to submit a job to the queue.
+ Before specifying a queue, ensure that the system is configured with
+ the queue, and access is allowed for submitting jobs to the queue.
+
+
+
+
+ mapreduce.job.acl-modify-job
+
+
+ String
+ true
+ job
+
+ Job specific access-control list for 'modifying' the job
+
+
+ Job specific access-control list for 'modifying' the job. It
+ is only used if authorization is enabled in Map/Reduce by setting the
+ configuration property mapred.acls.enabled to true.
+ This specifies the list of users and/or groups who can do modification
+ operations on the job. For specifying a list of users and groups the
+ format to use is "user1,user2 group1,group". If set to '*', it allows all
+ users/groups to modify this job. If set to ' '(i.e. space), it allows
+ none. This configuration is used to guard all the modifications with respect
+ to this job and takes care of all the following operations:
+ o killing this job
+ o killing a task of this job, failing a task of this job
+ o setting the priority of this job
+ Each of these operations are also protected by the per-queue level ACL
+ "acl-administer-jobs" configured via mapred-queues.xml. So a caller should
+ have the authorization to satisfy either the queue-level ACL or the
+ job-level ACL.
+ Irrespective of this ACL configuration, job-owner, the user who started the
+ cluster, cluster administrators configured via
+ mapreduce.cluster.administrators and queue administrators of the queue to
+ which this job is submitted to configured via
+ mapred.queue.queue-name.acl-administer-jobs in mapred-queue-acls.xml can
+ do all the modification operations on a job.
+ By default, nobody else besides job-owner, the user who started the cluster,
+ cluster administrators and queue administrators can perform modification
+ operations on a job.
+
+
+
+
+ mapreduce.job.acl-view-job
+
+
+ String
+ true
+ job
+
+ Job specific access-control list for 'viewing' the job
+
+
+ Job specific access-control list for 'viewing' the job. It is
+ only used if authorization is enabled in Map/Reduce by setting the
+ configuration property mapred.acls.enabled to true.
+ This specifies the list of users and/or groups who can view private details
+ about the job. For specifying a list of users and groups the
+ format to use is "user1,user2 group1,group". If set to '*', it allows all
+ users/groups to view this job. If set to ' '(i.e. space), it allows
+ none. This configuration is used to guard some of the job-views and at
+ present only protects APIs that can return possibly sensitive information
+ of the job-owner like
+ o job-level counters
+ o task-level counters
+ o tasks' diagnostic information
+ o task-logs displayed on the TaskTracker web-UI and
+ o job.xml showed by the JobTracker's web-UI
+ Every other piece of information of jobs is still accessible by any other
+ user, for e.g., JobStatus, JobProfile, list of jobs in the queue, etc.
+ Irrespective of this ACL configuration, job-owner, the user who started the
+ cluster, cluster administrators configured via
+ mapreduce.cluster.administrators and queue administrators of the queue to
+ which this job is submitted to configured via
+ mapred.queue.queue-name.acl-administer-jobs in mapred-queue-acls.xml can do
+ all the view operations on a job.
+ By default, nobody else besides job-owner, the user who started the
+ cluster, cluster administrators and queue administrators can perform
+ view operations on a job.
+
+
+
+
+ mapred.tasktracker.indexcache.mb
+ 10
+ Integer
+ tasktracker
+
+ The maximum memory that a task tracker allows for the index cache
+
+
+ The maximum memory that a task tracker allows for the
+ index cache that is used when serving map outputs to reducers.
+
+
+
+
+ mapred.combine.recordsBeforeProgress
+ 10000
+ Integer
+ basic
+
+ The number of records to process during combine output collection
+ before sending a progress notification to the TaskTracker.
+
+
+ The number of records to process during combine output collection
+ before sending a progress notification to the TaskTracker.
+
+
+
+
+ mapred.merge.recordsBeforeProgress
+ 10000
+ Integer
+ basic
+
+ The number of records to process during merge before
+ sending a progress notification to the TaskTracker.
+
+
+ The number of records to process during merge before
+ sending a progress notification to the TaskTracker.
+
+
+
+
+ mapred.reduce.slowstart.completed.maps
+ 0.05
+ Float
+ job
+
+ Fraction of the number of maps in the job which should be
+ complete before reduces are scheduled for the job
+
+
+ Fraction of the number of maps in the job which should be
+ complete before reduces are scheduled for the job.
+
+
+
+
+ mapred.task.tracker.task-controller
+ org.apache.hadoop.mapred.DefaultTaskController
+ Class
+ basic
+
+ TaskController which is used to launch and manage task execution
+
+
+ TaskController which is used to launch and manage task execution
+
+
+
+ mapreduce.tasktracker.group
+
+ true
+ String
+ tasktracker
+
+ Group to which TaskTracker belongs
+
+
+ Expert: Group to which TaskTracker belongs. If
+ LinuxTaskController is configured via mapreduce.tasktracker.taskcontroller,
+ the group owner of the task-controller binary should be same as this group.
+
+
+
+
+ mapred.disk.healthChecker.interval
+ 60000
+ Integer
+ health
+
+ node health check interval
+
+
+ How often the TaskTracker checks the health of its
+ local directories. Configuring this to a value smaller than the
+ heartbeat interval is equivalent to setting this to heartbeat
+ interval value.
+
+
+
+
+
+ mapred.healthChecker.script.path
+
+ true
+ Integer
+ health
+
+ node health script path
+
+
+ Absolute path to the script which is
+ periodically run by the node health monitoring service to determine if
+ the node is healthy or not. If the value of this key is empty or the
+ file does not exist in the location configured here, the node health
+ monitoring service is not started.
+
+
+
+ mapred.healthChecker.interval
+ 60000
+ Integer
+ health
+
+ Frequency of the node health script to be run
+
+
+ Frequency of the node health script to be run
+
+
+
+ mapred.healthChecker.script.timeout
+ 600000
+ Integer
+ health
+
+ node health script timeout
+
+
+ Time after node health script should be killed if
+ unresponsive and considered that the script has failed.
+
+
+
+ mapred.healthChecker.script.args
+
+ true
+ String
+ health
+
+ List of arguments which are to be passed to
+ node health script when it is being launched comma seperated.
+
+
+ List of arguments which are to be passed to
+ node health script when it is being launched comma seperated.
+
+
+
+
+ mapreduce.job.counters.limit
+ 120
+ Integer
+ job
+
+ Limit on the number of counters allowed per job.
+
+
+ Limit on the number of counters allowed per job.
+
+
+
+ mapreduce.slot.memory.weight
+
+ 50
+ Integer
+ tasktracker
+
+ TaskTracker reserve heapsize weight
+
+ true
+ directory
+
+ The weight of tasktracker reserve heapsize and the default value is 50.
+
+
+
+
+
+ mapred.scheduler
+ fair
+ String
+ basic
+
+ Task Schedule
+
+ true
+ advanced
+
+ Map/Reduce Task schedule method. If the Capacity scheduler has been selected, then additional items need to be configured.
+
+
+ fair,capacity
+
+
+ mapred.map.reduce.ratio
+ 2
+ Integer
+ basic
+
+ The map/reduce ratio.
+
+ true
+
+ The map/reduce ratio.
+
+
+
+ mapred.cpu.ratio.max
+ 1.5
+ Float
+ basic
+
+ The cpu max usage ratio.
+
+ true
+
+ The cpu max usage ratio.
+
+
+
+
+ mapred.fairscheduler.preemption
+ false
+ true
+ Boolean
+ perf
+
+ Whether to enable fairscheduler preemption.
+
+
+ Whether to enable fairscheduler preemption.
+
+
+
+ mapred.fairscheduler.assignmultiple
+ false
+ true
+ Boolean
+ perf
+
+ Whether to assign multiple
+
+
+ Whether to assign multiple
+
+
+
+ mapred.fairscheduler.sizebasedweight
+ false
+ true
+ Boolean
+ perf
+
+ size based weight
+
+
+ size based weight
+
+
+
+ mapred.fairscheduler.poolnameproperty
+ false
+ mapred.queue.name
+ String
+ perf
+
+ pool name property
+
+
+ job.set("mapred.queue.name",pool); // pool is set to either 'high' or 'low'
+
+
+
+ mapred.capacity-scheduler.maximum-system-jobs
+ 3000
+ Integer
+ basic
+
+ Maximum number of jobs in the system which can be initialized,
+ concurrently, by the CapacityScheduler
+
+
+ Maximum number of jobs in the system which can be initialized,
+ concurrently, by the CapacityScheduler.
+
+
+
+ mapred.capacity-scheduler.queue.default.capacity
+ 100
+ Integer
+ basic
+
+ Percentage of the number of slots in the cluster that are
+ to be available for jobs in this queue.
+
+
+ Percentage of the number of slots in the cluster that are
+ to be available for jobs in this queue.
+
+
+
+
+ mapred.capacity-scheduler.queue.default.maximum-capacity
+ -1
+ Integer
+ scheduling
+
+ per-queue maximum-capacity
+
+
+
+ maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster.
+ This provides a means to limit how much excess capacity a queue can use. By default, there is no limit.
+ The maximum-capacity of a queue can only be greater than or equal to its minimum capacity.
+ Default value of -1 implies a queue can use complete capacity of the cluster.
+ This property could be to curtail certain jobs which are long running in nature from occupying more than a
+ certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of
+ other queues being affected.
+ One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity
+ the max capacity would change. So if large number of nodes or racks get added to the cluster , max Capacity in
+ absolute terms would increase accordingly.
+
+
+
+
+ mapred.capacity-scheduler.queue.default.supports-priority
+ false
+ Boolean
+ scheduling
+
+ Whether to take jobs' priorities into account in scheduling decisions
+
+
+ If true, priorities of jobs will be taken into
+ account in scheduling decisions.
+
+
+
+
+ mapred.capacity-scheduler.queue.default.minimum-user-limit-percent
+ 100
+ Integer
+ scheduling
+
+ maximum limit on per-user's allocated percentage of resources
+
+
+
+ Each queue enforces a limit on the percentage of resources
+ allocated to a user at any given time, if there is competition for them.
+ This user limit can vary between a minimum and maximum value. The former
+ depends on the number of users who have submitted jobs, and the latter is
+ set to this property value. For example, suppose the value of this
+ property is 25. If two users have submitted jobs to a queue, no single
+ user can use more than 50% of the queue resources. If a third user submits
+ a job, no single user can use more than 33% of the queue resources. With 4
+ or more users, no user can use more than 25% of the queue's resources. A
+ value of 100 implies no user limits are imposed.
+
+
+
+
+ mapred.capacity-scheduler.queue.default.user-limit-factor
+ 1
+ Integer
+ scheduling
+
+ The multiple of the queue capacity which can be configured to
+ allow a single user to acquire more slots.
+
+
+
+ The multiple of the queue capacity which can be configured to
+ allow a single user to acquire more slots.
+
+
+
+
+ mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks
+ 200000
+ Integer
+ scheduling
+
+ The maximum number of tasks, across all jobs in the queue,
+ which can be initialized concurrently.
+
+
+ The maximum number of tasks, across all jobs in the queue,
+ which can be initialized concurrently. Once the queue's jobs exceed this
+ limit they will be queued on disk.
+
+
+
+
+ mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user
+ 100000
+ Integer
+ scheduling
+
+ The maximum number of tasks per-user, across all the of the
+ user's jobs in the queue, which can be initialized concurrently
+
+
+ The maximum number of tasks per-user, across all the of the
+ user's jobs in the queue, which can be initialized concurrently. Once the
+ user's jobs exceed this limit they will be queued on disk.
+
+
+
+
+ mapred.capacity-scheduler.queue.default.init-accept-jobs-factor
+ 10
+ Integer
+ scheduling
+
+ The multipe of (maximum-system-jobs * queue-capacity) used to
+ determine the number of jobs which are accepted by the scheduler.
+
+
+ The multipe of (maximum-system-jobs * queue-capacity) used to
+ determine the number of jobs which are accepted by the scheduler.
+
+
+
+
+
+
+ mapred.capacity-scheduler.default-supports-priority
+ false
+ Boolean
+ scheduling
+
+ If true, priorities of jobs will be taken into
+ account in scheduling decisions by default in a job queue.
+
+
+ If true, priorities of jobs will be taken into
+ account in scheduling decisions by default in a job queue.
+
+
+
+ mapred.capacity-scheduler.default-minimum-user-limit-percent
+ 100
+ Integer
+ scheduling
+
+ The percentage of the resources limited to a particular user
+ for the job queue at any given point of time by default.
+
+
+ The percentage of the resources limited to a particular user
+ for the job queue at any given point of time by default.
+
+
+
+ mapred.capacity-scheduler.default-user-limit-factor
+ 1
+ Integer
+ scheduling
+
+ The default multiple of queue-capacity which is used to
+ determine the amount of slots a single user can consume concurrently.
+
+
+
+ The default multiple of queue-capacity which is used to
+ determine the amount of slots a single user can consume concurrently.
+
+
+
+
+ mapred.capacity-scheduler.default-maximum-active-tasks-per-queue
+ 200000
+ Integer
+ scheduling
+
+ The default maximum number of tasks, across all jobs in the
+ queue, which can be initialized concurrently
+
+
+ The default maximum number of tasks, across all jobs in the
+ queue, which can be initialized concurrently. Once the queue's jobs exceed
+ this limit they will be queued on disk.
+
+
+
+
+ mapred.capacity-scheduler.default-maximum-active-tasks-per-user
+ 100000
+ Integer
+ scheduling
+
+ The default maximum number of tasks per-user, across all the of
+ the user's jobs in the queue, which can be initialized concurrently
+
+
+ The default maximum number of tasks per-user, across all the of
+ the user's jobs in the queue, which can be initialized concurrently. Once
+ the user's jobs exceed this limit they will be queued on disk.
+
+
+
+
+ mapred.capacity-scheduler.default-init-accept-jobs-factor
+ 10
+ Integer
+ scheduling
+
+ The default multipe of (maximum-system-jobs * queue-capacity)
+ used to determine the number of jobs which are accepted by the scheduler.
+
+
+
+ The default multipe of (maximum-system-jobs * queue-capacity)
+ used to determine the number of jobs which are accepted by the scheduler.
+
+
+
+
+
+ mapred.capacity-scheduler.init-poll-interval
+ 5000
+ Integer
+ scheduling
+
+ The amount of time in miliseconds which is used to poll
+ the job queues for jobs to initialize.
+
+
+ The amount of time in miliseconds which is used to poll
+ the job queues for jobs to initialize.
+
+
+
+ mapred.capacity-scheduler.init-worker-threads
+ 5
+ Integer
+ scheduling
+
+ Number of worker threads which would be used by
+ Initialization poller to initialize jobs in a set of queue
+
+
+ Number of worker threads which would be used by
+ Initialization poller to initialize jobs in a set of queue.
+ If number mentioned in property is equal to number of job queues
+ then a single thread would initialize jobs in a queue. If lesser
+ then a thread would get a set of queues assigned. If the number
+ is greater then number of threads would be equal to number of
+ job queues.
+
+
+
+
+ resmon.joblevel.metrics.enabled
+ false
+ Boolean
+ basic
+
+ Whether to enable joblevel metrics
+
+ true
+
+ Whether to enable joblevel metrics
+
+
+
+ mapred.profiling
+ false
+ Boolean
+ profiling
+
+ Whether to enable profiling or not
+
+
+ The flag to determine whether Profiling Feature enable or not.
+
+
+
+
+ directory
+ Configuration
+ true
+ false
+
+
+ advanced
+ Advanced Configuration
+ true
+ false
+
+
+ basic
+ Basic Configuration
+ Basic configurations that get HDFS running.
+
+
+ perf
+ Performance
+ Configurations that affect Hadoop's performance
+
+
+ jobtracker
+ Jobtracker Configuration
+ Configurations for Jobtracker.
+
+
+ tasktracker
+ Tasktracker Configuration
+ Configurations for Tasktracker.
+
+
+ map
+ Map Configuration
+ Configurations for Map job.
+
+
+ reduce
+ Reduce Configuration
+ Configurations for Reduce job.
+
+
+ io
+ IO Configuration
+ Configurations for Input/Output.
+
+
+ job
+ Job Configuration
+ Configurations for a job.
+
+
+ user
+ Configurations for user
+ Configurations for user.
+
+
+ scheduling
+ Configurations for scheduling
+ Configurations for scheduling.
+
+
+ profiling
+ Configurations for profiling
+ Configurations for profiling.
+
+
+ skip
+ Configurations for skip mode
+ Configurations for skip mode.
+
+
+ health
+ Configurations for health check
+ Configurations for health check.
+
+
diff --git a/savanna/tests/unit/plugins/intel/__init__.py b/savanna/tests/unit/plugins/intel/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/savanna/tests/unit/plugins/intel/client/__init__.py b/savanna/tests/unit/plugins/intel/client/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/savanna/tests/unit/plugins/intel/client/response.py b/savanna/tests/unit/plugins/intel/client/response.py
new file mode 100644
index 00000000..b2b867fa
--- /dev/null
+++ b/savanna/tests/unit/plugins/intel/client/response.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+
+class Response:
+ def __init__(self, data=None, ok=True, status_code=200):
+ self.text = json.dumps(data)
+ self.ok = ok
+ self.status_code = status_code
+ self.reason = None
+
+
+def make_resp(data=None, ok=True, status_code=200):
+ return Response(data, ok, status_code)
diff --git a/savanna/tests/unit/plugins/intel/client/test_client.py b/savanna/tests/unit/plugins/intel/client/test_client.py
new file mode 100644
index 00000000..7565bdb7
--- /dev/null
+++ b/savanna/tests/unit/plugins/intel/client/test_client.py
@@ -0,0 +1,310 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import mock
+
+from savanna import exceptions as ex
+from savanna.plugins.intel.client import client as c
+from savanna.plugins.intel import exceptions as iex
+from savanna.tests.unit import base
+from savanna.tests.unit.plugins.intel.client import response as r
+
+
+SESSION_POST_DATA = {'sessionID': '123'}
+SESSION_GET_DATA = {"items": [
+ {
+ "nodeprogress": {
+ "hostname": 'host',
+ 'info': '_ALLFINISH\n'
+ }
+ }
+]}
+
+
+class TestClient(base.DbTestCase):
+
+ @mock.patch('requests.post')
+ @mock.patch('requests.get')
+ def test_cluster_op(self, get, post):
+ client = c.IntelClient('qwe', 'rty')
+
+ data = {'lelik': 'bolik'}
+
+ post.return_value = r.make_resp(data)
+ self.assertEqual(client.cluster.create(), data)
+
+ get.return_value = r.make_resp(data)
+ self.assertEqual(client.cluster.get(), data)
+
+ post.return_value = r.make_resp(SESSION_POST_DATA)
+ get.return_value = r.make_resp(SESSION_GET_DATA)
+ client.cluster.install_software(['bla-bla'])
+
+ self.assertEqual(post.call_count, 2)
+ self.assertEqual(get.call_count, 2)
+
+ @mock.patch('requests.delete')
+ @mock.patch('requests.post')
+ @mock.patch('requests.get')
+ def test_nodes_op(self, get, post, delete):
+ client = c.IntelClient('qwe', 'rty')
+
+ # add
+ post.return_value = r.make_resp(data={
+ "items": [
+ {
+ "iporhostname": "n1",
+ "info": "Connected"
+ },
+ {
+ "iporhostname": "n2",
+ "info": "Connected"
+ }
+ ]
+ })
+ client.nodes.add(['n1', 'n2'], 'hadoop', '/Def', '/tmp/key')
+ post.return_value = r.make_resp(data={
+ "items": [
+ {
+ "iporhostname": "n1",
+ "info": "bla-bla"
+ }
+ ]
+ })
+ self.assertRaises(iex.IntelPluginException, client.nodes.add,
+ ['n1'], 'hadoop', '/Def', '/tmp/key')
+
+ # config
+ post.return_value = r.make_resp(SESSION_POST_DATA)
+ get.return_value = r.make_resp(SESSION_GET_DATA)
+ client.nodes.config()
+
+ # delete
+ delete.return_value = r.make_resp()
+ client.nodes.delete(['n1'])
+
+ # get
+ get.return_value = r.make_resp()
+ client.nodes.get()
+
+ # get_status
+ get.return_value = r.make_resp(data={"status": "running"})
+ client.nodes.get_status(['n1'])
+
+ # stop_nodes
+ post.return_value = r.make_resp()
+ client.nodes.stop(['n1'])
+
+ self.assertEqual(delete.call_count, 1)
+ self.assertEqual(post.call_count, 4)
+ self.assertEqual(get.call_count, 3)
+
+ @mock.patch('requests.put')
+ @mock.patch('requests.post')
+ def test_params_op(self, post, put):
+ client = c.IntelClient('qwe', 'rty')
+ post.return_value = r.make_resp()
+ put.return_value = r.make_resp()
+
+ # add
+ client.params.hdfs.add('lelik', 'bolik')
+ client.params.hadoop.add('lelik', 'bolik')
+ client.params.mapred.add('lelik', 'bolik')
+
+ # get
+ self.assertRaises(ex.NotImplementedException, client.params.hdfs.get,
+ ['n1'], 'lelik')
+ self.assertRaises(ex.NotImplementedException, client.params.hadoop.get,
+ ['n1'], 'lelik')
+ self.assertRaises(ex.NotImplementedException, client.params.mapred.get,
+ ['n1'], 'lelik')
+
+ # update
+ client.params.hdfs.update('lelik', 'bolik', nodes=['n1'])
+ client.params.hdfs.update('lelik', 'bolik')
+ client.params.hadoop.update('lelik', 'bolik', nodes=['n1'])
+ client.params.hadoop.update('lelik', 'bolik')
+ client.params.mapred.update('lelik', 'bolik', nodes=['n1'])
+ client.params.mapred.update('lelik', 'bolik')
+
+ self.assertEqual(post.call_count, 3)
+ self.assertEqual(put.call_count, 6)
+
+ @mock.patch('savanna.context.sleep', lambda x: None)
+ @mock.patch('requests.post')
+ @mock.patch('requests.get')
+ def test_base_services_op(self, get, post):
+ client = c.IntelClient('qwe', 'rty')
+
+ # start
+ post.return_value = r.make_resp()
+ get.return_value = r.make_resp(data={
+ "items": [
+ {
+ "serviceName": "hdfs",
+ "status": "running"
+ },
+ {
+ "serviceName": "mapred",
+ "status": "running"
+ }
+ ]})
+ client.services.hdfs.start()
+ client.services.mapred.start()
+
+ get.return_value = r.make_resp(data={
+ "items": [
+ {
+ "serviceName": "hdfs",
+ "status": "stopped"
+ },
+ {
+ "serviceName": "mapred",
+ "status": "stopped"
+ }
+ ]
+ })
+
+ self.assertRaises(iex.IntelPluginException,
+ client.services.hdfs.start)
+ self.assertRaises(iex.IntelPluginException,
+ client.services.mapred.start)
+
+ # stop
+ post.return_value = r.make_resp()
+ client.services.hdfs.stop()
+ client.services.mapred.stop()
+
+ # service
+ get.return_value = r.make_resp(data={
+ "items": [
+ {
+ "serviceName": "bla-bla",
+ "status": "fail"
+ }
+ ]
+ })
+
+ self.assertRaises(iex.IntelPluginException,
+ client.services.hdfs.status)
+ self.assertRaises(iex.IntelPluginException,
+ client.services.mapred.status)
+
+ # get_nodes
+ get.return_value = r.make_resp()
+ client.services.hdfs.get_nodes()
+ client.services.mapred.get_nodes()
+
+ # add_nodes
+ post.return_value = r.make_resp()
+ client.services.hdfs.add_nodes('DataNode', ['n1', 'n2'])
+ client.services.mapred.add_nodes('NameNode', ['n1', 'n2'])
+
+ self.assertEqual(get.call_count, 126)
+ self.assertEqual(post.call_count, 8)
+
+ @mock.patch('requests.delete')
+ @mock.patch('requests.post')
+ @mock.patch('requests.get')
+ def test_services_op(self, get, post, delete):
+ client = c.IntelClient('qwe', 'rty')
+
+ # add
+ post.return_value = r.make_resp()
+ client.services.add(['hdfs', 'mapred'])
+
+ # get_services
+ get.return_value = r.make_resp()
+ client.services.get_services()
+
+ # delete_service
+ delete.return_value = r.make_resp()
+ client.services.delete_service('hdfs')
+
+ @mock.patch('requests.post')
+ @mock.patch('requests.get')
+ def test_hdfs_services_op(self, get, post):
+ client = c.IntelClient('qwe', 'rty')
+
+ # format
+ get.return_value = r.make_resp(SESSION_GET_DATA)
+ post.return_value = r.make_resp(SESSION_POST_DATA)
+ client.services.hdfs.format()
+
+ # decommission
+ post.return_value = r.make_resp()
+ client.services.hdfs.decommission_nodes(['n1'])
+
+ # get status
+ get.return_value = r.make_resp(data={
+ "items": [
+ {
+ "hostname": "n1",
+ "status": "start"
+ }
+ ]
+ })
+ client.services.hdfs.get_datanodes_status()
+ self.assertEqual(client.services.hdfs.get_datanode_status('n1'),
+ 'start')
+ self.assertRaises(iex.IntelPluginException,
+ client.services.hdfs.get_datanode_status, 'n2')
+
+ self.assertEqual(get.call_count, 4)
+ self.assertEqual(post.call_count, 2)
+
+ @mock.patch('savanna.context.sleep', lambda x: None)
+ @mock.patch('requests.post')
+ @mock.patch('requests.get')
+ def test_session_op(self, get, post):
+ client = c.IntelClient('qwe', 'rty')
+
+ data1 = {
+ "items": [
+ {
+ "nodeprogress": {
+ "hostname": 'host',
+ 'info': 'info\n'
+ }
+ }
+ ]
+ }
+ data2 = {
+ "items": [
+ {
+ "nodeprogress": {
+ "hostname": 'host',
+ 'info': '_ALLFINISH\n'
+ }
+ }
+ ]
+ }
+
+ get.side_effect = (r.make_resp(data1), r.make_resp(data2))
+ post.return_value = r.make_resp(SESSION_POST_DATA)
+
+ client.services.hdfs.format()
+
+ self.assertEqual(get.call_count, 2)
+ self.assertEqual(post.call_count, 1)
+
+ @mock.patch('requests.get')
+ def test_rest_client(self, get):
+ client = c.IntelClient('qwe', 'rty')
+ get.return_value = r.make_resp(ok=False, status_code=500, data={
+ "message": "message"
+ })
+ self.assertRaises(iex.IntelPluginException,
+ client.services.get_services)
diff --git a/savanna/tests/unit/plugins/intel/test_plugin.py b/savanna/tests/unit/plugins/intel/test_plugin.py
new file mode 100644
index 00000000..0fe46a19
--- /dev/null
+++ b/savanna/tests/unit/plugins/intel/test_plugin.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna.plugins.general import exceptions as g_ex
+from savanna.plugins.intel import config_helper as c_helper
+from savanna.plugins.intel import exceptions as i_ex
+from savanna.plugins.intel import plugin as p
+from savanna.tests.unit import base
+from savanna.tests.unit.plugins.intel import test_utils as tu
+
+
+class TestIDHPlugin(base.DbTestCase):
+ def test_get_configs(self):
+ plugin = p.IDHProvider()
+ configs = plugin.get_configs('2.5.0')
+
+ self.assertIn(c_helper.IDH_REPO_URL, configs)
+ self.assertIn(c_helper.IDH_TARBALL_URL, configs)
+ self.assertIn(c_helper.OS_REPO_URL, configs)
+
+ def test_validate(self):
+ plugin = p.IDHProvider()
+
+ ng_mng = tu.make_ng_dict('mng', 'f1', ['manager'], 1)
+ ng_nn = tu.make_ng_dict('nn', 'f1', ['namenode'], 1)
+ ng_jt = tu.make_ng_dict('jt', 'f1', ['jobtracker'], 1)
+ ng_dn = tu.make_ng_dict('dn', 'f1', ['datanode'], 2)
+ ng_tt = tu.make_ng_dict('tt', 'f1', ['tasktracker'], 2)
+
+ cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0',
+ [ng_nn] + [ng_dn])
+ self.assertRaises(i_ex.NotSingleManagerException, plugin.validate, cl)
+
+ cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0', [ng_mng])
+ self.assertRaises(g_ex.NotSingleNameNodeException, plugin.validate, cl)
+
+ cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0',
+ [ng_mng] + [ng_nn] * 2)
+ self.assertRaises(g_ex.NotSingleNameNodeException, plugin.validate, cl)
+
+ cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0',
+ [ng_mng] + [ng_nn] + [ng_tt])
+ self.assertRaises(g_ex.TaskTrackersWithoutJobTracker,
+ plugin.validate, cl)
+
+ cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0',
+ [ng_mng] + [ng_nn] + [ng_jt] * 2 + [ng_tt])
+ self.assertRaises(g_ex.NotSingleJobTrackerException,
+ plugin.validate, cl)
diff --git a/savanna/tests/unit/plugins/intel/test_utils.py b/savanna/tests/unit/plugins/intel/test_utils.py
new file mode 100644
index 00000000..04cb0c6d
--- /dev/null
+++ b/savanna/tests/unit/plugins/intel/test_utils.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2013 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from savanna.conductor import resource as r
+
+
+def create_cluster(name, tenant, plugin, version, node_groups, **kwargs):
+ dct = {'name': name, 'tenant_id': tenant, 'plugin_name': plugin,
+ 'hadoop_version': version, 'node_groups': node_groups}
+ dct.update(kwargs)
+ return r.ClusterResource(dct)
+
+
+def make_ng_dict(name, flavor, processes, count, instances=[]):
+ return {'name': name, 'flavor_id': flavor, 'node_processes': processes,
+ 'count': count, 'instances': instances}
+
+
+def make_inst_dict(inst_id, inst_name):
+ return {'instance_id': inst_id, 'instance_name': inst_name}
diff --git a/savanna/tests/unit/resources/test-default-with-type-and-locale.xml b/savanna/tests/unit/resources/test-default-with-type-and-locale.xml
new file mode 100644
index 00000000..c4b86fb8
--- /dev/null
+++ b/savanna/tests/unit/resources/test-default-with-type-and-locale.xml
@@ -0,0 +1,43 @@
+
+
+
+
+ name1
+ value1
+ String
+
+ descr1
+
+
+
+
+ name2
+ value2
+
+ descr2
+
+
+
+
+ name3
+
+ String
+
+ descr3
+
+
+
+
+ name4
+ String
+
+ descr4
+
+
+
+
+ name5
+ value5
+ String
+
+
diff --git a/savanna/tests/unit/utils/test_xml_utils.py b/savanna/tests/unit/utils/test_xml_utils.py
index 2b3b8202..5611cdba 100644
--- a/savanna/tests/unit/utils/test_xml_utils.py
+++ b/savanna/tests/unit/utils/test_xml_utils.py
@@ -36,6 +36,24 @@ class XMLUtilsTestCase(unittest2.TestCase):
x.load_hadoop_xml_defaults(
'tests/unit/resources/test-default.xml'))
+ def test_load_xml_defaults_with_type_and_locale(self):
+ expected = [
+ {'name': u'name1', 'value': u'value1', 'type': u'String',
+ 'description': 'descr1'},
+ {'name': u'name2', 'value': u'value2', 'type': u'',
+ 'description': 'descr2'},
+ {'name': u'name3', 'value': '', 'type': u'String',
+ 'description': 'descr3'},
+ {'name': u'name4', 'value': '', 'type': u'String',
+ 'description': 'descr4'},
+ {'name': u'name5', 'value': u'value5', 'type': u'String',
+ 'description': ''}]
+ actual = x.load_hadoop_xml_defaults_with_type_and_locale(
+ 'tests/unit/resources/test-default-with-type-and-locale.xml')
+ self.assertListEqual(
+ expected,
+ actual)
+
def test_adjust_description(self):
self.assertEqual(x._adjust_field("\n"), "")
self.assertEqual(x._adjust_field("\n "), "")
diff --git a/savanna/utils/xmlutils.py b/savanna/utils/xmlutils.py
index 95306422..7e2464cf 100644
--- a/savanna/utils/xmlutils.py
+++ b/savanna/utils/xmlutils.py
@@ -36,6 +36,27 @@ def load_hadoop_xml_defaults(file_name):
return configs
+def load_hadoop_xml_defaults_with_type_and_locale(file_name):
+ doc = load_xml_document(file_name)
+ configs = []
+ prop = doc.getElementsByTagName('property')
+ for elements in prop:
+ configs.append({
+ 'name': _get_text_from_node(elements, 'name'),
+ 'value': _get_text_from_node(elements, 'value'),
+ 'type': _get_text_from_node(elements, 'valuetype'),
+ 'description': _adjust_field(
+ _get_text_from_node(
+ _get_node_element(elements, 'description'), 'en'))
+ })
+ return configs
+
+
+def _get_node_element(element, name):
+ element = element.getElementsByTagName(name)
+ return element[0] if element and element[0].hasChildNodes() else None
+
+
def create_hadoop_xml(configs, config_filter=None):
doc = xml.Document()
@@ -68,7 +89,7 @@ def load_xml_document(file_name):
def _get_text_from_node(element, name):
- element = element.getElementsByTagName(name)
+ element = element.getElementsByTagName(name) if element else None
return element[0].firstChild.nodeValue if (
element and element[0].hasChildNodes()) else ''
diff --git a/setup.cfg b/setup.cfg
index 64b96a35..e5cffc29 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -36,6 +36,7 @@ console_scripts =
savanna.cluster.plugins =
vanilla = savanna.plugins.vanilla.plugin:VanillaProvider
hdp = savanna.plugins.hdp.ambariplugin:AmbariPlugin
+ idh = savanna.plugins.intel.plugin:IDHProvider
savanna.infrastructure.engine =
savanna = savanna.service.instances:SavannaInfrastructureEngine