diff --git a/MANIFEST.in b/MANIFEST.in index 88d11b96..3c90fee1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -9,6 +9,7 @@ include savanna/db/migration/alembic_migrations/versions/README recursive-include savanna/locale * +include savanna/plugins/intel/resources/*.xml include savanna/plugins/vanilla/resources/*.xml include savanna/plugins/vanilla/resources/*.sh include savanna/plugins/vanilla/resources/*.sql diff --git a/savanna/exceptions.py b/savanna/exceptions.py index 40cbba35..ea142876 100644 --- a/savanna/exceptions.py +++ b/savanna/exceptions.py @@ -182,3 +182,10 @@ class ThreadException(SavannaException): self.message = "An error occurred in thread '%s': %s" % ( thread_description, str(e)) self.code = "THREAD_EXCEPTION" + + +class NotImplementedException(SavannaException): + code = "NOT_IMPLEMENTED" + + def __init__(self, feature): + self.message = "Feature '%s' is not implemented" % feature diff --git a/savanna/plugins/intel/__init__.py b/savanna/plugins/intel/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/savanna/plugins/intel/client/__init__.py b/savanna/plugins/intel/client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/savanna/plugins/intel/client/client.py b/savanna/plugins/intel/client/client.py new file mode 100644 index 00000000..0617094b --- /dev/null +++ b/savanna/plugins/intel/client/client.py @@ -0,0 +1,33 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna.plugins.intel.client import cluster +from savanna.plugins.intel.client import nodes +from savanna.plugins.intel.client import params +from savanna.plugins.intel.client import rest as r +from savanna.plugins.intel.client import services + + +class IntelClient(): + def __init__(self, manager_ip, cluster_name): + #TODO(alazarev) make credentials configurable (bug #1262881) + self.rest = r.RESTClient(manager_ip, 'admin', 'admin') + self.cluster_name = cluster_name + self._ctx = self + + self.cluster = cluster.Cluster(self) + self.nodes = nodes.Nodes(self) + self.params = params.Params(self) + self.services = services.Services(self) diff --git a/savanna/plugins/intel/client/cluster.py b/savanna/plugins/intel/client/cluster.py new file mode 100644 index 00000000..2cc62dac --- /dev/null +++ b/savanna/plugins/intel/client/cluster.py @@ -0,0 +1,44 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna.plugins.intel.client import context as c +from savanna.plugins.intel.client import session + + +class Cluster(c.IntelContext): + def create(self): + url = '/cluster' + data = { + 'name': self.cluster_name, + 'dnsresolution': True, + 'acceptlicense': True + } + + return self.rest.post(url, data) + + def get(self): + url = '/cluster/%s' % self.cluster_name + return self.rest.get(url) + + def install_software(self, nodes): + _nodes = [{'hostname': host} for host in nodes] + url = '/cluster/%s/nodes/commands/installsoftware' % self.cluster_name + session_id = self.rest.post(url, _nodes)['sessionID'] + return session.wait(self, session_id) + + def upload_authzkeyfile(self, authzkeyfile): + url = '/cluster/%s/upload/authzkey' % self.cluster_name + return self.rest.post(url, + files={'file': authzkeyfile})['upload result'] diff --git a/savanna/plugins/intel/client/context.py b/savanna/plugins/intel/client/context.py new file mode 100644 index 00000000..13897e8b --- /dev/null +++ b/savanna/plugins/intel/client/context.py @@ -0,0 +1,21 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class IntelContext(object): + def __init__(self, ctx): + self._ctx = ctx._ctx + self.cluster_name = ctx.cluster_name + self.rest = ctx.rest diff --git a/savanna/plugins/intel/client/nodes.py b/savanna/plugins/intel/client/nodes.py new file mode 100644 index 00000000..45cefcda --- /dev/null +++ b/savanna/plugins/intel/client/nodes.py @@ -0,0 +1,65 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna.plugins.intel.client import context as c +from savanna.plugins.intel.client import session +from savanna.plugins.intel import exceptions as iex + + +class Nodes(c.IntelContext): + def add(self, nodes, rack, username, path_to_key, keypass=''): + hosts = { + 'method': 'useauthzkeyfile', + 'nodeinfo': map(lambda host: { + 'hostname': host, + 'username': username, + 'passphrase': keypass, + 'authzkeyfile': path_to_key, + 'rackName': rack + }, nodes) + } + + url = '/cluster/%s/nodes' % self.cluster_name + resp = self.rest.post(url, hosts)['items'] + + for node_info in resp: + if node_info['info'] != 'Connected': + raise iex.IntelPluginException( + 'Error adding nodes: %s' % node_info['iporhostname']) + + def get(self): + url = '/cluster/%s/nodes' % self.cluster_name + return self.rest.get(url) + + def get_status(self, node): + url = '/cluster/%s/nodes/%s' % (self.cluster_name, node) + return self.rest.get(url)['status'] + + def delete(self, node): + url = '/cluster/%s/nodes/%s' % (self.cluster_name, node) + return self.rest.delete(url) + + def config(self, force=False): + url = ('/cluster/%s/nodes/commands/confignodes/%s' + % (self.cluster_name, 'force' if force else 'noforce')) + + session_id = self.rest.post(url)['sessionID'] + return session.wait(self, session_id) + + def stop(self, nodes): + url = '/cluster/%s/nodes/commands/stopnodes' % self.cluster_name + data = [{'hostname': host} for host in nodes] + + return self.rest.post(url, data) diff --git a/savanna/plugins/intel/client/params.py b/savanna/plugins/intel/client/params.py new file mode 100644 index 00000000..239bb70f --- /dev/null +++ b/savanna/plugins/intel/client/params.py @@ -0,0 +1,76 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna import exceptions +from savanna.plugins.intel.client import context as c + + +class BaseParams(c.IntelContext): + def __init__(self, ctx, service): + super(BaseParams, self).__init__(ctx) + self.service = service + + def add(self, item, value, desc=''): + data = { + 'editdesc': desc, + 'items': [ + { + 'type': self.service, + 'item': item, + 'value': value, + 'desc': desc + } + ] + } + url = ('/cluster/%s/configuration/%s' + % (self.cluster_name, self.service)) + return self.rest.post(url, data) + + def update(self, item, value, desc='', nodes=None): + data = { + 'editdesc': desc, + 'items': [ + { + 'type': self.service, + 'item': item, + 'value': value + } + ] + } + if nodes: + data = { + 'editdesc': desc, + 'items': map(lambda node: { + 'type': self.service, + 'item': item, + 'value': value, + 'hostname': node + }, nodes) + } + + url = ('/cluster/%s/configuration/%s' + % (self.cluster_name, self.service)) + return self.rest.put(url, data) + + def get(self, hosts, item): + raise exceptions.NotImplementedException("BaseParams.get") + + +class Params(c.IntelContext): + def __init__(self, ctx): + super(Params, self).__init__(ctx) + self.hadoop = BaseParams(self, 'hadoop') + self.hdfs = BaseParams(self, 'hdfs') + self.mapred = BaseParams(self, 'mapred') diff --git a/savanna/plugins/intel/client/rest.py b/savanna/plugins/intel/client/rest.py new file mode 100644 index 00000000..19ebc1fa --- /dev/null +++ b/savanna/plugins/intel/client/rest.py @@ -0,0 +1,78 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import requests +from requests import auth + +from savanna.openstack.common import log as logging +from savanna.plugins.intel import exceptions as iex + + +LOG = logging.getLogger(__name__) + + +class RESTClient(): + def __init__(self, manager_ip, auth_username, auth_password): + #TODO(alazarev) make port configurable (bug #1262895) + self.base_url = ('https://%s:9443/restapi/intelcloud/api/v1' + % manager_ip) + LOG.debug("Connecting to manager with URL of %s", self.base_url) + + self.auth = auth.HTTPBasicAuth(auth_username, auth_password) + + def get(self, url): + url = self.base_url + url + LOG.debug("Sending GET to URL of %s", url) + r = requests.get(url, verify=False, auth=self.auth) + return self._check_response(r) + + def post(self, url, data=None, files=None): + url = self.base_url + url + LOG.debug("Sending POST to URL '%s' (%s files): %s", url, + len(files) if files else 0, + data if data else 'no data') + r = requests.post(url, data=json.dumps(data) if data else None, + verify=False, auth=self.auth, files=files) + return self._check_response(r) + + def delete(self, url): + url = self.base_url + url + LOG.debug("Sending DELETE to URL of %s", url) + r = requests.delete(url, verify=False, auth=self.auth) + return self._check_response(r) + + def put(self, url, data=None): + url = self.base_url + url + if data: + LOG.debug("Sending PUT to URL of %s: %s", url, data) + r = requests.put(url, data=json.dumps(data), verify=False, + auth=self.auth) + else: + LOG.debug("Sending PUT to URL of %s with no data", url) + r = requests.put(url, verify=False, auth=self.auth) + + return self._check_response(r) + + def _check_response(self, resp): + LOG.debug("Response with HTTP code %s, and content of %s", + resp.status_code, resp.text) + if not resp.ok: + raise iex.IntelPluginException( + "Request to manager returned with code '%s', reason '%s' " + "and message '%s'" % (resp.status_code, resp.reason, + json.loads(resp.text)['message'])) + else: + return json.loads(resp.text) diff --git a/savanna/plugins/intel/client/services.py b/savanna/plugins/intel/client/services.py new file mode 100644 index 00000000..bc8d1e74 --- /dev/null +++ b/savanna/plugins/intel/client/services.py @@ -0,0 +1,137 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna import context +from savanna.openstack.common import log as logging +from savanna.plugins.intel.client import context as c +from savanna.plugins.intel.client import session +from savanna.plugins.intel import exceptions as iex + +LOG = logging.getLogger(__name__) + + +class BaseService(c.IntelContext): + def __init__(self, ctx, service_name): + super(BaseService, self).__init__(ctx) + self.service = service_name + + def start(self): + url = ('/cluster/%s/services/%s/commands/start' + % (self.cluster_name, self.service)) + + self.rest.post(url) + + timeout = 120 + cur_time = 0 + while cur_time < timeout: + context.sleep(2) + if self.status() == 'running': + break + else: + cur_time += 2 + else: + raise iex.IntelPluginException( + "Service '%s' has failed to start in %s seconds" + % (self.service, timeout)) + + def stop(self): + url = ('/cluster/%s/services/%s/commands/stop' + % (self.cluster_name, self.service)) + + return self.rest.post(url) + + def status(self): + url = '/cluster/%s/services' % self.cluster_name + statuses = self.rest.get(url)['items'] + for st in statuses: + if st['serviceName'] == self.service: + return st['status'] + + raise iex.IntelPluginException( + "Service '%s' is not installed on cluster '%s'" + % (self.service, self.cluster_name)) + + def get_nodes(self): + url = '/cluster/%s/services/%s' % (self.cluster_name, self.service) + return self.rest.get(url) + + def add_nodes(self, role, nodes): + url = ('/cluster/%s/services/%s/roles' + % (self.cluster_name, self.service)) + + data = map(lambda host: { + 'rolename': role, + 'hostname': host + }, nodes) + + return self.rest.post(url, data) + + +class HDFSService(BaseService): + def format(self, force=False): + url = ('/cluster/%s/services/hdfs/commands/hdfsformat/%s' + % (self.cluster_name, 'force' if force else 'noforce')) + + session_id = self.rest.post(url)['sessionID'] + return session.wait(self, session_id) + + def decommission_nodes(self, nodes, force=False): + url = ('/cluster/%s/nodes/commands/decommissionnodes/%s' + % (self.cluster_name, 'force' if force else 'noforce')) + data = map(lambda host: { + 'hostname': host + }, nodes) + + return self.rest.post(url, data) + + def get_datanodes_status(self): + url = '/cluster/%s/nodes/commands/datanodes/status' % self.cluster_name + return self.rest.get(url)['items'] + + def get_datanode_status(self, datanode): + stats = self.get_datanodes_status() + for stat in stats: + if stat['hostname'] == datanode: + return stat['status'].strip() + + raise iex.IntelPluginException( + "Datanode service is is not installed on node '%s'" % datanode) + + +class Services(c.IntelContext): + def __init__(self, ctx): + super(Services, self).__init__(ctx) + self.hdfs = HDFSService(self, 'hdfs') + self.mapred = BaseService(self, 'mapred') + self.hive = BaseService(self, 'hive') + self.oozie = BaseService(self, 'oozie') + + def add(self, services): + _services = map(lambda service: { + 'serviceName': service, + 'type': service + }, services) + url = '/cluster/%s/services' % self.cluster_name + + return self.rest.post(url, _services) + + def get_services(self): + url = '/cluster/%s/services' % self.cluster_name + + return self.rest.get(url) + + def delete_service(self, service): + url = '/cluster/%s/services/%s' % (self.cluster_name, service) + return self.rest.delete(url) diff --git a/savanna/plugins/intel/client/session.py b/savanna/plugins/intel/client/session.py new file mode 100644 index 00000000..d4b02342 --- /dev/null +++ b/savanna/plugins/intel/client/session.py @@ -0,0 +1,49 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna import context +from savanna.openstack.common import log as logging +from savanna.plugins.intel import exceptions as iex + +LOG = logging.getLogger(__name__) + + +def get(ctx, session_id): + url = '/cluster/%s/session/%s' % (ctx.cluster_name, session_id) + return ctx.rest.get(url) + + +def wait(ctx, session_id): + #TODO(lazarev) add check on savanna cluster state (exit on delete) + #TODO(alazarev) make configurable (bug #1262897) + timeout = 4*60*60 # 4 hours + cur_time = 0 + while cur_time < timeout: + info_items = get(ctx, session_id)['items'] + for item in info_items: + progress = item['nodeprogress'] + if progress['info'].strip() == '_ALLFINISH': + return + else: + context.sleep(10) + cur_time += 10 + + debug_msg = 'Hostname: %s\nInfo: %s' + debug_msg = debug_msg % (progress['hostname'], progress['info']) + LOG.debug(debug_msg) + else: + raise iex.IntelPluginException( + "Cluster '%s' has failed to start in %s minutes" + % (ctx.cluster_name, timeout / 60)) diff --git a/savanna/plugins/intel/config_helper.py b/savanna/plugins/intel/config_helper.py new file mode 100644 index 00000000..9b4cefee --- /dev/null +++ b/savanna/plugins/intel/config_helper.py @@ -0,0 +1,128 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna.plugins import provisioning as p +from savanna.utils import xmlutils as x + + +CORE_DEFAULT = x.load_hadoop_xml_defaults_with_type_and_locale( + 'plugins/intel/resources/hadoop-default.xml') + +HDFS_DEFAULT = x.load_hadoop_xml_defaults_with_type_and_locale( + 'plugins/intel/resources/hdfs-default.xml') + +MAPRED_DEFAULT = x.load_hadoop_xml_defaults_with_type_and_locale( + 'plugins/intel/resources/mapred-default.xml') + +XML_CONFS = { + "Hadoop": [CORE_DEFAULT], + "HDFS": [HDFS_DEFAULT], + "MapReduce": [MAPRED_DEFAULT] +} + +IDH_TARBALL_URL = p.Config('IDH tarball URL', 'general', 'cluster', priority=1, + default_value='http://repo1.intelhadoop.com:3424/' + 'setup/setup-intelhadoop-' + '2.5.1-en-evaluation.RHEL.tar.gz') + +OS_REPO_URL = p.Config('OS repository URL', 'general', 'cluster', priority=1, + is_optional=True, + default_value='http://mirror.centos.org/' + 'centos-6/6/os/x86_64') + +IDH_REPO_URL = p.Config('IDH repository URL', 'general', 'cluster', + priority=1, is_optional=True, + default_value='http://repo1.intelhadoop.com:3424' + '/evaluation/en/RHEL/2.5.1/rpm') + +ENABLE_SWIFT = p.Config('Enable Swift', 'general', 'cluster', + config_type="bool", priority=1, + default_value=True, is_optional=True) + +HIDDEN_CONFS = ['fs.default.name', 'dfs.name.dir', 'dfs.data.dir', + 'mapred.job.tracker', 'mapred.system.dir', 'mapred.local.dir'] + +CLUSTER_WIDE_CONFS = ['dfs.block.size', 'dfs.permissions', 'dfs.replication', + 'dfs.replication.min', 'dfs.replication.max', + 'io.file.buffer.size', 'mapreduce.job.counters.max', + 'mapred.output.compress', 'io.compression.codecs', + 'mapred.output.compression.codec', + 'mapred.output.compression.type', + 'mapred.compress.map.output', + 'mapred.map.output.compression.codec'] + +PRIORITY_1_CONFS = ['dfs.datanode.du.reserved', + 'dfs.datanode.failed.volumes.tolerated', + 'dfs.datanode.max.xcievers', 'dfs.datanode.handler.count', + 'dfs.namenode.handler.count', 'mapred.child.java.opts', + 'mapred.jobtracker.maxtasks.per.job', + 'mapred.job.tracker.handler.count', + 'mapred.map.child.java.opts', + 'mapred.reduce.child.java.opts', + 'io.sort.mb', 'mapred.tasktracker.map.tasks.maximum', + 'mapred.tasktracker.reduce.tasks.maximum'] + +PRIORITY_1_CONFS += CLUSTER_WIDE_CONFS + +CFG_TYPE = { + "Boolean": "bool", + "String": "string", + "Integer": "int", + "Choose": "string", + "Class": "string", + "Directory": "string", + "Float": "string", + "Int_range": "string", +} + + +def _initialise_configs(): + configs = [] + + for service, config_lists in XML_CONFS.iteritems(): + for config_list in config_lists: + for config in config_list: + if config['name'] not in HIDDEN_CONFS: + cfg = p.Config( + config['name'], service, "cluster", is_optional=True, + config_type="string", + default_value=str(config['value']), + description=config['description']) + + if config.get('type'): + cfg.config_type = CFG_TYPE[config['type']] + if cfg.config_type == 'bool': + cfg.default_value = cfg.default_value == 'true' + if cfg.config_type == 'int': + if cfg.default_value: + cfg.default_value = int(cfg.default_value) + else: + cfg.config_type = 'string' + if config['name'] in PRIORITY_1_CONFS: + cfg.priority = 1 + configs.append(cfg) + + configs.append(IDH_TARBALL_URL) + configs.append(IDH_REPO_URL) + configs.append(OS_REPO_URL) + configs.append(ENABLE_SWIFT) + return configs + + +PLUGIN_CONFIGS = _initialise_configs() + + +def get_plugin_configs(): + return PLUGIN_CONFIGS diff --git a/savanna/plugins/intel/exceptions.py b/savanna/plugins/intel/exceptions.py new file mode 100644 index 00000000..fccb5500 --- /dev/null +++ b/savanna/plugins/intel/exceptions.py @@ -0,0 +1,31 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import savanna.exceptions as e + + +class NotSingleManagerException(e.SavannaException): + message = "Intel hadoop cluster should contain only 1 Intel " \ + "Manager instance. Actual manager count is %s" + + def __init__(self, mng_count): + self.message = self.message % mng_count + self.code = "NOT_SINGLE_MANAGER" + + +class IntelPluginException(e.SavannaException): + def __init__(self, message): + self.message = message + self.code = "INTEL_PLUGIN_EXCEPTION" diff --git a/savanna/plugins/intel/installer.py b/savanna/plugins/intel/installer.py new file mode 100644 index 00000000..3d1eca90 --- /dev/null +++ b/savanna/plugins/intel/installer.py @@ -0,0 +1,411 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import telnetlib + +from savanna import conductor +from savanna import context +from savanna.openstack.common import log as logging +from savanna.plugins.general import utils as u +from savanna.plugins.intel.client import client as c +from savanna.plugins.intel import config_helper as c_helper +from savanna.plugins.intel import exceptions as iex +from savanna.swift import swift_helper as swift +from savanna.utils import crypto + + +conductor = conductor.API +LOG = logging.getLogger(__name__) + +_INST_CONF_TEMPLATE = """ +network_interface=eth0 +mode=silent +accept_jdk_license=accept +how_to_setup_os_repo=2 +os_repo=%s +os_repo_username= +os_repo_password= +os_repo_proxy= +how_to_setup_idh_repo=1 +idh_repo=%s +idh_repo_username= +idh_repo_password= +idh_repo_proxy= +firewall_selinux_setting=1""" + +SWIFT_LIB_URL = \ + 'http://savanna-files.mirantis.com/hadoop-swift/hadoop-swift-latest.jar' + + +def install_manager(cluster): + LOG.info("Starting Install Manager Process") + mng_instance = u.get_instance(cluster, 'manager') + + idh_tarball_path = \ + cluster.cluster_configs['general'].get('IDH tarball URL') + if not idh_tarball_path: + idh_tarball_path = c_helper.IDH_TARBALL_URL.default_value + + idh_tarball_filename = idh_tarball_path.rsplit('/', 1)[-1] + idh_dir = idh_tarball_filename[:idh_tarball_filename.find('.tar.gz')] + LOG.info("IDH tgz will be retrieved from: \'%s\'", idh_tarball_path) + + idh_repo = cluster.cluster_configs['general'].get('IDH repository URL') + if not idh_repo: + idh_repo = c_helper.IDH_REPO_URL.default_value + + os_repo = cluster.cluster_configs['general'].get('OS repository URL') + if not os_repo: + os_repo = c_helper.OS_REPO_URL.default_value + + idh_install_cmd = 'sudo ./%s/install.sh --mode=silent 2>&1' % idh_dir + + with mng_instance.remote() as r: + LOG.info("Download IDH manager ") + try: + r.execute_command('curl -O %s 2>&1' % idh_tarball_path) + except Exception as e: + raise RuntimeError("Unable to download IDH manager from %s", + idh_tarball_path, e) + + # unpack archive + LOG.info("Unpack manager %s ", idh_tarball_filename) + try: + r.execute_command('tar xzf %s 2>&1' % idh_tarball_filename) + except Exception as e: + raise RuntimeError("Unable to unpack tgz %s", + idh_tarball_filename, e) + + # install idh + LOG.debug("Install manager with %s : ", idh_install_cmd) + inst_conf = _INST_CONF_TEMPLATE % (os_repo, idh_repo) + r.write_file_to('%s/ui-installer/conf' % idh_dir, inst_conf) + #TODO(alazarev) make timeout configurable (bug #1262897) + r.execute_command(idh_install_cmd, timeout=3600) + + # fix nginx persimmions bug + r.execute_command('sudo chmod o+x /var/lib/nginx/ /var/lib/nginx/tmp ' + '/var/lib/nginx/tmp/client_body') + + # waiting start idh manager + #TODO(alazarev) make timeout configurable (bug #1262897) + timeout = 600 + LOG.debug("Waiting %s seconds for Manager to start : ", timeout) + while timeout: + try: + telnetlib.Telnet(mng_instance.management_ip, 9443) + break + except IOError: + timeout -= 2 + context.sleep(2) + else: + message = ("IDH Manager failed to start in %s minutes on node '%s' " + "of cluster '%s'" + % (timeout / 60, mng_instance.management_ip, cluster.name)) + LOG.error(message) + raise iex.IntelPluginException(message) + + +def configure_os(cluster): + instances = u.get_instances(cluster) + configure_os_from_instances(cluster, instances) + + +def create_hadoop_ssh_keys(cluster): + private_key, public_key = crypto.generate_key_pair() + extra = { + 'hadoop_private_ssh_key': private_key, + 'hadoop_public_ssh_key': public_key + } + return conductor.cluster_update(context.ctx(), cluster, {'extra': extra}) + + +def configure_os_from_instances(cluster, instances): + for instance in instances: + with instance.remote() as remote: + LOG.debug("Configuring OS settings on %s : ", instance.hostname()) + + # configure hostname, RedHat/Centos specific + remote.replace_remote_string('/etc/sysconfig/network', + 'HOSTNAME=.*', + 'HOSTNAME=%s' % instance.hostname()) + # disable selinux and iptables, because Intel distribution requires + # this to be off + remote.execute_command('sudo /usr/sbin/setenforce 0') + remote.replace_remote_string('/etc/selinux/config', + 'SELINUX=.*', 'SELINUX=disabled') + # disable iptables + remote.execute_command('sudo /sbin/service iptables stop') + remote.execute_command('sudo /sbin/chkconfig iptables off') + + # create 'hadoop' user + remote.write_files_to({ + 'id_rsa': cluster.extra.get('hadoop_private_ssh_key'), + 'authorized_keys': cluster.extra.get('hadoop_public_ssh_key') + }) + remote.execute_command( + 'sudo useradd hadoop && ' + 'sudo sh -c \'echo "hadoop ALL=(ALL) NOPASSWD:ALL" ' + '>> /etc/sudoers\' && ' + 'sudo mkdir -p /home/hadoop/.ssh/ && ' + 'sudo mv id_rsa authorized_keys /home/hadoop/.ssh && ' + 'sudo chown -R hadoop:hadoop /home/hadoop/.ssh && ' + 'sudo chmod 600 /home/hadoop/.ssh/{id_rsa,authorized_keys}') + + swift_enable = \ + cluster.cluster_configs['general'].get('Enable Swift') + if not swift_enable: + swift_enable = c_helper.ENABLE_SWIFT.default_value + + if swift_enable: + swift_lib_path = '/usr/lib/hadoop/lib/hadoop-swift-latest.jar' + cmd = ('sudo curl \'%s\' -o %s --create-dirs' + % (SWIFT_LIB_URL, swift_lib_path)) + remote.execute_command(cmd) + + +def _configure_services(client, cluster): + nn_host = u.get_namenode(cluster).hostname() + snn = u.get_secondarynamenodes(cluster) + snn_host = snn[0].hostname() if snn else None + jt_host = u.get_jobtracker(cluster).hostname() + dn_hosts = [dn.hostname() for dn in u.get_datanodes(cluster)] + tt_hosts = [tt.hostname() for tt in u.get_tasktrackers(cluster)] + + oozie_host = u.get_oozie(cluster).hostname() if u.get_oozie( + cluster) else None + hive_host = u.get_hiveserver(cluster).hostname() if u.get_hiveserver( + cluster) else None + + services = [] + if u.get_namenode(cluster): + services += ['hdfs'] + + if u.get_jobtracker(cluster): + services += ['mapred'] + + if oozie_host: + services += ['oozie'] + + if hive_host: + services += ['hive'] + + LOG.debug("Add services: %s" % ', '.join(services)) + client.services.add(services) + + LOG.debug("Assign roles to hosts") + client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host]) + + client.services.hdfs.add_nodes('DataNode', dn_hosts) + if snn: + client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host]) + + if oozie_host: + client.services.oozie.add_nodes('Oozie', [oozie_host]) + + if hive_host: + client.services.hive.add_nodes('HiveServer', [hive_host]) + + client.services.mapred.add_nodes('JobTracker', [jt_host]) + client.services.mapred.add_nodes('TaskTracker', tt_hosts) + + +def _configure_storage(client, cluster): + datanode_ng = u.get_node_groups(cluster, 'datanode')[0] + storage_paths = datanode_ng.storage_paths() + dn_hosts = [i.hostname() for i in u.get_datanodes(cluster)] + + name_dir_param = ",".join( + [st_path + '/dfs/name' for st_path in storage_paths]) + data_dir_param = ",".join( + [st_path + '/dfs/data' for st_path in storage_paths]) + client.params.hdfs.update('dfs.name.dir', name_dir_param) + client.params.hdfs.update('dfs.data.dir', data_dir_param, nodes=dn_hosts) + + +def _configure_swift(client, cluster): + swift_enable = cluster.cluster_configs['general'].get('Enable Swift') + if swift_enable is None or swift_enable: + swift_configs = swift.get_swift_configs() + for conf in swift_configs: + client.params.hadoop.add(conf['name'], conf['value']) + + +def _add_user_params(client, cluster): + for p in six.iteritems(cluster.cluster_configs["Hadoop"]): + client.params.hadoop.update(p[0], p[1]) + + for p in six.iteritems(cluster.cluster_configs["HDFS"]): + client.params.hdfs.update(p[0], p[1]) + + for p in six.iteritems(cluster.cluster_configs["MapReduce"]): + client.params.mapred.update(p[0], p[1]) + + +def install_cluster(cluster): + mng_instance = u.get_instance(cluster, 'manager') + mng_ip = mng_instance.management_ip + + all_hosts = list(set([i.hostname() for i in u.get_instances(cluster)])) + + client = c.IntelClient(mng_ip, cluster.name) + + LOG.info("Create cluster") + client.cluster.create() + + LOG.info("Add nodes to cluster") + rack = '/Default' + client.nodes.add(all_hosts, rack, 'hadoop', + '/home/hadoop/.ssh/id_rsa') + + LOG.info("Install software") + client.cluster.install_software(all_hosts) + + LOG.info("Configure services") + _configure_services(client, cluster) + + LOG.info("Deploy cluster") + client.nodes.config(force=True) + + LOG.info("Provisioning configs") + # cinder and ephemeral drive support + _configure_storage(client, cluster) + # swift support + _configure_swift(client, cluster) + # user configs + _add_user_params(client, cluster) + + LOG.info("Format HDFS") + client.services.hdfs.format() + + +def start_cluster(cluster): + client = c.IntelClient( + u.get_instance(cluster, 'manager').management_ip, cluster.name) + + LOG.debug("Starting hadoop services") + client.services.hdfs.start() + + client.services.mapred.start() + if u.get_hiveserver(cluster): + client.services.hive.start() + + if u.get_oozie(cluster): + client.services.oozie.start() + + +def scale_cluster(cluster, instances): + scale_ins_hosts = [i.hostname() for i in instances] + dn_hosts = [dn.hostname() for dn in u.get_datanodes(cluster)] + tt_hosts = [tt.hostname() for tt in u.get_tasktrackers(cluster)] + to_scale_dn = [] + to_scale_tt = [] + for i in scale_ins_hosts: + if i in dn_hosts: + to_scale_dn.append(i) + + if i in tt_hosts: + to_scale_tt.append(i) + + mng_ip = u.get_instance(cluster, 'manager').management_ip + client = c.IntelClient(mng_ip, cluster.name) + rack = '/Default' + client.nodes.add(scale_ins_hosts, rack, 'hadoop', + cluster.extra['manager_authzkeyfile_path']) + client.cluster.install_software(scale_ins_hosts) + + if to_scale_tt: + client.services.mapred.add_nodes('TaskTracker', to_scale_tt) + + if to_scale_dn: + client.services.hdfs.add_nodes('DataNode', to_scale_dn) + + client.nodes.config() + + if to_scale_dn: + client.services.hdfs.start() + + if to_scale_tt: + client.services.mapred.start() + + +def decommission_nodes(cluster, instances): + dec_hosts = [i.hostname() for i in instances] + dn_hosts = [dn.hostname() for dn in u.get_datanodes(cluster)] + tt_hosts = [dn.hostname() for dn in u.get_tasktrackers(cluster)] + + mng_ip = u.get_instances(cluster, 'manager')[0].management_ip + client = c.IntelClient(mng_ip, cluster.name) + + dec_dn_hosts = [] + for dec_host in dec_hosts: + if dec_host in dn_hosts: + dec_dn_hosts.append(dec_host) + + if dec_dn_hosts: + client.services.hdfs.decommission_nodes(dec_dn_hosts) + + #TODO(alazarev) make timeout configurable (bug #1262897) + timeout = 14400 # 4 hours + cur_time = 0 + for host in dec_dn_hosts: + while cur_time < timeout: + if client.services.hdfs.get_datanode_status( + host) == 'Decomissioned': + break + context.sleep(5) + cur_time += 5 + else: + LOG.warn("Failed to decomission node '%s' of cluster '%s' " + "in %s minutes" % (host, cluster.name, timeout/60)) + + client.nodes.stop(dec_hosts) + + # wait stop services + #TODO(alazarev) make timeout configurable (bug #1262897) + timeout = 600 # 10 minutes + cur_time = 0 + for instance in instances: + while cur_time < timeout: + stopped = True + if instance.hostname() in dn_hosts: + code, out = instance.remote().execute_command( + 'sudo /sbin/service hadoop-datanode status', + raise_when_error=False) + if out.strip() != 'datanode is stopped': + stopped = False + if out.strip() == 'datanode dead but pid file exists': + instance.remote().execute_command( + 'sudo rm -f ' + '/var/run/hadoop/hadoop-hadoop-datanode.pid') + if instance.hostname() in tt_hosts: + code, out = instance.remote().execute_command( + 'sudo /sbin/service hadoop-tasktracker status', + raise_when_error=False) + if out.strip() != 'tasktracker is stopped': + stopped = False + if stopped: + break + else: + context.sleep(5) + cur_time += 5 + else: + LOG.warn("Failed to stop services on node '%s' of cluster '%s' " + "in %s minutes" % (instance, cluster.name, timeout/60)) + + for node in dec_hosts: + LOG.info("Deleting node '%s' on cluster '%s'" % (node, cluster.name)) + client.nodes.delete(node) diff --git a/savanna/plugins/intel/plugin.py b/savanna/plugins/intel/plugin.py new file mode 100644 index 00000000..3a2fcde9 --- /dev/null +++ b/savanna/plugins/intel/plugin.py @@ -0,0 +1,173 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna import conductor +from savanna import context +from savanna.openstack.common import log as logging +from savanna.plugins.general import exceptions as ex +from savanna.plugins.general import utils as u +from savanna.plugins.intel import config_helper as c_helper +from savanna.plugins.intel import exceptions as i_ex +from savanna.plugins.intel import installer as ins +from savanna.plugins import provisioning as p + +conductor = conductor.API +LOG = logging.getLogger(__name__) + + +class IDHProvider(p.ProvisioningPluginBase): + def __init__(self): + self.processes = { + "Manager": ["manager"], + "HDFS": ["namenode", "datanode", "secondarynamenode"], + "MapReduce": ["jobtracker", "tasktracker"], + "Hadoop": [] # for hadoop parameters in UI + } + + def get_description(self): + return \ + 'The IDH OpenStack plugin works with project ' \ + 'Savanna to automate the deployment of the Intel Distribution ' \ + 'of Apache Hadoop on OpenStack based ' \ + 'public & private clouds' + + def get_node_processes(self, hadoop_version): + return self.processes + + def get_versions(self): + return ['2.5.1'] + + def get_title(self): + return "Intel(R) Distribution for Apache Hadoop* Software" + + def get_configs(self, hadoop_version): + return c_helper.get_plugin_configs() + + def get_hive_config_path(self): + return '/etc/hive/conf/hive-site.xml' + + def configure_cluster(self, cluster): + LOG.info("Configure IDH cluster") + cluster = ins.create_hadoop_ssh_keys(cluster) + ins.configure_os(cluster) + ins.install_manager(cluster) + ins.install_cluster(cluster) + + def start_cluster(self, cluster): + LOG.info("Start IDH cluster") + ins.start_cluster(cluster) + self._set_cluster_info(cluster) + + def validate(self, cluster): + nn_count = sum([ng.count for ng + in u.get_node_groups(cluster, 'namenode')]) + if nn_count != 1: + raise ex.NotSingleNameNodeException(nn_count) + + jt_count = sum([ng.count for ng + in u.get_node_groups(cluster, 'jobtracker')]) + if jt_count > 1: + raise ex.NotSingleJobTrackerException(jt_count) + + tt_count = sum([ng.count for ng + in u.get_node_groups(cluster, 'tasktracker')]) + if jt_count == 0 and tt_count > 0: + raise ex.TaskTrackersWithoutJobTracker() + + mng_count = sum([ng.count for ng + in u.get_node_groups(cluster, 'manager')]) + if mng_count != 1: + raise i_ex.NotSingleManagerException(mng_count) + + def scale_cluster(self, cluster, instances): + ins.configure_os_from_instances(cluster, instances) + ins.scale_cluster(cluster, instances) + + def decommission_nodes(self, cluster, instances): + ins.decommission_nodes(cluster, instances) + + def validate_scaling(self, cluster, existing, additional): + self._validate_additional_ng_scaling(cluster, additional) + self._validate_existing_ng_scaling(cluster, existing) + + def _get_scalable_processes(self): + return ["datanode", "tasktracker"] + + def _get_by_id(self, lst, id): + for obj in lst: + if obj.id == id: + return obj + + def _validate_additional_ng_scaling(self, cluster, additional): + jt = u.get_jobtracker(cluster) + scalable_processes = self._get_scalable_processes() + + for ng_id in additional: + ng = self._get_by_id(cluster.node_groups, ng_id) + if not set(ng.node_processes).issubset(scalable_processes): + raise ex.NodeGroupCannotBeScaled( + ng.name, "Intel plugin cannot scale nodegroup" + " with processes: " + + ' '.join(ng.node_processes)) + if not jt and 'tasktracker' in ng.node_processes: + raise ex.NodeGroupCannotBeScaled( + ng.name, "Intel plugin cannot scale node group with " + "processes which have no master-processes run " + "in cluster") + + def _validate_existing_ng_scaling(self, cluster, existing): + scalable_processes = self._get_scalable_processes() + dn_to_delete = 0 + for ng in cluster.node_groups: + if ng.id in existing: + if ng.count > existing[ng.id] and "datanode" in \ + ng.node_processes: + dn_to_delete += ng.count - existing[ng.id] + if not set(ng.node_processes).issubset(scalable_processes): + raise ex.NodeGroupCannotBeScaled( + ng.name, "Intel plugin cannot scale nodegroup" + " with processes: " + + ' '.join(ng.node_processes)) + + def _set_cluster_info(self, cluster): + mng = u.get_instances(cluster, 'manager')[0] + nn = u.get_namenode(cluster) + jt = u.get_jobtracker(cluster) + oozie = u.get_oozie(cluster) + + #TODO(alazarev) make port configurable (bug #1262895) + info = {'IDH Manager': { + 'Web UI': 'https://%s:9443' % mng.management_ip + }} + + if jt: + #TODO(alazarev) make port configurable (bug #1262895) + info['MapReduce'] = { + 'Web UI': 'http://%s:50030' % jt.management_ip + } + if nn: + #TODO(alazarev) make port configurable (bug #1262895) + info['HDFS'] = { + 'Web UI': 'http://%s:50070' % nn.management_ip + } + + if oozie: + #TODO(alazarev) make port configurable (bug #1262895) + info['JobFlow'] = { + 'Oozie': 'http://%s:11000' % oozie.management_ip + } + + ctx = context.ctx() + conductor.cluster_update(ctx, cluster, {'info': info}) diff --git a/savanna/plugins/intel/resources/configuration.xsd b/savanna/plugins/intel/resources/configuration.xsd new file mode 100644 index 00000000..b5573d09 --- /dev/null +++ b/savanna/plugins/intel/resources/configuration.xsd @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/savanna/plugins/intel/resources/hadoop-default.xml b/savanna/plugins/intel/resources/hadoop-default.xml new file mode 100644 index 00000000..6c85ee58 --- /dev/null +++ b/savanna/plugins/intel/resources/hadoop-default.xml @@ -0,0 +1,1371 @@ + + + + + + + default.heap.size + 4096 + Integer + perf + + The default heap size of cluster + + true + + Default memory size for datanode, jobtracker and hbase master. + + + + hadoop.extra.classpath + + true + String + basic + + Extra Java CLASSPATH element + + true + + Extra Java CLASSPATH elements. Will be appended to the value of HADOOP_CLASSPATH in hadoop-env.sh. + + + + hadoop.tmp.dir + true + /tmp/hadoop-${user.name} + String + filesystem + + The temp directory for hadoop + + + A base for other temporary directories. + + + + hadoop.native.lib + true + true + Boolean + basic + + use the native hadoop libraries + + + Should native hadoop libraries, if present, be used. + + + + hadoop.http.filter.initializers + + true + String + network + + hadoop web filter + + + A comma separated list of class names. Each class in the list + must extend org.apache.hadoop.http.FilterInitializer. The + corresponding Filter will be initialized. Then, the Filter will be + applied to all user viewing jsp and servlet web pages. The ordering + of the list defines the ordering of the filters. + + + + hadoop.security.group.mapping + org.apache.hadoop.security.ShellBasedUnixGroupsMapping + Class + security + + hadoop user mapping + + + Class for user to group mapping (get groups for a given user) + + + + hadoop.security.authorization + false + Boolean + security + + hadoop security authorization + + + Is service-level authorization enabled? + + + + hadoop.security.authentication + simple + Choose + simple,kerberos + security + + hadoop security authorization level + + + possible values are simple (no authentication), and kerberos + + + + hadoop.security.token.service.use_ip + true + Boolean + security + + Controls whether tokens always use IP addresses + + + + Controls whether tokens always use IP addresses. DNS changes will not + be detected if this option is enabled. Existing client connections + that break will always reconnect to the IP of the original host. New + clients will connect to the host's new IP but fail to locate a token. + Disabling this option will allow existing and new clients to detect + an IP change and continue to locate the new host's token. + + + + + + hadoop.workaround.non.threadsafe.getpwuid + false + Boolean + security + + hadoop thread safe + + + Some operating systems or authentication modules are known to + have broken implementations of getpwuid_r and getpwgid_r, such that these + calls are not thread-safe. Symptoms of this problem include JVM crashes + with a stack trace inside these functions. If your system exhibits this + issue, enable this configuration parameter to include a lock around the + calls as a workaround. + An incomplete list of some systems known to have this issue is available + at http://wiki.apache.org/hadoop/KnownBrokenPwuidImplementations + + + + hadoop.kerberos.kinit.command + true + kinit + String + security + + Kerberos credentials + + + Used to periodically renew Kerberos credentials when provided + to Hadoop. The default setting assumes that kinit is in the PATH of users + running the Hadoop client. Change this to the absolute path to kinit if this + is not the case. + + + + + hadoop.logfile.size + false + 10000000 + Integer + perf + + log file size + + + The max size of each log file + + + + hadoop.logfile.count + false + 10 + Integer + perf + + log file number + + + The max number of log files + + + + + io.file.buffer.size + 4096 + Integer + perf + + File buffer size + + + The size of buffer for use in sequence files. + The size of this buffer should probably be a multiple of hardware + page size (4096 on Intel x86), and it determines how much data is + buffered during read and write operations. + + basic +
TextItem
+ true +
+ + io.bytes.per.checksum + 512 + 512 + 4096 + Integer + perf + + Checksum size + + + The number of bytes per checksum. Must not be larger than + io.file.buffer.size. + + ioconf +
TextItem
+ false +
+ + dfs.datanode.protocol.client-request.client-verification-field.exists + false + true + Boolean + io + + checksum.client-verification-field + + + Allow DataNode to skip loading Checksum files. + + true + + + io.skip.checksum.errors + false + Boolean + io + + Skip checksum errors + + + If true, when a checksum error is encountered while + reading a sequence file, entries are skipped, instead of throwing an + exception. + + ioconf +
RadioGroupItem
+ true,false + false +
+ + io.compression.codecs + org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec + Class + perf + + Compression codecs + + + A list of the compression codec classes that can be used + for compression/decompression. + + + + io.serializations + org.apache.hadoop.io.serializer.WritableSerialization + Class + perf + + Io serializaions + + + A list of serialization classes that can be used for + obtaining serializers and deserializers. + + + + + + fs.default.name + false + file:/// + String + filesystem + + File System name + + true + + The name of the default file system. A URI whose + scheme and authority determine the FileSystem implementation. The + uri's scheme determines the config property (fs.SCHEME.impl) naming + the FileSystem implementation class. The uri's authority is used to + determine the host, port, etc. for a filesystem. + + + + + hadoop.namenode + + true + true + String + namenode + + NameNode name + + + Server name for namenode. + + + + hadoop.namenode.port + 8020 + String + namenode + + NameNode port + + true + + Server port for namenode. + + + + fs.trash.interval + 0 + Integer + filesystem + + Interval to delete checkpoints + + + Number of minutes after which the checkpoints + get deleted. If zero, the trash feature is disabled. + + + + fs.file.impl + org.apache.hadoop.fs.LocalFileSystem + Class + filesystem + + File system for file implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for file: uris. + + + + fs.hdfs.impl + org.apache.hadoop.hdfs.DistributedFileSystem + Class + filesystem + + File system for hdfs implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for hdfs: uris. + + + + fs.webhdfs.impl + org.apache.hadoop.hdfs.web.WebHdfsFileSystem + Class + filesystem + + File system for webhdfs implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for webhdfs: uris. + + + + fs.s3.impl + org.apache.hadoop.fs.s3.S3FileSystem + Class + filesystem + + File system for s3 implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for s3: uris. + + + + + fs.s3n.impl + org.apache.hadoop.fs.s3native.NativeS3FileSystem + Class + filesystem + + File system for s3n implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for s3n: (Native S3) uris. + + + + fs.kfs.impl + org.apache.hadoop.fs.kfs.KosmosFileSystem + Class + filesystem + + File system for kfs implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for kfs: uris. + + + + fs.hftp.impl + org.apache.hadoop.hdfs.HftpFileSystem + Class + filesystem + + File system for hftp implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for hftp: uris. + + + + fs.hsftp.impl + org.apache.hadoop.hdfs.HsftpFileSystem + Class + filesystem + + The FileSystem for hsftp + + org.apache.hadoop.fs.FileSystem + + The FileSystem for hsftp: uris + + + + fs.ftp.impl + org.apache.hadoop.fs.ftp.FTPFileSystem + Class + filesystem + + File system for ftp implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for ftp: uris. + + + + fs.ramfs.impl + org.apache.hadoop.fs.InMemoryFileSystem + Class + filesystem + + File system for ramfs implementation + + org.apache.hadoop.fs.FileSystem + + The FileSystem for ramfs: uris. + + + + fs.har.impl + org.apache.hadoop.fs.HarFileSystem + Class + filesystem + + File system for hadoop archives implementation + + org.apache.hadoop.fs.FileSystem + + The filesystem for Hadoop archives. + + + + fs.har.impl.disable.cache + true + Boolean + filesystem + + Cache hadoop archive instance + + + Don't cache 'har' filesystem instances. + + + + fs.checkpoint.dir + ${hadoop.tmp.dir}/dfs/namesecondary + ${hadoop.tmp.dir}/dfs/namesecondary + /hadoop/namesecondary + Directory + filesystem + + Checkpoint directory + + + Determines where on the local filesystem the DFS secondary + name node should store the temporary images to merge. + If this is a comma-delimited list of directories then the image is + replicated in all of the directories for redundancy. + + + + fs.checkpoint.edits.dir + ${fs.checkpoint.dir} + Directory + filesystem + + Checkpoint edit directory + + + Determines where on the local filesystem the DFS secondary + name node should store the temporary edits to merge. + If this is a comma-delimited list of directoires then the edits is + replicated in all of the directoires for redundancy. + Default value is same as fs.checkpoint.dir + + + + fs.checkpoint.period + 3600 + Integer + filesystem + + Checkpoint period + + + The number of seconds between two periodic checkpoints. + + + + fs.checkpoint.size + 67108864 + Integer + filesystem + + Edit log size to checkpoint + + + The size of the current edit log (in bytes) that triggers + a periodic checkpoint even if the fs.checkpoint.period hasn't expired. + + + + fs.s3.block.size + 67108864 + Integer + filesystem + + Block size to write to S3 + + + Block size to use when writing files to S3. + + + + fs.s3.buffer.dir + ${hadoop.tmp.dir}/s3 + Directory + filesystem + + Buffer file directory + + + Determines where on the local filesystem the S3 filesystem + should store files before sending them to S3 + (or after retrieving them from S3). + + + + + fs.s3.maxRetries + 4 + Integer + filesystem + + Maximum retries number to reading or writing file to S3 + + + The maximum number of retries for reading or writing files to S3, + before we signal failure to the application. + + + + + fs.s3.sleepTimeSeconds + 10 + Integer + filesystem + + Seconds to sleep between every retries to reading or writing file to S3 + + + The number of seconds to sleep between each S3 retry. + + + + + fs.automatic.close + true + Boolean + filesystem + + FileSystem auto close + + + By default, FileSystem instances are automatically closed at program + exit using a JVM shutdown hook. Setting this property to false disables this + behavior. This is an advanced option that should only be used by server applications + requiring a more carefully orchestrated shutdown sequence. + + + + + fs.s3n.block.size + 67108864 + Integer + filesystem + + Block size + + + Block size to use when reading files using the Native S3 + filesystem (s3n: URIs). + + + + local.cache.size + 10737418240 + Integer + perf + + cache size + + + + The limit on the size of cache you want to keep, set by default to + 10GB. This will act as a soft limit on the cache directory for out of + band data. + + + + + io.seqfile.compress.blocksize + 1000000 + Integer + io + + Minum block size for compression + + + The minimum block size for compression in block compressed + SequenceFiles. + + + + + io.seqfile.lazydecompress + true + Boolean + io + + Lazy decompress + + + Should values of block-compressed SequenceFiles be decompressed + only when necessary. + + + + + io.seqfile.sorter.recordlimit + 1000000 + Integer + io + + Limit number of records + + + The limit on number of records to be kept in memory in a spill + in SequenceFiles.Sorter + + + + + io.mapfile.bloom.size + 1048576 + Integer + io + + The size of BloomFilters + + + The size of BloomFilter-s used in BloomMapFile. Each time this many + keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter). + Larger values minimize the number of filters, which slightly increases the performance, + but may waste too much space if the total number of keys is usually much smaller + than this number. + + + + + io.mapfile.bloom.error.rate + 0.005 + Float + io + + Rate of false positives in BloomFilter + + + The rate of false positives in BloomFilter-s used in BloomMapFile. + As this value decreases, the size of BloomFilter-s increases exponentially. This + value is the probability of encountering false positives (default is 0.5%). + + + + + hadoop.util.hash.type + murmur + Choose + murmur,jenkins + basic + + Hash type + + + The default implementation of Hash. Currently this can take one of the + two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash. + + + + + + ipc.client.idlethreshold + 4000 + Integer + ipc + + Connection threshold + + + Defines the threshold number of connections after which + connections will be inspected for idleness. + + + + + ipc.client.kill.max + 10 + Integer + ipc + + Maximum clients number + + + Defines the maximum number of clients to disconnect in one go. + + + + + ipc.client.connection.maxidletime + 10000 + Integer + ipc + + Maximum time for connection + + + The maximum time in msec after which a client will bring down the + connection to the server. + + + + + ipc.client.connect.max.retries + 10 + Integer + ipc + + Maximum number retries + + + Indicates the number of retries a client will make to establish + a server connection. + + + + + ipc.server.listen.queue.size + 128 + Integer + ipc + + Length of the server listen queue + + + Indicates the length of the listen queue for servers accepting + client connections. + + + + + ipc.server.tcpnodelay + false + Boolean + ipc + + Turn on Nagle's algorithem + + + Turn on/off Nagle's algorithm for the TCP socket connection on + the server. Setting to true disables the algorithm and may decrease latency + with a cost of more/smaller packets. + + + + + ipc.client.tcpnodelay + false + Boolean + ipc + + Whether to Turn on Nagle's algorithem on the client + + + Turn on/off Nagle's algorithm for the TCP socket connection on + the client. Setting to true disables the algorithm and may decrease latency + with a cost of more/smaller packets. + + + + + webinterface.private.actions + false + Boolean + network + + Web interfaces + + + If set to true, the web interfaces of JT and NN may contain + actions, such as kill job, delete file, etc., that should + not be exposed to public. Enable this option if the interfaces + are only reachable by those who have the right authorization. + + + + + + hadoop.rpc.socket.factory.class.default + org.apache.hadoop.net.StandardSocketFactory + Class + proxy + + Socketfactory Class + + + Default SocketFactory to use. This parameter is expected to be + formatted as "package.FactoryClassName". + + + + + hadoop.rpc.socket.factory.class.ClientProtocol + + Class + proxy + + Socketfactory Class to use to Connect to DFS + + + SocketFactory to use to connect to a DFS. If null or empty, use + hadoop.rpc.socket.class.default. This socket factory is also used by + DFSClient to create sockets to DataNodes. + + + + + hadoop.socks.server + + true + String + proxy + + Address used by SocksSocketfactory + + + Address (host:port) of the SOCKS server to be used by the + SocksSocketFactory. + + + + + + topology.node.switch.mapping.impl + org.apache.hadoop.net.ScriptBasedMapping + Class + org.apache.hadoop.net.DNSToSwitchMapping + rack + + Topology node switch mapping implemention + + + The default implementation of the DNSToSwitchMapping. It + invokes a script specified in topology.script.file.name to resolve + node names. If the value for topology.script.file.name is not set, the + default value of DEFAULT_RACK is returned for all node names. + + + + + topology.script.file.name + false + + + /usr/lib/intelcloud/rackmap.sh + String + rack + + The script name to get NetworkTopology name + + true + + The script name that should be invoked to resolve DNS names to + NetworkTopology names. Example: the script would take host.foo.bar as an + argument, and return /rack1 as the output. + + + + + topology.script.number.args + false + 100 + 100 + 1 + Integer + rack + + The number of topology script args + + true + + The max number of args that the script configured with + topology.script.file.name should be run with. Each arg is an + IP address. + + + + + hadoop.security.uid.cache.secs + 14400 + Integer + security + + The timeout for cache from UID to UserName + + + NativeIO maintains a cache from UID to UserName. This is + the timeout for an entry in that cache. + + + + + hadoop.http.authentication.type + simple + + Choose + simple,kerberos,${hadoop.security.authentication} + security + + Authentication type for HTTP endpoint + + + + Defines authentication used for Oozie HTTP endpoint. + Supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME# + + + + + hadoop.http.authentication.token.validity + 36000 + Integer + security + + Authentication token validity + + + + Indicates how long (in seconds) an authentication token is valid before it has + to be renewed. + + + + + hadoop.http.authentication.signature.secret + hadoop + String + security + + The signature secret for signing the Authentication token + + + + The signature secret for signing the authentication tokens. + If not set a random secret is generated at startup time. + The same secret should be used for JT/NN/DN/TT configurations. + + + + + hadoop.http.authentication.cookie.domain + + true + String + security + + The domain to store authentication token + + + + The domain to use for the HTTP cookie that stores the authentication token. + In order to authentiation to work correctly across all Hadoop nodes web-consoles + the domain must be correctly set. + IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings. + For this setting to work properly all nodes, the cluster must be configured + to generate URLs with hostname.domain names on it. + + + + + hadoop.http.authentication.simple.anonymous.allowed + true + Boolean + security + + Simple authentication anonymous allowance + + + + Indicates if anonymous requests are allowed when using 'simple' authentication. + + + + + hadoop.http.authentication.kerberos.principal + true + 1 + HTTP/localhost@LOCALHOST + String + security + + Authentication principal + + + Indicates the Kerberos principal to be used for HTTP endpoint. + The principal MUST start with 'HTTP/' as per Kerberos HTTP SPNEGO + specification. + + + + + hadoop.http.authentication.kerberos.keytab + true + ${user.home}/hadoop.keytab + String + security + + Location of keytab file + + + + Location of the keytab file with the credentials for the principal. + Referring to the same keytab file Oozie uses for its Kerberos credentials for Hadoop. + + + + + + hadoop.security.group.mapping.ldap.url + true + + true + String + security + + The URL of the LDAP server + + + + The URL of the LDAP server to use for resolving user groups when using + the LdapGroupsMapping user to group mapping. + + + + + hadoop.security.group.mapping.ldap.ssl + true + false + Boolean + security + + Use SSL connecting to LDAP server + + + + Whether or not to use SSL when connecting to the LDAP server. + + + + + hadoop.security.group.mapping.ldap.ssl.keystore + true + + true + String + security + + File path to the SSL keystore + + + + File path to the SSL keystore that contains the SSL certificate required + by the LDAP server. + + + + + hadoop.security.group.mapping.ldap.ssl.keystore.password.file + true + + true + String + security + + File path containing the password of LDAP SSL keystore + + + + The path to a file containing the password of the LDAP SSL keystore. + IMPORTANT: This file should be readable only by the Unix user running + the daemons. + + + + + hadoop.security.group.mapping.ldap.bind.user + true + + true + String + security + + The user name to bind as when connecting to the LDAP server + + + + The distinguished name of the user to bind as when connecting to the LDAP + server. This may be left blank if the LDAP server supports anonymous binds. + + + + + hadoop.security.group.mapping.ldap.bind.password.file + true + + true + String + security + + The path to a file containing the password of the bind user + + + + The path to a file containing the password of the bind user. + IMPORTANT: This file should be readable only by the Unix user running + the daemons. + + + + + hadoop.security.group.mapping.ldap.base + true + + true + String + security + + The search base for the LDAP connection + + + + The search base for the LDAP connection. This is a distinguished name, + and will typically be the root of the LDAP directory. + + + + + hadoop.security.group.mapping.ldap.search.filter.user + true + (&(objectClass=user)(sAMAccountName={0})) + String + security + + An filter to use when searching for LDAP users + + + + An additional filter to use when searching for LDAP users. The default will + usually be appropriate for Active Directory installations. If connecting to + an LDAP server with a non-AD schema, this should be replaced with + (&(objectClass=inetOrgPerson)(uid={0}). {0} is a special string used to + denote where the username fits into the filter. + + + + + hadoop.security.group.mapping.ldap.search.filter.group + true + (objectClass=group) + String + security + + An filter to use when searching for LDAP groups + + + + An additional filter to use when searching for LDAP groups. This should be + changed when resolving groups against a non-Active Directory installation. + posixGroups are currently not a supported group class. + + + + + hadoop.security.group.mapping.ldap.search.attr.member + true + member + String + security + + The attribute identifying the users that are members of the group + + + + The attribute of the group object that identifies the users that are + members of the group. The default will usually be appropriate for + any LDAP installation. + + + + + hadoop.security.group.mapping.ldap.search.attr.group.name + cn + String + security + + The attribute identifying the group name + + + + The attribute of the group object that identifies the group name. The + default will usually be appropriate for all LDAP systems. + + + + + basic + Hadoop Basic + true + false + + + ioconf + Hadoop IO + true + false + + + basic + Basic Configuration + Basic configurations that get Hadoop running. + + + perf + Performance + Configurations that affect Hadoop's performance + + + security + Security + Security configurations like Kerberos. + + + network + Network Setting + Network Setting. + + + filesystem + File System + File System configurations. + + + namenode + Namenode + Configurations for HDFS Namenode. + + + io + IO Configuration + IO Configuration + + + ipc + IPC + IPC Configuration + + + proxy + Proxy Configuration + Proxy Configuration + + + rack + Rack Configuration + Rack Configuration + +
diff --git a/savanna/plugins/intel/resources/hdfs-default.xml b/savanna/plugins/intel/resources/hdfs-default.xml new file mode 100644 index 00000000..e86aa73a --- /dev/null +++ b/savanna/plugins/intel/resources/hdfs-default.xml @@ -0,0 +1,1193 @@ + + + + + + + + hadoop.namenode.memory + 0 + Integer + 1 + true + true + namenode + + Namenode server memory size + + + Default size for namenode server memory. + + + + hadoop.secondary.namenode.memory + 0 + Integer + 1 + true + true + namenode + + Secondary namenode server memory size + + + Default size for secondary namenode server memory. + + + + hadoop.datanode.memory + 4096 + Integer + 1 + true + true + datanode + + Datanode server memory size + + + Default size for datanode server memory. + + + + dfs.namenode.logging.level + info + String + namenode + + Namenode logging level + + + The logging level for dfs namenode. Other values are "dir"(trac +e namespace mutations), "block"(trace block under/over replications and block +creations/deletions), or "all". + + + + dfs.secondary.http.address + 0.0.0.0:50090 + String + false + namenode + + Secondary namenode http server address and port + + + + The secondary namenode http server address and port. + If the port is 0 then the server will start on a free port. + + + + + dfs.datanode.address + 0.0.0.0:50010 + String + datanode + + Address datanode server listens to + + + + The address where the datanode server will listen to. + If the port is 0 then the server will start on a free port. + + + + + dfs.datanode.http.address + 0.0.0.0:50075 + String + datanode + + Datanode http server address and port + + + + The datanode http server address and port. + If the port is 0 then the server will start on a free port. + + + + + dfs.datanode.ipc.address + 0.0.0.0:50020 + String + datanode + + Datanode ipc server adderss and port + + + + The datanode ipc server address and port. + If the port is 0 then the server will start on a free port. + + + + + dfs.block.local-path-access.user + + hbase + String + datanode + + DFS short circuit read allowed user list + + + The user in this list can directly read the local file system HDFS block, + instead of reading through DataNode, thus improving performance, + The list is seperated by comma. + + true + + + dfs.datanode.handler.count + 3 + 3 + 100 + Integer + datanode + + Datanode server threads count + + + The number of server threads for the datanode. + + + + dfs.datanode.max.xcievers + 32768 + Integer + datanode + + Datanode service threads count + + + Number of threads for the datanode service. + + + + dfs.http.address + 0.0.0.0:50070 + String + false + basic + + Namenode Web UI address and port + + + + The address and the base port where the dfs namenode web ui will listen on. + If the port is 0 then the server will start on a free port. + + + + + dfs.https.enable + false + Boolean + security + + Whether to support HTTPS + + + Decide if HTTPS(SSL) is supported on HDFS + + + + + dfs.https.need.client.auth + false + Boolean + security + + Whether require SSL client certificate authentication + + + Whether SSL client certificate authentication is required + + + + + dfs.https.server.keystore.resource + ssl-server.xml + String + security + + Resource file to extract SSL server keystore information + + + Resource file from which ssl server keystore + information will be extracted + + + + + dfs.https.client.keystore.resource + ssl-client.xml + String + security + + Resource file to extract SSL client keystore information + + + Resource file from which ssl client keystore + information will be extracted + + + + + dfs.datanode.https.address + 0.0.0.0:50475 + String + datanode + + Datanode https server address and port + + + + The datanode https server address and port. + If the port is 0 then the server will start on a free port. + + + + + dfs.https.address + 0.0.0.0:50470 + String + basic + + dfs https address and port + + + dfs https address and port + + + + dfs.datanode.dns.interface + default + String + datanode + + Network Interface name from which Datanode should report its IP address + + + The name of the Network Interface from which a data node should + report its IP address. + + + + + dfs.datanode.dns.nameserver + default + String + datanode + + Datanode name server address + + + The host name or IP address of the name server (DNS) + which a DataNode should use to determine the host name used by the + NameNode for communication and display purposes. + + + + + dfs.replication.considerLoad + true + Boolean + perf + + Whether chooseTarget considers the target's load or not + + + Decide if chooseTarget considers the target's load or not + + + + + dfs.default.chunk.view.size + 32768 + Integer + basic + + Chunk size to view on a browser + + + The number of bytes to view for a file on the browser. + + + + + dfs.datanode.du.reserved + 0 + Integer + datanode + + Reserved space in bytes per volume + + + Reserved space in bytes per volume. Always leave this much space free for non dfs use. + + + + + dfs.name.dir + /hadoop/drbd/hadoop_image,/hadoop/hadoop_image_local + Directory + basic + + DFS fsimage file directory + + + Determines where on the local filesystem the DFS name node + should store the name table(fsimage). If this is a comma-delimited list + of directories then the name table is replicated in all of the + directories, for redundancy. + + NameNode-system +
TextItem
+
+ + dfs.name.edits.dir + ${dfs.name.dir} + Directory + basic + + DFS edits file directory + + + Determines where on the local filesystem the DFS name node + should store the transaction (edits) file. If this is a comma-delimited list + of directories then the transaction file is replicated in all of the + directories, for redundancy. Default value is same as dfs.name.dir + + + + + dfs.web.ugi + webuser,webgroup + String + basic + + user account used by the web interface + + + The user account used by the web interface. + Syntax: USERNAME,GROUP1,GROUP2, ... + + + + + dfs.permissions + true + true + Boolean + security + + Whether enable permission checking in HDFS + + + + If "true", enable permission checking in HDFS. + If "false", permission checking is turned off, + but all other behavior is unchanged. + Switching from one parameter value to the other does not change the mode, + owner or group of files or directories. + + + + + dfs.permissions.extended + true + false + Boolean + security + + Whether to enable permission extension in HDFS + + + + If "true", enable permission extension in HDFS. + If "false", permission extension is turned off. + + + + + dfs.permissions.extended.permissions.file + true + + String + true + security + + Configuration file for extension rules + + + + If extended permissions is enabled, then the needed configuration file should be + configured here for extension rules. + + + + + dfs.permissions.supergroup + supergroup + String + security + + super-users group name + + + The name of the group of super-users. + + + + dfs.block.access.token.enable + false + Boolean + security + + Access tokens for accessing datanodes + + + + If "true", access tokens are used as capabilities for accessing datanodes. + If "false", no access tokens are checked on accessing datanodes. + + + + + dfs.block.access.key.update.interval + 600 + Integer + security + + Interval at which namenode updates its access keys + + + + Interval in minutes at which namenode updates its access keys. + + + + + dfs.block.access.token.lifetime + 600 + Integer + security + + Access tokens lifetime + + + The lifetime of access tokens in minutes. + + + + dfs.data.dir + ${hadoop.tmp.dir}/dfs/data + ${hadoop.tmp.dir}/dfs/data + + Directory + false + + basic + + Local filesystem directory datanode stores its blocks + + + Determines where on the local filesystem an DFS data node + should store its blocks. If this is a comma-delimited + list of directories, then data will be stored in all named + directories, typically on different devices. + Directories that do not exist are ignored. + + + 2 + + + dfs.datanode.data.dir.perm + 755 + 755 + 755 + Integer + security + + Local filesystem directory permissions + + + Permissions for the directories on on the local filesystem where + the DFS data node store its blocks. The permissions can either be octal or + symbolic. + + + + dfs.replication + 3 + Integer + basic + + Block replication count + + + Default block replication. + The actual number of replications can be specified when the file is created. + The default is used if replication is not specified in create time. + + + basic +
TextItem
+
+ + dfs.replication.max + 512 + Integer + basic + + Maximal block replication count + + + Maximal block replication. + + + + + dfs.replication.min + 1 + Integer + basic + + Minimal block replication count + + + Minimal block replication. + + + + + dfs.block.size + 67108864 + 67108864 + 134217728 + Integer + basic + + Default block size + + + The default block size for new files. + + basic +
TextItem
+
+ + dfs.df.interval + 60000 + Integer + basic + + Disk usage statistics refresh interval + + + Disk usage statistics refresh interval in msec. + + + + dfs.client.block.write.retries + 3 + Integer + perf + + Writing to datanodes retry times + + + The number of retries for writing blocks to the data nodes, + before we signal failure to the application. + + + + + dfs.blockreport.intervalMsec + 3600000 + Integer + basic + + Block reporting interval + + + Determines block reporting interval in milliseconds. + + + + dfs.blockreport.initialDelay + 0 + Integer + basic + + Delay for first block report + + + Delay for first block report in seconds. + + + + dfs.datanode.directoryscan.interval + 21600 + Integer + datanode + + Datanode scan data interval + + + Interval in seconds for Datanode to scan data directories and + reconcile the difference between blocks in memory and on the disk. + + + + + dfs.datanode.directoryscan.threads + 1 + Integer + datanode + + Number of threads to use when scanning volumes to + generate block reports + + + Number of threads to use when scanning volumes to + generate block reports. A value greater than one means + volumes will be scanned in parallel. + + + + dfs.heartbeat.interval + 3 + Integer + datanode + + Datanode heartbeat + + + Determines datanode heartbeat interval in seconds. + + + + dfs.namenode.handler.count + 10 + 10 + 100 + Integer + namenode + + Namenode server threads count + + + The number of server threads for the namenode. + + NameNode-system +
TextItem
+
+ + dfs.safemode.threshold.pct + 0.999f + Float + basic + + percentage of blocks that should satisfy + the minimal replication requirement defined by dfs.replication.min. + + + + + Specifies the percentage of blocks that should satisfy + the minimal replication requirement defined by dfs.replication.min. + Values less than or equal to 0 mean not to wait for any particular + percentage of blocks before exiting safemode. + Values greater than 1 will make safe mode permanent. + + + + + dfs.safemode.min.datanodes + 0 + Integer + true + basic + + number of datanodes that must be considered alive + before the name node exits safemode. + + + + Specifies the number of datanodes that must be considered alive + before the name node exits safemode. + Values less than or equal to 0 mean not to take the number of live + datanodes into account when deciding whether to remain in safe mode + during startup. + Values greater than the number of datanodes in the cluster + will make safe mode permanent. + + + + + dfs.safemode.extension + 30000 + Integer + basic + + Safe mode extension time + + + + Determines extension of safe mode in milliseconds + after the threshold level is reached. + + + + + dfs.balance.bandwidthPerSec + 1048576 + 1048576 + 104857600 + Integer + datanode + + Maximal amount of bandwidth that each datanode + can utilize for the balancing purpose + + + + Specifies the maximal amount of bandwidth that each datanode + can utilize for the balancing purpose in terms of + the number of bytes per second. + + + + + dfs.hosts + + String + true + basic + + File that contains a list of hosts + permitted to connect to the namenode + + + Names a file that contains a list of hosts that are + permitted to connect to the namenode. The full pathname of the file + must be specified. If the value is empty, all hosts are + permitted. + + + + dfs.hosts.exclude + + String + true + basic + + File that contains a list of hosts + not permitted to connect to the namenode + + + Names a file that contains a list of hosts that are + not permitted to connect to the namenode. The full pathname of the + file must be specified. If the value is empty, no hosts are + excluded. + + + + dfs.max.objects + 0 + Integer + basic + + The maximum number of files, directories and blocks + dfs supports + + + The maximum number of files, directories and blocks + dfs supports. A value of zero indicates no limit to the number + of objects that dfs supports. + + + + + dfs.namenode.decommission.interval + 30 + Integer + namenode + + Namenode periodicity to check if decommission is complete + + + Namenode periodicity in seconds to check if decommission is + complete. + + + + dfs.namenode.decommission.nodes.per.interval + 5 + Integer + namenode + + The number of nodes namenode checks if decommission is complete + + + The number of nodes namenode checks if decommission is complete + in each dfs.namenode.decommission.interval. + + + + dfs.replication.interval + 3 + Integer + basic + + The periodicity in seconds with which the namenode computes + repliaction work for datanodes. + + + The periodicity in seconds with which the namenode computes + repliaction work for datanodes. + + + + dfs.access.time.precision + 3600000 + Integer + basic + + HDFS file access time precision + + + The access time for HDFS file is precise upto this value. + The default value is 1 hour. Setting a value of 0 disables + access times for HDFS. + + + + + dfs.support.append + true + Boolean + basic + + Whether HDFS allows appends + + + Does HDFS allow appends to files? + + + + dfs.datanode.plugins + + String + true + datanode + + Datanode plugins to be activated + + + Comma-separated list of datanode plug-ins to be activated. + + + + + dfs.namenode.plugins + + String + true + namenode + + Namenode plugins to be activated + + + Comma-separated list of namenode plug-ins to be activated. + + + + + dfs.datanode.failed.volumes.tolerated + 0 + Integer + datanode + + The number of volumes that are allowed to + fail before a datanode stops offering service + + + The number of volumes that are allowed to + fail before a datanode stops offering service. By default + any volume failure will cause a datanode to shutdown. + + + + + dfs.namenode.delegation.key.update-interval + 86400000 + Integer + namenode + + Namenode delegation key update interval + + + The update interval for master key for delegation tokens + in the namenode in milliseconds. + + + + + dfs.namenode.delegation.token.max-lifetime + 604800000 + Integer + namenode + + Namenode delegation key maximum lifetime + + + The maximum lifetime in milliseconds for which a delegation + token is valid. + + + + + dfs.namenode.delegation.token.renew-interval + 86400000 + Integer + namenode + + Namenode delegation key renewal interval + + + The renewal interval for delegation token in milliseconds. + + + + + dfs.drbd.name.dir + /hadoop/drbd + Directory + true + true + basic + + Remote backup directory + + + Setting remote backup directory, this directory must be set in "dfs.name.dir". + + NameNode-system +
TextItem
+
+ + hdfs.shortcut.reader.user + + String + true + true + datanode + + User when reading local data in datanode + + + Which user do you want to use when reading local data in datanode. + Empty value will disable the feature. + + + + namenode.memory.weight + + 50 + Integer + true + namenode + + Weight of namenode heapsize + + + The weight of namenode heapsize and the default value is 50. + + advanced +
TextItem
+
+ + secondary.namenode.memory.weight + + 50 + Integer + true + namenode + + Weight of secondary namenode heapsize + + + The weight of secondary namenode heapsize and the default value is 50. + + + + + dfs.socket.timeout + 120000 + Integer + basic + + Timeout for socket connection + + + Timeout for socket connection. + + + + dfs.replication.adjust + false + Boolean + replication + + Whether to adjust the replication count + + + + If it is true, adjust the number of replication according to visiting numbers of blocks. + + + + + dfs.replication.historyWindow + 1440 + Integer + replication + + Retention cycle time + + + + Determines retention cycle time of file visit statistics,(in Minutes), The default value is 1 day. + + + + + dfs.replication.adjustTimer + 720 + Integer + replication + + Replication adjusting interval + + + + Determines the file's replication adjusting interval(in Minutes), The default value is 6 hours. + + + + + dfs.replication.adjust.maxPercent + 0.1 + Float + replication + + Replica adjusting max percentage of disk space + + + + The max percentage of disk space for replica adjusting. + + + + + dfs.replication.reserved.datanode.number.percent + 0.1 + Float + replication + + Percentage of datanode numbers reserved to others + + + + The percentage of Datanode numbers for reserving to others. + + + + + dfs.replication.adjust.blockWeight + 10 + Integer + replication + + Number of blocks for adjust weight + + + + The number of blocks to be regarded as a basic unit for adjust weight. + + + + + + basic + HDFS Basic + true + false + + + NameNode-system + NameNode Configuration + true + false + "" + + + advanced + Advanced Configuration + true + false + "" + + + basic + Basic Configuration + Basic configurations that get HDFS running. + + + perf + Performance + Configurations that affect Hadoop's performance + + + namenode + Namenode Configuration + Configurations for Namenode. + + + datanode + Datanode Configuration + Configurations for Datanode. + + + security + Security + Security configurations like permission control. + + + replication + Replication + Configurations for replication + +
diff --git a/savanna/plugins/intel/resources/mapred-default.xml b/savanna/plugins/intel/resources/mapred-default.xml new file mode 100644 index 00000000..107f962a --- /dev/null +++ b/savanna/plugins/intel/resources/mapred-default.xml @@ -0,0 +1,2678 @@ + + + + + + + + hadoop.jobtracker.memory + 4096 + Integer + jobtracker + + JobTracker server memory size + + 1 + true + true + + Default size for jobtracker server memory. + + + + hadoop.tasktracker.slot.memory + 4096 + Integer + tasktracker + + Total memory size for tasktracker map and reduce slot + + 1 + true + true + + Total memory size for tasktracker map and reduce slot. + + + + hadoop.tasktracker.memory + 4096 + Integer + tasktracker + + Default size for tasktracker server memory + + 1 + true + true + + Default size for tasktracker server memory. + + + + hadoop.job.history.location + + true + String + basic + + job history location + + + If job tracker is static the history files are stored + in this single well known place. If No value is set here, by default, + it is in the local file system at ${hadoop.log.dir}/history. + + + + + hadoop.job.history.user.location + + true + String + basic + + user specified history location + + + User can specify a location to store the history files of + a particular job. If nothing is specified, the logs are stored in + output directory. The files are stored in "_logs/history/" in the directory. + User can stop logging by giving the value "none". + + + + + mapred.job.tracker.history.completed.location + + true + String + basic + + The completed job history files location + + + The completed job history files are stored at this single well + known location. If nothing is specified, the files are stored at + ${hadoop.job.history.location}/done. + + + + + + io.sort.factor + 10 + 10 + 100 + Integer + perf + + IO sort factor + + + The number of streams to merge at once while sorting + files. This determines the number of open file handles. + + + + io.sort.mb + 100 + 100 + 200 + Integer + perf + + Total amount of buffer memory to use while sorting + files, + + + The total amount of buffer memory to use while sorting + files, in megabytes. By default, gives each merge stream 1MB, which + should minimize seeks. + + + + io.sort.record.percent + 0.05 + Float + perf + + The percentage of io.sort.mb dedicated to tracking record + boundaries + + + The percentage of io.sort.mb dedicated to tracking record + boundaries. Let this value be r, io.sort.mb be x. The maximum number + of records collected before the collection thread must block is equal + to (r * x) / 4 + + + + io.sort.spill.percent + 0.80 + Float + perf + + buffer limit + + + + The soft limit in either the buffer or record collection + buffers. Once reached, a thread will begin to spill the contents to disk + in the background. Note that this does not imply any chunking of data to + the spill. A value less than 0.5 is not recommended. + + + + io.map.index.skip + 0 + Integer + io + + Number of index entries to skip between each entry + + + Number of index entries to skip between each entry. + Zero by default. Setting this to values larger than zero can + facilitate opening large map files using less memory. + + + + mapred.job.tracker + false + local + + true + String + jobtracker + + The host and port that the MapReduce job tracker runs + at + + + The host that the MapReduce job tracker runs + at. If "local", then jobs are run in-process as a single map + and reduce task. + + + + + mapred.jobtracker + true + + true + String + true + jobtracker + + The address that the job tracker runs at + + directory + + The host and port that the MapReduce job tracker runs + at. If "local", then jobs are run in-process as a single map + and reduce task. + + +
TextItem
+
+ + mapred.jobtracker.port + 54311 + Integer + jobtracker + + The port that the MapReduce job tracker runs + at + + true + + The port that the MapReduce job tracker runs + at. If "local", then jobs are run in-process as a single map + and reduce task. + + + + + hadoop.jobtracker.thrift.port + 9290 + Integer + jobtracker + + The port for jobtracker thrift server. + + true + + The port for jobtracker thrift server. + + + + mapred.job.tracker.http.address + 0.0.0.0:50030 + String + jobtracker + + The job tracker http server address and port + + + + The job tracker http server address and port the server will listen on. + If the port is 0 then the server will start on a free port. + + + + + mapred.job.tracker.handler.count + 10 + 10 + 60 + Integer + jobtracker + + The number of server threads for the JobTracker. + + + + The number of server threads for the JobTracker. This should be roughly + 4% of the number of tasktracker nodes. + + + + + mapred.task.tracker.report.address + 127.0.0.1:0 + String + tasktracker + + The interface and port that task tracker server listens on + + + The interface and port that task tracker server listens on. + Since it is only connected to by the tasks, it uses the local interface. + EXPERT ONLY. Should only be changed if your host does not have the loopback + interface. + + + + mapred.local.dir + ${hadoop.tmp.dir}/mapred/local + + true + Directory + basic + + MapReduce local directory + + + 2 + + The local directory where MapReduce stores intermediate + data files. May be a comma-separated list of + directories on different devices in order to spread disk i/o. + Directories that do not exist are ignored. + + + + + mapred.system.dir + ${hadoop.tmp.dir}/mapred/system + Directory + basic + + The directory where MapReduce stores control files. + + false + + The directory where MapReduce stores control files. + + + + mapreduce.jobtracker.staging.root.dir + ${hadoop.tmp.dir}/mapred/staging + Directory + basic + + The root of the staging area for users' job files + + false + + The root of the staging area for users' job files + In practice, this should be the directory where users' home + directories are located (usually /user) + + + + + mapred.temp.dir + ${hadoop.tmp.dir}/mapred/temp + Directory + basic + + A shared directory for temporary files. + + false + + A shared directory for temporary files. + + + + mapred.local.dir.minspacestart + 0 + Integer + basic + + minimum space in mapred.local.dir to start task + + + If the space in mapred.local.dir drops under this, + do not ask for more tasks. Value in bytes. + + + + mapred.local.dir.minspacekill + 0 + Integer + basic + + mapred.local.dir minimum space kill + + + If the space in mapred.local.dir drops under this, + do not ask more tasks until all the current ones have finished and + cleaned up. Also, to save the rest of the tasks we have running, + kill one of them, to clean up some space. Start with the reduce tasks, + then go with the ones that have finished the least. + Value in bytes. + + + + + mapred.tasktracker.expiry.interval + 600000 + 600000 + 180000 + Integer + tasktracker + + TaskTracker expiry interval + + + Expert: The time-interval, in miliseconds, after which + a tasktracker is declared 'lost' if it doesn't send heartbeats. + + + + + mapred.tasktracker.instrumentation + false + org.apache.hadoop.mapred.TaskTrackerMetricsSource + Class + tasktracker + + TaskTracker instrumentation class + + + Expert: The instrumentation class to associate with each TaskTracker. + + + + + mapred.tasktracker.resourcecalculatorplugin + + true + String + tasktracker + + tasktracker resource calculator plugin + + + + Name of the class whose instance will be used to query resource information + on the tasktracker.The class must be an instance of + org.apache.hadoop.util.ResourceCalculatorPlugin. If the value is null, the + tasktracker attempts to use a class appropriate to the platform. + Currently, the only platform supported is Linux. + + + + + mapred.tasktracker.taskmemorymanager.monitoring-interval + 5000 + Integer + tasktracker + + TaskTracker monitoring interval + + + The interval, in milliseconds, for which the tasktracker waits + between two cycles of monitoring its tasks' memory usage. Used only if + tasks' memory management is enabled via mapred.tasktracker.tasks.maxmemory. + + + + + mapred.tasktracker.tasks.sleeptime-before-sigkill + 5000 + Integer + tasktracker + + TaskTracker sleeptime before sending a SIGKILL + + + The time, in milliseconds, the tasktracker waits for sending a + SIGKILL to a process, after it has been sent a SIGTERM. + + + + mapred.map.tasks + 2 + Integer + map + + map count + + + The default number of map tasks per job. + Ignored when mapred.job.tracker is "local". + + + + + mapred.reduce.tasks + 1 + Integer + reduce + + reduce count + + + + The default number of reduce tasks per job. Typically set to 99% + of the cluster's reduce capacity, so that if a node fails the reduces can + still be executed in a single wave. + Ignored when mapred.job.tracker is "local". + + + + + mapreduce.tasktracker.outofband.heartbeat + false + true + Boolean + tasktracker + + TaskTracker out-of-band heartbeat + + + Expert: Set this to true to let the tasktracker send an + out-of-band heartbeat on task-completion for better latency. + + + + + mapreduce.tasktracker.outofband.heartbeat.damper + 1000000 + Integer + tasktracker + + out-of-band heartbeat damper + + + When out-of-band heartbeats are enabled, provides + damping to avoid overwhelming the JobTracker if too many out-of-band + heartbeats would occur. The damping is calculated such that the + heartbeat interval is divided by (T*D + 1) where T is the number + of completed tasks and D is the damper value. + Setting this to a high value like the default provides no damping -- + as soon as any task finishes, a heartbeat will be sent. Setting this + parameter to 0 is equivalent to disabling the out-of-band heartbeat feature. + A value of 1 would indicate that, after one task has completed, the + time to wait before the next heartbeat would be 1/2 the usual time. + After two tasks have finished, it would be 1/3 the usual time, etc. + + + + + mapred.jobtracker.restart.recover + false + Boolean + job + + Whether enable job recovery + + + "true" to enable (job) recovery upon restart, + "false" to start afresh + + + + + mapred.jobtracker.job.history.block.size + 3145728 + Integer + job + + The block size of the job history file + + + The block size of the job history file. Since the job recovery + uses job history, its important to dump job history to disk as + soon as possible. Note that this is an expert level parameter. + The default value is set to 3 MB. + + + + + mapreduce.job.split.metainfo.maxsize + 10000000 + Integer + job + + The maximum permissible size of the split metainfo file + + + The maximum permissible size of the split metainfo file. + The JobTracker won't attempt to read split metainfo files bigger than + the configured value. + No limits if set to -1. + + + + + mapred.jobtracker.taskScheduler + false + org.apache.hadoop.mapred.JobQueueTaskScheduler + Class + basic + + The class responsible for scheduling the tasks. + + + The class responsible for scheduling the tasks. + + + + mapred.jobtracker.taskScheduler.maxRunningTasksPerJob + + true + Integer + job + + The maximum number of running tasks for a job before + it gets preempted + + false + + The maximum number of running tasks for a job before + it gets preempted. No limits if undefined. + + + + + mapred.map.max.attempts + 4 + Integer + map + + Map maximum attempt times + + + Expert: The maximum number of attempts per map task. + In other words, framework will try to execute a map task these many number + of times before giving up on it. + + + + + mapred.reduce.max.attempts + 4 + Integer + reduce + + Reduce maximum attempt times + + + Expert: The maximum number of attempts per reduce task. + In other words, framework will try to execute a reduce task these many number + of times before giving up on it. + + + + + mapred.reduce.parallel.copies + 5 + 20 + Integer + perf + + The default number of parallel transfers run by reduce + during the copy(shuffle) phase + + + The default number of parallel transfers run by reduce + during the copy(shuffle) phase. + + + + + mapreduce.reduce.shuffle.maxfetchfailures + 10 + Integer + reduce + + reducer maximum fetch failures + + + The maximum number of times a reducer tries to + fetch a map output before it reports it. + + + + + mapreduce.reduce.shuffle.connect.timeout + 180000 + Integer + reduce + + reducer connect timeout + + + Expert: The maximum amount of time (in milli seconds) a reduce + task spends in trying to connect to a tasktracker for getting map output. + + + + + mapreduce.reduce.shuffle.read.timeout + 180000 + Integer + reduce + + reducer read timeout + + + Expert: The maximum amount of time (in milli seconds) a reduce + task waits for map output data to be available for reading after obtaining + connection. + + + + + mapred.task.timeout + 600000 + 180000 + Integer + basic + + task timeout value + + + The number of milliseconds before a task will be + terminated if it neither reads an input, writes an output, nor + updates its status string. + + + + + mapred.tasktracker.map.tasks.maximum + 2 + + true + Integer + tasktracker + + The maximum number of map tasks that will be run + simultaneously by a task tracker + + true + + 2 + + The maximum number of map tasks that will be run + simultaneously by a task tracker. + + + + + mapred.tasktracker.reduce.tasks.maximum + 2 + + true + Integer + tasktracker + + The maximum number of reduce tasks that will be run + simultaneously by a task tracker. + + true + + 2 + + The maximum number of reduce tasks that will be run + simultaneously by a task tracker. + + + + + mapred.jobtracker.completeuserjobs.maximum + 100 + Integer + job + + maximum number of complete jobs to keep around per user + + + The maximum number of complete jobs per user to keep around + before delegating them to the job history. + + + + mapreduce.reduce.input.limit + -1 + Integer + reduce + + The limit on the input size of the reduce + + + The limit on the input size of the reduce. If the estimated + input size of the reduce is greater than this value, job is failed. A + value of -1 means that there is no limit set. + + + + mapred.job.tracker.retiredjobs.cache.size + 1000 + Integer + job + + The number of retired job status to keep in the cache. + + + The number of retired job status to keep in the cache. + + + + mapred.job.tracker.jobhistory.lru.cache.size + 5 + Integer + job + + The number of job history files loaded in memory + + + The number of job history files loaded in memory. The jobs are + loaded when they are first accessed. The cache is cleared based on LRU. + + + + + mapred.jobtracker.instrumentation + false + org.apache.hadoop.mapred.JobTrackerMetricsSource + Class + jobtracker + + JobTracker instrumentation class + + + Expert: The instrumentation class to associate with each JobTracker. + + + + mapred.child.java.opts + -Xmx200m + -Xmx512m -XX:+UseConcMarkSweepGC -XX:ParallelCMSThreads=1 -XX:ParallelGCThreads=1 + true + String + tasktracker + + Java opts for the task tracker child processes + + + Java opts for the task tracker child processes. + The following symbol, if present, will be interpolated: @taskid@ is replaced + by current TaskID. Any other occurrences of '@' will go unchanged. + For example, to enable verbose gc logging to a file named for the taskid in + /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: + -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc + The configuration variable mapred.child.ulimit can be used to control the + maximum virtual memory of the child processes. + + + + + mapred.child.heapsize + 200 + Integer + basic + + The JVM heapsize used by each map task and reduce task. + + true + true + + The JVM heapsize used by each map task and reduce task. + + + + mapred.child.env + + true + Integer + tasktracker + + tasktracker child process environment variable + + + User added environment variables for the task tracker child + processes. Example : + 1) A=foo This will set the env variable A to foo + 2) B=$B:c This is inherit tasktracker's B env variable. + + + + + mapred.child.ulimit + + true + + String + basic + + MapReduce maximum virtual memory + + + The maximum virtual memory, in KB, of a process launched by the + Map-Reduce framework. This can be used to control both the Mapper/Reducer + tasks and applications using Hadoop Pipes, Hadoop Streaming etc. + By default it is left unspecified to let cluster admins control it via + limits.conf and other such relevant mechanisms. + Note: mapred.child.ulimit must be greater than or equal to the -Xmx passed to + JavaVM, else the VM might not start. + + + + + mapred.cluster.map.memory.mb + -1 + Integer + map + + map virtual memory size + + + The size, in terms of virtual memory, of a single map slot + in the Map-Reduce framework, used by the scheduler. + A job can ask for multiple slots for a single map task via + mapred.job.map.memory.mb, upto the limit specified by + mapred.cluster.max.map.memory.mb, if the scheduler supports the feature. + The value of -1 indicates that this feature is turned off. + + + + + mapred.cluster.reduce.memory.mb + -1 + Integer + reduce + + reduce virtual memory size + + + The size, in terms of virtual memory, of a single reduce slot + in the Map-Reduce framework, used by the scheduler. + A job can ask for multiple slots for a single reduce task via + mapred.job.reduce.memory.mb, upto the limit specified by + mapred.cluster.max.reduce.memory.mb, if the scheduler supports the feature. + The value of -1 indicates that this feature is turned off. + + + + + mapred.cluster.max.map.memory.mb + -1 + Integer + map + + map maximum virtual memory size + + + The maximum size, in terms of virtual memory, of a single map + task launched by the Map-Reduce framework, used by the scheduler. + A job can ask for multiple slots for a single map task via + mapred.job.map.memory.mb, upto the limit specified by + mapred.cluster.max.map.memory.mb, if the scheduler supports the feature. + The value of -1 indicates that this feature is turned off. + + + + + mapred.cluster.max.reduce.memory.mb + -1 + Integer + reduce + + reduce maximum virtual memory size + + + The maximum size, in terms of virtual memory, of a single reduce + task launched by the Map-Reduce framework, used by the scheduler. + A job can ask for multiple slots for a single reduce task via + mapred.job.reduce.memory.mb, upto the limit specified by + mapred.cluster.max.reduce.memory.mb, if the scheduler supports the feature. + The value of -1 indicates that this feature is turned off. + + + + + mapred.job.map.memory.mb + -1 + Integer + map + + map virtual memory size + + + The size, in terms of virtual memory, of a single map task + for the job. + A job can ask for multiple slots for a single map task, rounded up to the + next multiple of mapred.cluster.map.memory.mb and upto the limit + specified by mapred.cluster.max.map.memory.mb, if the scheduler supports + the feature. + The value of -1 indicates that this feature is turned off iff + mapred.cluster.map.memory.mb is also turned off (-1). + + + + + mapred.job.reduce.memory.mb + -1 + Integer + reduce + + reduce virtual memory size + + + The size, in terms of virtual memory, of a single reduce task + for the job. + A job can ask for multiple slots for a single map task, rounded up to the + next multiple of mapred.cluster.reduce.memory.mb and upto the limit + specified by mapred.cluster.max.reduce.memory.mb, if the scheduler supports + the feature. + The value of -1 indicates that this feature is turned off iff + mapred.cluster.reduce.memory.mb is also turned off (-1). + + + + + mapred.child.tmp + ./tmp + String + basic + + tmp directory for map and reduce tasks + + + To set the value of tmp directory for map and reduce tasks. + If the value is an absolute path, it is directly assigned. Otherwise, it is + prepended with task's working directory. The java tasks are executed with + option -Djava.io.tmpdir='the absolute path of the tmp dir'. Pipes and + streaming are set with environment variable, + TMPDIR='the absolute path of the tmp dir' + + + + + mapred.inmem.merge.threshold + 1000 + Integer + perf + + in-memory merge threshold + + + The threshold, in terms of the number of files + for the in-memory merge process. When we accumulate threshold number of files + we initiate the in-memory merge and spill to disk. A value of 0 or less than + 0 indicates we want to DON'T have any threshold and instead depend only on + the ramfs's memory consumption to trigger the merge. + + + + + mapred.job.shuffle.merge.percent + 0.66 + Float + perf + + The usage threshold at which an in-memory merge will be + initiated + + + The usage threshold at which an in-memory merge will be + initiated, expressed as a percentage of the total memory allocated to + storing in-memory map outputs, as defined by + mapred.job.shuffle.input.buffer.percent. + + + + + mapred.job.shuffle.input.buffer.percent + 0.70 + Float + perf + + memory to store map outputs during the shuffle + + + The percentage of memory to be allocated from the maximum heap + size to storing map outputs during the shuffle. + + + + + mapred.job.reduce.input.buffer.percent + 0.0 + Float + perf + + The percentage of memory- relative to the maximum heap size- to + retain map outputs during the reduce + + + The percentage of memory- relative to the maximum heap size- to + retain map outputs during the reduce. When the shuffle is concluded, any + remaining map outputs in memory must consume less than this threshold before + the reduce can begin. + + + + + mapred.map.tasks.speculative.execution + true + true + Boolean + map + + whether to execute multiple instances of map task in parallel + + + If true, then multiple instances of some map tasks + may be executed in parallel. + + + + mapred.reduce.tasks.speculative.execution + true + true + false + Boolean + reduce + + whether to execute multiple instances of reduce task in parallel + + + If true, then multiple instances of some reduce tasks + may be executed in parallel. + + + + mapred.job.reuse.jvm.num.tasks + 1 + Integer + perf + + Number of taks to run per jvm + + + How many tasks to run per jvm. If set to -1, there is + no limit. + + + + + mapred.min.split.size + 0 + Integer + map + + The minimum size chunk that map input should be split + into + + + The minimum size chunk that map input should be split + into. Note that some file formats may have minimum split sizes that + take priority over this setting. + + + + mapred.jobtracker.maxtasks.per.job + -1 + Integer + job + + The maximum number of tasks for a single job + + + The maximum number of tasks for a single job. + A value of -1 indicates that there is no maximum. + + + + mapred.submit.replication + 10 + Integer + job + + The replication level for submitted job files + + + The replication level for submitted job files. This + should be around the square root of the number of nodes. + + + + + mapred.tasktracker.dns.interface + default + String + tasktracker + + The name of the Network Interface from which a task + tracker should report its IP address + + + The name of the Network Interface from which a task + tracker should report its IP address. + + + + + mapred.tasktracker.dns.nameserver + default + String + tasktracker + + TaskTracker name server address + + + The host name or IP address of the name server (DNS) + which a TaskTracker should use to determine the host name used by + the JobTracker for communication and display purposes. + + + + + tasktracker.http.threads + 40 + 40 + 60 + Integer + perf + + The number of worker threads that for the http server + + + The number of worker threads that for the http server. This is + used for map output fetching + + + + + mapred.task.tracker.http.address + 0.0.0.0:50060 + String + tasktracker + + The task tracker http server address and port + + + + The task tracker http server address and port. + If the port is 0 then the server will start on a free port. + + + + + keep.failed.task.files + false + Boolean + basic + + Whether to keep files for failed tasks + + + Should the files for failed tasks be kept. This should only be + used on jobs that are failing, because the storage is never + reclaimed. It also prevents the map outputs from being erased + from the reduce directory as they are consumed. + + + + + mapred.output.compress + false + Boolean + job + + Whether to compress job output + + + Should the job outputs be compressed? + + + + + mapred.output.compression.type + RECORD + RECORD + BLOCK + String + job + + compress type of job outputs + + + If the job outputs are to compressed as SequenceFiles, how should + they be compressed? Should be one of NONE, RECORD or BLOCK. + + + + + mapred.output.compression.codec + org.apache.hadoop.io.compress.DefaultCodec + Class + job + + job outputs compress codec class + + + If the job outputs are compressed, how should they be compressed? + + + + + mapred.compress.map.output + false + false + true + Boolean + map + + Whether to compress map outputs + + + Should the outputs of the maps be compressed before being + sent across the network. Uses SequenceFile compression. + + + + + mapred.map.output.compression.codec + org.apache.hadoop.io.compress.DefaultCodec + org.apache.hadoop.io.compress.DefaultCodec + org.apache.hadoop.io.compress.SnappyCodec + Class + map + + map outputs compression class + + + If the map outputs are compressed, how should they be + compressed? + + + + + map.sort.class + org.apache.hadoop.util.QuickSort + Class + basic + + The default sort class for sorting keys. + + + The default sort class for sorting keys. + + + + mapred.userlog.limit.kb + 0 + Integer + user + + The maximum size of user-logs of each task in KB + + + The maximum size of user-logs of each task in KB. 0 disables the cap. + + + + + mapred.userlog.retain.hours + 24 + Integer + user + + user log maximum retain hours + + + The maximum time, in hours, for which the user-logs are to be + retained after the job completion. + + + + mapred.user.jobconf.limit + 5242880 + Integer + user + + The maximum allowed size of the user jobconf + + + The maximum allowed size of the user jobconf. The + default is set to 5 MB + + + + mapred.hosts + + true + Integer + jobtracker + + Names a file that contains the list of nodes that may + connect to the jobtracker. + + + Names a file that contains the list of nodes that may + connect to the jobtracker. If the value is empty, all hosts are + permitted. + + + + mapred.hosts.exclude + + true + String + jobtracker + + Names a file that contains the list of hosts that + should be excluded by the jobtracker + + + Names a file that contains the list of hosts that + should be excluded by the jobtracker. If the value is empty, no + hosts are excluded. + + + + mapred.heartbeats.in.second + 100 + Integer + jobtracker + + Approximate number of heart-beats that could arrive + at JobTracker in a second + + + Expert: Approximate number of heart-beats that could arrive + at JobTracker in a second. Assuming each RPC can be processed + in 10msec, the default value is made 100 RPCs in a second. + + + + + mapred.max.tracker.blacklists + 4 + Integer + tasktracker + + TaskTracker maximum number of blacklists + + + The number of blacklists for a tasktracker by various jobs + after which the tasktracker will be marked as potentially + faulty and is a candidate for graylisting across all jobs. + (Unlike blacklisting, this is advisory; the tracker remains + active. However, it is reported as graylisted in the web UI, + with the expectation that chronically graylisted trackers + will be manually decommissioned.) This value is tied to + mapred.jobtracker.blacklist.fault-timeout-window; faults + older than the window width are forgiven, so the tracker + will recover from transient problems. It will also become + healthy after a restart. + + + + + mapred.jobtracker.blacklist.fault-timeout-window + 180 + Integer + tasktracker + + The timeout (in minutes) after which per-job tasktracker + faults are forgiven + + + The timeout (in minutes) after which per-job tasktracker + faults are forgiven. The window is logically a circular + buffer of time-interval buckets whose width is defined by + mapred.jobtracker.blacklist.fault-bucket-width; when the + "now" pointer moves across a bucket boundary, the previous + contents (faults) of the new bucket are cleared. In other + words, the timeout's granularity is determined by the bucket + width. + + + + + mapred.jobtracker.blacklist.fault-bucket-width + 15 + Integer + tasktracker + + TaskTracker fault timeout window bucket width + + + The width (in minutes) of each bucket in the tasktracker + fault timeout window. Each bucket is reused in a circular + manner after a full timeout-window interval (defined by + mapred.jobtracker.blacklist.fault-timeout-window). + + + + + mapred.max.tracker.failures + 4 + Integer + tasktracker + + TaskTracker maximum failures + + + The number of task-failures on a tasktracker of a given job + after which new tasks of that job aren't assigned to it. + + + + + jobclient.output.filter + FAILED + String + user + + he filter for controlling the output of the task's userlogs sent + to the console of the JobClient + + + The filter for controlling the output of the task's userlogs sent + to the console of the JobClient. + The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and + ALL. + + + + + jobclient.completion.poll.interval + 5000 + Integer + user + + JobClient polling JobTracker interval + + + The interval (in milliseconds) between which the JobClient + polls the JobTracker for updates about job status. You may want to set this + to a lower value to make tests run faster on a single node system. Adjusting + this value in production may lead to unwanted client-server traffic. + + + + + jobclient.progress.monitor.poll.interval + 1000 + Integer + user + + The interval (in milliseconds) between which the JobClient + reports status to the console and checks for job completion + + + The interval (in milliseconds) between which the JobClient + reports status to the console and checks for job completion. You may want to set this + to a lower value to make tests run faster on a single node system. Adjusting + this value in production may lead to unwanted client-server traffic. + + + + + mapred.job.tracker.persist.jobstatus.active + false + Boolean + job + + Indicates if persistency of job status information is + active or not. + + + Indicates if persistency of job status information is + active or not. + + + + + mapred.job.tracker.persist.jobstatus.hours + 0 + Integer + job + + he number of hours job status information is persisted in DFS. + + + The number of hours job status information is persisted in DFS. + The job status information will be available after it drops of the memory + queue and between jobtracker restarts. With a zero value the job status + information is not persisted at all in DFS. + + + + + mapred.job.tracker.persist.jobstatus.dir + /jobtracker/jobsInfo + Directory + job + + The directory where the job status information is persisted + + + The directory where the job status information is persisted + in a file system to be available after it drops of the memory queue and + between jobtracker restarts. + + + + + mapreduce.job.complete.cancel.delegation.tokens + true + Boolean + job + + Whether to unregister delegation tokens from renewal + + + if false - do not unregister/cancel delegation tokens + from renewal, because same tokens may be used by spawned jobs + + + + + mapred.task.profile + false + Boolean + basic + + Whether to collect profiler information + + + To set whether the system should collect profiler + information for some of the tasks in this job? The information is stored + in the user log directory. The value is "true" if task profiling + is enabled. + + + + mapred.task.profile.maps + 0-2 + Int_range + map + + ranges of map tasks to profile + + + To set the ranges of map tasks to profile. + mapred.task.profile has to be set to true for the value to be accounted. + + + + + mapred.task.profile.reduces + 0-2 + Int_range + reduce + + ranges of reduce tasks to profile + + + To set the ranges of reduce tasks to profile. + mapred.task.profile has to be set to true for the value to be accounted. + + + + + mapred.line.input.format.linespermap + 1 + Integer + basic + + Number of lines per split in NLineInputFormat + + + Number of lines per split in NLineInputFormat. + + + + + mapred.skip.attempts.to.start.skipping + 2 + Integer + skip + + he number of Task attempts AFTER which skip mode + will be kicked off + + + The number of Task attempts AFTER which skip mode + will be kicked off. When skip mode is kicked off, the + tasks reports the range of records which it will process + next, to the TaskTracker. So that on failures, TT knows which + ones are possibly the bad records. On further executions, + those are skipped. + + + + + mapred.skip.map.auto.incr.proc.count + true + Boolean + skip + + Whether to increment SkipBadRecords.COUNTER_MAP_PROCESSED_GROUPS + + + The flag which if set to true, + SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS is incremented + by MapRunner after invoking the map function. This value must be set to + false for applications which process the records asynchronously + or buffer the input records. For example streaming. + In such cases applications should increment this counter on their own. + + + + + mapred.skip.reduce.auto.incr.proc.count + true + Boolean + skip + + Whether to increment SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS + + + The flag which if set to true, + SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS is incremented + by framework after invoking the reduce function. This value must be set to + false for applications which process the records asynchronously + or buffer the input records. For example streaming. + In such cases applications should increment this counter on their own. + + + + + mapred.skip.out.dir + + true + String + skip + + skipped records output directory + + + If no value is specified here, the skipped records are + written to the output directory at _logs/skip. + User can stop writing skipped records by giving the value "none". + + + + + mapred.skip.map.max.skip.records + 0 + Integer + skip + + The number of acceptable skip records surrounding the bad + record PER bad record in mapper + + + The number of acceptable skip records surrounding the bad + record PER bad record in mapper. The number includes the bad record as well. + To turn the feature of detection/skipping of bad records off, set the + value to 0. + The framework tries to narrow down the skipped range by retrying + until this threshold is met OR all attempts get exhausted for this task. + Set the value to Long.MAX_VALUE to indicate that framework need not try to + narrow down. Whatever records(depends on application) get skipped are + acceptable. + + + + + mapred.skip.reduce.max.skip.groups + 0 + Integer + skip + + The number of acceptable skip groups surrounding the bad + group PER bad group in reducer + + + The number of acceptable skip groups surrounding the bad + group PER bad group in reducer. The number includes the bad group as well. + To turn the feature of detection/skipping of bad groups off, set the + value to 0. + The framework tries to narrow down the skipped range by retrying + until this threshold is met OR all attempts get exhausted for this task. + Set the value to Long.MAX_VALUE to indicate that framework need not try to + narrow down. Whatever groups(depends on application) get skipped are + acceptable. + + + + + + + job.end.retry.attempts + 0 + Integer + basic + + hadoop retry attempts to contact notification URL + + + Indicates how many times hadoop should attempt to contact the + notification URL + + + + job.end.retry.interval + 30000 + Integer + basic + + notifcation URL retry interval + + + Indicates time in milliseconds between notification URL retry + calls + + + + + hadoop.rpc.socket.factory.class.JobSubmissionProtocol + + true + String + basic + + SocketFactory to use to connect to a Map/Reduce master (JobTracker) + + + SocketFactory to use to connect to a Map/Reduce master + (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default. + + + + + mapred.task.cache.levels + 2 + Integer + perf + + max level of the task cache + + + This is the max level of the task cache. For example, if + the level is 2, the tasks cached are at the host level and at the rack + level. + + + + + mapred.queue.names + false + default + String + jobtracker + + list of queues configured for this jobtracker + + + Comma separated list of queues configured for this jobtracker. + Jobs are added to queues and schedulers can configure different + scheduling properties for the various queues. To configure a property + for a queue, the name of the queue must match the name specified in this + value. Queue properties that are common to all schedulers are configured + here with the naming convention, mapred.queue.$QUEUE-NAME.$PROPERTY-NAME, + for e.g. mapred.queue.default.submit-job-acl. + The number of queues configured in this parameter could depend on the + type of scheduler being used, as specified in + mapred.jobtracker.taskScheduler. For example, the JobQueueTaskScheduler + supports only a single queue, which is the default configured here. + Before adding more queues, ensure that the scheduler you've configured + supports multiple queues. + + + + + mapred.acls.enabled + true + false + Boolean + basic + + Whether to enable ACL check + + + Specifies whether ACLs should be checked + for authorization of users for doing various queue and job level operations. + ACLs are disabled by default. If enabled, access control checks are made by + JobTracker and TaskTracker when requests are made by users for queue + operations like submit job to a queue and kill a job in the queue and job + operations like viewing the job-details (See mapreduce.job.acl-view-job) + or for modifying the job (See mapreduce.job.acl-modify-job) using + Map/Reduce APIs, RPCs or via the console and web user interfaces. + + + + + mapred.queue.default.state + RUNNING + String + basic + + job state value + + + + This values defines the state , default queue is in. + the values can be either "STOPPED" or "RUNNING" + This value can be changed at runtime. + + + + + mapred.job.queue.name + default + String + job + + Queue to which a job is submitted + + + Queue to which a job is submitted. This must match one of the + queues defined in mapred.queue.names for the system. Also, the ACL setup + for the queue must allow the current user to submit a job to the queue. + Before specifying a queue, ensure that the system is configured with + the queue, and access is allowed for submitting jobs to the queue. + + + + + mapreduce.job.acl-modify-job + + + String + true + job + + Job specific access-control list for 'modifying' the job + + + Job specific access-control list for 'modifying' the job. It + is only used if authorization is enabled in Map/Reduce by setting the + configuration property mapred.acls.enabled to true. + This specifies the list of users and/or groups who can do modification + operations on the job. For specifying a list of users and groups the + format to use is "user1,user2 group1,group". If set to '*', it allows all + users/groups to modify this job. If set to ' '(i.e. space), it allows + none. This configuration is used to guard all the modifications with respect + to this job and takes care of all the following operations: + o killing this job + o killing a task of this job, failing a task of this job + o setting the priority of this job + Each of these operations are also protected by the per-queue level ACL + "acl-administer-jobs" configured via mapred-queues.xml. So a caller should + have the authorization to satisfy either the queue-level ACL or the + job-level ACL. + Irrespective of this ACL configuration, job-owner, the user who started the + cluster, cluster administrators configured via + mapreduce.cluster.administrators and queue administrators of the queue to + which this job is submitted to configured via + mapred.queue.queue-name.acl-administer-jobs in mapred-queue-acls.xml can + do all the modification operations on a job. + By default, nobody else besides job-owner, the user who started the cluster, + cluster administrators and queue administrators can perform modification + operations on a job. + + + + + mapreduce.job.acl-view-job + + + String + true + job + + Job specific access-control list for 'viewing' the job + + + Job specific access-control list for 'viewing' the job. It is + only used if authorization is enabled in Map/Reduce by setting the + configuration property mapred.acls.enabled to true. + This specifies the list of users and/or groups who can view private details + about the job. For specifying a list of users and groups the + format to use is "user1,user2 group1,group". If set to '*', it allows all + users/groups to view this job. If set to ' '(i.e. space), it allows + none. This configuration is used to guard some of the job-views and at + present only protects APIs that can return possibly sensitive information + of the job-owner like + o job-level counters + o task-level counters + o tasks' diagnostic information + o task-logs displayed on the TaskTracker web-UI and + o job.xml showed by the JobTracker's web-UI + Every other piece of information of jobs is still accessible by any other + user, for e.g., JobStatus, JobProfile, list of jobs in the queue, etc. + Irrespective of this ACL configuration, job-owner, the user who started the + cluster, cluster administrators configured via + mapreduce.cluster.administrators and queue administrators of the queue to + which this job is submitted to configured via + mapred.queue.queue-name.acl-administer-jobs in mapred-queue-acls.xml can do + all the view operations on a job. + By default, nobody else besides job-owner, the user who started the + cluster, cluster administrators and queue administrators can perform + view operations on a job. + + + + + mapred.tasktracker.indexcache.mb + 10 + Integer + tasktracker + + The maximum memory that a task tracker allows for the index cache + + + The maximum memory that a task tracker allows for the + index cache that is used when serving map outputs to reducers. + + + + + mapred.combine.recordsBeforeProgress + 10000 + Integer + basic + + The number of records to process during combine output collection + before sending a progress notification to the TaskTracker. + + + The number of records to process during combine output collection + before sending a progress notification to the TaskTracker. + + + + + mapred.merge.recordsBeforeProgress + 10000 + Integer + basic + + The number of records to process during merge before + sending a progress notification to the TaskTracker. + + + The number of records to process during merge before + sending a progress notification to the TaskTracker. + + + + + mapred.reduce.slowstart.completed.maps + 0.05 + Float + job + + Fraction of the number of maps in the job which should be + complete before reduces are scheduled for the job + + + Fraction of the number of maps in the job which should be + complete before reduces are scheduled for the job. + + + + + mapred.task.tracker.task-controller + org.apache.hadoop.mapred.DefaultTaskController + Class + basic + + TaskController which is used to launch and manage task execution + + + TaskController which is used to launch and manage task execution + + + + mapreduce.tasktracker.group + + true + String + tasktracker + + Group to which TaskTracker belongs + + + Expert: Group to which TaskTracker belongs. If + LinuxTaskController is configured via mapreduce.tasktracker.taskcontroller, + the group owner of the task-controller binary should be same as this group. + + + + + mapred.disk.healthChecker.interval + 60000 + Integer + health + + node health check interval + + + How often the TaskTracker checks the health of its + local directories. Configuring this to a value smaller than the + heartbeat interval is equivalent to setting this to heartbeat + interval value. + + + + + + mapred.healthChecker.script.path + + true + Integer + health + + node health script path + + + Absolute path to the script which is + periodically run by the node health monitoring service to determine if + the node is healthy or not. If the value of this key is empty or the + file does not exist in the location configured here, the node health + monitoring service is not started. + + + + mapred.healthChecker.interval + 60000 + Integer + health + + Frequency of the node health script to be run + + + Frequency of the node health script to be run + + + + mapred.healthChecker.script.timeout + 600000 + Integer + health + + node health script timeout + + + Time after node health script should be killed if + unresponsive and considered that the script has failed. + + + + mapred.healthChecker.script.args + + true + String + health + + List of arguments which are to be passed to + node health script when it is being launched comma seperated. + + + List of arguments which are to be passed to + node health script when it is being launched comma seperated. + + + + + mapreduce.job.counters.limit + 120 + Integer + job + + Limit on the number of counters allowed per job. + + + Limit on the number of counters allowed per job. + + + + mapreduce.slot.memory.weight + + 50 + Integer + tasktracker + + TaskTracker reserve heapsize weight + + true + directory + + The weight of tasktracker reserve heapsize and the default value is 50. + +
TextItem
+
+ + + mapred.scheduler + fair + String + basic + + Task Schedule + + true + advanced + + Map/Reduce Task schedule method. If the Capacity scheduler has been selected, then additional items need to be configured. + +
RadioGroupItem
+ fair,capacity +
+ + mapred.map.reduce.ratio + 2 + Integer + basic + + The map/reduce ratio. + + true + + The map/reduce ratio. + + + + mapred.cpu.ratio.max + 1.5 + Float + basic + + The cpu max usage ratio. + + true + + The cpu max usage ratio. + + + + + mapred.fairscheduler.preemption + false + true + Boolean + perf + + Whether to enable fairscheduler preemption. + + + Whether to enable fairscheduler preemption. + + + + mapred.fairscheduler.assignmultiple + false + true + Boolean + perf + + Whether to assign multiple + + + Whether to assign multiple + + + + mapred.fairscheduler.sizebasedweight + false + true + Boolean + perf + + size based weight + + + size based weight + + + + mapred.fairscheduler.poolnameproperty + false + mapred.queue.name + String + perf + + pool name property + + + job.set("mapred.queue.name",pool); // pool is set to either 'high' or 'low' + + + + mapred.capacity-scheduler.maximum-system-jobs + 3000 + Integer + basic + + Maximum number of jobs in the system which can be initialized, + concurrently, by the CapacityScheduler + + + Maximum number of jobs in the system which can be initialized, + concurrently, by the CapacityScheduler. + + + + mapred.capacity-scheduler.queue.default.capacity + 100 + Integer + basic + + Percentage of the number of slots in the cluster that are + to be available for jobs in this queue. + + + Percentage of the number of slots in the cluster that are + to be available for jobs in this queue. + + + + + mapred.capacity-scheduler.queue.default.maximum-capacity + -1 + Integer + scheduling + + per-queue maximum-capacity + + + + maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster. + This provides a means to limit how much excess capacity a queue can use. By default, there is no limit. + The maximum-capacity of a queue can only be greater than or equal to its minimum capacity. + Default value of -1 implies a queue can use complete capacity of the cluster. + This property could be to curtail certain jobs which are long running in nature from occupying more than a + certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of + other queues being affected. + One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity + the max capacity would change. So if large number of nodes or racks get added to the cluster , max Capacity in + absolute terms would increase accordingly. + + + + + mapred.capacity-scheduler.queue.default.supports-priority + false + Boolean + scheduling + + Whether to take jobs' priorities into account in scheduling decisions + + + If true, priorities of jobs will be taken into + account in scheduling decisions. + + + + + mapred.capacity-scheduler.queue.default.minimum-user-limit-percent + 100 + Integer + scheduling + + maximum limit on per-user's allocated percentage of resources + + + + Each queue enforces a limit on the percentage of resources + allocated to a user at any given time, if there is competition for them. + This user limit can vary between a minimum and maximum value. The former + depends on the number of users who have submitted jobs, and the latter is + set to this property value. For example, suppose the value of this + property is 25. If two users have submitted jobs to a queue, no single + user can use more than 50% of the queue resources. If a third user submits + a job, no single user can use more than 33% of the queue resources. With 4 + or more users, no user can use more than 25% of the queue's resources. A + value of 100 implies no user limits are imposed. + + + + + mapred.capacity-scheduler.queue.default.user-limit-factor + 1 + Integer + scheduling + + The multiple of the queue capacity which can be configured to + allow a single user to acquire more slots. + + + + The multiple of the queue capacity which can be configured to + allow a single user to acquire more slots. + + + + + mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks + 200000 + Integer + scheduling + + The maximum number of tasks, across all jobs in the queue, + which can be initialized concurrently. + + + The maximum number of tasks, across all jobs in the queue, + which can be initialized concurrently. Once the queue's jobs exceed this + limit they will be queued on disk. + + + + + mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user + 100000 + Integer + scheduling + + The maximum number of tasks per-user, across all the of the + user's jobs in the queue, which can be initialized concurrently + + + The maximum number of tasks per-user, across all the of the + user's jobs in the queue, which can be initialized concurrently. Once the + user's jobs exceed this limit they will be queued on disk. + + + + + mapred.capacity-scheduler.queue.default.init-accept-jobs-factor + 10 + Integer + scheduling + + The multipe of (maximum-system-jobs * queue-capacity) used to + determine the number of jobs which are accepted by the scheduler. + + + The multipe of (maximum-system-jobs * queue-capacity) used to + determine the number of jobs which are accepted by the scheduler. + + + + + + + mapred.capacity-scheduler.default-supports-priority + false + Boolean + scheduling + + If true, priorities of jobs will be taken into + account in scheduling decisions by default in a job queue. + + + If true, priorities of jobs will be taken into + account in scheduling decisions by default in a job queue. + + + + mapred.capacity-scheduler.default-minimum-user-limit-percent + 100 + Integer + scheduling + + The percentage of the resources limited to a particular user + for the job queue at any given point of time by default. + + + The percentage of the resources limited to a particular user + for the job queue at any given point of time by default. + + + + mapred.capacity-scheduler.default-user-limit-factor + 1 + Integer + scheduling + + The default multiple of queue-capacity which is used to + determine the amount of slots a single user can consume concurrently. + + + + The default multiple of queue-capacity which is used to + determine the amount of slots a single user can consume concurrently. + + + + + mapred.capacity-scheduler.default-maximum-active-tasks-per-queue + 200000 + Integer + scheduling + + The default maximum number of tasks, across all jobs in the + queue, which can be initialized concurrently + + + The default maximum number of tasks, across all jobs in the + queue, which can be initialized concurrently. Once the queue's jobs exceed + this limit they will be queued on disk. + + + + + mapred.capacity-scheduler.default-maximum-active-tasks-per-user + 100000 + Integer + scheduling + + The default maximum number of tasks per-user, across all the of + the user's jobs in the queue, which can be initialized concurrently + + + The default maximum number of tasks per-user, across all the of + the user's jobs in the queue, which can be initialized concurrently. Once + the user's jobs exceed this limit they will be queued on disk. + + + + + mapred.capacity-scheduler.default-init-accept-jobs-factor + 10 + Integer + scheduling + + The default multipe of (maximum-system-jobs * queue-capacity) + used to determine the number of jobs which are accepted by the scheduler. + + + + The default multipe of (maximum-system-jobs * queue-capacity) + used to determine the number of jobs which are accepted by the scheduler. + + + + + + mapred.capacity-scheduler.init-poll-interval + 5000 + Integer + scheduling + + The amount of time in miliseconds which is used to poll + the job queues for jobs to initialize. + + + The amount of time in miliseconds which is used to poll + the job queues for jobs to initialize. + + + + mapred.capacity-scheduler.init-worker-threads + 5 + Integer + scheduling + + Number of worker threads which would be used by + Initialization poller to initialize jobs in a set of queue + + + Number of worker threads which would be used by + Initialization poller to initialize jobs in a set of queue. + If number mentioned in property is equal to number of job queues + then a single thread would initialize jobs in a queue. If lesser + then a thread would get a set of queues assigned. If the number + is greater then number of threads would be equal to number of + job queues. + + + + + resmon.joblevel.metrics.enabled + false + Boolean + basic + + Whether to enable joblevel metrics + + true + + Whether to enable joblevel metrics + + + + mapred.profiling + false + Boolean + profiling + + Whether to enable profiling or not + + + The flag to determine whether Profiling Feature enable or not. + + + + + directory + Configuration + true + false + + + advanced + Advanced Configuration + true + false + + + basic + Basic Configuration + Basic configurations that get HDFS running. + + + perf + Performance + Configurations that affect Hadoop's performance + + + jobtracker + Jobtracker Configuration + Configurations for Jobtracker. + + + tasktracker + Tasktracker Configuration + Configurations for Tasktracker. + + + map + Map Configuration + Configurations for Map job. + + + reduce + Reduce Configuration + Configurations for Reduce job. + + + io + IO Configuration + Configurations for Input/Output. + + + job + Job Configuration + Configurations for a job. + + + user + Configurations for user + Configurations for user. + + + scheduling + Configurations for scheduling + Configurations for scheduling. + + + profiling + Configurations for profiling + Configurations for profiling. + + + skip + Configurations for skip mode + Configurations for skip mode. + + + health + Configurations for health check + Configurations for health check. + +
diff --git a/savanna/tests/unit/plugins/intel/__init__.py b/savanna/tests/unit/plugins/intel/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/savanna/tests/unit/plugins/intel/client/__init__.py b/savanna/tests/unit/plugins/intel/client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/savanna/tests/unit/plugins/intel/client/response.py b/savanna/tests/unit/plugins/intel/client/response.py new file mode 100644 index 00000000..b2b867fa --- /dev/null +++ b/savanna/tests/unit/plugins/intel/client/response.py @@ -0,0 +1,28 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + + +class Response: + def __init__(self, data=None, ok=True, status_code=200): + self.text = json.dumps(data) + self.ok = ok + self.status_code = status_code + self.reason = None + + +def make_resp(data=None, ok=True, status_code=200): + return Response(data, ok, status_code) diff --git a/savanna/tests/unit/plugins/intel/client/test_client.py b/savanna/tests/unit/plugins/intel/client/test_client.py new file mode 100644 index 00000000..7565bdb7 --- /dev/null +++ b/savanna/tests/unit/plugins/intel/client/test_client.py @@ -0,0 +1,310 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +from savanna import exceptions as ex +from savanna.plugins.intel.client import client as c +from savanna.plugins.intel import exceptions as iex +from savanna.tests.unit import base +from savanna.tests.unit.plugins.intel.client import response as r + + +SESSION_POST_DATA = {'sessionID': '123'} +SESSION_GET_DATA = {"items": [ + { + "nodeprogress": { + "hostname": 'host', + 'info': '_ALLFINISH\n' + } + } +]} + + +class TestClient(base.DbTestCase): + + @mock.patch('requests.post') + @mock.patch('requests.get') + def test_cluster_op(self, get, post): + client = c.IntelClient('qwe', 'rty') + + data = {'lelik': 'bolik'} + + post.return_value = r.make_resp(data) + self.assertEqual(client.cluster.create(), data) + + get.return_value = r.make_resp(data) + self.assertEqual(client.cluster.get(), data) + + post.return_value = r.make_resp(SESSION_POST_DATA) + get.return_value = r.make_resp(SESSION_GET_DATA) + client.cluster.install_software(['bla-bla']) + + self.assertEqual(post.call_count, 2) + self.assertEqual(get.call_count, 2) + + @mock.patch('requests.delete') + @mock.patch('requests.post') + @mock.patch('requests.get') + def test_nodes_op(self, get, post, delete): + client = c.IntelClient('qwe', 'rty') + + # add + post.return_value = r.make_resp(data={ + "items": [ + { + "iporhostname": "n1", + "info": "Connected" + }, + { + "iporhostname": "n2", + "info": "Connected" + } + ] + }) + client.nodes.add(['n1', 'n2'], 'hadoop', '/Def', '/tmp/key') + post.return_value = r.make_resp(data={ + "items": [ + { + "iporhostname": "n1", + "info": "bla-bla" + } + ] + }) + self.assertRaises(iex.IntelPluginException, client.nodes.add, + ['n1'], 'hadoop', '/Def', '/tmp/key') + + # config + post.return_value = r.make_resp(SESSION_POST_DATA) + get.return_value = r.make_resp(SESSION_GET_DATA) + client.nodes.config() + + # delete + delete.return_value = r.make_resp() + client.nodes.delete(['n1']) + + # get + get.return_value = r.make_resp() + client.nodes.get() + + # get_status + get.return_value = r.make_resp(data={"status": "running"}) + client.nodes.get_status(['n1']) + + # stop_nodes + post.return_value = r.make_resp() + client.nodes.stop(['n1']) + + self.assertEqual(delete.call_count, 1) + self.assertEqual(post.call_count, 4) + self.assertEqual(get.call_count, 3) + + @mock.patch('requests.put') + @mock.patch('requests.post') + def test_params_op(self, post, put): + client = c.IntelClient('qwe', 'rty') + post.return_value = r.make_resp() + put.return_value = r.make_resp() + + # add + client.params.hdfs.add('lelik', 'bolik') + client.params.hadoop.add('lelik', 'bolik') + client.params.mapred.add('lelik', 'bolik') + + # get + self.assertRaises(ex.NotImplementedException, client.params.hdfs.get, + ['n1'], 'lelik') + self.assertRaises(ex.NotImplementedException, client.params.hadoop.get, + ['n1'], 'lelik') + self.assertRaises(ex.NotImplementedException, client.params.mapred.get, + ['n1'], 'lelik') + + # update + client.params.hdfs.update('lelik', 'bolik', nodes=['n1']) + client.params.hdfs.update('lelik', 'bolik') + client.params.hadoop.update('lelik', 'bolik', nodes=['n1']) + client.params.hadoop.update('lelik', 'bolik') + client.params.mapred.update('lelik', 'bolik', nodes=['n1']) + client.params.mapred.update('lelik', 'bolik') + + self.assertEqual(post.call_count, 3) + self.assertEqual(put.call_count, 6) + + @mock.patch('savanna.context.sleep', lambda x: None) + @mock.patch('requests.post') + @mock.patch('requests.get') + def test_base_services_op(self, get, post): + client = c.IntelClient('qwe', 'rty') + + # start + post.return_value = r.make_resp() + get.return_value = r.make_resp(data={ + "items": [ + { + "serviceName": "hdfs", + "status": "running" + }, + { + "serviceName": "mapred", + "status": "running" + } + ]}) + client.services.hdfs.start() + client.services.mapred.start() + + get.return_value = r.make_resp(data={ + "items": [ + { + "serviceName": "hdfs", + "status": "stopped" + }, + { + "serviceName": "mapred", + "status": "stopped" + } + ] + }) + + self.assertRaises(iex.IntelPluginException, + client.services.hdfs.start) + self.assertRaises(iex.IntelPluginException, + client.services.mapred.start) + + # stop + post.return_value = r.make_resp() + client.services.hdfs.stop() + client.services.mapred.stop() + + # service + get.return_value = r.make_resp(data={ + "items": [ + { + "serviceName": "bla-bla", + "status": "fail" + } + ] + }) + + self.assertRaises(iex.IntelPluginException, + client.services.hdfs.status) + self.assertRaises(iex.IntelPluginException, + client.services.mapred.status) + + # get_nodes + get.return_value = r.make_resp() + client.services.hdfs.get_nodes() + client.services.mapred.get_nodes() + + # add_nodes + post.return_value = r.make_resp() + client.services.hdfs.add_nodes('DataNode', ['n1', 'n2']) + client.services.mapred.add_nodes('NameNode', ['n1', 'n2']) + + self.assertEqual(get.call_count, 126) + self.assertEqual(post.call_count, 8) + + @mock.patch('requests.delete') + @mock.patch('requests.post') + @mock.patch('requests.get') + def test_services_op(self, get, post, delete): + client = c.IntelClient('qwe', 'rty') + + # add + post.return_value = r.make_resp() + client.services.add(['hdfs', 'mapred']) + + # get_services + get.return_value = r.make_resp() + client.services.get_services() + + # delete_service + delete.return_value = r.make_resp() + client.services.delete_service('hdfs') + + @mock.patch('requests.post') + @mock.patch('requests.get') + def test_hdfs_services_op(self, get, post): + client = c.IntelClient('qwe', 'rty') + + # format + get.return_value = r.make_resp(SESSION_GET_DATA) + post.return_value = r.make_resp(SESSION_POST_DATA) + client.services.hdfs.format() + + # decommission + post.return_value = r.make_resp() + client.services.hdfs.decommission_nodes(['n1']) + + # get status + get.return_value = r.make_resp(data={ + "items": [ + { + "hostname": "n1", + "status": "start" + } + ] + }) + client.services.hdfs.get_datanodes_status() + self.assertEqual(client.services.hdfs.get_datanode_status('n1'), + 'start') + self.assertRaises(iex.IntelPluginException, + client.services.hdfs.get_datanode_status, 'n2') + + self.assertEqual(get.call_count, 4) + self.assertEqual(post.call_count, 2) + + @mock.patch('savanna.context.sleep', lambda x: None) + @mock.patch('requests.post') + @mock.patch('requests.get') + def test_session_op(self, get, post): + client = c.IntelClient('qwe', 'rty') + + data1 = { + "items": [ + { + "nodeprogress": { + "hostname": 'host', + 'info': 'info\n' + } + } + ] + } + data2 = { + "items": [ + { + "nodeprogress": { + "hostname": 'host', + 'info': '_ALLFINISH\n' + } + } + ] + } + + get.side_effect = (r.make_resp(data1), r.make_resp(data2)) + post.return_value = r.make_resp(SESSION_POST_DATA) + + client.services.hdfs.format() + + self.assertEqual(get.call_count, 2) + self.assertEqual(post.call_count, 1) + + @mock.patch('requests.get') + def test_rest_client(self, get): + client = c.IntelClient('qwe', 'rty') + get.return_value = r.make_resp(ok=False, status_code=500, data={ + "message": "message" + }) + self.assertRaises(iex.IntelPluginException, + client.services.get_services) diff --git a/savanna/tests/unit/plugins/intel/test_plugin.py b/savanna/tests/unit/plugins/intel/test_plugin.py new file mode 100644 index 00000000..0fe46a19 --- /dev/null +++ b/savanna/tests/unit/plugins/intel/test_plugin.py @@ -0,0 +1,61 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna.plugins.general import exceptions as g_ex +from savanna.plugins.intel import config_helper as c_helper +from savanna.plugins.intel import exceptions as i_ex +from savanna.plugins.intel import plugin as p +from savanna.tests.unit import base +from savanna.tests.unit.plugins.intel import test_utils as tu + + +class TestIDHPlugin(base.DbTestCase): + def test_get_configs(self): + plugin = p.IDHProvider() + configs = plugin.get_configs('2.5.0') + + self.assertIn(c_helper.IDH_REPO_URL, configs) + self.assertIn(c_helper.IDH_TARBALL_URL, configs) + self.assertIn(c_helper.OS_REPO_URL, configs) + + def test_validate(self): + plugin = p.IDHProvider() + + ng_mng = tu.make_ng_dict('mng', 'f1', ['manager'], 1) + ng_nn = tu.make_ng_dict('nn', 'f1', ['namenode'], 1) + ng_jt = tu.make_ng_dict('jt', 'f1', ['jobtracker'], 1) + ng_dn = tu.make_ng_dict('dn', 'f1', ['datanode'], 2) + ng_tt = tu.make_ng_dict('tt', 'f1', ['tasktracker'], 2) + + cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0', + [ng_nn] + [ng_dn]) + self.assertRaises(i_ex.NotSingleManagerException, plugin.validate, cl) + + cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0', [ng_mng]) + self.assertRaises(g_ex.NotSingleNameNodeException, plugin.validate, cl) + + cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0', + [ng_mng] + [ng_nn] * 2) + self.assertRaises(g_ex.NotSingleNameNodeException, plugin.validate, cl) + + cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0', + [ng_mng] + [ng_nn] + [ng_tt]) + self.assertRaises(g_ex.TaskTrackersWithoutJobTracker, + plugin.validate, cl) + + cl = tu.create_cluster('cl1', 't1', 'intel', '2.5.0', + [ng_mng] + [ng_nn] + [ng_jt] * 2 + [ng_tt]) + self.assertRaises(g_ex.NotSingleJobTrackerException, + plugin.validate, cl) diff --git a/savanna/tests/unit/plugins/intel/test_utils.py b/savanna/tests/unit/plugins/intel/test_utils.py new file mode 100644 index 00000000..04cb0c6d --- /dev/null +++ b/savanna/tests/unit/plugins/intel/test_utils.py @@ -0,0 +1,32 @@ +# Copyright (c) 2013 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from savanna.conductor import resource as r + + +def create_cluster(name, tenant, plugin, version, node_groups, **kwargs): + dct = {'name': name, 'tenant_id': tenant, 'plugin_name': plugin, + 'hadoop_version': version, 'node_groups': node_groups} + dct.update(kwargs) + return r.ClusterResource(dct) + + +def make_ng_dict(name, flavor, processes, count, instances=[]): + return {'name': name, 'flavor_id': flavor, 'node_processes': processes, + 'count': count, 'instances': instances} + + +def make_inst_dict(inst_id, inst_name): + return {'instance_id': inst_id, 'instance_name': inst_name} diff --git a/savanna/tests/unit/resources/test-default-with-type-and-locale.xml b/savanna/tests/unit/resources/test-default-with-type-and-locale.xml new file mode 100644 index 00000000..c4b86fb8 --- /dev/null +++ b/savanna/tests/unit/resources/test-default-with-type-and-locale.xml @@ -0,0 +1,43 @@ + + + + + name1 + value1 + String + + descr1 + + + + + name2 + value2 + + descr2 + + + + + name3 + + String + + descr3 + + + + + name4 + String + + descr4 + + + + + name5 + value5 + String + + diff --git a/savanna/tests/unit/utils/test_xml_utils.py b/savanna/tests/unit/utils/test_xml_utils.py index 2b3b8202..5611cdba 100644 --- a/savanna/tests/unit/utils/test_xml_utils.py +++ b/savanna/tests/unit/utils/test_xml_utils.py @@ -36,6 +36,24 @@ class XMLUtilsTestCase(unittest2.TestCase): x.load_hadoop_xml_defaults( 'tests/unit/resources/test-default.xml')) + def test_load_xml_defaults_with_type_and_locale(self): + expected = [ + {'name': u'name1', 'value': u'value1', 'type': u'String', + 'description': 'descr1'}, + {'name': u'name2', 'value': u'value2', 'type': u'', + 'description': 'descr2'}, + {'name': u'name3', 'value': '', 'type': u'String', + 'description': 'descr3'}, + {'name': u'name4', 'value': '', 'type': u'String', + 'description': 'descr4'}, + {'name': u'name5', 'value': u'value5', 'type': u'String', + 'description': ''}] + actual = x.load_hadoop_xml_defaults_with_type_and_locale( + 'tests/unit/resources/test-default-with-type-and-locale.xml') + self.assertListEqual( + expected, + actual) + def test_adjust_description(self): self.assertEqual(x._adjust_field("\n"), "") self.assertEqual(x._adjust_field("\n "), "") diff --git a/savanna/utils/xmlutils.py b/savanna/utils/xmlutils.py index 95306422..7e2464cf 100644 --- a/savanna/utils/xmlutils.py +++ b/savanna/utils/xmlutils.py @@ -36,6 +36,27 @@ def load_hadoop_xml_defaults(file_name): return configs +def load_hadoop_xml_defaults_with_type_and_locale(file_name): + doc = load_xml_document(file_name) + configs = [] + prop = doc.getElementsByTagName('property') + for elements in prop: + configs.append({ + 'name': _get_text_from_node(elements, 'name'), + 'value': _get_text_from_node(elements, 'value'), + 'type': _get_text_from_node(elements, 'valuetype'), + 'description': _adjust_field( + _get_text_from_node( + _get_node_element(elements, 'description'), 'en')) + }) + return configs + + +def _get_node_element(element, name): + element = element.getElementsByTagName(name) + return element[0] if element and element[0].hasChildNodes() else None + + def create_hadoop_xml(configs, config_filter=None): doc = xml.Document() @@ -68,7 +89,7 @@ def load_xml_document(file_name): def _get_text_from_node(element, name): - element = element.getElementsByTagName(name) + element = element.getElementsByTagName(name) if element else None return element[0].firstChild.nodeValue if ( element and element[0].hasChildNodes()) else '' diff --git a/setup.cfg b/setup.cfg index 64b96a35..e5cffc29 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,6 +36,7 @@ console_scripts = savanna.cluster.plugins = vanilla = savanna.plugins.vanilla.plugin:VanillaProvider hdp = savanna.plugins.hdp.ambariplugin:AmbariPlugin + idh = savanna.plugins.intel.plugin:IDHProvider savanna.infrastructure.engine = savanna = savanna.service.instances:SavannaInfrastructureEngine