Browse Source

Move the savanna subdir to sahara

Rename the subdirectory and replace all instances
of "import savanna" with "import sahara" and all
instances of "from savanna" with "from sahara".

* Replaced mock patches like mock.patch('savanna...
* Updated config generator script
* Renamed entry points in setup.cfg
* Hacking checks also fixed
* Manual renaming in alembic scripts to force work migrations
* Fix doc building
* Renamed itests directories
* Some changes in gitignore
* Removed locale dir after rebase

Co-Authored-By: Alexander Ignatov <aignatov@mirantis.com>

Change-Id: Ia77252c24046c3e7283c0a7b96d11636020b949c
Partially implements: blueprint savanna-renaming-service
changes/83/629183/1
Trevor McKay 8 years ago
committed by Alexander Ignatov
parent
commit
8578f2f19a
  1. 11
      .gitignore
  2. 0
      sahara/__init__.py
  3. 0
      sahara/plugins/__init__.py
  4. 0
      sahara/plugins/vanilla/__init__.py
  5. 62
      sahara/plugins/vanilla/abstractversionhandler.py
  6. 79
      sahara/plugins/vanilla/plugin.py
  7. 0
      sahara/plugins/vanilla/v1_2_1/__init__.py
  8. 451
      sahara/plugins/vanilla/v1_2_1/config_helper.py
  9. 45
      sahara/plugins/vanilla/v1_2_1/mysql_helper.py
  10. 62
      sahara/plugins/vanilla/v1_2_1/oozie_helper.py
  11. 26
      sahara/plugins/vanilla/v1_2_1/resources/README.rst
  12. 632
      sahara/plugins/vanilla/v1_2_1/resources/core-default.xml
  13. 9
      sahara/plugins/vanilla/v1_2_1/resources/create_hive_db.sql
  14. 4
      sahara/plugins/vanilla/v1_2_1/resources/create_oozie_db.sql
  15. 709
      sahara/plugins/vanilla/v1_2_1/resources/hdfs-default.xml
  16. 1873
      sahara/plugins/vanilla/v1_2_1/resources/hive-default.xml
  17. 1328
      sahara/plugins/vanilla/v1_2_1/resources/mapred-default.xml
  18. 1929
      sahara/plugins/vanilla/v1_2_1/resources/oozie-default.xml
  19. 20
      sahara/plugins/vanilla/v1_2_1/resources/topology.sh
  20. 107
      sahara/plugins/vanilla/v1_2_1/run_scripts.py
  21. 101
      sahara/plugins/vanilla/v1_2_1/scaling.py
  22. 484
      sahara/plugins/vanilla/v1_2_1/versionhandler.py
  23. 0
      sahara/plugins/vanilla/v2_3_0/__init__.py
  24. 247
      sahara/plugins/vanilla/v2_3_0/config.py
  25. 173
      sahara/plugins/vanilla/v2_3_0/config_helper.py
  26. 23
      sahara/plugins/vanilla/v2_3_0/resources/README.rst
  27. 1293
      sahara/plugins/vanilla/v2_3_0/resources/core-default.xml
  28. 1607
      sahara/plugins/vanilla/v2_3_0/resources/hdfs-default.xml
  29. 1741
      sahara/plugins/vanilla/v2_3_0/resources/mapred-default.xml
  30. 31
      sahara/plugins/vanilla/v2_3_0/resources/post_conf.template
  31. 1052
      sahara/plugins/vanilla/v2_3_0/resources/yarn-default.xml
  32. 54
      sahara/plugins/vanilla/v2_3_0/run_scripts.py
  33. 124
      sahara/plugins/vanilla/v2_3_0/scaling.py
  34. 46
      sahara/plugins/vanilla/v2_3_0/utils.py
  35. 85
      sahara/plugins/vanilla/v2_3_0/validation.py
  36. 104
      sahara/plugins/vanilla/v2_3_0/versionhandler.py
  37. 53
      sahara/plugins/vanilla/versionfactory.py
  38. 0
      sahara/tests/__init__.py
  39. 0
      sahara/tests/unit/__init__.py
  40. 64
      sahara/tests/unit/base.py
  41. 0
      sahara/tests/unit/plugins/__init__.py
  42. 0
      sahara/tests/unit/plugins/vanilla/__init__.py
  43. 0
      sahara/tests/unit/plugins/vanilla/v1_2_1/__init__.py
  44. 54
      sahara/tests/unit/plugins/vanilla/v1_2_1/test_dfsadmin_parsing.py
  45. 288
      sahara/tests/unit/plugins/vanilla/v1_2_1/test_plugin.py
  46. 44
      sahara/tests/unit/plugins/vanilla/v1_2_1/test_run_scripts.py
  47. 0
      sahara/tests/unit/plugins/vanilla/v2_3_0/__init__.py
  48. 62
      sahara/tests/unit/plugins/vanilla/v2_3_0/resources/dfs-report.txt
  49. 6
      sahara/tests/unit/plugins/vanilla/v2_3_0/resources/yarn-report.txt
  50. 71
      sahara/tests/unit/plugins/vanilla/v2_3_0/test_configs.py
  51. 66
      sahara/tests/unit/plugins/vanilla/v2_3_0/test_utils.py
  52. 58
      sahara/tests/unit/plugins/vanilla/v2_3_0/test_validation.py
  53. 0
      sahara/utils/__init__.py
  54. 67
      sahara/utils/patches.py
  55. 42
      setup.cfg
  56. 6
      tox.ini

11
.gitignore

@ -35,15 +35,18 @@ etc/local.cfg
etc/savanna/*.conf
etc/savanna/*.topology
etc/savanna.conf
etc/sahara/*.conf
etc/sahara/*.topology
etc/sahara.conf
ChangeLog
savanna/tests/integration/configs/itest.conf
sahara/tests/integration/configs/itest.conf
cscope.out
tools/lintstack.head.py
tools/pylint_exceptions
savanna/tests/cover
savanna/tests/coverage.xml
sahara/tests/cover
sahara/tests/coverage.xml
cover
htmlcov
savanna/openstack/common/db/savanna.sqlite
sahara/openstack/common/db/sahara.sqlite
.testrepository
AUTHORS

0
sahara/__init__.py

0
sahara/plugins/__init__.py

0
sahara/plugins/vanilla/__init__.py

62
sahara/plugins/vanilla/abstractversionhandler.py

@ -0,0 +1,62 @@
# Copyright (c) 2014 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class AbstractVersionHandler():
@abc.abstractmethod
def get_node_processes(self):
return
@abc.abstractmethod
def get_plugin_configs(self):
return
@abc.abstractmethod
def configure_cluster(self, cluster):
return
@abc.abstractmethod
def start_cluster(self, cluster):
return
@abc.abstractmethod
def validate(self, cluster):
return
@abc.abstractmethod
def scale_cluster(self, cluster, instances):
return
@abc.abstractmethod
def decommission_nodes(self, cluster, instances):
return
@abc.abstractmethod
def validate_scaling(self, cluster, existing, additional):
return
@abc.abstractmethod
def get_resource_manager_uri(self, cluster):
return
@abc.abstractmethod
def get_oozie_server(self, cluster):
return

79
sahara/plugins/vanilla/plugin.py

@ -0,0 +1,79 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sahara.plugins import provisioning as p
from sahara.plugins.vanilla import versionfactory as vhf
class VanillaProvider(p.ProvisioningPluginBase):
def __init__(self):
self.version_factory = vhf.VersionFactory.get_instance()
def get_description(self):
return (
"This plugin provides an ability to launch vanilla Apache Hadoop "
"1.2.1 cluster without any management consoles. Also it can "
"deploy Oozie 4.0.0 and Hive 0.11.0")
def _get_version_handler(self, hadoop_version):
return self.version_factory.get_version_handler(hadoop_version)
def get_resource_manager_uri(self, cluster):
return self._get_version_handler(
cluster.hadoop_version).get_resource_manager_uri(cluster)
def get_hdfs_user(self):
return 'hadoop'
def get_node_processes(self, hadoop_version):
return self._get_version_handler(hadoop_version).get_node_processes()
def get_versions(self):
return self.version_factory.get_versions()
def get_title(self):
return "Vanilla Apache Hadoop"
def get_configs(self, hadoop_version):
return self._get_version_handler(hadoop_version).get_plugin_configs()
def configure_cluster(self, cluster):
return self._get_version_handler(
cluster.hadoop_version).configure_cluster(cluster)
def start_cluster(self, cluster):
return self._get_version_handler(
cluster.hadoop_version).start_cluster(cluster)
def validate(self, cluster):
return self._get_version_handler(
cluster.hadoop_version).validate(cluster)
def scale_cluster(self, cluster, instances):
return self._get_version_handler(
cluster.hadoop_version).scale_cluster(cluster, instances)
def decommission_nodes(self, cluster, instances):
return self._get_version_handler(
cluster.hadoop_version).decommission_nodes(cluster, instances)
def validate_scaling(self, cluster, existing, additional):
return self._get_version_handler(
cluster.hadoop_version).validate_scaling(cluster, existing,
additional)
def get_oozie_server(self, cluster):
return self._get_version_handler(
cluster.hadoop_version).get_oozie_server(cluster)

0
sahara/plugins/vanilla/v1_2_1/__init__.py

451
sahara/plugins/vanilla/v1_2_1/config_helper.py

@ -0,0 +1,451 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo.config import cfg
from sahara import conductor as c
from sahara import context
from sahara.openstack.common import log as logging
from sahara.plugins.general import utils
from sahara.plugins import provisioning as p
from sahara.plugins.vanilla.v1_2_1 import mysql_helper as m_h
from sahara.plugins.vanilla.v1_2_1 import oozie_helper as o_h
from sahara.swift import swift_helper as swift
from sahara.topology import topology_helper as topology
from sahara.utils import crypto
from sahara.utils import types as types
from sahara.utils import xmlutils as x
conductor = c.API
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
CORE_DEFAULT = x.load_hadoop_xml_defaults(
'plugins/vanilla/v1_2_1/resources/core-default.xml')
HDFS_DEFAULT = x.load_hadoop_xml_defaults(
'plugins/vanilla/v1_2_1/resources/hdfs-default.xml')
MAPRED_DEFAULT = x.load_hadoop_xml_defaults(
'plugins/vanilla/v1_2_1/resources/mapred-default.xml')
HIVE_DEFAULT = x.load_hadoop_xml_defaults(
'plugins/vanilla/v1_2_1/resources/hive-default.xml')
## Append Oozie configs fore core-site.xml
CORE_DEFAULT += o_h.OOZIE_CORE_DEFAULT
XML_CONFS = {
"HDFS": [CORE_DEFAULT, HDFS_DEFAULT],
"MapReduce": [MAPRED_DEFAULT],
"JobFlow": [o_h.OOZIE_DEFAULT],
"Hive": [HIVE_DEFAULT]
}
# TODO(aignatov): Environmental configs could be more complex
ENV_CONFS = {
"MapReduce": {
'Job Tracker Heap Size': 'HADOOP_JOBTRACKER_OPTS=\\"-Xmx%sm\\"',
'Task Tracker Heap Size': 'HADOOP_TASKTRACKER_OPTS=\\"-Xmx%sm\\"'
},
"HDFS": {
'Name Node Heap Size': 'HADOOP_NAMENODE_OPTS=\\"-Xmx%sm\\"',
'Data Node Heap Size': 'HADOOP_DATANODE_OPTS=\\"-Xmx%sm\\"'
},
"JobFlow": {
'Oozie Heap Size': 'CATALINA_OPTS -Xmx%sm'
}
}
ENABLE_SWIFT = p.Config('Enable Swift', 'general', 'cluster',
config_type="bool", priority=1,
default_value=True, is_optional=True)
ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster',
config_type="bool", priority=1,
default_value=True, is_optional=True)
ENABLE_MYSQL = p.Config('Enable MySQL', 'general', 'cluster',
config_type="bool", priority=1,
default_value=True, is_optional=True)
# Default set to 1 day, which is the default Keystone token
# expiration time. After the token is expired we can't continue
# scaling anyway.
DECOMISSIONING_TIMEOUT = p.Config('Decomissioning Timeout', 'general',
'cluster', config_type='int', priority=1,
default_value=86400, is_optional=True,
description='Timeout for datanode'
' decomissioning operation'
' during scaling, in seconds')
HIDDEN_CONFS = ['fs.default.name', 'dfs.name.dir', 'dfs.data.dir',
'mapred.job.tracker', 'mapred.system.dir', 'mapred.local.dir',
'hadoop.proxyuser.hadoop.hosts',
'hadoop.proxyuser.hadoop.groups']
CLUSTER_WIDE_CONFS = ['dfs.block.size', 'dfs.permissions', 'dfs.replication',
'dfs.replication.min', 'dfs.replication.max',
'io.file.buffer.size', 'mapreduce.job.counters.max',
'mapred.output.compress', 'io.compression.codecs',
'mapred.output.compression.codec',
'mapred.output.compression.type',
'mapred.compress.map.output',
'mapred.map.output.compression.codec']
PRIORITY_1_CONFS = ['dfs.datanode.du.reserved',
'dfs.datanode.failed.volumes.tolerated',
'dfs.datanode.max.xcievers', 'dfs.datanode.handler.count',
'dfs.namenode.handler.count', 'mapred.child.java.opts',
'mapred.jobtracker.maxtasks.per.job',
'mapred.job.tracker.handler.count',
'mapred.map.child.java.opts',
'mapred.reduce.child.java.opts',
'io.sort.mb', 'mapred.tasktracker.map.tasks.maximum',
'mapred.tasktracker.reduce.tasks.maximum']
# for now we have not so many cluster-wide configs
# lets consider all of them having high priority
PRIORITY_1_CONFS += CLUSTER_WIDE_CONFS
def _initialise_configs():
configs = []
for service, config_lists in XML_CONFS.iteritems():
for config_list in config_lists:
for config in config_list:
if config['name'] not in HIDDEN_CONFS:
cfg = p.Config(config['name'], service, "node",
is_optional=True, config_type="string",
default_value=str(config['value']),
description=config['description'])
if cfg.default_value in ["true", "false"]:
cfg.config_type = "bool"
cfg.default_value = (cfg.default_value == 'true')
elif types.is_int(cfg.default_value):
cfg.config_type = "int"
cfg.default_value = int(cfg.default_value)
if config['name'] in CLUSTER_WIDE_CONFS:
cfg.scope = 'cluster'
if config['name'] in PRIORITY_1_CONFS:
cfg.priority = 1
configs.append(cfg)
for service, config_items in ENV_CONFS.iteritems():
for name, param_format_str in config_items.iteritems():
configs.append(p.Config(name, service, "node",
default_value=1024, priority=1,
config_type="int"))
configs.append(ENABLE_SWIFT)
configs.append(ENABLE_MYSQL)
configs.append(DECOMISSIONING_TIMEOUT)
if CONF.enable_data_locality:
configs.append(ENABLE_DATA_LOCALITY)
return configs
# Initialise plugin Hadoop configurations
PLUGIN_CONFIGS = _initialise_configs()
def get_plugin_configs():
return PLUGIN_CONFIGS
def get_general_configs(hive_hostname, passwd_hive_mysql):
config = {
ENABLE_SWIFT.name: {
'default_value': ENABLE_SWIFT.default_value,
'conf': extract_name_values(swift.get_swift_configs())
},
ENABLE_MYSQL.name: {
'default_value': ENABLE_MYSQL.default_value,
'conf': m_h.get_required_mysql_configs(
hive_hostname, passwd_hive_mysql)
}
}
if CONF.enable_data_locality:
config.update({
ENABLE_DATA_LOCALITY.name: {
'default_value': ENABLE_DATA_LOCALITY.default_value,
'conf': extract_name_values(topology.vm_awareness_all_config())
}
})
return config
def get_config_value(service, name, cluster=None):
if cluster:
savanna_configs = generate_savanna_configs(cluster)
if savanna_configs.get(name):
return savanna_configs[name]
for ng in cluster.node_groups:
if (ng.configuration().get(service) and
ng.configuration()[service].get(name)):
return ng.configuration()[service][name]
for c in PLUGIN_CONFIGS:
if c.applicable_target == service and c.name == name:
return c.default_value
raise RuntimeError("Unable get parameter '%s' from service %s",
name, service)
def generate_cfg_from_general(cfg, configs, general_config,
rest_excluded=False):
if 'general' in configs:
for nm in general_config:
if nm not in configs['general'] and not rest_excluded:
configs['general'][nm] = general_config[nm]['default_value']
for name, value in configs['general'].items():
if value:
cfg = _set_config(cfg, general_config, name)
LOG.info("Applying config: %s" % name)
else:
cfg = _set_config(cfg, general_config)
return cfg
def _get_hostname(service):
return service.hostname() if service else None
def get_hadoop_ssh_keys(cluster):
extra = cluster.extra or {}
private_key = extra.get('hadoop_private_ssh_key')
public_key = extra.get('hadoop_public_ssh_key')
if not private_key or not public_key:
private_key, public_key = crypto.generate_key_pair()
extra['hadoop_private_ssh_key'] = private_key
extra['hadoop_public_ssh_key'] = public_key
conductor.cluster_update(context.ctx(), cluster, {'extra': extra})
return private_key, public_key
def generate_savanna_configs(cluster, node_group=None):
nn_hostname = _get_hostname(utils.get_namenode(cluster))
jt_hostname = _get_hostname(utils.get_jobtracker(cluster))
oozie_hostname = _get_hostname(utils.get_oozie(cluster))
hive_hostname = _get_hostname(utils.get_hiveserver(cluster))
storage_path = node_group.storage_paths() if node_group else None
# inserting common configs depends on provisioned VMs and HDFS placement
# TODO(aignatov): should be moved to cluster context
cfg = {
'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
'dfs.name.dir': extract_hadoop_path(storage_path,
'/lib/hadoop/hdfs/namenode'),
'dfs.data.dir': extract_hadoop_path(storage_path,
'/lib/hadoop/hdfs/datanode'),
'dfs.hosts': '/etc/hadoop/dn.incl',
'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
}
if jt_hostname:
mr_cfg = {
'mapred.job.tracker': '%s:8021' % jt_hostname,
'mapred.system.dir': extract_hadoop_path(storage_path,
'/mapred/mapredsystem'),
'mapred.local.dir': extract_hadoop_path(storage_path,
'/lib/hadoop/mapred'),
'mapred.hosts': '/etc/hadoop/tt.incl',
'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
}
cfg.update(mr_cfg)
if oozie_hostname:
o_cfg = {
'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
'hadoop.proxyuser.hadoop.groups': 'hadoop',
}
cfg.update(o_cfg)
LOG.debug('Applied Oozie configs for core-site.xml')
cfg.update(o_h.get_oozie_required_xml_configs())
LOG.debug('Applied Oozie configs for oozie-site.xml')
if hive_hostname:
h_cfg = {
'hive.warehouse.subdir.inherit.perms': True,
'javax.jdo.option.ConnectionURL':
'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
}
cfg.update(h_cfg)
LOG.debug('Applied Hive config for hive metastore server')
return cfg
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
oozie_hostname = _get_hostname(utils.get_oozie(cluster))
hive_hostname = _get_hostname(utils.get_hiveserver(cluster))
ng_configs = node_group.configuration()
general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)
all_cfg = generate_savanna_configs(cluster, node_group)
# inserting user-defined configs
for key, value in extract_xml_confs(ng_configs):
all_cfg[key] = value
# applying swift configs if user enabled it
swift_xml_confs = swift.get_swift_configs()
all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)
# invoking applied configs to appropriate xml files
core_all = CORE_DEFAULT + swift_xml_confs
mapred_all = MAPRED_DEFAULT
if CONF.enable_data_locality:
all_cfg.update(topology.TOPOLOGY_CONFIG)
# applying vm awareness configs
core_all += topology.vm_awareness_core_config()
mapred_all += topology.vm_awareness_mapred_config()
xml_configs = {
'core-site': x.create_hadoop_xml(all_cfg, core_all),
'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
}
if hive_hostname:
xml_configs.update({'hive-site':
x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)})
LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)
if oozie_hostname:
xml_configs.update({'oozie-site':
x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)
return xml_configs
def extract_environment_confs(configs):
"""Returns list of Hadoop parameters which should be passed via environment
"""
lst = []
for service, srv_confs in configs.items():
if ENV_CONFS.get(service):
for param_name, param_value in srv_confs.items():
for cfg_name, cfg_format_str in ENV_CONFS[service].items():
if param_name == cfg_name and param_value is not None:
lst.append(cfg_format_str % param_value)
else:
LOG.warn("Plugin received wrong applicable target '%s' in "
"environmental configs" % service)
return lst
def extract_xml_confs(configs):
"""Returns list of Hadoop parameters which should be passed into general
configs like core-site.xml
"""
lst = []
for service, srv_confs in configs.items():
if XML_CONFS.get(service):
for param_name, param_value in srv_confs.items():
for cfg_list in XML_CONFS[service]:
names = [cfg['name'] for cfg in cfg_list]
if param_name in names and param_value is not None:
lst.append((param_name, param_value))
else:
LOG.warn("Plugin received wrong applicable target '%s' for "
"xml configs" % service)
return lst
def generate_setup_script(storage_paths, env_configs, append_oozie=False):
script_lines = ["#!/bin/bash -x"]
script_lines.append("echo -n > /tmp/hadoop-env.sh")
for line in env_configs:
if 'HADOOP' in line:
script_lines.append('echo "%s" >> /tmp/hadoop-env.sh' % line)
script_lines.append("cat /etc/hadoop/hadoop-env.sh >> /tmp/hadoop-env.sh")
script_lines.append("cp /tmp/hadoop-env.sh /etc/hadoop/hadoop-env.sh")
hadoop_log = storage_paths[0] + "/log/hadoop/\$USER/"
script_lines.append('sed -i "s,export HADOOP_LOG_DIR=.*,'
'export HADOOP_LOG_DIR=%s," /etc/hadoop/hadoop-env.sh'
% hadoop_log)
hadoop_log = storage_paths[0] + "/log/hadoop/hdfs"
script_lines.append('sed -i "s,export HADOOP_SECURE_DN_LOG_DIR=.*,'
'export HADOOP_SECURE_DN_LOG_DIR=%s," '
'/etc/hadoop/hadoop-env.sh' % hadoop_log)
if append_oozie:
o_h.append_oozie_setup(script_lines, env_configs)
for path in storage_paths:
script_lines.append("chown -R hadoop:hadoop %s" % path)
script_lines.append("chmod -R 755 %s" % path)
return "\n".join(script_lines)
def extract_name_values(configs):
return dict((cfg['name'], cfg['value']) for cfg in configs)
def extract_hadoop_path(lst, hadoop_dir):
if lst:
return ",".join([p + hadoop_dir for p in lst])
def _set_config(cfg, gen_cfg, name=None):
if name in gen_cfg:
cfg.update(gen_cfg[name]['conf'])
if name is None:
for name in gen_cfg:
cfg.update(gen_cfg[name]['conf'])
return cfg
def _get_general_cluster_config_value(cluster, option):
conf = cluster.cluster_configs
if 'general' in conf and option.name in conf['general']:
return conf['general'][option.name]
return option.default_value
def is_mysql_enable(cluster):
return _get_general_cluster_config_value(cluster, ENABLE_MYSQL)
def is_data_locality_enabled(cluster):
if not CONF.enable_data_locality:
return False
return _get_general_cluster_config_value(cluster, ENABLE_DATA_LOCALITY)
def get_decommissioning_timeout(cluster):
return _get_general_cluster_config_value(cluster, DECOMISSIONING_TIMEOUT)
def get_port_from_config(service, name, cluster=None):
address = get_config_value(service, name, cluster)
return utils.get_port_from_address(address)

45
sahara/plugins/vanilla/v1_2_1/mysql_helper.py

@ -0,0 +1,45 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def get_hive_mysql_configs(metastore_host, passwd):
return {
'javax.jdo.option.ConnectionURL': 'jdbc:mysql://%s/metastore' %
metastore_host,
'javax.jdo.option.ConnectionDriverName': 'com.mysql.jdbc.Driver',
'javax.jdo.option.ConnectionUserName': 'hive',
'javax.jdo.option.ConnectionPassword': passwd,
'datanucleus.autoCreateSchema': 'false',
'datanucleus.fixedDatastore': 'true',
'hive.metastore.uris': 'thrift://%s:9083' % metastore_host,
}
def get_oozie_mysql_configs():
return {
'oozie.service.JPAService.jdbc.driver':
'com.mysql.jdbc.Driver',
'oozie.service.JPAService.jdbc.url':
'jdbc:mysql://localhost:3306/oozie',
'oozie.service.JPAService.jdbc.username': 'oozie',
'oozie.service.JPAService.jdbc.password': 'oozie'
}
def get_required_mysql_configs(hive_hostname, passwd_mysql):
configs = get_oozie_mysql_configs()
if hive_hostname:
configs.update(get_hive_mysql_configs(hive_hostname, passwd_mysql))
return configs

62
sahara/plugins/vanilla/v1_2_1/oozie_helper.py

@ -0,0 +1,62 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sahara.utils import xmlutils as x
OOZIE_DEFAULT = x.load_hadoop_xml_defaults(
'plugins/vanilla/v1_2_1/resources/oozie-default.xml')
OOZIE_CORE_DEFAULT = [
{
'name': 'hadoop.proxyuser.hadoop.hosts',
'value': "localhost"
},
{
'name': 'hadoop.proxyuser.hadoop.groups',
'value': 'hadoop'
}]
OOZIE_HEAPSIZE_DEFAULT = "CATALINA_OPTS -Xmx1024m"
def get_oozie_required_xml_configs():
"""Following configs differ from default configs in oozie-default.xml."""
return {
'oozie.service.ActionService.executor.ext.classes':
'org.apache.oozie.action.email.EmailActionExecutor,'
'org.apache.oozie.action.hadoop.HiveActionExecutor,'
'org.apache.oozie.action.hadoop.ShellActionExecutor,'
'org.apache.oozie.action.hadoop.SqoopActionExecutor,'
'org.apache.oozie.action.hadoop.DistcpActionExecutor',
'oozie.service.SchemaService.wf.ext.schemas':
'shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,'
'email-action-0.1.xsd,hive-action-0.2.xsd,hive-action-0.3.xsd,'
'hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd,'
'sqoop-action-0.3.xsd,sqoop-action-0.4.xsd,ssh-action-0.1.xsd,'
'ssh-action-0.2.xsd,distcp-action-0.1.xsd,distcp-action-0.2.xsd,'
'oozie-sla-0.1.xsd,oozie-sla-0.2.xsd',
'oozie.service.JPAService.create.db.schema': 'false',
}
def append_oozie_setup(setup_script, env_configs):
for line in env_configs:
if 'CATALINA_OPT' in line:
setup_script.append('sed -i "s,%s,%s," '
'/opt/oozie/conf/oozie-env.sh'
% (OOZIE_HEAPSIZE_DEFAULT, line))

26
sahara/plugins/vanilla/v1_2_1/resources/README.rst

@ -0,0 +1,26 @@
Apache Hadoop Configurations for Savanna
========================================
This directory contains default XML configuration files:
* core-default.xml,
* hdfs-default.xml,
* mapred-default.xml,
* oozie-default.xml,
* hive-default.xml
These files are applied for Savanna's plugin of Apache Hadoop version 1.2.1,
Oozie 4.0.0, Hive version 0.11.0.
Files were taken from here:
https://github.com/apache/hadoop-common/blob/release-1.2.1/src/hdfs/hdfs-default.xml
https://github.com/apache/hadoop-common/blob/release-1.2.1/src/mapred/mapred-default.xml
https://github.com/apache/hadoop-common/blob/release-1.2.1/src/core/core-default.xml
https://github.com/apache/oozie/blob/release-4.0.0/core/src/main/resources/oozie-default.xml
https://github.com/apache/hive/blob/release-0.11.0/conf/hive-default.xml.template
XML configs are used to expose default Hadoop configurations to the users through
the Savanna's REST API. It allows users to override some config values which will
be pushed to the provisioned VMs running Hadoop services as part of appropriate
xml config.

632
sahara/plugins/vanilla/v1_2_1/resources/core-default.xml

@ -0,0 +1,632 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Do not modify this file directly. Instead, copy entries that you -->
<!-- wish to modify from this file into core-site.xml and change them -->
<!-- there. If core-site.xml does not already exist, create it. -->
<configuration>
<!--- global properties -->
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/hadoop-${user.name}</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>hadoop.native.lib</name>
<value>true</value>
<description>Should native hadoop libraries, if present, be used.</description>
</property>
<property>
<name>hadoop.http.filter.initializers</name>
<value></value>
<description>A comma separated list of class names. Each class in the list
must extend org.apache.hadoop.http.FilterInitializer. The corresponding
Filter will be initialized. Then, the Filter will be applied to all user
facing jsp and servlet web pages. The ordering of the list defines the
ordering of the filters.</description>
</property>
<property>
<name>hadoop.security.group.mapping</name>
<value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
<description>Class for user to group mapping (get groups for a given user)
</description>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>false</value>
<description>Is service-level authorization enabled?</description>
</property>
<property>
<name>hadoop.security.instrumentation.requires.admin</name>
<value>false</value>
<description>
Indicates if administrator ACLs are required to access
instrumentation servlets (JMX, METRICS, CONF, STACKS).
</description>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
<description>Possible values are simple (no authentication), and kerberos
</description>
</property>
<property>
<name>hadoop.security.token.service.use_ip</name>
<value>true</value>
<description>Controls whether tokens always use IP addresses. DNS changes
will not be detected if this option is enabled. Existing client connections
that break will always reconnect to the IP of the original host. New clients
will connect to the host's new IP but fail to locate a token. Disabling
this option will allow existing and new clients to detect an IP change and
continue to locate the new host's token.
</description>
</property>
<property>
<name>hadoop.security.use-weak-http-crypto</name>
<value>false</value>
<description>If enabled, use KSSL to authenticate HTTP connections to the
NameNode. Due to a bug in JDK6, using KSSL requires one to configure
Kerberos tickets to use encryption types that are known to be
cryptographically weak. If disabled, SPNEGO will be used for HTTP
authentication, which supports stronger encryption types.
</description>
</property>
<!--
<property>
<name>hadoop.security.service.user.name.key</name>
<value></value>
<description>Name of the kerberos principal of the user that owns
a given service daemon
</description>
</property>
-->
<!--- logging properties -->
<property>
<name>hadoop.logfile.size</name>
<value>10000000</value>
<description>The max size of each log file</description>
</property>
<property>
<name>hadoop.logfile.count</name>
<value>10</value>
<description>The max number of log files</description>
</property>
<!-- i/o properties -->
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
<description>The size of buffer for use in sequence files.
The size of this buffer should probably be a multiple of hardware
page size (4096 on Intel x86), and it determines how much data is
buffered during read and write operations.</description>
</property>
<property>
<name>io.bytes.per.checksum</name>
<value>512</value>
<description>The number of bytes per checksum. Must not be larger than
io.file.buffer.size.</description>
</property>
<property>
<name>io.skip.checksum.errors</name>
<value>false</value>
<description>If true, when a checksum error is encountered while
reading a sequence file, entries are skipped, instead of throwing an
exception.</description>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec</value>
<description>A list of the compression codec classes that can be used
for compression/decompression.</description>
</property>
<property>
<name>io.serializations</name>
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
<description>A list of serialization classes that can be used for
obtaining serializers and deserializers.</description>
</property>
<!-- file system properties -->
<property>
<name>fs.default.name</name>
<value>file:///</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
<property>
<name>fs.trash.interval</name>
<value>0</value>
<description>Number of minutes between trash checkpoints.
If zero, the trash feature is disabled.
</description>
</property>
<property>
<name>fs.file.impl</name>
<value>org.apache.hadoop.fs.LocalFileSystem</value>
<description>The FileSystem for file: uris.</description>
</property>
<property>
<name>fs.hdfs.impl</name>
<value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
<description>The FileSystem for hdfs: uris.</description>
</property>
<property>
<name>fs.s3.impl</name>
<value>org.apache.hadoop.fs.s3.S3FileSystem</value>
<description>The FileSystem for s3: uris.</description>
</property>
<property>
<name>fs.s3n.impl</name>
<value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
<description>The FileSystem for s3n: (Native S3) uris.</description>
</property>
<property>
<name>fs.kfs.impl</name>
<value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
<description>The FileSystem for kfs: uris.</description>
</property>
<property>
<name>fs.hftp.impl</name>
<value>org.apache.hadoop.hdfs.HftpFileSystem</value>
</property>
<property>
<name>fs.hsftp.impl</name>
<value>org.apache.hadoop.hdfs.HsftpFileSystem</value>
</property>
<property>
<name>fs.webhdfs.impl</name>
<value>org.apache.hadoop.hdfs.web.WebHdfsFileSystem</value>
</property>
<property>
<name>fs.ftp.impl</name>
<value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
<description>The FileSystem for ftp: uris.</description>
</property>
<property>
<name>fs.ramfs.impl</name>
<value>org.apache.hadoop.fs.InMemoryFileSystem</value>
<description>The FileSystem for ramfs: uris.</description>
</property>
<property>
<name>fs.har.impl</name>
<value>org.apache.hadoop.fs.HarFileSystem</value>
<description>The filesystem for Hadoop archives. </description>
</property>
<property>
<name>fs.har.impl.disable.cache</name>
<value>true</value>
<description>Don't cache 'har' filesystem instances.</description>
</property>
<property>
<name>fs.checkpoint.dir</name>
<value>${hadoop.tmp.dir}/dfs/namesecondary</value>
<description>Determines where on the local filesystem the DFS secondary
name node should store the temporary images to merge.
If this is a comma-delimited list of directories then the image is
replicated in all of the directories for redundancy.
</description>
</property>
<property>
<name>fs.checkpoint.edits.dir</name>
<value>${fs.checkpoint.dir}</value>
<description>Determines where on the local filesystem the DFS secondary
name node should store the temporary edits to merge.
If this is a comma-delimited list of directoires then teh edits is
replicated in all of the directoires for redundancy.
Default value is same as fs.checkpoint.dir
</description>
</property>
<property>
<name>fs.checkpoint.period</name>
<value>3600</value>
<description>The number of seconds between two periodic checkpoints.
</description>
</property>
<property>
<name>fs.checkpoint.size</name>
<value>67108864</value>
<description>The size of the current edit log (in bytes) that triggers
a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
</description>
</property>
<property>
<name>fs.s3.block.size</name>
<value>67108864</value>
<description>Block size to use when writing files to S3.</description>
</property>
<property>
<name>fs.s3.buffer.dir</name>
<value>${hadoop.tmp.dir}/s3</value>
<description>Determines where on the local filesystem the S3 filesystem
should store files before sending them to S3
(or after retrieving them from S3).
</description>
</property>
<property>
<name>fs.s3.maxRetries</name>
<value>4</value>
<description>The maximum number of retries for reading or writing files to S3,
before we signal failure to the application.
</description>
</property>
<property>
<name>fs.s3.sleepTimeSeconds</name>
<value>10</value>
<description>The number of seconds to sleep between each S3 retry.
</description>
</property>
<property>
<name>local.cache.size</name>
<value>10737418240</value>
<description>The limit on the size of cache you want to keep, set by default
to 10GB. This will act as a soft limit on the cache directory for out of band data.
</description>
</property>
<property>
<name>io.seqfile.compress.blocksize</name>
<value>1000000</value>
<description>The minimum block size for compression in block compressed
SequenceFiles.
</description>
</property>
<property>
<name>io.seqfile.lazydecompress</name>
<value>true</value>
<description>Should values of block-compressed SequenceFiles be decompressed
only when necessary.
</description>
</property>
<property>
<name>io.seqfile.sorter.recordlimit</name>
<value>1000000</value>
<description>The limit on number of records to be kept in memory in a spill
in SequenceFiles.Sorter
</description>
</property>
<property>
<name>io.mapfile.bloom.size</name>
<value>1048576</value>
<description>The size of BloomFilter-s used in BloomMapFile. Each time this many
keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
Larger values minimize the number of filters, which slightly increases the performance,
but may waste too much space if the total number of keys is usually much smaller
than this number.
</description>
</property>
<property>
<name>io.mapfile.bloom.error.rate</name>
<value>0.005</value>
<description>The rate of false positives in BloomFilter-s used in BloomMapFile.
As this value decreases, the size of BloomFilter-s increases exponentially. This
value is the probability of encountering false positives (default is 0.5%).
</description>
</property>
<property>
<name>hadoop.util.hash.type</name>
<value>murmur</value>
<description>The default implementation of Hash. Currently this can take one of the
two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
</description>
</property>
<!-- ipc properties -->
<property>
<name>ipc.client.idlethreshold</name>
<value>4000</value>
<description>Defines the threshold number of connections after which
connections will be inspected for idleness.
</description>
</property>
<property>
<name>ipc.client.kill.max</name>
<value>10</value>
<description>Defines the maximum number of clients to disconnect in one go.
</description>
</property>
<property>
<name>ipc.client.connection.maxidletime</name>
<value>10000</value>
<description>The maximum time in msec after which a client will bring down the
connection to the server.
</description>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>10</value>
<description>Indicates the number of retries a client will make to establish
a server connection.
</description>
</property>
<property>
<name>ipc.server.listen.queue.size</name>
<value>128</value>
<description>Indicates the length of the listen queue for servers accepting
client connections.
</description>
</property>
<property>
<name>ipc.server.tcpnodelay</name>
<value>false</value>
<description>Turn on/off Nagle's algorithm for the TCP socket connection on
the server. Setting to true disables the algorithm and may decrease latency
with a cost of more/smaller packets.
</description>
</property>
<property>
<name>ipc.client.tcpnodelay</name>
<value>false</value>
<description>Turn on/off Nagle's algorithm for the TCP socket connection on
the client. Setting to true disables the algorithm and may decrease latency
with a cost of more/smaller packets.
</description>
</property>
<!-- Web Interface Configuration -->
<property>
<name>webinterface.private.actions</name>
<value>false</value>
<description> If set to true, the web interfaces of JT and NN may contain
actions, such as kill job, delete file, etc., that should
not be exposed to public. Enable this option if the interfaces
are only reachable by those who have the right authorization.
</description>
</property>
<!-- Proxy Configuration -->
<property>
<name>hadoop.rpc.socket.factory.class.default</name>
<value>org.apache.hadoop.net.StandardSocketFactory</value>
<description> Default SocketFactory to use. This parameter is expected to be
formatted as "package.FactoryClassName".
</description>
</property>
<property>
<name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
<value></value>
<description> SocketFactory to use to connect to a DFS. If null or empty, use
hadoop.rpc.socket.class.default. This socket factory is also used by
DFSClient to create sockets to DataNodes.
</description>
</property>
<property>
<name>hadoop.socks.server</name>
<value></value>
<description> Address (host:port) of the SOCKS server to be used by the
SocksSocketFactory.
</description>
</property>
<!-- Topology Configuration -->
<property>
<name>topology.node.switch.mapping.impl</name>
<value>org.apache.hadoop.net.ScriptBasedMapping</value>
<description> The default implementation of the DNSToSwitchMapping. It
invokes a script specified in topology.script.file.name to resolve
node names. If the value for topology.script.file.name is not set, the
default value of DEFAULT_RACK is returned for all node names.
</description>
</property>
<property>
<name>net.topology.impl</name>
<value>org.apache.hadoop.net.NetworkTopology</value>
<description> The default implementation of NetworkTopology which is classic three layer one.
</description>
</property>
<property>
<name>topology.script.file.name</name>
<value></value>
<description> The script name that should be invoked to resolve DNS names to
NetworkTopology names. Example: the script would take host.foo.bar as an
argument, and return /rack1 as the output.
</description>
</property>
<property>
<name>topology.script.number.args</name>
<value>100</value>
<description> The max number of args that the script configured with
topology.script.file.name should be run with. Each arg is an
IP address.
</description>
</property>
<property>
<name>hadoop.security.uid.cache.secs</name>
<value>14400</value>
<description> NativeIO maintains a cache from UID to UserName. This is
the timeout for an entry in that cache. </description>
</property>
<!-- HTTP web-consoles Authentication -->
<property>
<name>hadoop.http.authentication.type</name>
<value>simple</value>
<description>
Defines authentication used for Oozie HTTP endpoint.
Supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#
</description>
</property>
<property>
<name>hadoop.http.authentication.token.validity</name>
<value>36000</value>
<description>
Indicates how long (in seconds) an authentication token is valid before it has
to be renewed.
</description>
</property>
<property>
<name>hadoop.http.authentication.signature.secret.file</name>
<value>${user.home}/hadoop-http-auth-signature-secret</value>
<description>
The signature secret for signing the authentication tokens.
If not set a random secret is generated at startup time.
The same secret should be used for JT/NN/DN/TT configurations.
</description>
</property>
<property>
<name>hadoop.http.authentication.cookie.domain</name>
<value></value>
<description>
The domain to use for the HTTP cookie that stores the authentication token.
In order to authentiation to work correctly across all Hadoop nodes web-consoles
the domain must be correctly set.
IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings.
For this setting to work properly all nodes in the cluster must be configured
to generate URLs with hostname.domain names on it.
</description>
</property>
<property>
<name>hadoop.http.authentication.simple.anonymous.allowed</name>
<value>true</value>
<description>
Indicates if anonymous requests are allowed when using 'simple' authentication.
</description>
</property>
<property>
<name>hadoop.http.authentication.kerberos.principal</name>
<value>HTTP/localhost@LOCALHOST</value>
<description>
Indicates the Kerberos principal to be used for HTTP endpoint.
The principal MUST start with 'HTTP/' as per Kerberos HTTP SPNEGO specification.
</description>
</property>
<property>
<name>hadoop.http.authentication.kerberos.keytab</name>
<value>${user.home}/hadoop.keytab</value>
<description>
Location of the keytab file with the credentials for the principal.
Referring to the same keytab file Oozie uses for its Kerberos credentials for Hadoop.
</description>
</property>
<property>
<name>hadoop.relaxed.worker.version.check</name>
<value>false</value>
<description>
By default datanodes refuse to connect to namenodes if their build
revision (svn revision) do not match, and tasktrackers refuse to
connect to jobtrackers if their build version (version, revision,
user, and source checksum) do not match. This option changes the
behavior of hadoop workers to only check for a version match (eg
"1.0.2") but ignore the other build fields (revision, user, and
source checksum).
</description>
</property>
<property>
<name>hadoop.skip.worker.version.check</name>
<value>false</value>
<description>
By default datanodes refuse to connect to namenodes if their build
revision (svn revision) do not match, and tasktrackers refuse to
connect to jobtrackers if their build version (version, revision,
user, and source checksum) do not match. This option changes the
behavior of hadoop workers to skip doing a version check at all.
This option supersedes the 'hadoop.relaxed.worker.version.check'
option.
</description>
</property>
<property>
<name>hadoop.jetty.logs.serve.aliases</name>
<value>true</value>
<description>
Enable/Disable aliases serving from jetty
</description>
</property>
<property>
<name>ipc.client.fallback-to-simple-auth-allowed</name>
<value>false</value>
<description>
When a client is configured to attempt a secure connection, but attempts to
connect to an insecure server, that server may instruct the client to
switch to SASL SIMPLE (unsecure) authentication. This setting controls
whether or not the client will accept this instruction from the server.
When false (the default), the client will not allow the fallback to SIMPLE
authentication, and will abort the connection.
</description>
</property>
</configuration>

9
sahara/plugins/vanilla/v1_2_1/resources/create_hive_db.sql

@ -0,0 +1,9 @@
CREATE DATABASE metastore;
USE metastore;
SOURCE /opt/hive/scripts/metastore/upgrade/mysql/hive-schema-0.10.0.mysql.sql;
CREATE USER 'hive'@'localhost' IDENTIFIED BY 'pass';
REVOKE ALL PRIVILEGES, GRANT OPTION FROM 'hive'@'localhost';
GRANT ALL PRIVILEGES ON metastore.* TO 'hive'@'localhost' IDENTIFIED BY 'pass';
GRANT ALL PRIVILEGES ON metastore.* TO 'hive'@'%' IDENTIFIED BY 'pass';
FLUSH PRIVILEGES;
exit

4
sahara/plugins/vanilla/v1_2_1/resources/create_oozie_db.sql

@ -0,0 +1,4 @@
create database oozie;
grant all privileges on oozie.* to 'oozie'@'localhost' identified by 'oozie';
grant all privileges on oozie.* to 'oozie'@'%' identified by 'oozie';
exit

709
sahara/plugins/vanilla/v1_2_1/resources/hdfs-default.xml

@ -0,0 +1,709 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Do not modify this file directly. Instead, copy entries that you -->
<!-- wish to modify from this file into hdfs-site.xml and change them -->
<!-- there. If hdfs-site.xml does not already exist, create it. -->
<configuration>
<property>
<name>dfs.namenode.logging.level</name>
<value>info</value>
<description>The logging level for dfs namenode. Other values are "dir"(trac
e namespace mutations), "block"(trace block under/over replications and block
creations/deletions), or "all".</description>
</property>
<property>
<name>dfs.namenode.rpc-address</name>
<value></value>
<description>
RPC address that handles all clients requests. If empty then we'll get the
value from fs.default.name.
The value of this property will take the form of hdfs://nn-host1:rpc-port.
</description>
</property>
<property>
<name>dfs.secondary.http.address</name>
<value>0.0.0.0:50090</value>
<description>
The secondary namenode http server address and port.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:50010</value>
<description>
The datanode server address and port for data transfer.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:50075</value>
<description>
The datanode http server address and port.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:50020</value>
<description>
The datanode ipc server address and port.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.handler.count</name>
<value>3</value>
<description>The number of server threads for the datanode.</description>
</property>
<property>
<name>dfs.http.address</name>
<value>0.0.0.0:50070</value>
<description>
The address and the base port where the dfs namenode web ui will listen on.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.https.enable</name>
<value>false</value>
<description>Decide if HTTPS(SSL) is supported on HDFS
</description>
</property>
<property>
<name>dfs.https.need.client.auth</name>
<value>false</value>
<description>Whether SSL client certificate authentication is required
</description>
</property>
<property>
<name>dfs.https.server.keystore.resource</name>
<value>ssl-server.xml</value>
<description>Resource file from which ssl server keystore
information will be extracted
</description>
</property>
<property>
<name>dfs.https.client.keystore.resource</name>
<value>ssl-client.xml</value>
<description>Resource file from which ssl client keystore
information will be extracted
</description>
</property>
<property>
<name>dfs.datanode.https.address</name>
<value>0.0.0.0:50475</value>
</property>
<property>
<name>dfs.https.address</name>
<value>0.0.0.0:50470</value>
</property>
<property>
<name>dfs.datanode.dns.interface</name>
<value>default</value>
<description>The name of the Network Interface from which a data node should
report its IP address.
</description>
</property>
<property>
<name>dfs.datanode.dns.nameserver</name>
<value>default</value>
<description>The host name or IP address of the name server (DNS)
which a DataNode should use to determine the host name used by the
NameNode for communication and display purposes.
</description>
</property>
<property>
<name>dfs.replication.considerLoad</name>
<value>true</value>
<description>Decide if chooseTarget considers the target's load or not
</description>
</property>
<property>
<name>dfs.default.chunk.view.size</name>
<value>32768</value>
<description>The number of bytes to view for a file on the browser.
</description>
</property>
<property>
<name>dfs.datanode.du.reserved</name>
<value>0</value>
<description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
</description>
</property>
<property>
<name>dfs.name.dir</name>
<value>${hadoop.tmp.dir}/dfs/name</value>
<description>Determines where on the local filesystem the DFS name node
should store the name table(fsimage). If this is a comma-delimited list
of directories then the name table is replicated in all of the
directories, for redundancy. </description>
</property>
<property>
<name>dfs.name.edits.dir</name>
<value>${dfs.name.dir}</value>
<description>Determines where on the local filesystem the DFS name node
should store the transaction (edits) file. If this is a comma-delimited list
of directories then the transaction file is replicated in all of the
directories, for redundancy. Default value is same as dfs.name.dir
</description>
</property>
<property>
<name>dfs.namenode.edits.toleration.length</name>
<value>0</value>
<description>
The length in bytes that namenode is willing to tolerate when the edit log
is corrupted. The edit log toleration feature checks the entire edit log.
It computes read length (the length of valid data), corruption length and
padding length. In case that corruption length is non-zero, the corruption
will be tolerated only if the corruption length is less than or equal to
the toleration length.
For disabling edit log toleration feature, set this property to -1. When
the feature is disabled, the end of edit log will not be checked. In this
case, namenode will startup normally even if the end of edit log is
corrupted.
</description>
</property>
<property>
<name>dfs.web.ugi</name>
<value>webuser,webgroup</value>
<description>The user account used by the web interface.
Syntax: USERNAME,GROUP1,GROUP2, ...
</description>
</property>
<property>
<name>dfs.permissions</name>
<value>true</value>
<description>
If "true", enable permission checking in HDFS.
If "false", permission checking is turned off,
but all other behavior is unchanged.
Switching from one parameter value to the other does not change the mode,
owner or group of files or directories.
</description>
</property>
<property>
<name>dfs.permissions.supergroup</name>
<value>supergroup</value>
<description>The name of the group of super-users.</description>
</property>
<property>
<name>dfs.block.access.token.enable</name>
<value>false</value>
<description>
If "true", access tokens are used as capabilities for accessing datanodes.
If "false", no access tokens are checked on accessing datanodes.
</description>
</property>
<property>
<name>dfs.block.access.key.update.interval</name>
<value>600</value>
<description>
Interval in minutes at which namenode updates its access keys.
</description>
</property>
<property>
<name>dfs.block.access.token.lifetime</name>
<value>600</value>
<description>The lifetime of access tokens in minutes.</description>
</property>
<property>
<name>dfs.data.dir</name>
<value>${hadoop.tmp.dir}/dfs/data</value>
<description>Determines where on the local filesystem an DFS data node
should store its blocks. If this is a comma-delimited
list of directories, then data will be stored in all named
directories, typically on different devices.
Directories that do not exist are ignored.
</description>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>755</value>