Move the savanna subdir to sahara

Rename the subdirectory and replace all instances of "import savanna" with "import sahara" and all instances of "from savanna" with "from sahara". * Replaced mock patches like mock.patch('savanna... * Updated config generator script * Renamed entry points in setup.cfg * Hacking checks also fixed * Manual renaming in alembic scripts to force work migrations * Fix doc building * Renamed itests directories * Some changes in gitignore * Removed locale dir after rebase Co-Authored-By: Alexander Ignatov <aignatov@mirantis.com> Change-Id: Ia77252c24046c3e7283c0a7b96d11636020b949c Partially implements: blueprint savanna-renaming-service
2014-03-17 14:23:00 -04:00 · 2014-03-17 14:23:00 -04:00 · 8578f2f19a
parent 01be22a21e
commit 8578f2f19a
56 changed files with 15365 additions and 28 deletions
--- a/.gitignore
+++ b/.gitignore
@ -35,15 +35,18 @@ etc/local.cfg
 etc/savanna/*.conf
 etc/savanna/*.topology
 etc/savanna.conf
 etc/sahara/*.conf
 etc/sahara/*.topology
 etc/sahara.conf
 ChangeLog
-savanna/tests/integration/configs/itest.conf
+sahara/tests/integration/configs/itest.conf
 cscope.out
 tools/lintstack.head.py
 tools/pylint_exceptions
-savanna/tests/cover
+sahara/tests/cover
-savanna/tests/coverage.xml
+sahara/tests/coverage.xml
 cover
 htmlcov
-savanna/openstack/common/db/savanna.sqlite
+sahara/openstack/common/db/sahara.sqlite
 .testrepository
 AUTHORS
--- a/sahara/init.py
+++ b/sahara/init.py
--- a/sahara/plugins/init.py
+++ b/sahara/plugins/init.py
--- a/sahara/plugins/vanilla/init.py
+++ b/sahara/plugins/vanilla/init.py
--- a/sahara/plugins/vanilla/abstractversionhandler.py
+++ b/sahara/plugins/vanilla/abstractversionhandler.py
@ -0,0 +1,62 @@
 # Copyright (c) 2014 Mirantis, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import abc
 import six
@six.add_metaclass(abc.ABCMeta)
 class AbstractVersionHandler():
    @abc.abstractmethod
    def get_node_processes(self):
        return
    @abc.abstractmethod
    def get_plugin_configs(self):
        return
    @abc.abstractmethod
    def configure_cluster(self, cluster):
        return
    @abc.abstractmethod
    def start_cluster(self, cluster):
        return
    @abc.abstractmethod
    def validate(self, cluster):
        return
    @abc.abstractmethod
    def scale_cluster(self, cluster, instances):
        return
    @abc.abstractmethod
    def decommission_nodes(self, cluster, instances):
        return
    @abc.abstractmethod
    def validate_scaling(self, cluster, existing, additional):
        return
    @abc.abstractmethod
    def get_resource_manager_uri(self, cluster):
        return
    @abc.abstractmethod
    def get_oozie_server(self, cluster):
        return
--- a/sahara/plugins/vanilla/plugin.py
+++ b/sahara/plugins/vanilla/plugin.py
@ -0,0 +1,79 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara.plugins import provisioning as p
 from sahara.plugins.vanilla import versionfactory as vhf
 class VanillaProvider(p.ProvisioningPluginBase):
    def __init__(self):
        self.version_factory = vhf.VersionFactory.get_instance()
    def get_description(self):
        return (
            "This plugin provides an ability to launch vanilla Apache Hadoop "
            "1.2.1 cluster without any management consoles. Also it can "
            "deploy Oozie 4.0.0 and Hive 0.11.0")
    def _get_version_handler(self, hadoop_version):
        return self.version_factory.get_version_handler(hadoop_version)
    def get_resource_manager_uri(self, cluster):
        return self._get_version_handler(
            cluster.hadoop_version).get_resource_manager_uri(cluster)
    def get_hdfs_user(self):
        return 'hadoop'
    def get_node_processes(self, hadoop_version):
        return self._get_version_handler(hadoop_version).get_node_processes()
    def get_versions(self):
        return self.version_factory.get_versions()
    def get_title(self):
        return "Vanilla Apache Hadoop"
    def get_configs(self, hadoop_version):
        return self._get_version_handler(hadoop_version).get_plugin_configs()
    def configure_cluster(self, cluster):
        return self._get_version_handler(
            cluster.hadoop_version).configure_cluster(cluster)
    def start_cluster(self, cluster):
        return self._get_version_handler(
            cluster.hadoop_version).start_cluster(cluster)
    def validate(self, cluster):
        return self._get_version_handler(
            cluster.hadoop_version).validate(cluster)
    def scale_cluster(self, cluster, instances):
        return self._get_version_handler(
            cluster.hadoop_version).scale_cluster(cluster, instances)
    def decommission_nodes(self, cluster, instances):
        return self._get_version_handler(
            cluster.hadoop_version).decommission_nodes(cluster, instances)
    def validate_scaling(self, cluster, existing, additional):
        return self._get_version_handler(
            cluster.hadoop_version).validate_scaling(cluster, existing,
                                                     additional)
    def get_oozie_server(self, cluster):
        return self._get_version_handler(
            cluster.hadoop_version).get_oozie_server(cluster)
--- a/sahara/plugins/vanilla/v1_2_1/init.py
+++ b/sahara/plugins/vanilla/v1_2_1/init.py
--- a/sahara/plugins/vanilla/v1_2_1/config_helper.py
+++ b/sahara/plugins/vanilla/v1_2_1/config_helper.py
@ -0,0 +1,451 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from oslo.config import cfg
 from sahara import conductor as c
 from sahara import context
 from sahara.openstack.common import log as logging
 from sahara.plugins.general import utils
 from sahara.plugins import provisioning as p
 from sahara.plugins.vanilla.v1_2_1 import mysql_helper as m_h
 from sahara.plugins.vanilla.v1_2_1 import oozie_helper as o_h
 from sahara.swift import swift_helper as swift
 from sahara.topology import topology_helper as topology
 from sahara.utils import crypto
 from sahara.utils import types as types
 from sahara.utils import xmlutils as x
 conductor = c.API
 LOG = logging.getLogger(__name__)
 CONF = cfg.CONF
 CORE_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v1_2_1/resources/core-default.xml')
 HDFS_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v1_2_1/resources/hdfs-default.xml')
 MAPRED_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v1_2_1/resources/mapred-default.xml')
 HIVE_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v1_2_1/resources/hive-default.xml')
 ## Append Oozie configs fore core-site.xml
 CORE_DEFAULT += o_h.OOZIE_CORE_DEFAULT
 XML_CONFS = {
    "HDFS": [CORE_DEFAULT, HDFS_DEFAULT],
    "MapReduce": [MAPRED_DEFAULT],
    "JobFlow": [o_h.OOZIE_DEFAULT],
    "Hive": [HIVE_DEFAULT]
 }
 # TODO(aignatov): Environmental configs could be more complex
 ENV_CONFS = {
    "MapReduce": {
        'Job Tracker Heap Size': 'HADOOP_JOBTRACKER_OPTS=\\"-Xmx%sm\\"',
        'Task Tracker Heap Size': 'HADOOP_TASKTRACKER_OPTS=\\"-Xmx%sm\\"'
    },
    "HDFS": {
        'Name Node Heap Size': 'HADOOP_NAMENODE_OPTS=\\"-Xmx%sm\\"',
        'Data Node Heap Size': 'HADOOP_DATANODE_OPTS=\\"-Xmx%sm\\"'
    },
    "JobFlow": {
        'Oozie Heap Size': 'CATALINA_OPTS -Xmx%sm'
    }
 }
 ENABLE_SWIFT = p.Config('Enable Swift', 'general', 'cluster',
                        config_type="bool", priority=1,
                        default_value=True, is_optional=True)
 ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster',
                                config_type="bool", priority=1,
                                default_value=True, is_optional=True)
 ENABLE_MYSQL = p.Config('Enable MySQL', 'general', 'cluster',
                        config_type="bool", priority=1,
                        default_value=True, is_optional=True)
 # Default set to 1 day, which is the default Keystone token
 # expiration time. After the token is expired we can't continue
 # scaling anyway.
 DECOMISSIONING_TIMEOUT = p.Config('Decomissioning Timeout', 'general',
                                  'cluster', config_type='int', priority=1,
                                  default_value=86400, is_optional=True,
                                  description='Timeout for datanode'
                                              ' decomissioning operation'
                                              ' during scaling, in seconds')
 HIDDEN_CONFS = ['fs.default.name', 'dfs.name.dir', 'dfs.data.dir',
                'mapred.job.tracker', 'mapred.system.dir', 'mapred.local.dir',
                'hadoop.proxyuser.hadoop.hosts',
                'hadoop.proxyuser.hadoop.groups']
 CLUSTER_WIDE_CONFS = ['dfs.block.size', 'dfs.permissions', 'dfs.replication',
                      'dfs.replication.min', 'dfs.replication.max',
                      'io.file.buffer.size', 'mapreduce.job.counters.max',
                      'mapred.output.compress', 'io.compression.codecs',
                      'mapred.output.compression.codec',
                      'mapred.output.compression.type',
                      'mapred.compress.map.output',
                      'mapred.map.output.compression.codec']
 PRIORITY_1_CONFS = ['dfs.datanode.du.reserved',
                    'dfs.datanode.failed.volumes.tolerated',
                    'dfs.datanode.max.xcievers', 'dfs.datanode.handler.count',
                    'dfs.namenode.handler.count', 'mapred.child.java.opts',
                    'mapred.jobtracker.maxtasks.per.job',
                    'mapred.job.tracker.handler.count',
                    'mapred.map.child.java.opts',
                    'mapred.reduce.child.java.opts',
                    'io.sort.mb', 'mapred.tasktracker.map.tasks.maximum',
                    'mapred.tasktracker.reduce.tasks.maximum']
 # for now we have not so many cluster-wide configs
 # lets consider all of them having high priority
 PRIORITY_1_CONFS += CLUSTER_WIDE_CONFS
 def _initialise_configs():
    configs = []
    for service, config_lists in XML_CONFS.iteritems():
        for config_list in config_lists:
            for config in config_list:
                if config['name'] not in HIDDEN_CONFS:
                    cfg = p.Config(config['name'], service, "node",
                                   is_optional=True, config_type="string",
                                   default_value=str(config['value']),
                                   description=config['description'])
                    if cfg.default_value in ["true", "false"]:
                        cfg.config_type = "bool"
                        cfg.default_value = (cfg.default_value == 'true')
                    elif types.is_int(cfg.default_value):
                        cfg.config_type = "int"
                        cfg.default_value = int(cfg.default_value)
                    if config['name'] in CLUSTER_WIDE_CONFS:
                        cfg.scope = 'cluster'
                    if config['name'] in PRIORITY_1_CONFS:
                        cfg.priority = 1
                    configs.append(cfg)
    for service, config_items in ENV_CONFS.iteritems():
        for name, param_format_str in config_items.iteritems():
            configs.append(p.Config(name, service, "node",
                                    default_value=1024, priority=1,
                                    config_type="int"))
    configs.append(ENABLE_SWIFT)
    configs.append(ENABLE_MYSQL)
    configs.append(DECOMISSIONING_TIMEOUT)
    if CONF.enable_data_locality:
        configs.append(ENABLE_DATA_LOCALITY)
    return configs
 # Initialise plugin Hadoop configurations
 PLUGIN_CONFIGS = _initialise_configs()
 def get_plugin_configs():
    return PLUGIN_CONFIGS
 def get_general_configs(hive_hostname, passwd_hive_mysql):
    config = {
        ENABLE_SWIFT.name: {
            'default_value': ENABLE_SWIFT.default_value,
            'conf': extract_name_values(swift.get_swift_configs())
        },
        ENABLE_MYSQL.name: {
            'default_value': ENABLE_MYSQL.default_value,
            'conf': m_h.get_required_mysql_configs(
                hive_hostname, passwd_hive_mysql)
        }
    }
    if CONF.enable_data_locality:
        config.update({
            ENABLE_DATA_LOCALITY.name: {
                'default_value': ENABLE_DATA_LOCALITY.default_value,
                'conf': extract_name_values(topology.vm_awareness_all_config())
            }
        })
    return config
 def get_config_value(service, name, cluster=None):
    if cluster:
        savanna_configs = generate_savanna_configs(cluster)
        if savanna_configs.get(name):
            return savanna_configs[name]
        for ng in cluster.node_groups:
            if (ng.configuration().get(service) and
                    ng.configuration()[service].get(name)):
                return ng.configuration()[service][name]
    for c in PLUGIN_CONFIGS:
        if c.applicable_target == service and c.name == name:
            return c.default_value
    raise RuntimeError("Unable get parameter '%s' from service %s",
                       name, service)
 def generate_cfg_from_general(cfg, configs, general_config,
                              rest_excluded=False):
    if 'general' in configs:
        for nm in general_config:
            if nm not in configs['general'] and not rest_excluded:
                configs['general'][nm] = general_config[nm]['default_value']
        for name, value in configs['general'].items():
            if value:
                cfg = _set_config(cfg, general_config, name)
                LOG.info("Applying config: %s" % name)
    else:
        cfg = _set_config(cfg, general_config)
    return cfg
 def _get_hostname(service):
    return service.hostname() if service else None
 def get_hadoop_ssh_keys(cluster):
    extra = cluster.extra or {}
    private_key = extra.get('hadoop_private_ssh_key')
    public_key = extra.get('hadoop_public_ssh_key')
    if not private_key or not public_key:
        private_key, public_key = crypto.generate_key_pair()
        extra['hadoop_private_ssh_key'] = private_key
        extra['hadoop_public_ssh_key'] = public_key
        conductor.cluster_update(context.ctx(), cluster, {'extra': extra})
    return private_key, public_key
 def generate_savanna_configs(cluster, node_group=None):
    nn_hostname = _get_hostname(utils.get_namenode(cluster))
    jt_hostname = _get_hostname(utils.get_jobtracker(cluster))
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))
    storage_path = node_group.storage_paths() if node_group else None
    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context
    cfg = {
        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts': '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
    }
    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker': '%s:8021' % jt_hostname,
            'mapred.system.dir': extract_hadoop_path(storage_path,
                                                     '/mapred/mapredsystem'),
            'mapred.local.dir': extract_hadoop_path(storage_path,
                                                    '/lib/hadoop/mapred'),
            'mapred.hosts': '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)
    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')
    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')
    return cfg
 def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))
    ng_configs = node_group.configuration()
    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)
    all_cfg = generate_savanna_configs(cluster, node_group)
    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value
    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)
    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT
    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)
        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()
    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }
    if hive_hostname:
        xml_configs.update({'hive-site':
                            x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)
    if oozie_hostname:
        xml_configs.update({'oozie-site':
                            x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)
    return xml_configs
 def extract_environment_confs(configs):
    """Returns list of Hadoop parameters which should be passed via environment
    """
    lst = []
    for service, srv_confs in configs.items():
        if ENV_CONFS.get(service):
            for param_name, param_value in srv_confs.items():
                for cfg_name, cfg_format_str in ENV_CONFS[service].items():
                    if param_name == cfg_name and param_value is not None:
                        lst.append(cfg_format_str % param_value)
        else:
            LOG.warn("Plugin received wrong applicable target '%s' in "
                     "environmental configs" % service)
    return lst
 def extract_xml_confs(configs):
    """Returns list of Hadoop parameters which should be passed into general
    configs like core-site.xml
    """
    lst = []
    for service, srv_confs in configs.items():
        if XML_CONFS.get(service):
            for param_name, param_value in srv_confs.items():
                for cfg_list in XML_CONFS[service]:
                    names = [cfg['name'] for cfg in cfg_list]
                    if param_name in names and param_value is not None:
                        lst.append((param_name, param_value))
        else:
            LOG.warn("Plugin received wrong applicable target '%s' for "
                     "xml configs" % service)
    return lst
 def generate_setup_script(storage_paths, env_configs, append_oozie=False):
    script_lines = ["#!/bin/bash -x"]
    script_lines.append("echo -n > /tmp/hadoop-env.sh")
    for line in env_configs:
        if 'HADOOP' in line:
            script_lines.append('echo "%s" >> /tmp/hadoop-env.sh' % line)
    script_lines.append("cat /etc/hadoop/hadoop-env.sh >> /tmp/hadoop-env.sh")
    script_lines.append("cp /tmp/hadoop-env.sh /etc/hadoop/hadoop-env.sh")
    hadoop_log = storage_paths[0] + "/log/hadoop/\$USER/"
    script_lines.append('sed -i "s,export HADOOP_LOG_DIR=.*,'
                        'export HADOOP_LOG_DIR=%s," /etc/hadoop/hadoop-env.sh'
                        % hadoop_log)
    hadoop_log = storage_paths[0] + "/log/hadoop/hdfs"
    script_lines.append('sed -i "s,export HADOOP_SECURE_DN_LOG_DIR=.*,'
                        'export HADOOP_SECURE_DN_LOG_DIR=%s," '
                        '/etc/hadoop/hadoop-env.sh' % hadoop_log)
    if append_oozie:
        o_h.append_oozie_setup(script_lines, env_configs)
    for path in storage_paths:
        script_lines.append("chown -R hadoop:hadoop %s" % path)
        script_lines.append("chmod -R 755 %s" % path)
    return "\n".join(script_lines)
 def extract_name_values(configs):
    return dict((cfg['name'], cfg['value']) for cfg in configs)
 def extract_hadoop_path(lst, hadoop_dir):
    if lst:
        return ",".join([p + hadoop_dir for p in lst])
 def _set_config(cfg, gen_cfg, name=None):
    if name in gen_cfg:
        cfg.update(gen_cfg[name]['conf'])
    if name is None:
        for name in gen_cfg:
            cfg.update(gen_cfg[name]['conf'])
    return cfg
 def _get_general_cluster_config_value(cluster, option):
    conf = cluster.cluster_configs
    if 'general' in conf and option.name in conf['general']:
        return conf['general'][option.name]
    return option.default_value
 def is_mysql_enable(cluster):
    return _get_general_cluster_config_value(cluster, ENABLE_MYSQL)
 def is_data_locality_enabled(cluster):
    if not CONF.enable_data_locality:
        return False
    return _get_general_cluster_config_value(cluster, ENABLE_DATA_LOCALITY)
 def get_decommissioning_timeout(cluster):
    return _get_general_cluster_config_value(cluster, DECOMISSIONING_TIMEOUT)
 def get_port_from_config(service, name, cluster=None):
    address = get_config_value(service, name, cluster)
    return utils.get_port_from_address(address)
--- a/sahara/plugins/vanilla/v1_2_1/mysql_helper.py
+++ b/sahara/plugins/vanilla/v1_2_1/mysql_helper.py
@ -0,0 +1,45 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 def get_hive_mysql_configs(metastore_host, passwd):
    return {
        'javax.jdo.option.ConnectionURL': 'jdbc:mysql://%s/metastore' %
        metastore_host,
        'javax.jdo.option.ConnectionDriverName': 'com.mysql.jdbc.Driver',
        'javax.jdo.option.ConnectionUserName': 'hive',
        'javax.jdo.option.ConnectionPassword': passwd,
        'datanucleus.autoCreateSchema': 'false',
        'datanucleus.fixedDatastore': 'true',
        'hive.metastore.uris': 'thrift://%s:9083' % metastore_host,
    }
 def get_oozie_mysql_configs():
    return {
        'oozie.service.JPAService.jdbc.driver':
        'com.mysql.jdbc.Driver',
        'oozie.service.JPAService.jdbc.url':
        'jdbc:mysql://localhost:3306/oozie',
        'oozie.service.JPAService.jdbc.username': 'oozie',
        'oozie.service.JPAService.jdbc.password': 'oozie'
    }
 def get_required_mysql_configs(hive_hostname, passwd_mysql):
    configs = get_oozie_mysql_configs()
    if hive_hostname:
        configs.update(get_hive_mysql_configs(hive_hostname, passwd_mysql))
    return configs
--- a/sahara/plugins/vanilla/v1_2_1/oozie_helper.py
+++ b/sahara/plugins/vanilla/v1_2_1/oozie_helper.py
@ -0,0 +1,62 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara.utils import xmlutils as x
 OOZIE_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v1_2_1/resources/oozie-default.xml')
 OOZIE_CORE_DEFAULT = [
    {
        'name': 'hadoop.proxyuser.hadoop.hosts',
        'value': "localhost"
    },
    {
        'name': 'hadoop.proxyuser.hadoop.groups',
        'value': 'hadoop'
    }]
 OOZIE_HEAPSIZE_DEFAULT = "CATALINA_OPTS -Xmx1024m"
 def get_oozie_required_xml_configs():
    """Following configs differ from default configs in oozie-default.xml."""
    return {
        'oozie.service.ActionService.executor.ext.classes':
        'org.apache.oozie.action.email.EmailActionExecutor,'
        'org.apache.oozie.action.hadoop.HiveActionExecutor,'
        'org.apache.oozie.action.hadoop.ShellActionExecutor,'
        'org.apache.oozie.action.hadoop.SqoopActionExecutor,'
        'org.apache.oozie.action.hadoop.DistcpActionExecutor',
        'oozie.service.SchemaService.wf.ext.schemas':
        'shell-action-0.1.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,'
        'email-action-0.1.xsd,hive-action-0.2.xsd,hive-action-0.3.xsd,'
        'hive-action-0.4.xsd,hive-action-0.5.xsd,sqoop-action-0.2.xsd,'
        'sqoop-action-0.3.xsd,sqoop-action-0.4.xsd,ssh-action-0.1.xsd,'
        'ssh-action-0.2.xsd,distcp-action-0.1.xsd,distcp-action-0.2.xsd,'
        'oozie-sla-0.1.xsd,oozie-sla-0.2.xsd',
        'oozie.service.JPAService.create.db.schema': 'false',
    }
 def append_oozie_setup(setup_script, env_configs):
    for line in env_configs:
        if 'CATALINA_OPT' in line:
            setup_script.append('sed -i "s,%s,%s," '
                                '/opt/oozie/conf/oozie-env.sh'
                                % (OOZIE_HEAPSIZE_DEFAULT, line))
--- a/sahara/plugins/vanilla/v1_2_1/resources/README.rst
+++ b/sahara/plugins/vanilla/v1_2_1/resources/README.rst
@ -0,0 +1,26 @@
 Apache Hadoop Configurations for Savanna
 ========================================
 This directory contains default XML configuration files:
 * core-default.xml,
 * hdfs-default.xml,
 * mapred-default.xml,
 * oozie-default.xml,
 * hive-default.xml
 These files are applied for Savanna's plugin of Apache Hadoop version 1.2.1,
 Oozie 4.0.0, Hive version 0.11.0.
 Files were taken from here:
 https://github.com/apache/hadoop-common/blob/release-1.2.1/src/hdfs/hdfs-default.xml
 https://github.com/apache/hadoop-common/blob/release-1.2.1/src/mapred/mapred-default.xml
 https://github.com/apache/hadoop-common/blob/release-1.2.1/src/core/core-default.xml
 https://github.com/apache/oozie/blob/release-4.0.0/core/src/main/resources/oozie-default.xml
 https://github.com/apache/hive/blob/release-0.11.0/conf/hive-default.xml.template
 XML configs are used to expose default Hadoop configurations to the users through
 the Savanna's REST API. It allows users to override some config values which will
 be pushed to the provisioned VMs running Hadoop services as part of appropriate
 xml config.
--- a/sahara/plugins/vanilla/v1_2_1/resources/core-default.xml
+++ b/sahara/plugins/vanilla/v1_2_1/resources/core-default.xml
@ -0,0 +1,632 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <!-- Do not modify this file directly.  Instead, copy entries that you -->
 <!-- wish to modify from this file into core-site.xml and change them -->
 <!-- there.  If core-site.xml does not already exist, create it.      -->
 <configuration>
 <!--- global properties -->
 <property>
  <name>hadoop.tmp.dir</name>
  <value>/tmp/hadoop-${user.name}</value>
  <description>A base for other temporary directories.</description>
 </property>
 <property>
  <name>hadoop.native.lib</name>
  <value>true</value>
  <description>Should native hadoop libraries, if present, be used.</description>
 </property>
 <property>
  <name>hadoop.http.filter.initializers</name>
  <value></value>
  <description>A comma separated list of class names. Each class in the list 
  must extend org.apache.hadoop.http.FilterInitializer. The corresponding 
  Filter will be initialized. Then, the Filter will be applied to all user 
  facing jsp and servlet web pages.  The ordering of the list defines the 
  ordering of the filters.</description>
 </property>
 <property>
  <name>hadoop.security.group.mapping</name>
  <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
  <description>Class for user to group mapping (get groups for a given user)
  </description>
 </property>
 <property>
  <name>hadoop.security.authorization</name>
  <value>false</value>
  <description>Is service-level authorization enabled?</description>
 </property>
 <property>
  <name>hadoop.security.instrumentation.requires.admin</name>
  <value>false</value>
  <description>
    Indicates if administrator ACLs are required to access
    instrumentation servlets (JMX, METRICS, CONF, STACKS).
  </description>
 </property>
 <property>
  <name>hadoop.security.authentication</name>
  <value>simple</value>
  <description>Possible values are simple (no authentication), and kerberos
  </description>
 </property>
 <property>
  <name>hadoop.security.token.service.use_ip</name>
  <value>true</value>
  <description>Controls whether tokens always use IP addresses.  DNS changes
  will not be detected if this option is enabled.  Existing client connections
  that break will always reconnect to the IP of the original host.  New clients
  will connect to the host's new IP but fail to locate a token.  Disabling
  this option will allow existing and new clients to detect an IP change and
  continue to locate the new host's token.
  </description>
 </property>
 <property>
  <name>hadoop.security.use-weak-http-crypto</name>
  <value>false</value>
  <description>If enabled, use KSSL to authenticate HTTP connections to the
  NameNode. Due to a bug in JDK6, using KSSL requires one to configure
  Kerberos tickets to use encryption types that are known to be
  cryptographically weak. If disabled, SPNEGO will be used for HTTP
  authentication, which supports stronger encryption types.
  </description>
 </property>
 <!--
 <property>
  <name>hadoop.security.service.user.name.key</name>
  <value></value>
  <description>Name of the kerberos principal of the user that owns
  a given service daemon
  </description>
 </property>
 -->
 <!--- logging properties -->
 <property>
  <name>hadoop.logfile.size</name>
  <value>10000000</value>
  <description>The max size of each log file</description>
 </property>
 <property>
  <name>hadoop.logfile.count</name>
  <value>10</value>
  <description>The max number of log files</description>
 </property>
 <!-- i/o properties -->
 <property>
  <name>io.file.buffer.size</name>
  <value>4096</value>
  <description>The size of buffer for use in sequence files.
  The size of this buffer should probably be a multiple of hardware
  page size (4096 on Intel x86), and it determines how much data is
  buffered during read and write operations.</description>
 </property>
 <property>
  <name>io.bytes.per.checksum</name>
  <value>512</value>
  <description>The number of bytes per checksum.  Must not be larger than
  io.file.buffer.size.</description>
 </property>
 <property>
  <name>io.skip.checksum.errors</name>
  <value>false</value>
  <description>If true, when a checksum error is encountered while
  reading a sequence file, entries are skipped, instead of throwing an
  exception.</description>
 </property>
 <property>
  <name>io.compression.codecs</name>
  <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec</value>
  <description>A list of the compression codec classes that can be used 
               for compression/decompression.</description>
 </property>
 <property>
  <name>io.serializations</name>
  <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
  <description>A list of serialization classes that can be used for
  obtaining serializers and deserializers.</description>
 </property>
 <!-- file system properties -->
 <property>
  <name>fs.default.name</name>
  <value>file:///</value>
  <description>The name of the default file system.  A URI whose
  scheme and authority determine the FileSystem implementation.  The
  uri's scheme determines the config property (fs.SCHEME.impl) naming
  the FileSystem implementation class.  The uri's authority is used to
  determine the host, port, etc. for a filesystem.</description>
 </property>
 <property>
  <name>fs.trash.interval</name>
  <value>0</value>
  <description>Number of minutes between trash checkpoints.
  If zero, the trash feature is disabled.
  </description>
 </property>
 <property>
  <name>fs.file.impl</name>
  <value>org.apache.hadoop.fs.LocalFileSystem</value>
  <description>The FileSystem for file: uris.</description>
 </property>
 <property>
  <name>fs.hdfs.impl</name>
  <value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
  <description>The FileSystem for hdfs: uris.</description>
 </property>
 <property>
  <name>fs.s3.impl</name>
  <value>org.apache.hadoop.fs.s3.S3FileSystem</value>
  <description>The FileSystem for s3: uris.</description>
 </property>
 <property>
  <name>fs.s3n.impl</name>
  <value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
  <description>The FileSystem for s3n: (Native S3) uris.</description>
 </property>
 <property>
  <name>fs.kfs.impl</name>
  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
  <description>The FileSystem for kfs: uris.</description>
 </property>
 <property>
  <name>fs.hftp.impl</name>
  <value>org.apache.hadoop.hdfs.HftpFileSystem</value>
 </property>
 <property>
  <name>fs.hsftp.impl</name>
  <value>org.apache.hadoop.hdfs.HsftpFileSystem</value>
 </property>
 <property>
  <name>fs.webhdfs.impl</name>
  <value>org.apache.hadoop.hdfs.web.WebHdfsFileSystem</value>
 </property>
 <property>
  <name>fs.ftp.impl</name>
  <value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
  <description>The FileSystem for ftp: uris.</description>
 </property>
 <property>
  <name>fs.ramfs.impl</name>
  <value>org.apache.hadoop.fs.InMemoryFileSystem</value>
  <description>The FileSystem for ramfs: uris.</description>
 </property>
 <property>
  <name>fs.har.impl</name>
  <value>org.apache.hadoop.fs.HarFileSystem</value>
  <description>The filesystem for Hadoop archives. </description>
 </property>
 <property>
  <name>fs.har.impl.disable.cache</name>
  <value>true</value>
  <description>Don't cache 'har' filesystem instances.</description>
 </property>
 <property>
  <name>fs.checkpoint.dir</name>
  <value>${hadoop.tmp.dir}/dfs/namesecondary</value>
  <description>Determines where on the local filesystem the DFS secondary
      name node should store the temporary images to merge.
      If this is a comma-delimited list of directories then the image is
      replicated in all of the directories for redundancy.
  </description>
 </property>
 <property>
  <name>fs.checkpoint.edits.dir</name>
  <value>${fs.checkpoint.dir}</value>
  <description>Determines where on the local filesystem the DFS secondary
      name node should store the temporary edits to merge.
      If this is a comma-delimited list of directoires then teh edits is
      replicated in all of the directoires for redundancy.
      Default value is same as fs.checkpoint.dir
  </description>
 </property>
 <property>
  <name>fs.checkpoint.period</name>
  <value>3600</value>
  <description>The number of seconds between two periodic checkpoints.
  </description>
 </property>
 <property>
  <name>fs.checkpoint.size</name>
  <value>67108864</value>
  <description>The size of the current edit log (in bytes) that triggers
       a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
  </description>
 </property>
 <property>
  <name>fs.s3.block.size</name>
  <value>67108864</value>
  <description>Block size to use when writing files to S3.</description>
 </property>
 <property>
  <name>fs.s3.buffer.dir</name>
  <value>${hadoop.tmp.dir}/s3</value>
  <description>Determines where on the local filesystem the S3 filesystem
  should store files before sending them to S3
  (or after retrieving them from S3).
  </description>
 </property>
 <property>
  <name>fs.s3.maxRetries</name>
  <value>4</value>
  <description>The maximum number of retries for reading or writing files to S3, 
  before we signal failure to the application.
  </description>
 </property>
 <property>
  <name>fs.s3.sleepTimeSeconds</name>
  <value>10</value>
  <description>The number of seconds to sleep between each S3 retry.
  </description>
 </property>
 <property>
  <name>local.cache.size</name>
  <value>10737418240</value>
  <description>The limit on the size of cache you want to keep, set by default
  to 10GB. This will act as a soft limit on the cache directory for out of band data.
  </description>
 </property>
 <property>
  <name>io.seqfile.compress.blocksize</name>
  <value>1000000</value>
  <description>The minimum block size for compression in block compressed 
          SequenceFiles.
  </description>
 </property>
 <property>
  <name>io.seqfile.lazydecompress</name>
  <value>true</value>
  <description>Should values of block-compressed SequenceFiles be decompressed
          only when necessary.
  </description>
 </property>
 <property>
  <name>io.seqfile.sorter.recordlimit</name>
  <value>1000000</value>
  <description>The limit on number of records to be kept in memory in a spill 
          in SequenceFiles.Sorter
  </description>
 </property>
 <property>
  <name>io.mapfile.bloom.size</name>
  <value>1048576</value>
  <description>The size of BloomFilter-s used in BloomMapFile. Each time this many
  keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
  Larger values minimize the number of filters, which slightly increases the performance,
  but may waste too much space if the total number of keys is usually much smaller
  than this number.
  </description>
 </property>
 <property>
  <name>io.mapfile.bloom.error.rate</name>
  <value>0.005</value>
  <description>The rate of false positives in BloomFilter-s used in BloomMapFile.
  As this value decreases, the size of BloomFilter-s increases exponentially. This
  value is the probability of encountering false positives (default is 0.5%).
  </description>
 </property>
 <property>
  <name>hadoop.util.hash.type</name>
  <value>murmur</value>
  <description>The default implementation of Hash. Currently this can take one of the
  two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
  </description>
 </property>
 <!-- ipc properties -->
 <property>
  <name>ipc.client.idlethreshold</name>
  <value>4000</value>
  <description>Defines the threshold number of connections after which
               connections will be inspected for idleness.
  </description>
 </property>
 <property>
  <name>ipc.client.kill.max</name>
  <value>10</value>
  <description>Defines the maximum number of clients to disconnect in one go.
  </description>
 </property>
 <property>
  <name>ipc.client.connection.maxidletime</name>
  <value>10000</value>
  <description>The maximum time in msec after which a client will bring down the
               connection to the server.
  </description>
 </property>
 <property>
  <name>ipc.client.connect.max.retries</name>
  <value>10</value>
  <description>Indicates the number of retries a client will make to establish
               a server connection.
  </description>
 </property>
 <property>
  <name>ipc.server.listen.queue.size</name>
  <value>128</value>
  <description>Indicates the length of the listen queue for servers accepting
               client connections.
  </description>
 </property>
 <property>
  <name>ipc.server.tcpnodelay</name>
  <value>false</value>
  <description>Turn on/off Nagle's algorithm for the TCP socket connection on 
  the server. Setting to true disables the algorithm and may decrease latency
  with a cost of more/smaller packets. 
  </description>
 </property>
 <property>
  <name>ipc.client.tcpnodelay</name>
  <value>false</value>
  <description>Turn on/off Nagle's algorithm for the TCP socket connection on 
  the client. Setting to true disables the algorithm and may decrease latency
  with a cost of more/smaller packets. 
  </description>
 </property>
 <!-- Web Interface Configuration -->
 <property>
  <name>webinterface.private.actions</name>
  <value>false</value>
  <description> If set to true, the web interfaces of JT and NN may contain 
                actions, such as kill job, delete file, etc., that should 
                not be exposed to public. Enable this option if the interfaces 
                are only reachable by those who have the right authorization.
  </description>
 </property>
 <!-- Proxy Configuration -->
 <property>
  <name>hadoop.rpc.socket.factory.class.default</name>
  <value>org.apache.hadoop.net.StandardSocketFactory</value>
  <description> Default SocketFactory to use. This parameter is expected to be
    formatted as "package.FactoryClassName".
  </description>
 </property>
 <property>
  <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
  <value></value>
  <description> SocketFactory to use to connect to a DFS. If null or empty, use
    hadoop.rpc.socket.class.default. This socket factory is also used by
    DFSClient to create sockets to DataNodes.
  </description>
 </property>
 <property>
  <name>hadoop.socks.server</name>
  <value></value>
  <description> Address (host:port) of the SOCKS server to be used by the
    SocksSocketFactory.
  </description>
 </property>
 <!-- Topology Configuration -->
 <property>
  <name>topology.node.switch.mapping.impl</name>
  <value>org.apache.hadoop.net.ScriptBasedMapping</value>
  <description> The default implementation of the DNSToSwitchMapping. It
    invokes a script specified in topology.script.file.name to resolve
    node names. If the value for topology.script.file.name is not set, the
    default value of DEFAULT_RACK is returned for all node names.
  </description>
 </property>
 <property>
  <name>net.topology.impl</name>
  <value>org.apache.hadoop.net.NetworkTopology</value>
  <description> The default implementation of NetworkTopology which is classic three layer one.
  </description>
 </property>
 <property>
  <name>topology.script.file.name</name>
  <value></value>
  <description> The script name that should be invoked to resolve DNS names to
    NetworkTopology names. Example: the script would take host.foo.bar as an
    argument, and return /rack1 as the output.
  </description>
 </property>
 <property>
  <name>topology.script.number.args</name>
  <value>100</value>
  <description> The max number of args that the script configured with 
    topology.script.file.name should be run with. Each arg is an
    IP address.
  </description>
 </property>
 <property>
  <name>hadoop.security.uid.cache.secs</name>
  <value>14400</value>
  <description> NativeIO maintains a cache from UID to UserName. This is
  the timeout for an entry in that cache. </description>
 </property>
 <!-- HTTP web-consoles Authentication -->
 <property>
  <name>hadoop.http.authentication.type</name>
  <value>simple</value>
  <description>
    Defines authentication used for Oozie HTTP endpoint.
    Supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#
  </description>
 </property>
 <property>
  <name>hadoop.http.authentication.token.validity</name>
  <value>36000</value>
  <description>
    Indicates how long (in seconds) an authentication token is valid before it has
    to be renewed.
  </description>
 </property>
 <property>
  <name>hadoop.http.authentication.signature.secret.file</name>
  <value>${user.home}/hadoop-http-auth-signature-secret</value>
  <description>
    The signature secret for signing the authentication tokens.
    If not set a random secret is generated at startup time.
    The same secret should be used for JT/NN/DN/TT configurations.
  </description>
 </property>
 <property>
  <name>hadoop.http.authentication.cookie.domain</name>
  <value></value>
  <description>
    The domain to use for the HTTP cookie that stores the authentication token.
    In order to authentiation to work correctly across all Hadoop nodes web-consoles
    the domain must be correctly set.
    IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings.
    For this setting to work properly all nodes in the cluster must be configured
    to generate URLs with hostname.domain names on it.
  </description>
 </property>
 <property>
  <name>hadoop.http.authentication.simple.anonymous.allowed</name>
  <value>true</value>
  <description>
    Indicates if anonymous requests are allowed when using 'simple' authentication.
  </description>
 </property>
 <property>
  <name>hadoop.http.authentication.kerberos.principal</name>
  <value>HTTP/localhost@LOCALHOST</value>
  <description>
    Indicates the Kerberos principal to be used for HTTP endpoint.
    The principal MUST start with 'HTTP/' as per Kerberos HTTP SPNEGO specification.
  </description>
 </property>
 <property>
  <name>hadoop.http.authentication.kerberos.keytab</name>
  <value>${user.home}/hadoop.keytab</value>
  <description>
    Location of the keytab file with the credentials for the principal.
    Referring to the same keytab file Oozie uses for its Kerberos credentials for Hadoop.
  </description>
 </property>
 <property>
  <name>hadoop.relaxed.worker.version.check</name>
  <value>false</value>
  <description>
    By default datanodes refuse to connect to namenodes if their build
    revision (svn revision) do not match, and tasktrackers refuse to
    connect to jobtrackers if their build version (version, revision,
    user, and source checksum) do not match. This option changes the
    behavior of hadoop workers to only check for a version match (eg
    "1.0.2") but ignore the other build fields (revision, user, and
    source checksum).
  </description>
 </property>
 <property>
  <name>hadoop.skip.worker.version.check</name>
  <value>false</value>
  <description>
    By default datanodes refuse to connect to namenodes if their build
    revision (svn revision) do not match, and tasktrackers refuse to
    connect to jobtrackers if their build version (version, revision,
    user, and source checksum) do not match. This option changes the
    behavior of hadoop workers to skip doing a version check at all.
    This option supersedes the 'hadoop.relaxed.worker.version.check'
    option.
  </description>
 </property>
 <property>
  <name>hadoop.jetty.logs.serve.aliases</name>
  <value>true</value>
  <description>
    Enable/Disable aliases serving from jetty
  </description>
 </property>
 <property>
  <name>ipc.client.fallback-to-simple-auth-allowed</name>
  <value>false</value>
  <description>
    When a client is configured to attempt a secure connection, but attempts to
    connect to an insecure server, that server may instruct the client to
    switch to SASL SIMPLE (unsecure) authentication. This setting controls
    whether or not the client will accept this instruction from the server.
    When false (the default), the client will not allow the fallback to SIMPLE
    authentication, and will abort the connection.
  </description>
 </property>
 </configuration>
--- a/sahara/plugins/vanilla/v1_2_1/resources/create_hive_db.sql
+++ b/sahara/plugins/vanilla/v1_2_1/resources/create_hive_db.sql
@ -0,0 +1,9 @@
 CREATE DATABASE metastore;
 USE metastore;
 SOURCE /opt/hive/scripts/metastore/upgrade/mysql/hive-schema-0.10.0.mysql.sql;
 CREATE USER 'hive'@'localhost' IDENTIFIED BY 'pass';
 REVOKE ALL PRIVILEGES, GRANT OPTION FROM 'hive'@'localhost';
 GRANT ALL PRIVILEGES ON metastore.* TO 'hive'@'localhost' IDENTIFIED BY 'pass';
 GRANT ALL PRIVILEGES ON metastore.* TO 'hive'@'%' IDENTIFIED BY 'pass';
 FLUSH PRIVILEGES;
 exit
--- a/sahara/plugins/vanilla/v1_2_1/resources/create_oozie_db.sql
+++ b/sahara/plugins/vanilla/v1_2_1/resources/create_oozie_db.sql
@ -0,0 +1,4 @@
 create database oozie;
 grant all privileges on oozie.* to 'oozie'@'localhost' identified by 'oozie';
 grant all privileges on oozie.* to 'oozie'@'%' identified by 'oozie';
 exit
--- a/sahara/plugins/vanilla/v1_2_1/resources/hdfs-default.xml
+++ b/sahara/plugins/vanilla/v1_2_1/resources/hdfs-default.xml
@ -0,0 +1,709 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <!-- Do not modify this file directly.  Instead, copy entries that you -->
 <!-- wish to modify from this file into hdfs-site.xml and change them -->
 <!-- there.  If hdfs-site.xml does not already exist, create it.      -->
 <configuration>
 <property>
  <name>dfs.namenode.logging.level</name>
  <value>info</value>
  <description>The logging level for dfs namenode. Other values are "dir"(trac
 e namespace mutations), "block"(trace block under/over replications and block
 creations/deletions), or "all".</description>
 </property>
 <property>
  <name>dfs.namenode.rpc-address</name>
  <value></value>
  <description>
    RPC address that handles all clients requests. If empty then we'll get the
    value from fs.default.name.
    The value of this property will take the form of hdfs://nn-host1:rpc-port.
  </description>
 </property>
 <property>
  <name>dfs.secondary.http.address</name>
  <value>0.0.0.0:50090</value>
  <description>
    The secondary namenode http server address and port.
    If the port is 0 then the server will start on a free port.
  </description>
 </property>
 <property>
  <name>dfs.datanode.address</name>
  <value>0.0.0.0:50010</value>
  <description>
    The datanode server address and port for data transfer.
    If the port is 0 then the server will start on a free port.
  </description>
 </property>
 <property>
  <name>dfs.datanode.http.address</name>
  <value>0.0.0.0:50075</value>
  <description>
    The datanode http server address and port.
    If the port is 0 then the server will start on a free port.
  </description>
 </property>
 <property>
  <name>dfs.datanode.ipc.address</name>
  <value>0.0.0.0:50020</value>
  <description>
    The datanode ipc server address and port.
    If the port is 0 then the server will start on a free port.
  </description>
 </property>
 <property>
  <name>dfs.datanode.handler.count</name>
  <value>3</value>
  <description>The number of server threads for the datanode.</description>
 </property>
 <property>
  <name>dfs.http.address</name>
  <value>0.0.0.0:50070</value>
  <description>
    The address and the base port where the dfs namenode web ui will listen on.
    If the port is 0 then the server will start on a free port.
  </description>
 </property>
 <property>
  <name>dfs.https.enable</name>
  <value>false</value>
  <description>Decide if HTTPS(SSL) is supported on HDFS
  </description>
 </property>
 <property>
  <name>dfs.https.need.client.auth</name>
  <value>false</value>
  <description>Whether SSL client certificate authentication is required
  </description>
 </property>
 <property>
  <name>dfs.https.server.keystore.resource</name>
  <value>ssl-server.xml</value>
  <description>Resource file from which ssl server keystore
  information will be extracted
  </description>
 </property>
 <property>
  <name>dfs.https.client.keystore.resource</name>
  <value>ssl-client.xml</value>
  <description>Resource file from which ssl client keystore
  information will be extracted
  </description>
 </property>
 <property>
  <name>dfs.datanode.https.address</name>
  <value>0.0.0.0:50475</value>
 </property>
 <property>
  <name>dfs.https.address</name>
  <value>0.0.0.0:50470</value>
 </property>
 <property>
  <name>dfs.datanode.dns.interface</name>
  <value>default</value>
  <description>The name of the Network Interface from which a data node should 
  report its IP address.
  </description>
 </property>
 <property>
  <name>dfs.datanode.dns.nameserver</name>
  <value>default</value>
  <description>The host name or IP address of the name server (DNS)
  which a DataNode should use to determine the host name used by the
  NameNode for communication and display purposes.
  </description>
 </property>
 <property>
  <name>dfs.replication.considerLoad</name>
  <value>true</value>
  <description>Decide if chooseTarget considers the target's load or not
  </description>
 </property>
 <property>
  <name>dfs.default.chunk.view.size</name>
  <value>32768</value>
  <description>The number of bytes to view for a file on the browser.
  </description>
 </property>
 <property>
  <name>dfs.datanode.du.reserved</name>
  <value>0</value>
  <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
  </description>
 </property>
 <property>
  <name>dfs.name.dir</name>
  <value>${hadoop.tmp.dir}/dfs/name</value>
  <description>Determines where on the local filesystem the DFS name node
      should store the name table(fsimage).  If this is a comma-delimited list
      of directories then the name table is replicated in all of the
      directories, for redundancy. </description>
 </property>
 <property>
  <name>dfs.name.edits.dir</name>
  <value>${dfs.name.dir}</value>
  <description>Determines where on the local filesystem the DFS name node
      should store the transaction (edits) file. If this is a comma-delimited list
      of directories then the transaction file is replicated in all of the 
      directories, for redundancy. Default value is same as dfs.name.dir
  </description>
 </property>
 <property>
  <name>dfs.namenode.edits.toleration.length</name>
  <value>0</value>
  <description>
    The length in bytes that namenode is willing to tolerate when the edit log
    is corrupted.  The edit log toleration feature checks the entire edit log.
    It computes read length (the length of valid data), corruption length and
    padding length.  In case that corruption length is non-zero, the corruption
    will be tolerated only if the corruption length is less than or equal to
    the toleration length.
    For disabling edit log toleration feature, set this property to -1.  When
    the feature is disabled, the end of edit log will not be checked.  In this
    case, namenode will startup normally even if the end of edit log is
    corrupted.
  </description>
 </property>
 <property>
  <name>dfs.web.ugi</name>
  <value>webuser,webgroup</value>
  <description>The user account used by the web interface.
    Syntax: USERNAME,GROUP1,GROUP2, ...
  </description>
 </property>
 <property>
  <name>dfs.permissions</name>
  <value>true</value>
  <description>
    If "true", enable permission checking in HDFS.
    If "false", permission checking is turned off,
    but all other behavior is unchanged.
    Switching from one parameter value to the other does not change the mode,
    owner or group of files or directories.
  </description>
 </property>
 <property>
  <name>dfs.permissions.supergroup</name>
  <value>supergroup</value>
  <description>The name of the group of super-users.</description>
 </property>
 <property>
  <name>dfs.block.access.token.enable</name>
  <value>false</value>
  <description>
    If "true", access tokens are used as capabilities for accessing datanodes.
    If "false", no access tokens are checked on accessing datanodes.
  </description>
 </property>
 <property>
  <name>dfs.block.access.key.update.interval</name>
  <value>600</value>
  <description>
    Interval in minutes at which namenode updates its access keys.
  </description>
 </property>
 <property>
  <name>dfs.block.access.token.lifetime</name>
  <value>600</value>
  <description>The lifetime of access tokens in minutes.</description>
 </property>
 <property>
  <name>dfs.data.dir</name>
  <value>${hadoop.tmp.dir}/dfs/data</value>
  <description>Determines where on the local filesystem an DFS data node
  should store its blocks.  If this is a comma-delimited
  list of directories, then data will be stored in all named
  directories, typically on different devices.
  Directories that do not exist are ignored.
  </description>
 </property>
 <property>
  <name>dfs.datanode.data.dir.perm</name>
  <value>755</value>
  <description>Permissions for the directories on on the local filesystem where 
  the DFS data node store its blocks. The permissions can either be octal or 
  symbolic.</description>
 </property>
 <property>
  <name>dfs.replication</name>
  <value>3</value>
  <description>Default block replication. 
  The actual number of replications can be specified when the file is created.
  The default is used if replication is not specified in create time.
  </description>
 </property>
 <property>
  <name>dfs.replication.max</name>
  <value>512</value>
  <description>Maximal block replication. 
  </description>
 </property>
 <property>
  <name>dfs.replication.min</name>
  <value>1</value>
  <description>Minimal block replication. 
  </description>
 </property>
 <property>
  <name>dfs.block.size</name>
  <value>67108864</value>
  <description>The default block size for new files.</description>
 </property>
 <property>
  <name>dfs.df.interval</name>
  <value>60000</value>
  <description>Disk usage statistics refresh interval in msec.</description>
 </property>
 <property>
  <name>dfs.client.block.write.retries</name>
  <value>3</value>
  <description>The number of retries for writing blocks to the data nodes, 
  before we signal failure to the application.
  </description>
 </property>
 <property>
  <name>dfs.blockreport.intervalMsec</name>
  <value>3600000</value>
  <description>Determines block reporting interval in milliseconds.</description>
 </property>
 <property>
  <name>dfs.blockreport.initialDelay</name>  <value>0</value>
  <description>Delay for first block report in seconds.</description>
 </property>
 <property>
  <name>dfs.heartbeat.interval</name>
  <value>3</value>
  <description>Determines datanode heartbeat interval in seconds.</description>
 </property>
 <property>
  <name>dfs.namenode.handler.count</name>
  <value>10</value>
  <description>The number of server threads for the namenode.</description>
 </property>
 <property>
  <name>dfs.safemode.threshold.pct</name>
  <value>0.999f</value>
  <description>
    Specifies the percentage of blocks that should satisfy 
    the minimal replication requirement defined by dfs.replication.min.
    Values less than or equal to 0 mean not to wait for any particular
    percentage of blocks before exiting safemode.
    Values greater than 1 will make safe mode permanent.
  </description>
 </property>
 <property>
  <name>dfs.namenode.safemode.min.datanodes</name>
  <value>0</value>
  <description>
    Specifies the number of datanodes that must be considered alive
    before the name node exits safemode.
    Values less than or equal to 0 mean not to take the number of live
    datanodes into account when deciding whether to remain in safe mode
    during startup.
    Values greater than the number of datanodes in the cluster
    will make safe mode permanent.
  </description>
 </property>
 <property>
  <name>dfs.safemode.extension</name>
  <value>30000</value>
  <description>
    Determines extension of safe mode in milliseconds 
    after the threshold level is reached.
  </description>
 </property>
 <property>
  <name>dfs.balance.bandwidthPerSec</name>
  <value>1048576</value>
  <description>
        Specifies the maximum amount of bandwidth that each datanode
        can utilize for the balancing purpose in term of
        the number of bytes per second.
  </description>
 </property>
 <property>
  <name>dfs.hosts</name>
  <value></value>
  <description>Names a file that contains a list of hosts that are
  permitted to connect to the namenode. The full pathname of the file
  must be specified.  If the value is empty, all hosts are
  permitted.</description>
 </property>
 <property>
  <name>dfs.hosts.exclude</name>
  <value></value>
  <description>Names a file that contains a list of hosts that are
  not permitted to connect to the namenode.  The full pathname of the
  file must be specified.  If the value is empty, no hosts are
  excluded.</description>
 </property> 
 <property>
  <name>dfs.max.objects</name>
  <value>0</value>
  <description>The maximum number of files, directories and blocks
  dfs supports. A value of zero indicates no limit to the number
  of objects that dfs supports.
  </description>
 </property>
 <property>
  <name>dfs.namenode.decommission.interval</name>
  <value>30</value>
  <description>Namenode periodicity in seconds to check if decommission is 
  complete.</description>
 </property>
 <property>
  <name>dfs.namenode.decommission.nodes.per.interval</name>
  <value>5</value>
  <description>The number of nodes namenode checks if decommission is complete
  in each dfs.namenode.decommission.interval.</description>
 </property>
 <property>
  <name>dfs.replication.interval</name>
  <value>3</value>
  <description>The periodicity in seconds with which the namenode computes 
  repliaction work for datanodes. </description>
 </property>
 <property>
  <name>dfs.access.time.precision</name>
  <value>3600000</value>
  <description>The access time for HDFS file is precise upto this value. 
               The default value is 1 hour. Setting a value of 0 disables
               access times for HDFS.
  </description>
 </property>
 <property>
  <name>dfs.support.append</name>
  <description>
    This option is no longer supported. HBase no longer requires that
    this option be enabled as sync is now enabled by default. See
    HADOOP-8230 for additional information.
  </description>
 </property>
 <property>
  <name>dfs.namenode.delegation.key.update-interval</name>
  <value>86400000</value>
  <description>The update interval for master key for delegation tokens 
       in the namenode in milliseconds.
  </description>
 </property>
 <property>
  <name>dfs.namenode.delegation.token.max-lifetime</name>
  <value>604800000</value>
  <description>The maximum lifetime in milliseconds for which a delegation 
      token is valid.
  </description>
 </property>
 <property>
  <name>dfs.namenode.delegation.token.renew-interval</name>
  <value>86400000</value>
  <description>The renewal interval for delegation token in milliseconds.
  </description>
 </property>
 <property>
  <name>dfs.datanode.failed.volumes.tolerated</name>
  <value>0</value>
  <description>The number of volumes that are allowed to
  fail before a datanode stops offering service. By default
  any volume failure will cause a datanode to shutdown.
  </description>
 </property>
 <property>
  <name>dfs.datanode.max.xcievers</name>
  <value>4096</value>
  <description>Specifies the maximum number of threads to use for transferring data
  in and out of the DN.
  </description>
 </property>
 <property>
  <name>dfs.datanode.readahead.bytes</name>
  <value>4193404</value>
  <description>
        While reading block files, if the Hadoop native libraries are available,
        the datanode can use the posix_fadvise system call to explicitly
        page data into the operating system buffer cache ahead of the current
        reader's position. This can improve performance especially when
        disks are highly contended.
        This configuration specifies the number of bytes ahead of the current
        read position which the datanode will attempt to read ahead. This
        feature may be disabled by configuring this property to 0.
        If the native libraries are not available, this configuration has no
        effect.
  </description>
 </property>
 <property>
  <name>dfs.datanode.drop.cache.behind.reads</name>
  <value>false</value>
  <description>
        In some workloads, the data read from HDFS is known to be significantly
        large enough that it is unlikely to be useful to cache it in the
        operating system buffer cache. In this case, the DataNode may be
        configured to automatically purge all data from the buffer cache
        after it is delivered to the client. This behavior is automatically
        disabled for workloads which read only short sections of a block
        (e.g HBase random-IO workloads).
        This may improve performance for some workloads by freeing buffer
        cache spage usage for more cacheable data.
        If the Hadoop native libraries are not available, this configuration
        has no effect.
  </description>
 </property>
 <property>
  <name>dfs.datanode.drop.cache.behind.writes</name>
  <value>false</value>
  <description>
        In some workloads, the data written to HDFS is known to be significantly
        large enough that it is unlikely to be useful to cache it in the
        operating system buffer cache. In this case, the DataNode may be
        configured to automatically purge all data from the buffer cache
        after it is written to disk.
        This may improve performance for some workloads by freeing buffer
        cache spage usage for more cacheable data.
        If the Hadoop native libraries are not available, this configuration
        has no effect.
  </description>
 </property>
 <property>
  <name>dfs.datanode.sync.behind.writes</name>
  <value>false</value>
  <description>
        If this configuration is enabled, the datanode will instruct the
        operating system to enqueue all written data to the disk immediately
        after it is written. This differs from the usual OS policy which
        may wait for up to 30 seconds before triggering writeback.
        This may improve performance for some workloads by smoothing the
        IO profile for data written to disk.
        If the Hadoop native libraries are not available, this configuration
        has no effect.
  </description>
 </property>
 <property>
  <name>dfs.client.use.datanode.hostname</name>
  <value>false</value>
  <description>Whether clients should use datanode hostnames when
    connecting to datanodes.
  </description>
 </property>
 <property>
  <name>dfs.datanode.use.datanode.hostname</name>
  <value>false</value>
  <description>Whether datanodes should use datanode hostnames when
    connecting to other datanodes for data transfer.
  </description>
 </property>
 <property>
  <name>dfs.client.local.interfaces</name>
  <value></value>
  <description>A comma separated list of network interface names to use
    for data transfer between the client and datanodes. When creating
    a connection to read from or write to a datanode, the client
    chooses one of the specified interfaces at random and binds its
    socket to the IP of that interface. Individual names may be
    specified as either an interface name (eg "eth0"), a subinterface
    name (eg "eth0:0"), or an IP address (which may be specified using
    CIDR notation to match a range of IPs).
  </description>
 </property>
 <property>
  <name>dfs.image.transfer.bandwidthPerSec</name>
  <value>0</value>
  <description>
    Specifies the maximum amount of bandwidth that can be utilized
    for image transfer in term of the number of bytes per second.
    A default value of 0 indicates that throttling is disabled.
  </description>
 </property>
 <property>
  <name>dfs.webhdfs.enabled</name>
  <value>false</value>
  <description>
    Enable WebHDFS (REST API) in Namenodes and Datanodes.
  </description>
 </property>
 <property>
  <name>dfs.namenode.kerberos.internal.spnego.principal</name>
  <value>${dfs.web.authentication.kerberos.principal}</value>
 </property>
 <property>
  <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
  <value>${dfs.web.authentication.kerberos.principal}</value>
 </property>
 <property>
  <name>dfs.namenode.invalidate.work.pct.per.iteration</name>
  <value>0.32f</value>
  <description>
    *Note*: Advanced property. Change with caution.
    This determines the percentage amount of block
    invalidations (deletes) to do over a single DN heartbeat
    deletion command. The final deletion count is determined by applying this
    percentage to the number of live nodes in the system.
    The resultant number is the number of blocks from the deletion list
    chosen for proper invalidation over a single heartbeat of a single DN.
    Value should be a positive, non-zero percentage in float notation (X.Yf),
    with 1.0f meaning 100%.
  </description>
 </property>
 <property>
  <name>dfs.namenode.replication.work.multiplier.per.iteration</name>
  <value>2</value>
  <description>
    *Note*: Advanced property. Change with caution.
    This determines the total amount of block transfers to begin in
    parallel at a DN, for replication, when such a command list is being
    sent over a DN heartbeat by the NN. The actual number is obtained by
    multiplying this multiplier with the total number of live nodes in the
    cluster. The result number is the number of blocks to begin transfers
    immediately for, per DN heartbeat. This number can be any positive,
    non-zero integer.
  </description>
 </property>
 <property>
  <name>dfs.namenode.avoid.read.stale.datanode</name>
  <value>false</value>
  <description>
    Indicate whether or not to avoid reading from &quot;stale&quot; datanodes whose
    heartbeat messages have not been received by the namenode 
    for more than a specified time interval. Stale datanodes will be
    moved to the end of the node list returned for reading. See
    dfs.namenode.avoid.write.stale.datanode for a similar setting for writes.
  </description>
 </property>
 <property>
  <name>dfs.namenode.avoid.write.stale.datanode</name>
  <value>false</value>
  <description>
    Indicate whether or not to avoid writing to &quot;stale&quot; datanodes whose 
    heartbeat messages have not been received by the namenode 
    for more than a specified time interval. Writes will avoid using 
    stale datanodes unless more than a configured ratio 
    (dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as 
    stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting
    for reads.
  </description>
 </property>
 <property>
  <name>dfs.namenode.stale.datanode.interval</name>
  <value>30000</value>
  <description>
    Default time interval for marking a datanode as "stale", i.e., if 
    the namenode has not received heartbeat msg from a datanode for 
    more than this time interval, the datanode will be marked and treated 
    as "stale" by default. The stale interval cannot be too small since 
    otherwise this may cause too frequent change of stale states. 
    We thus set a minimum stale interval value (the default value is 3 times 
    of heartbeat interval) and guarantee that the stale interval cannot be less
    than the minimum value.
  </description>
 </property>
 <property>
  <name>dfs.namenode.write.stale.datanode.ratio</name>
  <value>0.5f</value>
  <description>
    When the ratio of number stale datanodes to total datanodes marked
    is greater than this ratio, stop avoiding writing to stale nodes so
   as to prevent causing hotspots.
  </description>
 </property>
 <property>
  <name>dfs.datanode.plugins</name>
  <value></value>
  <description>Comma-separated list of datanode plug-ins to be activated.
  </description>
 </property>
 <property>
  <name>dfs.namenode.plugins</name>
  <value></value>
  <description>Comma-separated list of namenode plug-ins to be activated.
  </description>
 </property>
 </configuration>
--- a/sahara/plugins/vanilla/v1_2_1/resources/hive-default.xml
+++ b/sahara/plugins/vanilla/v1_2_1/resources/hive-default.xml
--- a/sahara/plugins/vanilla/v1_2_1/resources/mapred-default.xml
+++ b/sahara/plugins/vanilla/v1_2_1/resources/mapred-default.xml
--- a/sahara/plugins/vanilla/v1_2_1/resources/oozie-default.xml
+++ b/sahara/plugins/vanilla/v1_2_1/resources/oozie-default.xml
--- a/sahara/plugins/vanilla/v1_2_1/resources/topology.sh
+++ b/sahara/plugins/vanilla/v1_2_1/resources/topology.sh
@ -0,0 +1,20 @@
 #!/bin/bash
 HADOOP_CONF=/etc/hadoop
 while [ $# -gt 0 ] ; do
  nodeArg=$1
  exec< ${HADOOP_CONF}/topology.data
  result=""
  while read line ; do
    ar=( $line )
    if [ "${ar[0]}" = "$nodeArg" ] ; then
      result="${ar[1]}"
    fi
  done
  shift
  if [ -z "$result" ] ; then
    echo -n "/default/rack "
  else
    echo -n "$result "
  fi
 done
--- a/sahara/plugins/vanilla/v1_2_1/run_scripts.py
+++ b/sahara/plugins/vanilla/v1_2_1/run_scripts.py
@ -0,0 +1,107 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara.openstack.common import log as logging
 LOG = logging.getLogger(__name__)
 def start_processes(remote, *processes):
    for proc in processes:
        remote.execute_command('sudo su -c "/usr/sbin/hadoop-daemon.sh '
                               'start %s" hadoop' % proc)
 def refresh_nodes(remote, service):
    remote.execute_command("sudo su -c 'hadoop %s -refreshNodes' hadoop"
                           % service)
 def format_namenode(remote):
    remote.execute_command("sudo su -c 'hadoop namenode -format' hadoop")
 def hive_create_warehouse_dir(remote):
    LOG.debug("Creating Hive warehouse dir")
    remote.execute_command("sudo su - -c 'hadoop fs -mkdir "
                           "/user/hive/warehouse' hadoop")
 def hive_copy_shared_conf(remote, dest):
    LOG.debug("Copying shared Hive conf")
    remote.execute_command(
        "sudo su - -c 'hadoop fs -put /opt/hive/conf/hive-site.xml "
        "%s' hadoop" % dest)
 def oozie_share_lib(remote, nn_hostname):
    LOG.debug("Sharing Oozie libs to hdfs://%s:8020" % nn_hostname)
    #remote.execute_command('sudo su - -c "/opt/oozie/bin/oozie-setup.sh '
    #                       'sharelib create -fs hdfs://%s:8020" hadoop'
    #                       % nn_hostname)
    #TODO(alazarev) return 'oozie-setup.sh sharelib create' back
    #when #1262023 is resolved
    remote.execute_command(
        'sudo su - -c "mkdir /tmp/oozielib && '
        'tar zxf /opt/oozie/oozie-sharelib-4.0.0.tar.gz -C /tmp/oozielib && '
        'hadoop fs -put /tmp/oozielib/share share && '
        'rm -rf /tmp/oozielib" hadoop')
    LOG.debug("Creating sqlfile for Oozie")
    remote.execute_command('sudo su - -c "/opt/oozie/bin/ooziedb.sh '
                           'create -sqlfile oozie.sql '
                           '-run Validate DB Connection" hadoop')
 def check_datanodes_count(remote, count):
    if count < 1:
        return True
    LOG.debug("Checking datanode count")
    exit_code, stdout = remote.execute_command(
        'sudo su -c "hadoop dfsadmin -report | '
        'grep \'Datanodes available:\' | '
        'awk \'{print \\$3}\'" hadoop')
    LOG.debug("Datanode count='%s'" % stdout.rstrip())
    return exit_code == 0 and int(stdout) == count
 def mysql_start(remote, mysql_instance):
    LOG.debug("Starting mysql at %s" % mysql_instance.hostname())
    remote.execute_command("/opt/start-mysql.sh")
 def oozie_create_db(remote):
    LOG.debug("Creating Oozie DB Schema...")
    remote.execute_command("mysql -u root < /tmp/create_oozie_db.sql")
 def start_oozie(remote):
    remote.execute_command(
        'sudo su - -c "/opt/oozie/bin/oozied.sh start" hadoop')
 def hive_create_db(remote):
    LOG.debug("Creating Hive metastore db...")
    remote.execute_command("mysql -u root < /tmp/create_hive_db.sql")
 def hive_metastore_start(remote):
    LOG.debug("Starting Hive Metastore Server...")
    remote.execute_command("sudo su - -c 'nohup /opt/hive/bin/hive"
                           " --service metastore > /dev/null &' hadoop")
--- a/sahara/plugins/vanilla/v1_2_1/scaling.py
+++ b/sahara/plugins/vanilla/v1_2_1/scaling.py
@ -0,0 +1,101 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 from sahara import context
 from sahara.openstack.common import timeutils
 from sahara.plugins.general import utils
 from sahara.plugins.vanilla.v1_2_1 import config_helper
 from sahara.plugins.vanilla.v1_2_1 import run_scripts as run
 from sahara.utils import remote
 def decommission_tt(jt, inst_to_be_deleted, survived_inst):
    with remote.get_remote(jt) as r:
        r.write_file_to('/etc/hadoop/tt.excl',
                        utils.generate_fqdn_host_names(
                            inst_to_be_deleted))
        run.refresh_nodes(remote.get_remote(jt), "mradmin")
        context.sleep(3)
        r.write_files_to({'/etc/hadoop/tt.incl':
                         utils.generate_fqdn_host_names(survived_inst),
                         '/etc/hadoop/tt.excl': "",
                          })
 def decommission_dn(nn, inst_to_be_deleted, survived_inst):
    with remote.get_remote(nn) as r:
        r.write_file_to('/etc/hadoop/dn.excl',
                        utils.generate_fqdn_host_names(
                            inst_to_be_deleted))
        run.refresh_nodes(remote.get_remote(nn), "dfsadmin")
        context.sleep(3)
        timeout = config_helper.get_decommissioning_timeout(
            nn.node_group.cluster)
        s_time = timeutils.utcnow()
        all_found = False
        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            cmd = r.execute_command(
                "sudo su -c 'hadoop dfsadmin -report' hadoop")
            all_found = True
            datanodes_info = parse_dfs_report(cmd[1])
            for i in inst_to_be_deleted:
                for dn in datanodes_info:
                    if (dn["Name"].startswith(i.internal_ip)) and (
                            dn["Decommission Status"] != "Decommissioned"):
                        all_found = False
                        break
            if all_found:
                r.write_files_to({'/etc/hadoop/dn.incl':
                                 utils.
                                 generate_fqdn_host_names(survived_inst),
                                  '/etc/hadoop/dn.excl': "",
                                  })
                break
            context.sleep(3)
        if not all_found:
            raise Exception("Cannot finish decommission in %s seconds" %
                            timeout)
 def parse_dfs_report(cmd_output):
    report = cmd_output.rstrip().split(os.linesep)
    array = []
    started = False
    for line in report:
        if started:
            array.append(line)
        if line.startswith("Datanodes available"):
            started = True
    res = []
    datanode_info = {}
    for i in xrange(0, len(array)):
        if array[i]:
            idx = str.find(array[i], ':')
            name = array[i][0:idx]
            value = array[i][idx + 2:]
            datanode_info[name.strip()] = value.strip()
        if not array[i] and datanode_info:
            res.append(datanode_info)
            datanode_info = {}
    if datanode_info:
        res.append(datanode_info)
    return res
--- a/sahara/plugins/vanilla/v1_2_1/versionhandler.py
+++ b/sahara/plugins/vanilla/v1_2_1/versionhandler.py
@ -0,0 +1,484 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import uuid
 from oslo.config import cfg
 import six
 from sahara import conductor
 from sahara import context
 from sahara.openstack.common import log as logging
 from sahara.plugins.general import exceptions as ex
 from sahara.plugins.general import utils
 from sahara.plugins.vanilla import abstractversionhandler as avm
 from sahara.plugins.vanilla.v1_2_1 import config_helper as c_helper
 from sahara.plugins.vanilla.v1_2_1 import run_scripts as run
 from sahara.plugins.vanilla.v1_2_1 import scaling as sc
 from sahara.topology import topology_helper as th
 from sahara.utils import edp
 from sahara.utils import files as f
 from sahara.utils import general as g
 from sahara.utils import remote
 conductor = conductor.API
 LOG = logging.getLogger(__name__)
 CONF = cfg.CONF
 class VersionHandler(avm.AbstractVersionHandler):
    def get_plugin_configs(self):
        return c_helper.get_plugin_configs()
    def get_node_processes(self):
        return {
            "HDFS": ["namenode", "datanode", "secondarynamenode"],
            "MapReduce": ["tasktracker", "jobtracker"],
            "JobFlow": ["oozie"],
            "Hive": ["hiveserver"]
        }
    def get_resource_manager_uri(self, cluster):
        return cluster['info']['MapReduce']['JobTracker']
    def get_oozie_server(self, cluster):
        return utils.get_oozie(cluster)
    def validate(self, cluster):
        nn_count = sum([ng.count for ng
                        in utils.get_node_groups(cluster, "namenode")])
        if nn_count != 1:
            raise ex.InvalidComponentCountException("namenode", 1, nn_count)
        jt_count = sum([ng.count for ng
                        in utils.get_node_groups(cluster, "jobtracker")])
        if jt_count not in [0, 1]:
            raise ex.InvalidComponentCountException("jobtracker", '0 or 1',
                                                    jt_count)
        oozie_count = sum([ng.count for ng
                           in utils.get_node_groups(cluster, "oozie")])
        if oozie_count not in [0, 1]:
            raise ex.InvalidComponentCountException("oozie", '0 or 1',
                                                    oozie_count)
        hive_count = sum([ng.count for ng
                          in utils.get_node_groups(cluster, "hiveserver")])
        if jt_count == 0:
            tt_count = sum([ng.count for ng
                            in utils.get_node_groups(cluster, "tasktracker")])
            if tt_count > 0:
                raise ex.RequiredServiceMissingException(
                    "jobtracker", required_by="tasktracker")
            if oozie_count > 0:
                raise ex.RequiredServiceMissingException(
                    "jobtracker", required_by="oozie")
            if hive_count > 0:
                raise ex.RequiredServiceMissingException(
                    "jobtracker", required_by="hive")
        if hive_count not in [0, 1]:
            raise ex.InvalidComponentCountException("hive", '0 or 1',
                                                    hive_count)
    def configure_cluster(self, cluster):
        instances = utils.get_instances(cluster)
        self._setup_instances(cluster, instances)
    def start_cluster(self, cluster):
        nn_instance = utils.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")
        for snn in utils.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")
        jt_instance = utils.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")
        self._start_tt_dn_processes(utils.get_instances(cluster))
        self._await_datanodes(cluster)
        LOG.info("Hadoop services in cluster %s have been started" %
                 cluster.name)
        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname())
        hive_server = utils.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(hive_server) as r:
                run.hive_create_warehouse_dir(r)
                run.hive_copy_shared_conf(
                    r, edp.get_hive_shared_conf_path('hadoop'))
                if c_helper.is_mysql_enable(cluster):
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(r, hive_server)
                    run.hive_create_db(r)
                    run.hive_metastore_start(r)
                    LOG.info("Hive Metastore server at %s has been started",
                             hive_server.hostname())
        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
    def _await_datanodes(self, cluster):
        datanodes_count = len(utils.get_datanodes(cluster))
        if datanodes_count < 1:
            return
        LOG.info("Waiting %s datanodes to start up" % datanodes_count)
        with remote.get_remote(utils.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info(
                        'Datanodes on cluster %s has been started' %
                        cluster.name)
                    return
                context.sleep(1)
                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        'Stop waiting datanodes on cluster %s since it has '
                        'been deleted' % cluster.name)
                    return
    def _extract_configs_to_extra(self, cluster):
        oozie = utils.get_oozie(cluster)
        hive = utils.get_hiveserver(cluster)
        extra = dict()
        if hive:
            extra['hive_mysql_passwd'] = six.text_type(uuid.uuid4())
        for ng in cluster.node_groups:
            extra[ng.id] = {
                'xml': c_helper.generate_xml_configs(
                    cluster, ng, extra['hive_mysql_passwd'] if hive else None),
                'setup_script': c_helper.generate_setup_script(
                    ng.storage_paths(),
                    c_helper.extract_environment_confs(ng.configuration()),
                    append_oozie=(
                        oozie and oozie.node_group.id == ng.id)
                )
            }
        if c_helper.is_data_locality_enabled(cluster):
            topology_data = th.generate_topology_map(
                cluster, CONF.enable_hypervisor_awareness)
            extra['topology_data'] = "\n".join(
                [k + " " + v for k, v in topology_data.items()]) + "\n"
        return extra
    def decommission_nodes(self, cluster, instances):
        tts = utils.get_tasktrackers(cluster)
        dns = utils.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False
        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
    def validate_scaling(self, cluster, existing, additional):
        self._validate_existing_ng_scaling(cluster, existing)
        self._validate_additional_ng_scaling(cluster, additional)
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)
        run.refresh_nodes(remote.get_remote(
            utils.get_namenode(cluster)), "dfsadmin")
        jt = utils.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")
        self._start_tt_dn_processes(instances)
    def _start_tt_dn_processes(self, instances):
        tt_dn_names = ["datanode", "tasktracker"]
        with context.ThreadGroup() as tg:
            for i in instances:
                processes = set(i.node_group.node_processes)
                tt_dn_procs = processes.intersection(tt_dn_names)
                if tt_dn_procs:
                    tg.spawn('vanilla-start-tt-dn-%s' % i.instance_name,
                             self._start_tt_dn, i, list(tt_dn_procs))
    def _start_tt_dn(self, instance, tt_dn_procs):
        with instance.remote() as r:
            run.start_processes(r, *tt_dn_procs)
    def _setup_instances(self, cluster, instances):
        extra = self._extract_configs_to_extra(cluster)
        self._push_configs_to_nodes(cluster, extra, instances)
    def _push_configs_to_nodes(self, cluster, extra, new_instances):
        all_instances = utils.get_instances(cluster)
        with context.ThreadGroup() as tg:
            for instance in all_instances:
                if instance in new_instances:
                    tg.spawn('vanilla-configure-%s' % instance.instance_name,
                             self._push_configs_to_new_node, cluster,
                             extra, instance)
                else:
                    tg.spawn('vanilla-reconfigure-%s' % instance.instance_name,
                             self._push_configs_to_existing_node, cluster,
                             extra, instance)
    def _push_configs_to_new_node(self, cluster, extra, instance):
        ng_extra = extra[instance.node_group.id]
        private_key, public_key = c_helper.get_hadoop_ssh_keys(cluster)
        files = {
            '/etc/hadoop/core-site.xml': ng_extra['xml']['core-site'],
            '/etc/hadoop/mapred-site.xml': ng_extra['xml']['mapred-site'],
            '/etc/hadoop/hdfs-site.xml': ng_extra['xml']['hdfs-site'],
            '/tmp/savanna-hadoop-init.sh': ng_extra['setup_script'],
            'id_rsa': private_key,
            'authorized_keys': public_key
        }
        key_cmd = 'sudo mkdir -p /home/hadoop/.ssh/ && ' \
                  'sudo mv id_rsa authorized_keys /home/hadoop/.ssh && ' \
                  'sudo chown -R hadoop:hadoop /home/hadoop/.ssh && ' \
                  'sudo chmod 600 /home/hadoop/.ssh/{id_rsa,authorized_keys}'
        with remote.get_remote(instance) as r:
            # TODO(aignatov): sudo chown is wrong solution. But it works.
            r.execute_command(
                'sudo chown -R $USER:$USER /etc/hadoop'
            )
            r.execute_command(
                'sudo chown -R $USER:$USER /opt/oozie/conf'
            )
            r.write_files_to(files)
            r.execute_command(
                'sudo chmod 0500 /tmp/savanna-hadoop-init.sh'
            )
            r.execute_command(
                'sudo /tmp/savanna-hadoop-init.sh '
                '>> /tmp/savanna-hadoop-init.log 2>&1')
            r.execute_command(key_cmd)
            if c_helper.is_data_locality_enabled(cluster):
                r.write_file_to(
                    '/etc/hadoop/topology.sh',
                    f.get_file_text(
                        'plugins/vanilla/v1_2_1/resources/topology.sh'))
                r.execute_command(
                    'sudo chmod +x /etc/hadoop/topology.sh'
                )
            self._write_topology_data(r, cluster, extra)
            self._push_master_configs(r, cluster, extra, instance)
    def _push_configs_to_existing_node(self, cluster, extra, instance):
        node_processes = instance.node_group.node_processes
        need_update = (c_helper.is_data_locality_enabled(cluster) or
                       'namenode' in node_processes or
                       'jobtracker' in node_processes or
                       'oozie' in node_processes or
                       'hiveserver' in node_processes)
        if not need_update:
            return
        with remote.get_remote(instance) as r:
            self._write_topology_data(r, cluster, extra)
            self._push_master_configs(r, cluster, extra, instance)
    def _write_topology_data(self, r, cluster, extra):
        if c_helper.is_data_locality_enabled(cluster):
            topology_data = extra['topology_data']
            r.write_file_to('/etc/hadoop/topology.data', topology_data)
    def _push_master_configs(self, r, cluster, extra, instance):
        ng_extra = extra[instance.node_group.id]
        node_processes = instance.node_group.node_processes
        if 'namenode' in node_processes:
            self._push_namenode_configs(cluster, r)
        if 'jobtracker' in node_processes:
            self._push_jobtracker_configs(cluster, r)
        if 'oozie' in node_processes:
            self._push_oozie_configs(cluster, ng_extra, r)
        if 'hiveserver' in node_processes:
            self._push_hive_configs(cluster, ng_extra,
                                    extra['hive_mysql_passwd'], r)
    def _push_namenode_configs(self, cluster, r):
        r.write_file_to('/etc/hadoop/dn.incl',
                        utils.generate_fqdn_host_names(
                            utils.get_datanodes(cluster)))
    def _push_jobtracker_configs(self, cluster, r):
        r.write_file_to('/etc/hadoop/tt.incl',
                        utils.generate_fqdn_host_names(
                            utils.get_tasktrackers(cluster)))
    def _push_oozie_configs(self, cluster, ng_extra, r):
        r.write_file_to('/opt/oozie/conf/oozie-site.xml',
                        ng_extra['xml']['oozie-site'])
        if c_helper.is_mysql_enable(cluster):
            sql_script = f.get_file_text(
                'plugins/vanilla/v1_2_1/resources/create_oozie_db.sql')
            files = {
                '/tmp/create_oozie_db.sql': sql_script
            }
            r.write_files_to(files)
    def _push_hive_configs(self, cluster, ng_extra, hive_mysql_passwd, r):
        files = {
            '/opt/hive/conf/hive-site.xml':
            ng_extra['xml']['hive-site']
        }
        if c_helper.is_mysql_enable(cluster):
            sql_script = f.get_file_text(
                'plugins/vanilla/v1_2_1/resources/create_hive_db.sql'
            )
            sql_script = sql_script.replace('pass',
                                            hive_mysql_passwd)
            files.update({'/tmp/create_hive_db.sql': sql_script})
        r.write_files_to(files)
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}
        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker', cluster)
            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }
        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }
        if oozie:
            #TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }
        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
    def _get_scalable_processes(self):
        return ["datanode", "tasktracker"]
    def _get_by_id(self, lst, id):
        for obj in lst:
            if obj.id == id:
                return obj
        return None
    def _validate_additional_ng_scaling(self, cluster, additional):
        jt = utils.get_jobtracker(cluster)
        scalable_processes = self._get_scalable_processes()
        for ng_id in additional:
            ng = self._get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Vanilla plugin cannot scale nodegroup"
                             " with processes: " +
                             ' '.join(ng.node_processes))
            if not jt and 'tasktracker' in ng.node_processes:
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Vanilla plugin cannot scale node group with "
                             "processes which have no master-processes run "
                             "in cluster")
    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        dn_to_delete = 0
        for ng in cluster.node_groups:
            if ng.id in existing:
                if ng.count > existing[ng.id] and "datanode" in \
                        ng.node_processes:
                    dn_to_delete += ng.count - existing[ng.id]
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name, "Vanilla plugin cannot scale nodegroup"
                                 " with processes: " +
                                 ' '.join(ng.node_processes))
        dn_amount = len(utils.get_datanodes(cluster))
        rep_factor = c_helper.get_config_value('HDFS', 'dfs.replication',
                                               cluster)
        if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
            raise ex.ClusterCannotBeScaled(
                cluster.name, "Vanilla plugin cannot shrink cluster because "
                              "it would be not enough nodes for replicas "
                              "(replication factor is %s)" % rep_factor)
--- a/sahara/plugins/vanilla/v2_3_0/init.py
+++ b/sahara/plugins/vanilla/v2_3_0/init.py
--- a/sahara/plugins/vanilla/v2_3_0/config.py
+++ b/sahara/plugins/vanilla/v2_3_0/config.py
@ -0,0 +1,247 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import six
 from sahara.openstack.common import log as logging
 from sahara.plugins.general import utils
 from sahara.plugins.vanilla.v2_3_0 import config_helper as c_helper
 from sahara.swift import swift_helper as swift
 from sahara.utils import files as f
 from sahara.utils import xmlutils as x
 LOG = logging.getLogger(__name__)
 HADOOP_CONF_DIR = '/opt/hadoop/etc/hadoop'
 HADOOP_USER = 'hadoop'
 HADOOP_GROUP = 'hadoop'
 def configure_cluster(cluster):
    LOG.debug("Configuring cluster \"%s\"", cluster.name)
    instances = []
    for node_group in cluster.node_groups:
        for instance in node_group.instances:
            instances.append(instance)
    configure_instances(instances)
 def configure_instances(instances):
    for instance in instances:
        _provisioning_configs(instance)
        _post_configuration(instance)
 def _provisioning_configs(instance):
    xmls, env = _generate_configs(instance.node_group)
    _push_xml_configs(instance, xmls)
    _push_env_configs(instance, env)
 def _generate_configs(node_group):
    user_xml_confs, user_env_confs = _get_user_configs(node_group)
    hadoop_xml_confs, default_env_confs = _get_hadoop_configs(node_group)
    xml_confs = _merge_configs(user_xml_confs, hadoop_xml_confs)
    env_confs = _merge_configs(default_env_confs, user_env_confs)
    return xml_confs, env_confs
 def _get_hadoop_configs(node_group):
    cluster = node_group.cluster
    nn_hostname = utils.get_namenode(cluster).hostname()
    res_hostname = utils.get_resourcemanager(cluster).hostname()
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname,
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        },
        'YARN': {
            'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
            'yarn.resourcemanager.hostname': '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
                HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
                HADOOP_CONF_DIR)
        },
        'MapReduce': {
            'mapreduce.framework.name': 'yarn'
        },
    }
    if c_helper.get_config_value(c_helper.ENABLE_SWIFT.applicable_target,
                                 c_helper.ENABLE_SWIFT.name, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']
        confs['HDFS'].update(swift_configs)
    return confs, c_helper.get_env_configs()
 def _get_user_configs(node_group):
    ng_xml_confs, ng_env_confs = _separate_configs(node_group.node_configs)
    cl_xml_confs, cl_env_confs = _separate_configs(
        node_group.cluster.cluster_configs)
    xml_confs = _merge_configs(cl_xml_confs, ng_xml_confs)
    env_confs = _merge_configs(cl_env_confs, ng_env_confs)
    return xml_confs, env_confs
 def _separate_configs(configs):
    all_env_configs = c_helper.get_env_configs()
    xml_configs = {}
    env_configs = {}
    for service, params in six.iteritems(configs):
        xml_configs[service] = {}
        env_configs[service] = {}
        for param, value in six.iteritems(params):
            if all_env_configs.get(service, {}).get(param):
                if not env_configs.get(service):
                    env_configs[service] = {}
                env_configs[service][param] = value
            else:
                if not xml_configs.get(service):
                    xml_configs[service] = {}
                xml_configs[service][param] = value
    return xml_configs, env_configs
 def _generate_xml(configs):
    xml_confs = {}
    for service, confs in six.iteritems(configs):
        xml_confs[service] = x.create_hadoop_xml(confs)
    return xml_confs
 def _push_env_configs(instance, configs):
    nn_heap = configs['HDFS']['NameNode Heap Size']
    dn_heap = configs['HDFS']['DataNode Heap Size']
    rm_heap = configs['YARN']['ResourceManager Heap Size']
    nm_heap = configs['YARN']['NodeManager Heap Size']
    with instance.remote() as r:
        r.replace_remote_string(
            '%s/hadoop-env.sh' % HADOOP_CONF_DIR,
            'export HADOOP_NAMENODE_OPTS=.*',
            'export HADOOP_NAMENODE_OPTS="-Xmx%dm"' % nn_heap)
        r.replace_remote_string(
            '%s/hadoop-env.sh' % HADOOP_CONF_DIR,
            'export HADOOP_DATANODE_OPTS=.*',
            'export HADOOP_DATANODE_OPTS="-Xmx%dm"' % dn_heap)
        r.replace_remote_string(
            '%s/yarn-env.sh' % HADOOP_CONF_DIR,
            '\\#export YARN_RESOURCEMANAGER_HEAPSIZE=.*',
            'export YARN_RESOURCEMANAGER_HEAPSIZE=%d' % rm_heap)
        r.replace_remote_string(
            '%s/yarn-env.sh' % HADOOP_CONF_DIR,
            '\\#export YARN_NODEMANAGER_HEAPSIZE=.*',
            'export YARN_NODEMANAGER_HEAPSIZE=%d' % nm_heap)
 def _push_xml_configs(instance, configs):
    xmls = _generate_xml(configs)
    service_to_conf_map = {
        'Hadoop': '%s/core-site.xml' % HADOOP_CONF_DIR,
        'HDFS': '%s/hdfs-site.xml' % HADOOP_CONF_DIR,
        'YARN': '%s/yarn-site.xml' % HADOOP_CONF_DIR,
        'MapReduce': '%s/mapred-site.xml' % HADOOP_CONF_DIR,
    }
    xml_confs = {}
    for service, confs in six.iteritems(xmls):
        if service not in service_to_conf_map.keys():
            continue
        xml_confs[service_to_conf_map[service]] = confs
    _push_configs_to_instance(instance, xml_confs)
 def _push_configs_to_instance(instance, configs):
    LOG.debug("Push configs to instance \"%s\"", instance.instance_name)
    with instance.remote() as r:
        for fl, data in six.iteritems(configs):
            r.write_file_to(fl, data, run_as_root=True)
 def _post_configuration(instance):
    node_group = instance.node_group
    dirs = _get_hadoop_dirs(node_group)
    args = {
        'hadoop_user': HADOOP_USER,
        'hadoop_group': HADOOP_GROUP,
        'hadoop_conf_dir': HADOOP_CONF_DIR,
        'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']),
        'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']),
        'hadoop_log_dir': dirs['hadoop_log_dir'],
        'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'],
        'yarn_log_dir': dirs['yarn_log_dir']
    }
    post_conf_script = f.get_file_text(
        'plugins/vanilla/v2_3_0/resources/post_conf.template')
    post_conf_script = post_conf_script.format(**args)
    with instance.remote() as r:
        r.write_file_to('/tmp/post_conf.sh', post_conf_script)
        r.execute_command('chmod +x /tmp/post_conf.sh')
        r.execute_command('sudo /tmp/post_conf.sh')
 def _get_hadoop_dirs(node_group):
    dirs = {}
    storage_paths = node_group.storage_paths()
    dirs['hadoop_name_dirs'] = _make_hadoop_paths(
        storage_paths, '/hdfs/namenode')
    dirs['hadoop_data_dirs'] = _make_hadoop_paths(
        storage_paths, '/hdfs/datanode')
    dirs['hadoop_log_dir'] = _make_hadoop_paths(
        storage_paths, '/hadoop/logs')[0]
    dirs['hadoop_secure_dn_log_dir'] = _make_hadoop_paths(
        storage_paths, '/hadoop/logs/secure')[0]
    dirs['yarn_log_dir'] = _make_hadoop_paths(
        storage_paths, '/yarn/logs')[0]
    return dirs
 def _make_hadoop_paths(paths, hadoop_dir):
    return [path + hadoop_dir for path in paths]
 def _merge_configs(a, b):
    res = {}
    def update(cfg):
        for service, configs in six.iteritems(cfg):
            if not res.get(service):
                res[service] = {}
            res[service].update(configs)
    update(a)
    update(b)
    return res
--- a/sahara/plugins/vanilla/v2_3_0/config_helper.py
+++ b/sahara/plugins/vanilla/v2_3_0/config_helper.py
@ -0,0 +1,173 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara import exceptions as ex
 from sahara.openstack.common import log as logging
 from sahara.plugins import provisioning as p
 from sahara.utils import types as types
 from sahara.utils import xmlutils as x
 LOG = logging.getLogger(__name__)
 CORE_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v2_3_0/resources/core-default.xml')
 HDFS_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v2_3_0/resources/hdfs-default.xml')
 MAPRED_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v2_3_0/resources/mapred-default.xml')
 YARN_DEFAULT = x.load_hadoop_xml_defaults(
    'plugins/vanilla/v2_3_0/resources/yarn-default.xml')
 XML_CONFS = {
    "Hadoop": [CORE_DEFAULT],
    "HDFS": [HDFS_DEFAULT],
    "YARN": [YARN_DEFAULT],
    "MapReduce": [MAPRED_DEFAULT]
 }
 ENV_CONFS = {
    "YARN": {
        'ResourceManager Heap Size': 1024,
        'NodeManager Heap Size': 1024
    },
    "HDFS": {
        'NameNode Heap Size': 1024,
        'DataNode Heap Size': 1024
    }
 }
 ENABLE_SWIFT = p.Config('Enable Swift', 'general', 'cluster',
                        config_type="bool", priority=1,
                        default_value=True, is_optional=False)
 HIDDEN_CONFS = [
    'dfs.namenode.data.dir', 'dfs.namenode.name.dir', 'fs.defaultFS',
    'hadoop.proxyuser.hadoop.groups', 'hadoop.proxyuser.hadoop.hosts',
    'yarn.resourcemanager.address',
    'yarn.resourcemanager.resource-tracker.address',
    'yarn.resourcemanager.scheduler.address',
 ]
 CLUSTER_WIDE_CONFS = [
    'dfs.blocksize', 'dfs.namenode.replication.min', 'dfs.permissions.enabled',
    'dfs.replication', 'dfs.replication.max', 'io.compression.codecs',
    'io.file.buffer.size', 'mapreduce.job.counters.max',
    'mapreduce.map.output.compress.codec',
    'mapreduce.output.fileoutputformat.compress.codec',
    'mapreduce.output.fileoutputformat.compress.type',
    'mapredude.map.output.compress',
    'mapredude.output.fileoutputformat.compress'
 ]
 PRIORITY_1_CONFS = [
    'dfs.datanode.du.reserved', 'dfs.datanode.failed.volumes.tolerated',
    'dfs.datanode.handler.count', 'dfs.datanode.max.transfer.threads',
    'dfs.namenode.handler.count', 'mapred.child.java.opts',
    'mapred.jobtracker.maxtasks.per.job', 'mapreduce.jobtracker.handler.count',
    'mapreduce.map.java.opts', 'mapreduce.reduce.java.opts',
    'mapreduce.task.io.sort.mb', 'mapreduce.tasktracker.map.tasks.maximum',
    'mapreduce.tasktracker.reduce.tasks.maximum'
 ]
 # for now we have not so many cluster-wide configs
 # lets consider all of them having high priority
 PRIORITY_1_CONFS += CLUSTER_WIDE_CONFS
 def _init_xml_configs():
    configs = []
    for service, config_lists in XML_CONFS.iteritems():
        for config_list in config_lists:
            for config in config_list:
                if config['name'] not in HIDDEN_CONFS:
                    cfg = p.Config(config['name'], service, "node",
                                   is_optional=True, config_type="string",
                                   default_value=str(config['value']),
                                   description=config['description'])
                    if cfg.default_value in ["true", "false"]:
                        cfg.config_type = "bool"
                        cfg.default_value = (cfg.default_value == 'true')
                    elif types.is_int(cfg.default_value):
                        cfg.config_type = "int"
                        cfg.default_value = int(cfg.default_value)
                    if config['name'] in CLUSTER_WIDE_CONFS:
                        cfg.scope = 'cluster'
                    if config['name'] in PRIORITY_1_CONFS:
                        cfg.priority = 1
                    configs.append(cfg)
    return configs
 def _init_env_configs():
    configs = []
    for service, config_items in ENV_CONFS.iteritems():
        for name, value in config_items.iteritems():
            configs.append(p.Config(name, service, "node",
                                    default_value=value, priority=1,
                                    config_type="int"))
    return configs
 def _init_general_configs():
    return [ENABLE_SWIFT]
 # Initialise plugin Hadoop configurations
 PLUGIN_XML_CONFIGS = _init_xml_configs()
 PLUGIN_ENV_CONFIGS = _init_env_configs()
 PLUGIN_GENERAL_CONFIGS = _init_general_configs()
 def _init_all_configs():
    configs = []
    configs.extend(PLUGIN_XML_CONFIGS)
    configs.extend(PLUGIN_ENV_CONFIGS)
    configs.extend(PLUGIN_GENERAL_CONFIGS)
    return configs
 PLUGIN_CONFIGS = _init_all_configs()
 def get_plugin_configs():
    return PLUGIN_CONFIGS
 def get_xml_configs():
    return PLUGIN_XML_CONFIGS
 def get_env_configs():
    return ENV_CONFS
 def get_config_value(service, name, cluster=None):
    if cluster:
        for ng in cluster.node_groups:
            cl_param = ng.configuration().get(service, {}).get(name)
            if cl_param is not None:
                return cl_param
    for c in get_plugin_configs():
        if c.applicable_target == service and c.name == name:
            return c.default_value
    raise ex.SaharaException("Unable get parameter '%s' from service %s",
                             name, service)
--- a/sahara/plugins/vanilla/v2_3_0/resources/README.rst
+++ b/sahara/plugins/vanilla/v2_3_0/resources/README.rst
@ -0,0 +1,23 @@
 Apache Hadoop Configurations for Savanna
 ========================================
 This directory contains default XML configuration files:
 * core-default.xml,
 * hdfs-default.xml,
 * mapred-default.xml,
 * yarn-default.xml
 These files are applied for Savanna's plugin of Apache Hadoop version 2.3.0
 Files were taken from here:
 https://github.com/apache/hadoop-common/blob/release-2.3.0/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
 https://github.com/apache/hadoop-common/blob/release-2.3.0/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
 https://github.com/apache/hadoop-common/blob/release-2.3.0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 https://github.com/apache/hadoop-common/blob/release-2.3.0/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
 XML configs are used to expose default Hadoop configurations to the users through
 Savanna's REST API. It allows users to override some config values which will
 be pushed to the provisioned VMs running Hadoop services as part of appropriate
 xml config.
--- a/sahara/plugins/vanilla/v2_3_0/resources/core-default.xml
+++ b/sahara/plugins/vanilla/v2_3_0/resources/core-default.xml
--- a/sahara/plugins/vanilla/v2_3_0/resources/hdfs-default.xml
+++ b/sahara/plugins/vanilla/v2_3_0/resources/hdfs-default.xml
--- a/sahara/plugins/vanilla/v2_3_0/resources/mapred-default.xml
+++ b/sahara/plugins/vanilla/v2_3_0/resources/mapred-default.xml
--- a/sahara/plugins/vanilla/v2_3_0/resources/post_conf.template
+++ b/sahara/plugins/vanilla/v2_3_0/resources/post_conf.template
@ -0,0 +1,31 @@
 #!/bin/bash
 # change permission to config
 hadoop_configs=('core-site.xml' 'hdfs-site.xml' 'mapred-site.xml' 'yarn-site.xml')
 for conf in "${{hadoop_configs[@]}}"
 do
    chown -R {hadoop_group}:{hadoop_user} {hadoop_conf_dir}/$conf
 done
 # create dirs for hdfs and mapreduce service
 dirs=({hadoop_name_dirs} {hadoop_data_dirs} {hadoop_log_dir} {hadoop_secure_dn_log_dir} {yarn_log_dir})
 for dir in "${{dirs[@]}}"
 do
    mkdir -p $dir
    chown -R {hadoop_group}:{hadoop_user} $dir
 done
 # change hadoop log dir
 sed -i "s,\#export HADOOP_LOG_DIR=.*,export HADOOP_LOG_DIR={hadoop_log_dir}," {hadoop_conf_dir}/hadoop-env.sh
 sed -i "s,export HADOOP_SECURE_DN_LOG_DIR=.*,export HADOOP_SECURE_DN_LOG_DIR={hadoop_secure_dn_log_dir}," {hadoop_conf_dir}/hadoop-env.sh
 # change yarn log dir
 sed -i "s,YARN_LOG_DIR=.*,YARN_LOG_DIR={yarn_log_dir}," {hadoop_conf_dir}/yarn-env.sh
 # prepare scaling files
 sc_all_files=('dn-include' 'nm-include' 'dn-exclude' 'nm-exclude')
 for file in "${{sc_all_files[@]}}"
 do
    touch {hadoop_conf_dir}/$file
    chown {hadoop_group}:{hadoop_user} {hadoop_conf_dir}/$file
 done
--- a/sahara/plugins/vanilla/v2_3_0/resources/yarn-default.xml
+++ b/sahara/plugins/vanilla/v2_3_0/resources/yarn-default.xml
--- a/sahara/plugins/vanilla/v2_3_0/run_scripts.py
+++ b/sahara/plugins/vanilla/v2_3_0/run_scripts.py
@ -0,0 +1,54 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara.plugins.general import utils as u
 def start_instance(instance):
    processes = instance.node_group.node_processes
    for process in processes:
        if process in ['namenode', 'datanode']:
            start_hadoop_process(instance, process)
        elif process in ['resourcemanager', 'nodemanager']:
            start_yarn_process(instance, process)
        else:
            raise RuntimeError("Process is not supported")
 def start_hadoop_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c "hadoop-daemon.sh start %s" hadoop' % process)
 def start_yarn_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c  "yarn-daemon.sh start %s" hadoop' % process)
 def format_namenode(instance):
    instance.remote().execute_command(
        'sudo su - -c "hdfs namenode -format" hadoop')
 def refresh_hadoop_nodes(cluster):
    nn = u.get_namenode(cluster)
    nn.remote().execute_command(
        'sudo su - -c "hdfs dfsadmin -refreshNodes" hadoop')
 def refresh_yarn_nodes(cluster):
    rm = u.get_resourcemanager(cluster)
    rm.remote().execute_command(
        'sudo su - -c "yarn rmadmin -refreshNodes" hadoop')
--- a/sahara/plugins/vanilla/v2_3_0/scaling.py
+++ b/sahara/plugins/vanilla/v2_3_0/scaling.py
@ -0,0 +1,124 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara import context
 from sahara import exceptions as ex
 from sahara.openstack.common import timeutils
 from sahara.plugins.general import utils as u
 from sahara.plugins.vanilla.v2_3_0 import config
 from sahara.plugins.vanilla.v2_3_0 import run_scripts as run
 from sahara.plugins.vanilla.v2_3_0 import utils as pu
 HADOOP_CONF_DIR = config.HADOOP_CONF_DIR
 def scale_cluster(cluster, instances):
    config.configure_instances(instances)
    _update_include_files(cluster)
    run.refresh_hadoop_nodes(cluster)
    run.refresh_yarn_nodes(cluster)
    for instance in instances:
        run.start_instance(instance)
 def _get_instances_with_service(instances, service):
    ret = []
    for instance in instances:
        if service in instance.node_group.node_processes:
            ret.append(instance)
    return ret
 def _update_include_files(cluster):
    instances = u.get_instances(cluster)
    datanodes = u.get_datanodes(cluster)
    nodemanagers = u.get_nodemanagers(cluster)
    dn_hosts = u.generate_fqdn_host_names(datanodes)
    nm_hosts = u.generate_fqdn_host_names(nodemanagers)
    for instance in instances:
        with instance.remote() as r:
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % (
                    dn_hosts, HADOOP_CONF_DIR))
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % (
                    nm_hosts, HADOOP_CONF_DIR))
 def decommission_nodes(cluster, instances):
    datanodes = _get_instances_with_service(instances, 'datanode')
    nodemanagers = _get_instances_with_service(instances, 'nodemanager')
    _update_exclude_files(cluster, instances)
    run.refresh_hadoop_nodes(cluster)
    run.refresh_yarn_nodes(cluster)
    _check_nodemanagers_decommission(cluster, nodemanagers)
    _check_datanodes_decommission(cluster, datanodes)
    _update_include_files(cluster)
    _clear_exclude_files(cluster)
 def _update_exclude_files(cluster, instances):
    datanodes = _get_instances_with_service(instances, 'datanode')
    nodemanagers = _get_instances_with_service(instances, 'nodemanager')
    dn_hosts = u.generate_fqdn_host_names(datanodes)
    nm_hosts = u.generate_fqdn_host_names(nodemanagers)
    for instance in u.get_instances(cluster):
        with instance.remote() as r:
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/dn-exclude" hadoop' % (
                    dn_hosts, HADOOP_CONF_DIR))
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/nm-exclude" hadoop' % (
                    nm_hosts, HADOOP_CONF_DIR))
 def _clear_exclude_files(cluster):
    for instance in u.get_instances(cluster):
        with instance.remote() as r:
            r.execute_command(
                'sudo su - -c "echo > %s/dn-exclude" hadoop' % HADOOP_CONF_DIR)
            r.execute_command(
                'sudo su - -c "echo > %s/nm-exclude" hadoop' % HADOOP_CONF_DIR)
 def _check_decommission(cluster, instances, check_func, timeout):
    s_time = timeutils.utcnow()
    while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
        statuses = check_func(cluster)
        dec_ok = True
        for instance in instances:
            if statuses[instance.fqdn()] != 'decommissioned':
                dec_ok = False
        if dec_ok:
            return
        else:
            context.sleep(5)
    else:
        ex.SaharaException("Cannot finish decommission in %d seconds" %
                           timeout)
 def _check_nodemanagers_decommission(cluster, instances):
    _check_decommission(cluster, instances, pu.get_nodemanagers_status, 300)
 def _check_datanodes_decommission(cluster, instances):
    _check_decommission(cluster, instances, pu.get_datanodes_status, 3600 * 4)
--- a/sahara/plugins/vanilla/v2_3_0/utils.py
+++ b/sahara/plugins/vanilla/v2_3_0/utils.py
@ -0,0 +1,46 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import re
 from sahara.plugins.general import utils as u
 def get_datanodes_status(cluster):
    statuses = {}
    namenode = u.get_namenode(cluster)
    status_regexp = r'^Hostname: (.*)\nDecommission Status : (.*)$'
    matcher = re.compile(status_regexp, re.MULTILINE)
    dfs_report = namenode.remote().execute_command(
        'sudo su - -c "hdfs dfsadmin -report" hadoop')[1]
    for host, status in matcher.findall(dfs_report):
        statuses[host] = status.lower()
    return statuses
 def get_nodemanagers_status(cluster):
    statuses = {}
    resourcemanager = u.get_resourcemanager(cluster)
    status_regexp = r'^(\S+):\d+\s+(\w+)'
    matcher = re.compile(status_regexp, re.MULTILINE)
    yarn_report = resourcemanager.remote().execute_command(
        'sudo su - -c "yarn node -all -list" hadoop')[1]
    for host, status in matcher.findall(yarn_report):
        statuses[host] = status.lower()
    return statuses
--- a/sahara/plugins/vanilla/v2_3_0/validation.py
+++ b/sahara/plugins/vanilla/v2_3_0/validation.py
@ -0,0 +1,85 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara.plugins.general import exceptions as ex
 from sahara.plugins.general import utils as u
 from sahara.plugins.vanilla.v2_3_0 import config_helper as c_helper
 from sahara.utils import general as gu
 def validate_cluster_creating(cluster):
    nn_count = _get_inst_count(cluster, 'namenode')
    if nn_count != 1:
        raise ex.InvalidComponentCountException('namenode', 1, nn_count)
    rm_count = _get_inst_count(cluster, 'resourcemanager')
    if rm_count not in [0, 1]:
        raise ex.InvalidComponentCountException('resourcemanager', '0 or 1',
                                                rm_count)
    if rm_count == 0:
        nm_count = _get_inst_count(cluster, 'nodemanager')
        if nm_count > 0:
            raise ex.RequiredServiceMissingException('resourcemanager',
                                                     required_by='nodemanager')
 def validate_additional_ng_scaling(cluster, additional):
    rm = u.get_resourcemanager(cluster)
    scalable_processes = _get_scalable_processes()
    for ng_id in additional:
        ng = gu.get_by_id(cluster.node_groups, ng_id)
        if not set(ng.node_processes).issubset(scalable_processes):
            msg = "Vanilla plugin cannot scale nodegroup with processes: %s"
            raise ex.NodeGroupCannotBeScaled(ng.name,
                                             msg % ' '.join(ng.node_processes))
        if not rm and 'nodemanager' in ng.node_processes:
            msg = ("Vanilla plugin cannot scale node group with processes "
                   "which have no master-processes run in cluster")
            raise ex.NodeGroupCannotBeScaled(ng.name, msg)
 def validate_existing_ng_scaling(cluster, existing):
    scalable_processes = _get_scalable_processes()
    dn_to_delete = 0
    for ng in cluster.node_groups:
        if ng.id in existing:
            if ng.count > existing[ng.id] and "datanode" in ng.node_processes:
                dn_to_delete += ng.count - existing[ng.id]
            if not set(ng.node_processes).issubset(scalable_processes):
                msg = ("Vanilla plugin cannot scale nodegroup "
                       "with processes: %s")
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, msg % ' '.join(ng.node_processes))
    dn_amount = len(u.get_datanodes(cluster))
    rep_factor = c_helper.get_config_value('HDFS', 'dfs.replication', cluster)
    if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
        msg = ("Vanilla plugin cannot shrink cluster because it would be not "
               "enough nodes for replicas (replication factor is %s)")
        raise ex.ClusterCannotBeScaled(
            cluster.name, msg % rep_factor)
 def _get_scalable_processes():
    return ['datanode', 'nodemanager']
 def _get_inst_count(cluster, process):
    return sum([ng.count for ng in u.get_node_groups(cluster, process)])
--- a/sahara/plugins/vanilla/v2_3_0/versionhandler.py
+++ b/sahara/plugins/vanilla/v2_3_0/versionhandler.py
@ -0,0 +1,104 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from oslo.config import cfg
 from sahara import conductor
 from sahara import context
 from sahara.openstack.common import log as logging
 from sahara.plugins.general import utils
 from sahara.plugins.vanilla import abstractversionhandler as avm
 from sahara.plugins.vanilla.v2_3_0 import config as c
 from sahara.plugins.vanilla.v2_3_0 import config_helper as c_helper
 from sahara.plugins.vanilla.v2_3_0 import run_scripts as run
 from sahara.plugins.vanilla.v2_3_0 import scaling as sc
 from sahara.plugins.vanilla.v2_3_0 import validation as vl
 conductor = conductor.API
 LOG = logging.getLogger(__name__)
 CONF = cfg.CONF
 class VersionHandler(avm.AbstractVersionHandler):
    def get_plugin_configs(self):
        return c_helper.get_plugin_configs()
    def get_node_processes(self):
        return {
            "Hadoop": [],
            "MapReduce": [],
            "HDFS": ["namenode", "datanode"],
            "YARN": ["resourcemanager", "nodemanager"]
        }
    def validate(self, cluster):
        vl.validate_cluster_creating(cluster)
    def update_infra(self, cluster):
        pass
    def configure_cluster(self, cluster):
        c.configure_cluster(cluster)
    def start_cluster(self, cluster):
        nn = utils.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')
        rm = utils.get_resourcemanager(cluster)
        run.start_yarn_process(rm, 'resourcemanager')
        for dn in utils.get_datanodes(cluster):
            run.start_hadoop_process(dn, 'datanode')
        for nm in utils.get_nodemanagers(cluster):
            run.start_yarn_process(nm, 'nodemanager')
        self._set_cluster_info(cluster)
    def decommission_nodes(self, cluster, instances):
        sc.decommission_nodes(cluster, instances)
    def validate_scaling(self, cluster, existing, additional):
        vl.validate_additional_ng_scaling(cluster, additional)
        vl.validate_existing_ng_scaling(cluster, existing)
    def scale_cluster(self, cluster, instances):
        sc.scale_cluster(cluster, instances)
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        rm = utils.get_resourcemanager(cluster)
        info = {}
        if rm:
            info['YARN'] = {
                'Web UI': 'http://%s:%s' % (rm.management_ip, '8088'),
            }
        if nn:
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, '50070'),
            }
        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
    def get_oozie_server(self, cluster):
        pass
    def get_resource_manager_uri(self, cluster):
        pass
--- a/sahara/plugins/vanilla/versionfactory.py
+++ b/sahara/plugins/vanilla/versionfactory.py
@ -0,0 +1,53 @@
 # Copyright (c) 2014 Mirantis, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import re
 class VersionFactory():
    versions = None
    modules = None
    initialized = False
    @staticmethod
    def get_instance():
        if not VersionFactory.initialized:
            src_dir = os.path.join(os.path.dirname(__file__), '')
            VersionFactory.versions = (
                [name[1:].replace('_', '.')
                 for name in os.listdir(src_dir)
                 if (os.path.isdir(os.path.join(src_dir, name))
                     and re.match(r'^v\d+_\d+_\d+$', name))])
            VersionFactory.modules = {}
            for version in VersionFactory.versions:
                module_name = 'sahara.plugins.vanilla.v%s.versionhandler' % (
                    version.replace('.', '_'))
                module_class = getattr(
                    __import__(module_name, fromlist=['sahara']),
                    'VersionHandler')
                module = module_class()
                key = version.replace('_', '.')
                VersionFactory.modules[key] = module
            VersionFactory.initialized = True
        return VersionFactory()
    def get_versions(self):
        return VersionFactory.versions
    def get_version_handler(self, version):
        return VersionFactory.modules[version]
--- a/sahara/tests/init.py
+++ b/sahara/tests/init.py
--- a/sahara/tests/unit/init.py
+++ b/sahara/tests/unit/init.py
--- a/sahara/tests/unit/base.py
+++ b/sahara/tests/unit/base.py
@ -0,0 +1,64 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import tempfile
 import unittest2
 from sahara import context
 from sahara.db import api as db_api
 from sahara import main
 from sahara.openstack.common.db.sqlalchemy import session
 class SaharaTestCase(unittest2.TestCase):
    def setUp(self):
        super(SaharaTestCase, self).setUp()
        self.maxDiff = None
        self.setup_context()
    def setup_context(self, username="test_user", tenant_id="tenant_1",
                      token="test_auth_token", tenant_name='test_tenant',
                      **kwargs):
        self.addCleanup(context.set_ctx,
                        context.ctx() if context.has_ctx() else None)
        context.set_ctx(context.Context(
            username=username, tenant_id=tenant_id,
            token=token, service_catalog={},
            tenant_name=tenant_name, **kwargs))
    def override_config(self, name, override, group=None):
        main.CONF.set_override(name, override, group)
        self.addCleanup(main.CONF.clear_override, name, group)
 class SaharaWithDbTestCase(SaharaTestCase):
    def setUp(self):
        super(SaharaWithDbTestCase, self).setUp()
        self.setup_db()
    def setup_db(self):
        self.db_fd, self.db_path = tempfile.mkstemp()
        session.set_defaults('sqlite:///' + self.db_path, self.db_path)
        db_api.setup_db()
        self.addCleanup(self._drop_db)
    def _drop_db(self):
        db_api.drop_db()
        os.close(self.db_fd)
        os.unlink(self.db_path)
--- a/sahara/tests/unit/plugins/init.py
+++ b/sahara/tests/unit/plugins/init.py
--- a/sahara/tests/unit/plugins/vanilla/init.py
+++ b/sahara/tests/unit/plugins/vanilla/init.py
--- a/sahara/tests/unit/plugins/vanilla/v1_2_1/init.py
+++ b/sahara/tests/unit/plugins/vanilla/v1_2_1/init.py
--- a/sahara/tests/unit/plugins/vanilla/v1_2_1/test_dfsadmin_parsing.py
+++ b/sahara/tests/unit/plugins/vanilla/v1_2_1/test_dfsadmin_parsing.py
@ -0,0 +1,54 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pkg_resources as pkg
 import unittest2
 from sahara.plugins.vanilla.v1_2_1 import scaling as sc
 from sahara import version
 class ProvisioningPluginBaseTest(unittest2.TestCase):
    def test_result_for_3_nodes(self):
        ins = open(pkg.resource_filename(
            version.version_info.package, "tests/unit/resources/"
                                          "dfs_admin_3_nodes.txt"), "r")
        big_string = ins.read()
        exp1 = {"Name": "10.155.0.94:50010", "Decommission Status": "Normal"}
        exp2 = {"Name": "10.155.0.90:50010", "Last contact": "Tue Jul 16 12:"
                "00:07 UTC 2013"}
        exp3 = {"Configured Capacity": "10568916992 (9.84 GB)", "DFS "
                "Remaining%": "93.42%"}
        expected = [exp1, exp2, exp3]
        res = sc.parse_dfs_report(big_string)
        self.assertItemsEqual(expected, res)
    def test_result_for_0_nodes(self):
        ins = open(pkg.resource_filename(
            version.version_info.package, "tests/unit/resources/"
                                          "dfs_admin_0_nodes.txt"), "r")
        big_string = ins.read()
        res = sc.parse_dfs_report(big_string)
        self.assertEqual(0, len(res))
    def test_result_for_1_node(self):
        ins = open(pkg.resource_filename(
            version.version_info.package, "tests/unit/resources/"
                                          "dfs_admin_1_nodes.txt"), "r")
        big_string = ins.read()
        exp = {"Name": "10.155.0.94:50010", "Decommission Status": "Normal"}
        res = sc.parse_dfs_report(big_string)
        self.assertIn(exp, res)
--- a/sahara/tests/unit/plugins/vanilla/v1_2_1/test_plugin.py
+++ b/sahara/tests/unit/plugins/vanilla/v1_2_1/test_plugin.py
@ -0,0 +1,288 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import mock
 from sahara import conductor as cond
 from sahara import context
 from sahara.plugins.general import exceptions as ex
 from sahara.plugins.vanilla import plugin as p
 from sahara.plugins.vanilla.v1_2_1 import config_helper as c_h
 from sahara.plugins.vanilla.v1_2_1 import mysql_helper as m_h
 from sahara.plugins.vanilla.v1_2_1 import versionhandler as v_h
 from sahara.tests.unit import base
 from sahara.tests.unit import testutils as tu
 conductor = cond.API
 class VanillaPluginTest(base.SaharaWithDbTestCase):
    def setUp(self):
        super(VanillaPluginTest, self).setUp()
        self.pl = p.VanillaProvider()
    def test_validate(self):
        self.ng = []
        self.ng.append(tu.make_ng_dict("nn", "f1", ["namenode"], 0))
        self.ng.append(tu.make_ng_dict("jt", "f1", ["jobtracker"], 0))
        self.ng.append(tu.make_ng_dict("tt", "f1", ["tasktracker"], 0))
        self.ng.append(tu.make_ng_dict("oozie", "f1", ["oozie"], 0))
        self._validate_case(1, 1, 10, 1)
        with self.assertRaises(ex.InvalidComponentCountException):
            self._validate_case(0, 1, 10, 1)
        with self.assertRaises(ex.InvalidComponentCountException):
            self._validate_case(2, 1, 10, 1)
        with self.assertRaises(ex.RequiredServiceMissingException):
            self._validate_case(1, 0, 10, 1)
        with self.assertRaises(ex.InvalidComponentCountException):
            self._validate_case(1, 2, 10, 1)
        with self.assertRaises(ex.InvalidComponentCountException):
            self._validate_case(1, 1, 0, 2)
        with self.assertRaises(ex.RequiredServiceMissingException):
            self._validate_case(1, 0, 0, 1)
    def _validate_case(self, *args):
        lst = []
        for i in range(0, len(args)):
            self.ng[i]['count'] = args[i]
            lst.append(self.ng[i])
        cl = tu.create_cluster("cluster1", "tenant1", "vanilla", "1.2.1", lst)
        self.pl.validate(cl)
    def test_get_configs(self):
        cl_configs = self.pl.get_configs("1.2.1")
        for cfg in cl_configs:
            if cfg.config_type is "bool":
                self.assertIsInstance(cfg.default_value, bool)
            elif cfg.config_type is "int":
                self.assertIsInstance(cfg.default_value, int)
            else:
                self.assertIsInstance(cfg.default_value, str)
            self.assertNotIn(cfg.name, c_h.HIDDEN_CONFS)
    def test_extract_environment_configs(self):
        env_configs = {
            "JobFlow": {
                'Oozie Heap Size': 4000
            },
            "MapReduce": {
                'Job Tracker Heap Size': 1000,
                'Task Tracker Heap Size': "2000"
            },
            "HDFS": {
                'Name Node Heap Size': 3000,
                'Data Node Heap Size': "4000"
            },
            "Wrong-applicable-target": {
                't1': 4
            }}
        self.assertListEqual(c_h.extract_environment_confs(env_configs),
                             ['HADOOP_NAMENODE_OPTS=\\"-Xmx3000m\\"',
                              'HADOOP_DATANODE_OPTS=\\"-Xmx4000m\\"',
                              'CATALINA_OPTS -Xmx4000m',
                              'HADOOP_JOBTRACKER_OPTS=\\"-Xmx1000m\\"',
                              'HADOOP_TASKTRACKER_OPTS=\\"-Xmx2000m\\"'])
    def test_extract_xml_configs(self):
        xml_configs = {
            "HDFS": {
                'dfs.replication': 3,
                'fs.default.name': 'hdfs://',
                'key': 'value'
            },
            "MapReduce": {
                'io.sort.factor': 10,
                'mapred.reduce.tasks': 2
            },
            "Wrong-applicable-target": {
                'key': 'value'
            }
        }
        self.assertListEqual(c_h.extract_xml_confs(xml_configs),
                             [('fs.default.name', 'hdfs://'),
                              ('dfs.replication', 3),
                              ('mapred.reduce.tasks', 2),
                              ('io.sort.factor', 10)])
    def test_general_configs(self):
        gen_config = {
            c_h.ENABLE_SWIFT.name: {
                'default_value': c_h.ENABLE_SWIFT.default_value,
                'conf': {
                    'fs.swift.enabled': True
                }
            },
            c_h.ENABLE_MYSQL.name: {
                'default_value': c_h.ENABLE_MYSQL.default_value,
                'conf': {
                    'oozie.service.JPAService.jdbc.username': 'oozie'
                }
            }
        }
        all_configured = {
            'fs.swift.enabled': True,
            'oozie.service.JPAService.jdbc.username': 'oozie'
        }
        configs = {
            'general': {
                'Enable Swift': True
            }
        }
        cfg = c_h.generate_cfg_from_general({}, configs, gen_config)
        self.assertDictEqual(cfg, all_configured)
        configs['general'].update({'Enable MySQL': False})
        cfg = c_h.generate_cfg_from_general({}, configs, gen_config)
        self.assertDictEqual(cfg, {'fs.swift.enabled': True})
        configs['general'].update({
            'Enable Swift': False,
            'Enable MySQL': False
        })
        cfg = c_h.generate_cfg_from_general({}, configs, gen_config)
        self.assertDictEqual(cfg, {})
        configs = {}
        cfg = c_h.generate_cfg_from_general({}, configs, gen_config)
        self.assertDictEqual(cfg, all_configured)
    def test_get_mysql_configs(self):
        cfg = m_h.get_required_mysql_configs(None, None)
        self.assertDictEqual(cfg, m_h.get_oozie_mysql_configs())
        cfg = m_h.get_required_mysql_configs("metastore_host", "passwd")
        cfg_to_compare = m_h.get_oozie_mysql_configs()
        cfg_to_compare.update(m_h.get_hive_mysql_configs(
            "metastore_host", "passwd"))
        self.assertDictEqual(cfg, cfg_to_compare)
    @mock.patch('sahara.conductor.api.LocalApi.cluster_get')
    def test_get_config_value(self, cond_get_cluster):
        cluster = self._get_fake_cluster()
        cond_get_cluster.return_value = cluster
        self.assertEqual(
            c_h.get_config_value('HDFS', 'fs.default.name', cluster),
            'hdfs://inst1:8020')
        self.assertEqual(
            c_h.get_config_value('HDFS', 'spam', cluster), 'eggs')
        self.assertEqual(
            c_h.get_config_value('HDFS', 'dfs.safemode.extension'), 30000)
        self.assertRaises(RuntimeError,
                          c_h.get_config_value,
                          'MapReduce', 'spam', cluster)
    @mock.patch('sahara.plugins.vanilla.v1_2_1.versionhandler.context')
    @mock.patch('sahara.conductor.api.LocalApi.cluster_update')
    def test_set_cluster_info(self, cond_cluster_update, context_mock):
        cluster = self._get_fake_cluster()
        v_h.VersionHandler()._set_cluster_info(cluster)
        expected_info = {
            'HDFS': {
                'NameNode': 'hdfs://inst1:8020',
                'Web UI': 'http://127.0.0.1:50070'
            },
            'MapReduce': {
                'Web UI': 'http://127.0.0.1:50030',
                'JobTracker': 'inst1:8021'
            },
            'JobFlow': {
                'Oozie': 'http://127.0.0.1:11000'
            }
        }
        cond_cluster_update.assert_called_with(context_mock.ctx(), cluster,
                                               {'info': expected_info})
    def _get_fake_cluster(self):
        class FakeNG(object):
            def __init__(self, name, flavor, processes, count, instances=None,
                         configuration=None, cluster_id=None):
                self.name = name
                self.flavor = flavor
                self.node_processes = processes
                self.count = count
                self.instances = instances or []
                self.ng_configuration = configuration
                self.cluster_id = cluster_id
            def configuration(self):
                return self.ng_configuration
            def storage_paths(self):
                return ['/mnt']
        class FakeCluster(object):
            def __init__(self, name, tenant, plugin, version, node_groups):
                self.name = name
                self.tenant = tenant
                self.plugin = plugin
                self.version = version
                self.node_groups = node_groups
        class FakeInst(object):
            def __init__(self, inst_name, inst_id, management_ip):
                self.instance_name = inst_name
                self.instance_id = inst_id
                self.management_ip = management_ip
            def hostname(self):
                return self.instance_name
        ms_inst = FakeInst('inst1', 'id1', '127.0.0.1')
        wk_inst = FakeInst('inst2', 'id2', '127.0.0.1')
        conf = {
            "MapReduce": {},
            "HDFS": {
                "spam": "eggs"
            },
            "JobFlow": {},
        }
        ng1 = FakeNG('master', 'fl1', ['namenode', 'jobtracker', 'oozie'], 1,
                     [ms_inst], conf, 'id1')
        ng2 = FakeNG('worker', 'fl1', ['datanode', 'tasktracker'], 1,
                     [wk_inst], conf, 'id1')
        return FakeCluster('cl1', 'ten1', 'vanilla', '1.2.1', [ng1, ng2])
    def test_get_hadoop_ssh_keys(self):
        cluster_dict = {
            'name': 'cluster1',
            'plugin_name': 'mock_plugin',
            'hadoop_version': 'mock_version',
            'default_image_id': 'initial',
            'node_groups': [tu.make_ng_dict("ng1", "f1", ["s1"], 1)]}
        cluster1 = conductor.cluster_create(context.ctx(), cluster_dict)
        (private_key1, public_key1) = c_h.get_hadoop_ssh_keys(cluster1)
        #should store keys for old cluster
        cluster1 = conductor.cluster_get(context.ctx(), cluster1)
        (private_key2, public_key2) = c_h.get_hadoop_ssh_keys(cluster1)
        self.assertEqual(public_key1, public_key2)
        self.assertEqual(private_key1, private_key2)
        #should generate new keys for new cluster
        cluster_dict.update({'name': 'cluster2'})
        cluster2 = conductor.cluster_create(context.ctx(), cluster_dict)
        (private_key3, public_key3) = c_h.get_hadoop_ssh_keys(cluster2)
        self.assertNotEqual(public_key1, public_key3)
        self.assertNotEqual(private_key1, private_key3)
--- a/sahara/tests/unit/plugins/vanilla/v1_2_1/test_run_scripts.py
+++ b/sahara/tests/unit/plugins/vanilla/v1_2_1/test_run_scripts.py
@ -0,0 +1,44 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import mock
 import unittest2
 from sahara.plugins.vanilla.v1_2_1 import run_scripts
 class RunScriptsTest(unittest2.TestCase):
    def setUp(self):
        pass
    def test_check_datanodes_count_positive(self):
        remote = mock.Mock()
        remote.execute_command.return_value = (0, "1")
        self.assertTrue(run_scripts.check_datanodes_count(remote, 1))
    def test_check_datanodes_count_negative(self):
        remote = mock.Mock()
        remote.execute_command.return_value = (0, "1")
        self.assertFalse(run_scripts.check_datanodes_count(remote, 2))
    def test_check_datanodes_count_nonzero_exitcode(self):
        remote = mock.Mock()
        remote.execute_command.return_value = (1, "1")
        self.assertFalse(run_scripts.check_datanodes_count(remote, 1))
    def test_check_datanodes_count_expects_zero(self):
        remote = mock.Mock()
        self.assertTrue(run_scripts.check_datanodes_count(remote, 0))
        self.assertEqual(remote.execute_command.call_count, 0)
--- a/sahara/tests/unit/plugins/vanilla/v2_3_0/init.py
+++ b/sahara/tests/unit/plugins/vanilla/v2_3_0/init.py
--- a/sahara/tests/unit/plugins/vanilla/v2_3_0/resources/dfs-report.txt
+++ b/sahara/tests/unit/plugins/vanilla/v2_3_0/resources/dfs-report.txt
@ -0,0 +1,62 @@
 Configured Capacity: 60249329664 (56.11 GB)
 Present Capacity: 50438139904 (46.97 GB)
 DFS Remaining: 50438041600 (46.97 GB)
 DFS Used: 98304 (96 KB)
 DFS Used%: 0.00%
 Under replicated blocks: 0
 Blocks with corrupt replicas: 0
 Missing blocks: 0
 -------------------------------------------------
 Datanodes available: 4 (4 total, 0 dead)
 Live datanodes:
 Name: 10.50.0.22:50010 (cluster-worker-001.novalocal)
 Hostname: cluster-worker-001.novalocal
 Decommission Status : Normal
 Configured Capacity: 20083101696 (18.70 GB)
 DFS Used: 24576 (24 KB)
 Non DFS Used: 3270406144 (3.05 GB)
 DFS Remaining: 16812670976 (15.66 GB)
 DFS Used%: 0.00%
 DFS Remaining%: 83.72%
 Last contact: Mon Feb 24 13:41:13 UTC 2014
 Name: 10.50.0.36:50010 (cluster-worker-003.novalocal)
 Hostname: cluster-worker-003.novalocal
 Decommission Status : Normal
 Configured Capacity: 20083101696 (18.70 GB)
 DFS Used: 24576 (24 KB)
 Non DFS Used: 3270393856 (3.05 GB)
 DFS Remaining: 16812683264 (15.66 GB)
 DFS Used%: 0.00%
 DFS Remaining%: 83.72%
 Last contact: Mon Feb 24 13:41:11 UTC 2014
 Name: 10.50.0.25:50010 (cluster-worker-002.novalocal)
 Hostname: cluster-worker-002.novalocal
 Decommission Status : Normal
 Configured Capacity: 20083101696 (18.70 GB)
 DFS Used: 24576 (24 KB)
 Non DFS Used: 3270389760 (3.05 GB)
 DFS Remaining: 16812687360 (15.66 GB)
 DFS Used%: 0.00%
 DFS Remaining%: 83.72%
 Last contact: Mon Feb 24 13:41:12 UTC 2014
 Name: 10.50.0.60:50010 (cluster-worker-004.novalocal)
 Hostname: cluster-worker-004.novalocal
 Decommission Status : Decommissioned
 Configured Capacity: 20083101696 (18.70 GB)
 DFS Used: 24576 (24 KB)
 Non DFS Used: 3270316032 (3.05 GB)
 DFS Remaining: 16812761088 (15.66 GB)
 DFS Used%: 0.00%
 DFS Remaining%: 83.72%
 Last contact: Mon Feb 24 13:33:33 UTC 2014
--- a/sahara/tests/unit/plugins/vanilla/v2_3_0/resources/yarn-report.txt
+++ b/sahara/tests/unit/plugins/vanilla/v2_3_0/resources/yarn-report.txt
@ -0,0 +1,6 @@
 Total Nodes:4
         Node-Id         Node-State Node-Http-Address   Number-of-Running-Containers
 cluster-worker-001.novalocal:54746          RUNNING cluster-worker-001.novalocal:8042                              0
 cluster-worker-002.novalocal:53509          RUNNING cluster-worker-002.novalocal:8042                              0
 cluster-worker-003.novalocal:60418          RUNNING cluster-worker-003.novalocal:8042                              0
 cluster-worker-004.novalocal:33876   DECOMMISSIONED cluster-worker-004.novalocal:8042                              0
--- a/sahara/tests/unit/plugins/vanilla/v2_3_0/test_configs.py
+++ b/sahara/tests/unit/plugins/vanilla/v2_3_0/test_configs.py
@ -0,0 +1,71 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara.plugins.vanilla.v2_3_0 import config as c
 from sahara.tests.unit import base
 class VanillaTwoConfigTestCase(base.SaharaTestCase):
    def test_get_hadoop_dirs(self):
        ng = FakeNG(storage_paths=['/vol1', '/vol2'])
        dirs = c._get_hadoop_dirs(ng)
        expected = {
            'hadoop_name_dirs': ['/vol1/hdfs/namenode',
                                 '/vol2/hdfs/namenode'],
            'hadoop_data_dirs': ['/vol1/hdfs/datanode',
                                 '/vol2/hdfs/datanode'],
            'hadoop_log_dir': '/vol1/hadoop/logs',
            'hadoop_secure_dn_log_dir': '/vol1/hadoop/logs/secure',
            'yarn_log_dir': '/vol1/yarn/logs'
        }
        self.assertDictEqual(dirs, expected)
    def test_merge_configs(self):
        a = {
            'HDFS': {
                'param1': 'value1',
                'param2': 'value2'
            }
        }
        b = {
            'HDFS': {
                'param1': 'value3',
                'param3': 'value4'
            },
            'YARN': {
                'param5': 'value5'
            }
        }
        res = c._merge_configs(a, b)
        expected = {
            'HDFS': {
                'param1': 'value3',
                'param2': 'value2',
                'param3': 'value4'
            },
            'YARN': {
                'param5': 'value5'
            }
        }
        self.assertDictEqual(res, expected)
 class FakeNG():
    def __init__(self, storage_paths=None):
        self.paths = storage_paths
    def storage_paths(self):
        return self.paths
--- a/sahara/tests/unit/plugins/vanilla/v2_3_0/test_utils.py
+++ b/sahara/tests/unit/plugins/vanilla/v2_3_0/test_utils.py
@ -0,0 +1,66 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import mock
 from sahara.plugins.vanilla.v2_3_0 import utils as u
 from sahara.tests.unit import base
 from sahara.utils import files
 class UtilsTestCase(base.SaharaTestCase):
    @mock.patch('sahara.plugins.general.utils.get_namenode')
    def test_datanodes_status(self, nn):
        report = files.get_file_text(
            'tests/unit/plugins/vanilla/v2_3_0/resources/dfs-report.txt')
        nn.return_value = self._get_instance(report)
        statuses = u.get_datanodes_status(None)
        expected = {
            'cluster-worker-001.novalocal': 'normal',
            'cluster-worker-002.novalocal': 'normal',
            'cluster-worker-003.novalocal': 'normal',
            'cluster-worker-004.novalocal': 'decommissioned'
        }
        self.assertDictEqual(statuses, expected)
    @mock.patch('sahara.plugins.general.utils.get_resourcemanager')
    def test_nodemanagers_status(self, rm):
        report = files.get_file_text(
            'tests/unit/plugins/vanilla/v2_3_0/resources/yarn-report.txt')
        rm.return_value = self._get_instance(report)
        statuses = u.get_nodemanagers_status(None)
        expected = {
            'cluster-worker-001.novalocal': 'running',
            'cluster-worker-002.novalocal': 'running',
            'cluster-worker-003.novalocal': 'running',
            'cluster-worker-004.novalocal': 'decommissioned'
        }
        self.assertDictEqual(statuses, expected)
    def _get_instance(self, out):
        inst_remote = mock.MagicMock()
        inst_remote.execute_command.return_value = 0, out
        inst_remote.__enter__.return_value = inst_remote
        inst = mock.MagicMock()
        inst.remote.return_value = inst_remote
        return inst
--- a/sahara/tests/unit/plugins/vanilla/v2_3_0/test_validation.py
+++ b/sahara/tests/unit/plugins/vanilla/v2_3_0/test_validation.py
@ -0,0 +1,58 @@
 # Copyright (c) 2014 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from sahara.plugins.general import exceptions as ex
 from sahara.plugins.vanilla import plugin as p
 from sahara.tests.unit import base
 from sahara.tests.unit import testutils as tu
 class ValidationTest(base.SaharaTestCase):
    def setUp(self):
        super(ValidationTest, self).setUp()
        self.pl = p.VanillaProvider()
    def test_validate(self):
        self.ng = []
        self.ng.append(tu.make_ng_dict("nn", "f1", ["namenode"], 0))
        self.ng.append(tu.make_ng_dict("jt", "f1", ["resourcemanager"], 0))
        self.ng.append(tu.make_ng_dict("tt", "f1", ["nodemanager"], 0))
        self.ng.append(tu.make_ng_dict("dn", "f1", ["datanode"], 0))
        self._validate_case(1, 1, 10, 10)
        self._validate_case(1, 1, 1, 0)
        self._validate_case(1, 1, 0, 1)
        self._validate_case(1, 1, 0, 0)
        self._validate_case(1, 0, 0, 0)
        with self.assertRaises(ex.InvalidComponentCountException):
            self._validate_case(0, 1, 10, 1)
        with self.assertRaises(ex.InvalidComponentCountException):
            self._validate_case(2, 1, 10, 1)
        with self.assertRaises(ex.RequiredServiceMissingException):
            self._validate_case(1, 0, 10, 1)
        with self.assertRaises(ex.InvalidComponentCountException):
            self._validate_case(1, 2, 10, 1)
    def _validate_case(self, *args):
        lst = []
        for i in range(0, len(args)):
            self.ng[i]['count'] = args[i]
            lst.append(self.ng[i])
        cl = tu.create_cluster("cluster1", "tenant1", "vanilla", "2.3.0", lst)
        self.pl.validate(cl)
--- a/sahara/utils/init.py
+++ b/sahara/utils/init.py
--- a/sahara/utils/patches.py
+++ b/sahara/utils/patches.py
@ -0,0 +1,67 @@
 # Copyright (c) 2013 Mirantis Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 def patch_minidom_writexml():
    """Patch for xml.dom.minidom toprettyxml bug with whitespaces around text
    (This patch will be applied for all Python versions < 2.7.3)
    Issue: http://bugs.python.org/issue4147
    Patch: http://hg.python.org/cpython/rev/cb6614e3438b/
    Description: http://ronrothman.com/public/leftbraned/xml-dom-minidom-\
                        toprettyxml-and-silly-whitespace/#best-solution
    """
    import sys
    if sys.version_info >= (2, 7, 3):
        return
    import xml.dom.minidom as md
    def writexml(self, writer, indent="", addindent="", newl=""):
        # indent = current indentation
        # addindent = indentation to add to higher levels
        # newl = newline string
        writer.write(indent + "<" + self.tagName)
        attrs = self._get_attributes()
        a_names = attrs.keys()
        a_names.sort()
        for a_name in a_names:
            writer.write(" %s=\"" % a_name)
            md._write_data(writer, attrs[a_name].value)
            writer.write("\"")
        if self.childNodes:
            writer.write(">")
            if (len(self.childNodes) == 1
                    and self.childNodes[0].nodeType == md.Node.TEXT_NODE):
                self.childNodes[0].writexml(writer, '', '', '')
            else:
                writer.write(newl)
                for node in self.childNodes:
                    node.writexml(writer, indent + addindent, addindent, newl)
                writer.write(indent)
            writer.write("</%s>%s" % (self.tagName, newl))
        else:
            writer.write("/>%s" % (newl))
    md.Element.writexml = writexml
    def writexml(self, writer, indent="", addindent="", newl=""):
        md._write_data(writer, "%s%s%s" % (indent, self.data, newl))
    md.Text.writexml = writexml
--- a/setup.cfg
+++ b/setup.cfg
@ -1,7 +1,7 @@
 [metadata]
-name = savanna
+name = sahara
 version = 2014.1
-summary = Savanna project
+summary = Sahara project
 description-file = README.rst
 license = Apache Software License
 classifiers =
@ -15,41 +15,41 @@ classifiers =
    Operating System :: POSIX :: Linux
 author = OpenStack
 author-email = openstack-dev@lists.openstack.org
-home-page = https://savanna.readthedocs.org
+home-page = http://docs.openstack.org/developer/sahara/
 [global]
 setup-hooks = pbr.hooks.setup_hook
 [files]
 packages =
-    savanna
+    sahara
 data_files =
    share/savanna = etc/savanna/*
 [entry_points]
 console_scripts =
-    savanna-api = savanna.cli.savanna_api:main
+    savanna-api = sahara.cli.savanna_api:main
-    savanna-db-manage = savanna.db.migration.cli:main
+    savanna-db-manage = sahara.db.migration.cli:main
-    _savanna-subprocess = savanna.cli.savanna_subprocess:main
+    _savanna-subprocess = sahara.cli.savanna_subprocess:main
    # TODO(slukjanov): remove this code (temp to migrate to the new name)
-    sahara-api = savanna.cli.savanna_api:main
+    sahara-api = sahara.cli.savanna_api:main
-    sahara-db-manage = savanna.db.migration.cli:main
+    sahara-db-manage = sahara.db.migration.cli:main
 savanna.cluster.plugins =
-    vanilla = savanna.plugins.vanilla.plugin:VanillaProvider
+    vanilla = sahara.plugins.vanilla.plugin:VanillaProvider
-    hdp = savanna.plugins.hdp.ambariplugin:AmbariPlugin
+    hdp = sahara.plugins.hdp.ambariplugin:AmbariPlugin
-    idh = savanna.plugins.intel.plugin:IDHProvider
+    idh = sahara.plugins.intel.plugin:IDHProvider
 savanna.infrastructure.engine =
-    savanna = savanna.service.direct_engine:DirectEngine
+    savanna = sahara.service.direct_engine:DirectEngine
-    direct = savanna.service.direct_engine:DirectEngine
+    direct = sahara.service.direct_engine:DirectEngine
-    heat = savanna.service.heat_engine:HeatEngine
+    heat = sahara.service.heat_engine:HeatEngine
 savanna.remote =
-    ssh = savanna.utils.ssh_remote:SshRemoteDriver
+    ssh = sahara.utils.ssh_remote:SshRemoteDriver
-    agent = savanna.utils.agent_remote:AgentRemoteDriver
+    agent = sahara.utils.agent_remote:AgentRemoteDriver
 [build_sphinx]
 all_files = 1
@ -59,13 +59,13 @@ source-dir = doc/source
 [extract_messages]
 keywords = _ gettext ngettext l_ lazy_gettext
 mapping_file = babel.cfg
-output_file = savanna/locale/sahara.pot
+output_file = sahara/locale/savanna.pot
 [compile_catalog]
-directory = savanna/locale
+directory = sahara/locale
 domain = sahara
 [update_catalog]
 domain = sahara
-output_dir = savanna/locale
+output_dir = sahara/locale
-input_file = savanna/locale/sahara.pot
+input_file = sahara/locale/sahara.pot
--- a/tox.ini
+++ b/tox.ini
@ -8,7 +8,7 @@ usedevelop = True
 install_command = pip install -U {opts} {packages}
 setenv =
    VIRTUAL_ENV={envdir}
-    DISCOVER_DIRECTORY=savanna/tests/unit
+    DISCOVER_DIRECTORY=sahara/tests/unit
 deps =
    -r{toxinidir}/requirements.txt
    -r{toxinidir}/test-requirements.txt
@ -17,7 +17,7 @@ commands = python setup.py test --slowest --testr-args="{posargs}"
 [testenv:integration]
 setenv =
    VIRTUAL_ENV={envdir}
-    DISCOVER_DIRECTORY=savanna/tests/integration
+    DISCOVER_DIRECTORY=sahara/tests/integration
 commands = python setup.py test --slowest --testr-args="{posargs}"
 [testenv:cover]
@ -50,4 +50,4 @@ builtins = _
 exclude=.venv,.git,.tox,dist,doc,*openstack/common*,*lib/python*,*egg,tools
 [hacking]
-local-check-factory = savanna.utils.hacking.checks.factory
+local-check-factory = sahara.utils.hacking.checks.factory