Merge "MapR plugin implementation"

This commit is contained in:
Jenkins 2014-10-22 00:28:12 +00:00 committed by Gerrit Code Review
commit ceb074c5b4
93 changed files with 7199 additions and 0 deletions

View File

@ -25,6 +25,15 @@ include sahara/plugins/hdp/versions/version_1_3_2/resources/*.sh
include sahara/plugins/hdp/versions/version_2_0_6/resources/*.template
include sahara/plugins/hdp/versions/version_2_0_6/resources/*.json
include sahara/plugins/hdp/versions/version_2_0_6/resources/*.sh
include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.sh
include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.json
include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.xml
include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.sh
include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.json
include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.xml
include sahara/plugins/mapr/versions/v3_1_1/resources/*.sh
include sahara/plugins/mapr/versions/v3_1_1/resources/*.json
include sahara/plugins/mapr/versions/v3_1_1/resources/*.xml
include sahara/plugins/spark/resources/*.xml
include sahara/plugins/spark/resources/*.sh
include sahara/plugins/spark/resources/*.template
@ -32,6 +41,14 @@ include sahara/resources/*.heat
include sahara/service/edp/resources/*.xml
include sahara/swift/resources/*.xml
include sahara/tests/unit/plugins/vanilla/hadoop2/resources/*.txt
include sahara/tests/unit/plugins/mapr/utils/resources/*.topology
include sahara/tests/unit/plugins/mapr/utils/resources/*.json
include sahara/tests/unit/plugins/mapr/utils/resources/*.data
include sahara/tests/unit/plugins/mapr/utils/resources/*.properties
include sahara/tests/unit/plugins/mapr/utils/resources/*.xml
include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.conf
include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.data
include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.xml
include sahara/tests/unit/resources/*.heat
include sahara/tests/unit/resources/*.xml
include sahara/tests/unit/resources/*.txt

View File

@ -0,0 +1,67 @@
MapR Distribution Plugin
========================
The MapR Sahara plugin allows to provision MapR clusters on
OpenStack in an easy way and do it, quickly, conveniently and simply.
Operation
---------
The MapR Plugin performs the following four primary functions during cluster creation:
1. MapR components deployment - the plugin manages the deployment of the required software to the target VMs
2. Services Installation - MapR services are installed according to provided roles list
3. Services Configuration - the plugin combines default settings with user provided settings
4. Services Start - the plugin starts appropriate services according to specified roles
Images
------
For cluster provisioning prepared images should be used. They already have
MapR 3.1.1 (with Apache Hadoop 0.20.2) and MapR 4.0.1 (with Apache Hadoop 2.4.1) installed.
MapR plugin needs an image to be tagged in Sahara Image Registry with
two tags: 'MapR' and '<MapR version>' (e.g. '4.0.1').
Note that you should provide username of default cloud-user used in the Image:
+--------------+------------+
| OS | username |
+==============+============+
| Ubuntu 14.04 | ubuntu |
+--------------+------------+
| CentOS 6.5 | cloud-user |
+--------------+------------+
Hadoop Version Support
----------------------
The MapR plugin currently supports Hadoop 0.20.2 and Hadoop 2.4.1.
Cluster Validation
------------------
Mr1 Cluster is valid if and only if:
1. Zookeeper component count per cluster equals 1 or greater. Zookeeper service is up and running.
2.1 Each node has Fileserver component. Fileserver is up and running on each node. Or
2.2 Each node has NFS server component. NFS server is up and running.
3. If node has TaskTracker component then Fileserver must be also.
4. Web-server component count per cluster equals 0 or 1. Web-server is up and running.
YARN Cluster is valid if and only if:
1. Zookeeper component count per cluster equals 1 or greater. Zookeeper service is up and running.
2. Resource manager component count per cluster equals 1 or greater. Resource manager component is up and running.
3.1 Each node has Fileserver component. Fileserver is up and running on each node. Or
3.2 Each node has NFS server component. NFS server is up and running.
4. Web-server component count per cluster equals 0 or 1. Web-server is up and running.
5. History server component count per cluster equals 1. History server is up and running.
The MapR Plugin
---------------
For more information, please contact MapR.

View File

@ -8,3 +8,4 @@ distribution in various topologies and with management/monitoring tools.
* :doc:`vanilla_plugin` - deploys Vanilla Apache Hadoop
* :doc:`hdp_plugin` - deploys Hortonworks Data Platform
* :doc:`spark_plugin` - deploys Apache Spark with Cloudera HDFS
* :doc:`mapr_plugin` - deploys MapR plugin with MapR File System

View File

View File

@ -0,0 +1,88 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.plugins.provisioning as p
class MapRPlugin(p.ProvisioningPluginBase):
title = 'MapR Hadoop Distribution'
description = ('The MapR Distribution provides a full Hadoop stack that'
' includes the MapR File System (MapR-FS), MapReduce,'
' a complete Hadoop ecosystem, and the MapR Control System'
' user interface')
hdfs_user = 'mapr'
def _get_handler(self, hadoop_version):
return vhf.VersionHandlerFactory.get().get_handler(hadoop_version)
def get_title(self):
return MapRPlugin.title
def get_description(self):
return MapRPlugin.description
def get_hdfs_user(self):
return MapRPlugin.hdfs_user
def get_versions(self):
return vhf.VersionHandlerFactory.get().get_versions()
def get_node_processes(self, hadoop_version):
return self._get_handler(hadoop_version).get_node_processes()
def get_configs(self, hadoop_version):
return self._get_handler(hadoop_version).get_configs()
def configure_cluster(self, cluster):
self._get_handler(cluster.hadoop_version).configure_cluster(cluster)
def start_cluster(self, cluster):
self._get_handler(cluster.hadoop_version).start_cluster(cluster)
def validate(self, cluster):
self._get_handler(cluster.hadoop_version).validate(cluster)
def validate_scaling(self, cluster, existing, additional):
v_handler = self._get_handler(cluster.hadoop_version)
v_handler.validate_scaling(cluster, existing, additional)
def scale_cluster(self, cluster, instances):
v_handler = self._get_handler(cluster.hadoop_version)
v_handler.scale_cluster(cluster, instances)
def decommission_nodes(self, cluster, instances):
v_handler = self._get_handler(cluster.hadoop_version)
v_handler.decommission_nodes(cluster, instances)
def get_oozie_server(self, cluster):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_oozie_server(cluster)
def get_name_node_uri(self, cluster):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_name_node_uri(cluster)
def get_oozie_server_uri(self, cluster):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_oozie_server_uri(cluster)
def get_resource_manager_uri(self, cluster):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_resource_manager_uri(cluster)
def get_edp_engine(self, cluster, job_type):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_edp_engine(cluster, job_type)

View File

View File

@ -0,0 +1,20 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self

View File

@ -0,0 +1,78 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara.plugins.mapr.util import names
import sahara.plugins.utils as u
ZOOKEEPER_CLIENT_PORT = 5181
def get_cldb_nodes_ip(cluster):
cldb_node_list = u.get_instances(cluster, names.CLDB)
return ','.join([i.management_ip for i in cldb_node_list])
def get_zookeeper_nodes_ip(cluster):
zkeeper_node_list = u.get_instances(cluster, names.ZOOKEEPER)
return ','.join([i.management_ip for i in zkeeper_node_list])
def get_zookeeper_nodes_ip_with_port(cluster):
zkeeper_node_list = u.get_instances(cluster, names.ZOOKEEPER)
return ','.join(['%s:%s' % (i.management_ip, ZOOKEEPER_CLIENT_PORT)
for i in zkeeper_node_list])
def get_resourcemanager_ip(cluster):
rm_instance = u.get_instance(cluster, names.RESOURCE_MANAGER)
return rm_instance.management_ip
def get_historyserver_ip(cluster):
hs_instance = u.get_instance(cluster, names.HISTORY_SERVER)
return hs_instance.management_ip
def get_jobtracker(cluster):
instance = u.get_instance(cluster, names.JOBTRACKER)
return instance
def get_resourcemanager(cluster):
return u.get_instance(cluster, names.RESOURCE_MANAGER)
def get_nodemanagers(cluster):
return u.get_instances(cluster, names.NODE_MANAGER)
def get_oozie(cluster):
return u.get_instance(cluster, names.OOZIE)
def get_datanodes(cluster):
return u.get_instances(cluster, names.DATANODE)
def get_tasktrackers(cluster):
return u.get_instances(cluster, names.TASK_TRACKER)
def get_secondarynamenodes(cluster):
return u.get_instances(cluster, names.SECONDARY_NAMENODE)
def get_historyserver(cluster):
return u.get_instance(cluster, names.HISTORY_SERVER)

View File

@ -0,0 +1,76 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections as c
import six
import sahara.plugins.utils as u
class ClusterInfo(object):
# TODO(aosadchiy): perform lookup for plugin_spec
def __init__(self, cluster, plugin_spec):
self.cluster = cluster
self.plugin_spec = plugin_spec
def get_default_configs(self, node_group=None):
services = self.get_services(node_group)
return self.plugin_spec.get_default_plugin_configs(services)
def get_services(self, node_group=None):
if not node_group:
return set(service for node_group in self.cluster.node_groups
for service in self.get_services(node_group))
else:
return (set(self.plugin_spec.get_node_process_service(node_process)
for node_process in node_group.node_processes)
| set(['general']))
def get_user_configs(self, node_group=None):
services = self.get_services(node_group)
predicate = lambda i: i[0] in services and i[1]
configs = dict(filter(
predicate, six.iteritems(self.cluster.cluster_configs)))
scope = 'node' if node_group else 'cluster'
result = c.defaultdict(lambda: c.defaultdict(dict))
for service, kv in six.iteritems(configs):
for key, value in six.iteritems(kv):
filename = self.plugin_spec.get_config_file(
scope, service, key)
result[service][filename][key] = value
return result
def get_node_group_files(self):
return
def get_node_groups(self, node_process=None):
return u.get_node_groups(self.cluster, node_process)
def get_instances_count(self, node_process=None):
return u.get_instances_count(self.cluster, node_process)
def get_instances(self, node_process=None):
return u.get_instances(self.cluster, node_process)
def get_instance(self, node_process):
return u.get_instance(self.cluster, node_process)
def get_instances_ip(self, node_process):
return [i.management_ip for i in self.get_instances(node_process)]
def get_instance_ip(self, node_process):
return self.get_instance(node_process).management_ip

View File

@ -0,0 +1,110 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo.config import cfg
from sahara import exceptions as ex
from sahara.i18n import _
from sahara.i18n import _LI
from sahara.openstack.common import log as logging
import sahara.plugins.mapr.util.config_file_utils as cfu
import sahara.plugins.mapr.versions.version_handler_factory as vhf
from sahara.plugins import provisioning as p
import sahara.plugins.utils as u
from sahara.topology import topology_helper as th
from sahara.utils import files as f
MAPR_HOME = '/opt/mapr'
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
CONF.import_opt("enable_data_locality", "sahara.topology.topology_helper")
ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster',
config_type="bool", priority=1,
default_value=True, is_optional=True)
def post_configure_instance(instance):
LOG.info(_LI('START: Post configuration for instance.'))
with instance.remote() as r:
if is_data_locality_enabled(instance.node_group.cluster):
LOG.debug('Data locality is enabled.')
t_script = MAPR_HOME + '/topology.sh'
LOG.debug('Start writing file %s', t_script)
r.write_file_to(t_script, f.get_file_text(
'plugins/mapr/util/resources/topology.sh'), run_as_root=True)
LOG.debug('Done for writing file %s', t_script)
LOG.debug('Start executing command: chmod +x %s', t_script)
r.execute_command('chmod +x ' + t_script, run_as_root=True)
LOG.debug('Done for executing command.')
else:
LOG.debug('Data locality is disabled.')
LOG.info(_LI('END: Post configuration for instance.'))
def configure_instances(cluster, instances):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
p_spec = v_handler.get_plugin_spec()
configurer = v_handler.get_cluster_configurer(cluster, p_spec)
configurer.configure(instances)
def configure_topology_data(cluster, is_node_awareness):
LOG.info(_LI('START: configuring topology data.'))
if is_data_locality_enabled(cluster):
LOG.debug('Data locality is enabled.')
LOG.debug('Start generating topology map.')
topology_map = th.generate_topology_map(cluster, is_node_awareness)
LOG.debug('Done for generating topology map.')
topology_data = cfu.to_file_content(topology_map, 'topology')
for i in u.get_instances(cluster):
LOG.debug(
'Start writing to file: %s/topology.data', MAPR_HOME)
i.remote().write_file_to(MAPR_HOME + "/topology.data",
topology_data, run_as_root=True)
LOG.debug('Done writing to file: %s/topology.data', MAPR_HOME)
else:
LOG.debug('Data locality is disabled.')
LOG.info(_LI('END: configuring topology data.'))
def get_plugin_configs():
configs = []
if CONF.enable_data_locality:
configs.append(ENABLE_DATA_LOCALITY)
return configs
def get_plugin_config_value(service, name, cluster):
if cluster:
for ng in cluster.node_groups:
cl_param = ng.configuration().get(service, {}).get(name)
if cl_param is not None:
return cl_param
for c in get_plugin_configs():
if c.applicable_target == service and c.name == name:
return c.default_value
raise ex.NotFoundException(
name, (_("Unable to get parameter '%(name)s' from service %(service)s")
% {'name': name, 'service': service}))
def is_data_locality_enabled(cluster):
if not CONF.enable_data_locality:
return False
return get_plugin_config_value(ENABLE_DATA_LOCALITY.applicable_target,
ENABLE_DATA_LOCALITY.name, cluster)

View File

@ -0,0 +1,81 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.util.func_utils as fu
import sahara.utils.files as f
import sahara.utils.xmlutils as x
def load_properties_file(path):
predicate = fu.and_predicate(lambda i: len(i) != 0,
lambda i: not i.isspace(),
lambda i: not i.startswith('#'))
mapper = fu.chain_function(lambda i: tuple(i.split('=')),
lambda i: (i[0].strip(), i[1].strip()))
lines = f.get_file_text(path).splitlines()
return dict(map(mapper, filter(predicate, lines)))
def load_xml_file(path):
kv_mapper = lambda i: (x._get_text_from_node(i, 'name'),
x._adjust_field(x._get_text_from_node(i, 'value')))
strip_mapper = lambda i: (i[0].strip(), i[1].strip())
props = x.load_xml_document(path).getElementsByTagName('property')
return dict(map(strip_mapper, map(kv_mapper, props)))
def load_raw_file(path):
return {'content': f.get_file_text(path)}
def to_properties_file_content(data):
mapper = lambda i: '%s=%s\n' % i
reducer = lambda p, c: p + c
return reduce(reducer, map(mapper, six.iteritems(data)), '')
def to_xml_file_content(data):
return x.create_hadoop_xml(data)
def to_topology_file_content(data):
mapper = lambda i: '%s %s\n' % i
reducer = lambda p, c: p + c
return reduce(reducer, map(mapper, six.iteritems(data)))
def to_raw_file_content(data, cfu=True, conv=str):
return data['content'] if cfu else conv(data)
def load_file(path, file_type):
if file_type == 'properties':
return load_properties_file(path)
elif file_type == 'xml':
return load_xml_file(path)
elif file_type == 'raw':
return load_raw_file(path)
def to_file_content(data, file_type, *args, **kargs):
if file_type == 'properties':
return to_properties_file_content(data, *args, **kargs)
elif file_type == 'xml':
return to_xml_file_content(data, *args, **kargs)
elif file_type == 'topology':
return to_topology_file_content(data, *args, **kargs)
elif file_type == 'raw':
return to_raw_file_content(data, *args, **kargs)

View File

@ -0,0 +1,77 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.util.dict_utils as du
import sahara.plugins.mapr.util.func_utils as fu
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.utils.configs as c
def get_scope_default_configs(version_handler, scope, services=None):
configs = map(lambda i: i.to_dict(), version_handler.get_configs())
q_predicate = fu.field_equals_predicate('scope', scope)
if services:
at_predicate = fu.in_predicate('applicable_target', services)
q_predicate = fu.and_predicate(q_predicate, at_predicate)
q_fields = ['applicable_target', 'name', 'default_value']
q_result = du.select(q_fields, configs, q_predicate)
m_reducer = du.iterable_to_values_pair_dict_reducer(
'name', 'default_value')
return du.map_by_field_value(q_result, 'applicable_target',
dict, m_reducer)
def get_cluster_default_configs(version_handler, services=None):
return get_scope_default_configs(version_handler, 'cluster', services)
def get_node_default_configs(version_handler, services=None):
return get_scope_default_configs(version_handler, 'node', services)
def get_default_configs(version_handler, services=None):
cluster_configs = get_cluster_default_configs(version_handler, services)
node_configs = get_node_default_configs(version_handler, services)
return c.merge_configs(cluster_configs, node_configs)
def get_node_group_services(node_group):
h_version = node_group.cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
services = v_handler.get_node_processes()
node_processes = node_group.node_processes
return set(s for np in node_processes
for s in services if np in services[s])
def get_cluster_configs(cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
default_configs = get_cluster_default_configs(v_handler)
user_configs = cluster.cluster_configs
return c.merge_configs(default_configs, user_configs)
def get_configs(node_group):
services = get_node_group_services(node_group)
h_version = node_group.cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
default_configs = get_default_configs(v_handler, services)
user_configs = node_group.configuration()
return c.merge_configs(default_configs, user_configs)
def get_service(version_handler, node_process):
node_processes = version_handler.get_node_processes()
return du.get_keys_by_value_2(node_processes, node_process)

View File

@ -0,0 +1,124 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections as cl
import copy as cp
import functools as ft
import itertools as it
import six
import sahara.plugins.mapr.util.func_utils as fu
def append_to_key(dict_0, dict_1):
return dict((k0, dict((k1, dict_1[k1]) for k1 in keys_1 if k1 in dict_1))
for k0, keys_1 in six.iteritems(dict_0))
def iterable_to_values_pair_dict_reducer(key_0, key_1):
def reducer(previous, iterable, mapper):
previous.update(dict(map(mapper, iterable)))
return previous
return ft.partial(reducer, mapper=fu.get_values_pair_function(key_0,
key_1))
def flatten_to_list_reducer():
def reducer(previous, iterable):
previous.extend(list(iterable))
return previous
return reducer
def map_by_field_value(iterable, key, factory=list,
iterator_reducer=flatten_to_list_reducer()):
def reducer(mapping, current):
mapping[current[0]] = iterator_reducer(
mapping[current[0]], iter(current[1]))
return mapping
groups = it.groupby(iterable, fu.get_value_function(key))
return reduce(reducer, groups, cl.defaultdict(factory))
def map_by_fields_values(iterable, fields, factory=list,
reducer=flatten_to_list_reducer()):
if len(fields) == 1:
return map_by_field_value(iterable, fields[0], factory, reducer)
else:
return dict((k, map_by_fields_values(v, fields[1:], factory, reducer))
for k, v in six.iteritems(map_by_field_value(
iterable, fields[0])))
def get_keys_by_value_type(mapping, value_type):
return filter(lambda k: isinstance(mapping[k], value_type), mapping)
def deep_update(dict_0, dict_1, copy=True):
result = cp.deepcopy(dict_0) if copy else dict_0
dict_valued_keys_0 = set(get_keys_by_value_type(dict_0, dict))
dict_valued_keys_1 = set(get_keys_by_value_type(dict_1, dict))
common_keys = dict_valued_keys_0 & dict_valued_keys_1
if not common_keys:
result.update(dict_1)
else:
for k1, v1 in six.iteritems(dict_1):
result[k1] = deep_update(
dict_0[k1], v1) if k1 in common_keys else v1
return result
def get_keys_by_value(mapping, value):
return [k for k, v in six.iteritems(mapping) if v == value]
# TODO(aosadchiy): find more appropriate name
def get_keys_by_value_2(mapping, value):
return [k for k, v in six.iteritems(mapping) if value in v]
def iterable_to_values_list_reducer(key):
def reducer(previous, iterable, mapper):
previous.extend(map(mapper, iterable))
return previous
return ft.partial(reducer, mapper=fu.get_value_function(key))
def select(fields, iterable, predicate=fu.true_predicate):
return map(fu.extract_fields_function(fields), filter(predicate, iterable))
has_no_dict_values_predicate = lambda n: not get_keys_by_value_type(n, dict)
def list_of_vp_dicts_function(key_0, key_1):
def transformer(item, key_0, key_1):
return [fu.values_pair_to_dict_function(key_0, key_1)(i)
for i in six.iteritems(item)]
return ft.partial(transformer, key_0=key_0, key_1=key_1)
def flattened_dict(mapping, keys, is_terminal=has_no_dict_values_predicate,
transform=None):
if not transform:
transform = list_of_vp_dicts_function(*keys[-2:])
if is_terminal(mapping):
return list(transform(mapping))
else:
temp = [it.imap(fu.append_field_function(keys[0], key),
flattened_dict(value, keys[1:],
is_terminal, transform))
for key, value in six.iteritems(mapping)]
return list(it.chain(*temp))

View File

@ -0,0 +1,167 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import copy as c
import functools as ft
import itertools as it
import six
# predicates
true_predicate = lambda i: True
false_predicate = lambda i: False
def not_predicate(predicate):
return ft.partial(lambda i, p: not p(i), p=predicate)
def and_predicate(*predicates):
if len(predicates) == 1:
return predicates[0]
else:
def predicate(item, predicates):
for p in predicates:
if not p(item):
return False
return True
return ft.partial(predicate, predicates=predicates)
def or_predicate(*predicates):
if len(predicates) == 1:
return predicates[0]
else:
def predicate(item, predicates):
for p in predicates:
if p(item):
return True
return False
return ft.partial(predicate, predicates=predicates)
def impl_predicate(p0, p1):
return or_predicate(not_predicate(p0), p1)
def field_equals_predicate(key, value):
return ft.partial(lambda i, k, v: i[k] == v, k=key, v=value)
def like_predicate(template, ignored=[]):
if not template:
return true_predicate
elif len(template) == 1:
k, v = six.iteritems(template).next()
return true_predicate if k in ignored else field_equals_predicate(k, v)
else:
return and_predicate(*[field_equals_predicate(key, value)
for key, value in six.iteritems(template)
if key not in ignored])
def in_predicate(key, values):
if not values:
return false_predicate
else:
return or_predicate(*[field_equals_predicate(key, value)
for value in values])
# functions
def chain_function(*functions):
return reduce(lambda p, c: ft.partial(lambda i, p, c: c(p(i)), p=p, c=c),
functions)
def copy_function():
return lambda i: c.deepcopy(i)
def append_field_function(key, value):
def mapper(item, key, value):
item = c.deepcopy(item)
item[key] = value
return item
return ft.partial(mapper, key=key, value=value)
def append_fields_function(fields):
if not fields:
return copy_function()
elif len(fields) == 1:
key, value = six.iteritems(fields).next()
return append_field_function(key, value)
else:
return chain_function(*[append_field_function(key, value)
for key, value in six.iteritems(fields)])
def get_values_pair_function(key_0, key_1):
return ft.partial(lambda i, k0, k1: (i[k0], i[k1]), k0=key_0, k1=key_1)
def get_field_function(key):
return ft.partial(lambda i, k: (k, i[k]), k=key)
def get_fields_function(keys):
return ft.partial(lambda i, k: [f(i) for f in [get_field_function(key)
for key in k]], k=keys)
def extract_fields_function(keys):
return lambda i: dict(get_fields_function(keys)(i))
def get_value_function(key):
return ft.partial(lambda i, k: i[k], k=key)
def set_default_value_function(key, value):
def mapper(item, key, value):
item = c.deepcopy(item)
if key not in item:
item[key] = value
return item
return ft.partial(mapper, key=key, value=value)
def set_default_values_function(fields):
if not fields:
return copy_function()
elif len(fields) == 1:
key, value = six.iteritems(fields).next()
return set_default_value_function(key, value)
else:
return chain_function(*[set_default_value_function(key, value)
for key, value in six.iteritems(fields)])
def values_pair_to_dict_function(key_0, key_1):
return ft.partial(lambda vp, k0, k1: {k0: vp[0], k1: vp[1]},
k0=key_0, k1=key_1)
def flatten(iterable):
return it.chain.from_iterable(iterable)
def sync_execute_consumer(*consumers):
def consumer(argument, consumers):
for cn in consumers:
cn(argument)
return ft.partial(consumer, consumers=consumers)

View File

@ -0,0 +1,37 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import uuid
import six
MV_TO_MAPRFS_CMD = ('sudo -u %(user)s'
' hadoop fs -copyFromLocal %(source)s %(target)s'
' && sudo rm -f %(source)s')
MKDIR_CMD = 'sudo -u %(user)s hadoop fs -mkdir -p %(path)s'
def put_file_to_maprfs(r, content, file_name, path, hdfs_user):
tmp_file_name = '/tmp/%s.%s' % (file_name, six.text_type(uuid.uuid4()))
r.write_file_to(tmp_file_name, content)
move_from_local(r, tmp_file_name, path + '/' + file_name, hdfs_user)
def move_from_local(r, source, target, hdfs_user):
args = {'user': hdfs_user, 'source': source, 'target': target}
r.execute_command(MV_TO_MAPRFS_CMD % args)
def create_maprfs_dir(remote, dir_name, hdfs_user):
remote.execute_command(MKDIR_CMD % {'user': hdfs_user, 'path': dir_name})

View File

@ -0,0 +1,41 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
OOZIE = 'Oozie'
HIVE = 'Hive'
HIVE_METASTORE = 'HiveMetastore'
HIVE_SERVER2 = 'HiveServer2'
CLDB = 'CLDB'
FILE_SERVER = 'FileServer'
ZOOKEEPER = 'ZooKeeper'
RESOURCE_MANAGER = 'ResourceManager'
HISTORY_SERVER = 'HistoryServer'
IS_M7_ENABLED = 'Enable MapR-DB'
GENERAL = 'general'
JOBTRACKER = 'JobTracker'
NODE_MANAGER = 'NodeManager'
DATANODE = 'Datanode'
TASK_TRACKER = 'TaskTracker'
SECONDARY_NAMENODE = 'SecondaryNamenode'
NFS = 'NFS'
WEB_SERVER = 'Webserver'
WAIT_OOZIE_INTERVAL = 300
WAIT_NODE_ALARM_NO_HEARTBEAT = 360
ecosystem_components = ['Oozie',
'Hive-Metastore',
'HiveServer2',
'HBase-Master',
'HBase-RegionServer',
'HBase-Client',
'Pig']

View File

@ -0,0 +1,198 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
import os.path
import six
import sahara.openstack.common.log as logging
import sahara.plugins.mapr.util.config_file_utils as cfu
import sahara.plugins.mapr.util.dict_utils as du
import sahara.plugins.mapr.util.func_utils as fu
import sahara.plugins.provisioning as p
import sahara.utils.files as fm
LOG = logging.getLogger(__name__)
class PluginSpec(object):
def __init__(self, path):
self.base_dir = os.path.dirname(path)
self.plugin_spec_dict = self._load_plugin_spec_dict(path)
self.service_file_name_map = self._load_service_file_name_map()
self.default_configs = self._load_default_configs()
self.service_node_process_map = self._load_service_node_process_map()
self.plugin_config_objects = self._load_plugin_config_objects()
self.file_name_config_map = self._load_file_name_config_map()
self.plugin_config_items = self._load_plugin_config_items()
self.plugin_configs = self._load_plugin_configs()
self.default_plugin_configs = self._load_default_plugin_configs()
self.file_type_map = self._load_file_type_map()
def _load_plugin_spec_dict(self, path):
LOG.debug('Loading plugin spec from %s', path)
plugin_spec_dict = json.loads(fm.get_file_text(path))
return plugin_spec_dict
def _load_service_file_name_map(self):
LOG.debug('Loading service -> filename mapping')
return dict((s['name'], [fn for fn in s['files']])
for s in self.plugin_spec_dict['services']
if 'files' in s and s['files'])
def _load_default_configs(self):
LOG.debug('Loading defaults from local files')
file_name_data_map = {}
for f in self.plugin_spec_dict['files']:
if 'local' not in f:
LOG.debug('%s skipped. No "local" section', f['remote'])
continue
local_path = os.path.join(self.base_dir, f['local'])
LOG.debug('Loading %(local_path)s as default for %(remote)s',
{'local_path': local_path, 'remote': f['remote']})
data = cfu.load_file(local_path, f['type'])
file_name_data_map[f['remote']] = data
return du.append_to_key(self.service_file_name_map, file_name_data_map)
def _load_plugin_config_items(self):
LOG.debug('Loading full configs map for plugin')
items = map(lambda i: i.to_dict(), self.plugin_config_objects)
def mapper(item):
file_name = du.get_keys_by_value_2(
self.file_name_config_map, item['name'])[0]
append_f = fu.append_field_function('file', file_name)
return append_f(item)
return map(mapper, items)
def _load_plugin_configs(self):
LOG.debug('Loading plugin configs {service:{file:{name:value}}}')
m_fields = ['applicable_target', 'file']
vp_fields = ('name', 'default_value')
reducer = du.iterable_to_values_pair_dict_reducer(*vp_fields)
return du.map_by_fields_values(self.plugin_config_items,
m_fields, dict, reducer)
def _load_default_plugin_configs(self):
return du.deep_update(self.default_configs, self.plugin_configs)
def _load_service_node_process_map(self):
LOG.debug('Loading {service:[node process]} mapping')
return dict((s['name'], [np for np in s['node_processes']])
for s in self.plugin_spec_dict['services']
if 'node_processes' in s and s['node_processes'])
def _load_file_name_config_map(self):
LOG.debug('Loading {filename:[config_name]} names mapping')
r = {}
for fd in self.plugin_spec_dict['files']:
if 'configs' in fd:
r[fd['remote']] = [i['name']
for ir, sd in six.iteritems(fd['configs'])
for s, items in six.iteritems(sd)
for i in items]
return r
def _load_plugin_config_objects(self):
LOG.debug('Loading config objects for sahara-dashboard')
def mapper(item):
req = ['name', 'applicable_target', 'scope']
opt = ['description', 'config_type', 'config_values',
'default_value', 'is_optional', 'priority']
kargs = dict((k, item[k]) for k in req + opt if k in item)
return p.Config(**kargs)
result = []
for file_dict in self.plugin_spec_dict['files']:
if 'configs' not in file_dict:
LOG.debug('%s skipped. No "configs" section',
file_dict['remote'])
continue
remote_path = file_dict['remote']
applicable_target = du.get_keys_by_value_2(
self.service_file_name_map, remote_path)[0]
for is_required, scope_dict in six.iteritems(file_dict['configs']):
is_optional = is_required != 'required'
for scope, items in six.iteritems(scope_dict):
fields = {'file': remote_path, 'is_optional': is_optional,
'scope': scope,
'applicable_target': applicable_target}
append_f = fu.append_fields_function(fields)
result.extend([append_f(i) for i in items])
return map(mapper, result)
def _load_file_type_map(self):
LOG.debug('Loading {filename:type} mapping')
return dict((f['remote'], f['type'])
for f in self.plugin_spec_dict['files'])
def get_node_process_service(self, node_process):
return du.get_keys_by_value_2(self.service_node_process_map,
node_process)[0]
def get_default_plugin_configs(self, services):
return dict((k, self.default_plugin_configs[k])
for k in services if k in self.default_plugin_configs)
def get_config_file(self, scope, service, name):
p_template = {
'applicable_target': service, 'scope': scope, 'name': name}
q_fields = ['file']
q_predicate = fu.like_predicate(p_template)
q_source = self.plugin_config_items
q_result = du.select(q_fields, q_source, q_predicate)
if q_result and 'file' in q_result[0]:
return q_result[0]['file']
else:
return None
def get_file_type(self, file_name):
if file_name in self.file_type_map:
return self.file_type_map[file_name]
else:
return None
def get_service_for_file_name(self, file_name):
return du.get_keys_by_value_2(self.service_file_name_map, file_name)[0]
def get_version_config_objects(self):
common_fields = {'scope': 'cluster',
'config_type': 'dropdown',
'is_optional': False,
'priority': 1}
def has_version_field(service):
return 'versions' in service
def get_versions(service):
return {'name': '%s Version' % service['name'],
'applicable_target': service['name'],
'config_values': [(v, v) for v in service['versions']]}
def add_common_fields(item):
item.update(common_fields)
return item
def to_config(item):
return p.Config(**item)
mapper = fu.chain_function(get_versions, add_common_fields, to_config)
source = self.plugin_spec_dict['services']
return map(mapper, filter(has_version_field, source))
def get_configs(self):
return self.plugin_config_objects + self.get_version_config_objects()

View File

@ -0,0 +1,13 @@
#!/bin/bash
disk_list_file=/tmp/disk.list
if [ -f ${disk_list_file} ]; then
rm -f ${disk_list_file}
fi
for path in $*; do
device=`findmnt ${path} -cno SOURCE`
umount -f ${device}
echo ${device} >> ${disk_list_file}
done

View File

@ -0,0 +1,3 @@
classic_version=0.20.2
yarn_version=2.4.1
default_mode=%(mode)s

View File

@ -0,0 +1,20 @@
#!/bin/bash
MAPR_HOME=/opt/mapr
while [ $# -gt 0 ] ; do
nodeArg=$1
exec< ${MAPR_HOME}/topology.data
result=""
while read line ; do
ar=( $line )
if [ "${ar[0]}" = "$nodeArg" ]; then
result="${ar[1]}"
fi
done
shift
if [ -z "$result" ]; then
echo -n "/default/rack "
else
echo -n "$result "
fi
done

View File

@ -0,0 +1,9 @@
#!/bin/sh
while True; do
if [ -f '/tmp/launching-mapr-mfs.lck' ]; then
sleep 5
else
break
fi
done

View File

@ -0,0 +1,71 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara.i18n import _LI
from sahara.openstack.common import log as logging
LOG = logging.getLogger(__name__)
def run_configure_sh(remote, script_string):
LOG.info(_LI("running configure.sh script"))
remote.execute_command(script_string, run_as_root=True)
def start_zookeeper(remote):
LOG.info(_LI("Starting mapr-zookeeper"))
remote.execute_command('service mapr-zookeeper start', run_as_root=True)
def start_oozie(remote):
LOG.info(_LI("Starting mapr-oozie"))
remote.execute_command('service mapr-oozie start',
run_as_root=True,
raise_when_error=False)
def start_hive_metastore(remote):
LOG.info(_LI("Starting mapr-hive-server2"))
remote.execute_command('service mapr-hivemetastore start',
run_as_root=True)
def start_hive_server2(remote):
LOG.info(_LI("Starting mapr-hive-server2"))
remote.execute_command('service mapr-hiveserver2 start', run_as_root=True)
def start_warden(remote):
LOG.info(_LI("Starting mapr-warden"))
remote.execute_command('service mapr-warden start', run_as_root=True)
def start_cldb(remote):
LOG.info(_LI("Starting mapr-cldb"))
remote.execute_command('service mapr-cldb start', run_as_root=True)
def start_node_manager(remote):
LOG.info(_LI("Starting nodemanager"))
remote.execute_command(('/opt/mapr/hadoop/hadoop-2.3.0'
'/sbin/yarn-daemon.sh start nodemanager'),
run_as_root=True)
def start_resource_manager(remote):
LOG.info(_LI("Starting resourcemanager"))
remote.execute_command(('/opt/mapr/hadoop/hadoop-2.3.0'
'/sbin/yarn-daemon.sh start resourcemanager'),
run_as_root=True)

View File

@ -0,0 +1,144 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara import context
from sahara.i18n import _LI
from sahara.openstack.common import log as logging
from sahara.plugins.mapr.util import config
from sahara.plugins.mapr.util import names
from sahara.plugins.mapr.util import run_scripts
from sahara.plugins.mapr.util import start_helper
from sahara.utils import general as gen
LOG = logging.getLogger(__name__)
STOP_WARDEN_CMD = 'service mapr-warden stop'
STOP_ZOOKEEPER_CMD = 'service mapr-zookeeper stop'
GET_SERVER_ID_CMD = ('maprcli node list -json -filter [ip==%s] -columns id'
' | grep id | grep -o \'[0-9]*\'')
MOVE_NODE_CMD = 'maprcli node move -serverids %s -topology /decommissioned'
GET_HOSTNAME_CMD = ('maprcli node list -json -filter [ip==%s]'
' -columns hostname | grep hostname'
' | grep -Po \'(?<=("hostname":")).*?(?=")\'')
REMOVE_NODE_CMD = 'maprcli node remove -filter [ip==%(ip)s] -nodes %(nodes)s'
REMOVE_MAPR_PACKAGES_CMD = ('python -mplatform | grep Ubuntu '
'&& apt-get remove mapr-\* -y'
' || yum remove mapr-\* -y')
REMOVE_MAPR_HOME_CMD = 'rm -rf /opt/mapr'
REMOVE_MAPR_CORES_CMD = 'rm -rf /opt/cores/*.core.*'
def scale_cluster(cluster, instances, disk_setup_script_path, waiting_script,
context, configure_sh_string, is_node_awareness):
LOG.info(_LI('START: Cluster scaling. Cluster = %s'), cluster.name)
for inst in instances:
start_helper.install_role_on_instance(inst, context)
config.configure_instances(cluster, instances)
start_services(cluster, instances, disk_setup_script_path,
waiting_script, configure_sh_string)
LOG.info(_LI('END: Cluster scaling. Cluster = %s'), cluster)
def decommission_nodes(cluster, instances, configure_sh_string):
LOG.info(_LI('Start decommission . Cluster = %s'), cluster.name)
move_node(cluster, instances)
stop_services(cluster, instances)
context.sleep(names.WAIT_NODE_ALARM_NO_HEARTBEAT)
remove_node(cluster, instances)
remove_services(cluster, instances)
if check_for_cldb_or_zookeeper_service(instances):
all_instances = gen.get_instances(cluster)
current_cluster_instances = [
x for x in all_instances if x not in instances]
for inst in current_cluster_instances:
start_helper.exec_configure_sh_on_instance(
cluster, inst, configure_sh_string)
LOG.info(_LI('End decommission. Cluster = %s'), cluster.name)
def start_services(cluster, instances, disk_setup_script_path,
waiting_script, configure_sh_string):
LOG.info(_LI('START: Starting services.'))
for inst in instances:
start_helper.exec_configure_sh_on_instance(
cluster, inst, configure_sh_string)
start_helper.wait_for_mfs_unlock(cluster, waiting_script)
start_helper.setup_maprfs_on_instance(inst, disk_setup_script_path)
if check_if_is_zookeeper_node(inst):
run_scripts.start_zookeeper(inst.remote())
run_scripts.start_warden(inst.remote())
if check_for_cldb_or_zookeeper_service(instances):
start_helper.exec_configure_sh_on_cluster(
cluster, configure_sh_string)
LOG.info(_LI('END: Starting services.'))
def stop_services(cluster, instances):
LOG.info(_LI("Stop warden and zookeeper"))
for instance in instances:
with instance.remote() as r:
r.execute_command(STOP_WARDEN_CMD, run_as_root=True)
if check_if_is_zookeeper_node(instance):
r.execute_command(STOP_ZOOKEEPER_CMD, run_as_root=True)
LOG.info(_LI("Warden and zookeeper stoped"))
def move_node(cluster, instances):
LOG.info(_LI("Start moving the node to the /decommissioned"))
for instance in instances:
with instance.remote() as r:
command = GET_SERVER_ID_CMD % instance.management_ip
ec, out = r.execute_command(command, run_as_root=True)
command = MOVE_NODE_CMD % out.strip()
r.execute_command(command, run_as_root=True)
LOG.info(_LI("Nodes moved to the /decommissioned"))
def remove_node(cluster, instances):
LOG.info("Start removing the nodes")
for instance in instances:
with instance.remote() as r:
command = GET_HOSTNAME_CMD % instance.management_ip
ec, out = r.execute_command(command, run_as_root=True)
command = REMOVE_NODE_CMD % {'ip': instance.management_ip,
'nodes': out.strip()}
r.execute_command(command, run_as_root=True)
LOG.info("Nodes removed")
def remove_services(cluster, instances):
LOG.info(_LI("Start remove all mapr services"))
for instance in instances:
with instance.remote() as r:
r.execute_command(REMOVE_MAPR_PACKAGES_CMD, run_as_root=True)
r.execute_command(REMOVE_MAPR_HOME_CMD, run_as_root=True)
r.execute_command(REMOVE_MAPR_CORES_CMD, run_as_root=True)
LOG.info(_LI("All mapr services removed"))
def check_if_is_zookeeper_node(instance):
processes_list = instance.node_group.node_processes
return names.ZOOKEEPER in processes_list
def check_for_cldb_or_zookeeper_service(instances):
for inst in instances:
np_list = inst.node_group.node_processes
if names.ZOOKEEPER in np_list or names.CLDB in np_list:
return True
return False

View File

@ -0,0 +1,177 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara import context
from sahara.i18n import _LI
from sahara.openstack.common import log as logging
from sahara.plugins.mapr.util import names
from sahara.plugins.mapr.util import run_scripts
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.plugins.utils as utils
from sahara.utils import files as files
LOG = logging.getLogger(__name__)
def exec_configure_sh_on_cluster(cluster, script_string):
inst_list = utils.get_instances(cluster)
for n in inst_list:
exec_configure_sh_on_instance(cluster, n, script_string)
def exec_configure_sh_on_instance(cluster, instance, script_string):
LOG.info(_LI('START: Executing configure.sh'))
if check_for_mapr_db(cluster):
script_string += ' -M7'
if not check_if_mapr_user_exist(instance):
script_string += ' --create-user'
LOG.debug('script_string = %s', script_string)
instance.remote().execute_command(script_string, run_as_root=True)
LOG.info(_LI('END: Executing configure.sh'))
def check_for_mapr_db(cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
return v_handler.get_context(cluster).is_m7_enabled()
def setup_maprfs_on_cluster(cluster, path_to_disk_setup_script):
mapr_node_list = utils.get_instances(cluster, 'FileServer')
for instance in mapr_node_list:
setup_maprfs_on_instance(instance, path_to_disk_setup_script)
def setup_maprfs_on_instance(instance, path_to_disk_setup_script):
LOG.info(_LI('START: Setup maprfs on instance %s'), instance.instance_name)
create_disk_list_file(instance, path_to_disk_setup_script)
execute_disksetup(instance)
LOG.info(_LI('END: Setup maprfs on instance.'))
def create_disk_list_file(instance, path_to_disk_setup_script):
LOG.info(_LI('START: Creating disk list file.'))
script_path = '/tmp/disk_setup_script.sh'
rmt = instance.remote()
LOG.debug('Writing /tmp/disk_setup_script.sh')
rmt.write_file_to(
script_path, files.get_file_text(path_to_disk_setup_script))
LOG.debug('Start executing command: chmod +x %s', script_path)
rmt.execute_command('chmod +x ' + script_path, run_as_root=True)
LOG.debug('Done for executing command.')
args = ' '.join(instance.node_group.storage_paths())
cmd = '%s %s' % (script_path, args)
LOG.debug('Executing %s', cmd)
rmt.execute_command(cmd, run_as_root=True)
LOG.info(_LI('END: Creating disk list file.'))
def execute_disksetup(instance):
LOG.info(_LI('START: Executing disksetup.'))
rmt = instance.remote()
rmt.execute_command(
'/opt/mapr/server/disksetup -F /tmp/disk.list', run_as_root=True)
LOG.info(_LI('END: Executing disksetup.'))
def wait_for_mfs_unlock(cluster, path_to_waiting_script):
mapr_node_list = utils.get_instances(cluster, names.FILE_SERVER)
for instance in mapr_node_list:
create_waiting_script_file(instance, path_to_waiting_script)
exec_waiting_script_on_instance(instance)
def start_zookeeper_nodes_on_cluster(cluster):
zkeeper_node_list = utils.get_instances(cluster, names.ZOOKEEPER)
for z_keeper_node in zkeeper_node_list:
run_scripts.start_zookeeper(z_keeper_node.remote())
def start_warden_on_cluster(cluster):
node_list = utils.get_instances(cluster)
for node in node_list:
run_scripts.start_warden(node.remote())
def start_warden_on_cldb_nodes(cluster):
node_list = utils.get_instances(cluster, names.CLDB)
for node in node_list:
run_scripts.start_warden(node.remote())
def start_warden_on_other_nodes(cluster):
node_list = utils.get_instances(cluster)
for node in node_list:
if names.CLDB not in node.node_group.node_processes:
run_scripts.start_warden(node.remote())
def create_waiting_script_file(instance, path_to_waiting_script):
LOG.info(_LI('START: Creating waiting script file.'))
script_path = '/tmp/waiting_script.sh'
rmt = instance.remote()
rmt.write_file_to(script_path, files.get_file_text(path_to_waiting_script))
LOG.info(_LI('END: Creating waiting script file.'))
def exec_waiting_script_on_instance(instance):
LOG.info(_LI('START: Waiting script'))
rmt = instance.remote()
rmt.execute_command('chmod +x /tmp/waiting_script.sh', run_as_root=True)
rmt.execute_command('/tmp/waiting_script.sh', run_as_root=True)
LOG.info(_LI('END: Waiting script'))
def check_if_mapr_user_exist(instance):
ec, out = instance.remote().execute_command('id -u mapr',
run_as_root=True,
raise_when_error=False)
return ec == 0
def check_for_mapr_component(instance, component_name):
component_list = instance.node_group.node_processes
return component_name in component_list
def install_role_on_instance(instance, cluster_context):
LOG.info(_LI('START: Installing roles on node '))
roles_list = instance.node_group.node_processes
exec_str = (cluster_context.get_install_manager()
+ cluster_context.get_roles_str(roles_list))
LOG.debug('Executing "%(command)s" on %(instance)s',
{'command': exec_str, 'instance': instance.instance_id})
instance.remote().execute_command(exec_str, run_as_root=True, timeout=900)
LOG.info(_LI('END: Installing roles on node '))
def install_roles(cluster, cluster_context):
LOG.info(_LI('START: Installing roles on cluster'))
instances = utils.get_instances(cluster)
with context.ThreadGroup(len(instances)) as tg:
for instance in instances:
tg.spawn('install_roles_%s' % instance.instance_id,
install_role_on_instance,
instance,
cluster_context)
LOG.info(_LI('END: Installing roles on cluster'))
def start_ecosystem(cluster_context):
oozie_inst = cluster_context.get_oozie_instance()
if oozie_inst is not None:
context.sleep(names.WAIT_OOZIE_INTERVAL)
run_scripts.start_oozie(oozie_inst.remote())

View File

@ -0,0 +1,135 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import functools as ft
from sahara.i18n import _
import sahara.plugins.exceptions as e
import sahara.plugins.mapr.util.cluster_info as ci
import sahara.plugins.mapr.util.wrapper as w
class LessThanCountException(e.InvalidComponentCountException):
def __init__(self, component, expected_count, count):
super(LessThanCountException, self).__init__(
component, expected_count, count)
self.message = (_("Hadoop cluster should contain at least"
" %(expected_count)d %(component)s component(s)."
" Actual %(component)s count is %(count)d")
% {'expected_count': expected_count,
'component': component, 'count': count})
class MoreThanCountException(e.InvalidComponentCountException):
def __init__(self, component, expected_count, count):
super(MoreThanCountException, self).__init__(
component, expected_count, count)
self.message = (_("Hadoop cluster should contain not more than"
" %(expected_count)d %(component)s component(s)."
" Actual %(component)s count is %(count)d")
% {'expected_count': expected_count,
'component': component, 'count': count})
class NodeRequiredServiceMissingException(e.RequiredServiceMissingException):
def __init__(self, service_name, required_by=None):
super(NodeRequiredServiceMissingException, self).__init__(
service_name, required_by)
self.message = _('Node is missing a service: %s') % service_name
if required_by:
self.message = (_('%(message)s, required by service:'
' %(required_by)s')
% {'message': self.message,
'required_by': required_by})
def not_less_than_count_component_vr(component, count):
def validate(cluster, component, count):
c_info = ci.ClusterInfo(cluster, None)
actual_count = c_info.get_instances_count(component)
if not actual_count >= count:
raise LessThanCountException(component, count, actual_count)
return ft.partial(validate, component=component, count=count)
def not_more_than_count_component_vr(component, count):
def validate(cluster, component, count):
c_info = ci.ClusterInfo(cluster, None)
actual_count = c_info.get_instances_count(component)
if not actual_count <= count:
raise MoreThanCountException(component, count, actual_count)
return ft.partial(validate, component=component, count=count)
def equal_count_component_vr(component, count):
def validate(cluster, component, count):
c_info = ci.ClusterInfo(cluster, None)
actual_count = c_info.get_instances_count(component)
if not actual_count == count:
raise e.InvalidComponentCountException(
component, count, actual_count)
return ft.partial(validate, component=component, count=count)
def require_component_vr(component):
def validate(instance, component):
if component not in instance.node_group.node_processes:
raise NodeRequiredServiceMissingException(component)
return ft.partial(validate, component=component)
def require_of_listed_components(components):
def validate(instance, components):
if (False in (c in instance.node_group.node_processes
for c in components)):
raise NodeRequiredServiceMissingException(components)
return ft.partial(validate, components=components)
def each_node_has_component_vr(component):
def validate(cluster, component):
rc_vr = require_component_vr(component)
c_info = ci.ClusterInfo(cluster, None)
for i in c_info.get_instances():
rc_vr(i)
return ft.partial(validate, component=component)
def each_node_has_at_least_one_of_listed_components(components):
def validate(cluster, components):
rc_vr = require_of_listed_components(components)
c_info = ci.ClusterInfo(cluster, None)
for i in c_info.get_instances():
rc_vr(i)
return ft.partial(validate, components=components)
def node_dependency_satisfied_vr(component, dependency):
def validate(cluster, component, dependency):
c_info = ci.ClusterInfo(cluster, None)
for ng in c_info.get_node_groups(component):
if dependency not in ng.node_processes:
raise NodeRequiredServiceMissingException(
component, dependency)
return ft.partial(validate, component=component, dependency=dependency)
def create_fake_cluster(cluster, existing, additional):
w_node_groups = [w.Wrapper(ng, count=existing[ng.id])
if ng.id in existing else ng
for ng in cluster.node_groups]
return w.Wrapper(cluster, node_groups=w_node_groups)

View File

@ -0,0 +1,28 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
class Wrapper(object):
WRAPPED = '__wrapped__'
def __init__(self, wrapped, **kargs):
object.__getattribute__(self, '__dict__').update(kargs)
object.__setattr__(self, Wrapper.WRAPPED, wrapped)
def __getattribute__(self, name):
wrapped = object.__getattribute__(self, Wrapper.WRAPPED)
try:
return object.__getattribute__(self, name)
except AttributeError:
return object.__getattribute__(wrapped, name)

View File

View File

@ -0,0 +1,167 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import collections as c
import os
import six
from sahara.i18n import _LI
from sahara.openstack.common import log as logging
import sahara.plugins.mapr.util.cluster_helper as ch
import sahara.plugins.mapr.util.cluster_info as ci
from sahara.plugins.mapr.util import config
import sahara.plugins.mapr.util.config_file_utils as cfu
import sahara.plugins.mapr.util.dict_utils as du
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.plugins.utils as u
import sahara.swift.swift_helper as sh
LOG = logging.getLogger(__name__)
@six.add_metaclass(abc.ABCMeta)
class BaseClusterConfigurer(object):
def get_topology_configs(self):
result = c.defaultdict(dict)
if config.is_data_locality_enabled(self.cluster):
if self.is_node_awareness_enabled():
LOG.debug('Node group awareness is set to True')
file_name = '%s/core-site.xml' % self.get_hadoop_conf_dir()
service = self.plugin_spec.get_service_for_file_name(file_name)
data = {}
data['net.topology.impl'] = (
'org.apache.hadoop.net.NetworkTopologyWithNodeGroup')
data['net.topology.nodegroup.aware'] = True
data['dfs.block.replicator.classname'] = (
'org.apache.hadoop.hdfs.server.namenode'
'.BlockPlacementPolicyWithNodeGroup')
result[service][file_name] = data
file_name = '%s/mapred-site.xml' % self.get_hadoop_conf_dir()
service = self.plugin_spec.get_service_for_file_name(file_name)
data = {}
data['mapred.jobtracker.nodegroup.aware'] = True
data['mapred.task.cache.levels'] = 3
result[service][file_name] = data
file_name = '/opt/mapr/conf/cldb.conf'
service = self.plugin_spec.get_service_for_file_name(file_name)
data = {}
data['net.topology.script.file.name'] = '/opt/mapr/topology.sh'
result[service][file_name] = data
else:
LOG.debug('Node group awareness is not implemented in YARN'
' yet so enable_hypervisor_awareness set to'
' False explicitly')
return result
def get_swift_configs(self):
mapper = lambda i: (i['name'], i['value'])
file_name = '%s/core-site.xml' % self.get_hadoop_conf_dir()
service = self.plugin_spec.get_service_for_file_name(file_name)
data = dict(map(mapper, sh.get_swift_configs()))
return {service: {file_name: data}}
def get_cluster_configs(self):
default_configs = self.cluster_info.get_default_configs()
user_configs = self.cluster_info.get_user_configs()
result = du.deep_update(default_configs, user_configs)
file_name = '/opt/mapr/conf/cldb.conf'
service = self.plugin_spec.get_service_for_file_name(file_name)
if file_name not in result[service]:
result[service][file_name] = {}
data = result[service][file_name]
data['cldb.zookeeper.servers'] = ch.get_zookeeper_nodes_ip_with_port(
self.cluster)
return result
def get_cluster_configs_template(self):
template = {}
du.deep_update(template, self.get_topology_configs(), False)
du.deep_update(template, self.get_swift_configs(), False)
du.deep_update(template, self.get_cluster_configs(), False)
return template
def get_node_group_configs(self, node_groups=None):
ng_configs = {}
if not node_groups:
node_groups = self.cluster.node_groups
cc_template = self.cluster_configs_template
p_spec = self.plugin_spec
for ng in node_groups:
ng_services = self.cluster_info.get_services(ng)
d_configs = dict(filter(lambda i: i[0] in ng_services,
six.iteritems(cc_template)))
u_configs = self.cluster_info.get_user_configs(ng)
nc_template = du.deep_update(d_configs, u_configs)
nc_data = {}
for files in nc_template.values():
for f_name, f_data in six.iteritems(files):
if f_name:
f_type = p_spec.get_file_type(f_name)
f_content = cfu.to_file_content(f_data, f_type)
if f_content:
nc_data[f_name] = f_content
ng_configs[ng.id] = nc_data
return ng_configs
def configure_instances(self, instances=None):
if not instances:
instances = u.get_instances(self.cluster)
for i in instances:
i_files = self.node_group_files[i.node_group_id]
LOG.info(_LI('Writing files %(f_names)s to node %(node)s'),
{'f_names': i_files.keys(), 'node': i.management_ip})
with i.remote() as r:
for f_name in i_files:
r.execute_command('mkdir -p ' + os.path.dirname(f_name),
run_as_root=True)
LOG.debug('Created dir: %s', os.path.dirname(f_name))
r.write_files_to(i_files, run_as_root=True)
config.post_configure_instance(i)
def __init__(self, cluster, plugin_spec):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
self.context = v_handler.get_context(cluster)
self.cluster = cluster
self.plugin_spec = plugin_spec
self.cluster_info = ci.ClusterInfo(self.cluster, self.plugin_spec)
self.cluster_configs_template = self.get_cluster_configs_template()
self.node_group_files = self.get_node_group_configs()
def configure(self, instances=None):
self.configure_topology_data(self.cluster)
self.configure_instances(instances)
@staticmethod
def _post_configure_instance(instance):
config.post_configure_instance(instance)
def configure_topology_data(self, cluster):
config.configure_topology_data(
cluster, self.is_node_awareness_enabled())
@abc.abstractmethod
def get_hadoop_conf_dir(self):
return
@abc.abstractmethod
def is_node_awareness_enabled(self):
return

View File

@ -0,0 +1,173 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
import sahara.plugins.mapr.util.config_utils as cu
import sahara.plugins.mapr.util.names as n
import sahara.plugins.utils as u
@six.add_metaclass(abc.ABCMeta)
class BaseContext(object):
hive_version_config = 'Hive Version'
oozie_version_config = 'Oozie Version'
@abc.abstractmethod
def get_cluster(self):
return
@abc.abstractmethod
def is_m7_enabled(self):
return
@abc.abstractmethod
def get_hadoop_version(self):
return
def get_linux_distro_version(self):
return self.get_zk_instances()[0].remote().execute_command(
'lsb_release -is', run_as_root=True)[1].rstrip()
def get_install_manager(self):
install_manager_map = {'Ubuntu': 'apt-get install --force-yes -y ',
'CentOS': 'yum install -y ',
'RedHatEnterpriseServer': 'yum install -y ',
'Suse': 'zypper '}
return install_manager_map.get(self.get_linux_distro_version())
def get_install_manager_version_separator(self):
install_manager_map = {'Ubuntu': '=',
'CentOS': '-',
'RedHatEnterpriseServer': '-',
'Suse': ':'}
return install_manager_map.get(self.get_linux_distro_version())
def get_fs_instances(self):
return u.get_instances(self.get_cluster(), n.FILE_SERVER)
def get_zk_instances(self):
return u.get_instances(self.get_cluster(), n.ZOOKEEPER)
def get_zk_uris(self):
mapper = lambda i: '%s' % i.management_ip
return map(mapper, self.get_zk_instances())
def get_cldb_instances(self):
return u.get_instances(self.get_cluster(), n.CLDB)
def get_cldb_uris(self):
mapper = lambda i: '%s' % i.management_ip
return map(mapper, self.get_cldb_instances())
def get_cldb_uri(self):
return 'maprfs:///'
def get_rm_instance(self):
return u.get_instance(self.get_cluster(), n.RESOURCE_MANAGER)
def get_rm_port(self):
return '8032'
def get_rm_uri(self):
port = self.get_rm_port()
ip = self.get_rm_instance().management_ip
return '%s:%s' % (ip, port) if port else ip
def get_hs_instance(self):
return u.get_instance(self.get_cluster(), n.HISTORY_SERVER)
def get_hs_uri(self):
return self.get_hs_instance().management_ip
def get_oozie_instance(self):
return u.get_instance(self.get_cluster(), n.OOZIE)
def get_hive_metastore_instances(self):
return u.get_instances(self.get_cluster(), n.HIVE_METASTORE)
def get_hive_server2_instances(self):
return u.get_instances(self.get_cluster(), n.HIVE_SERVER2)
def get_oozie_uri(self):
ip = self.get_oozie_instance().management_ip
return 'http://%s:11000/oozie' % ip
def get_roles_str(self, comp_list):
component_list_str = 'mapr-core ' + ' '.join(['mapr-' + role + ' '
for role in comp_list])
if 'HBase-Client' in comp_list:
component_list_str = component_list_str.replace(
'HBase-Client', 'hbase')
if 'Oozie' in comp_list:
component_list_str = component_list_str.replace(
'Oozie', 'oozie' + self.get_oozie_version())
if 'HiveMetastore' in comp_list:
component_list_str = component_list_str.replace(
'HiveMetastore', 'HiveMetastore' + self.get_hive_version())
if 'HiveServer2' in comp_list:
component_list_str = component_list_str.replace(
'HiveServer2', 'HiveServer2' + self.get_hive_version())
return component_list_str.lower()
def user_exists(self):
return
def get_plain_instances(self):
fs = self.get_fs_instances()
zk = self.get_zk_instances()
cldb = self.get_cldb_instances()
zk_fs_cldb = zk + fs + cldb
instances = u.get_instances(self.get_cluster())
return [i for i in instances if i not in zk_fs_cldb]
def get_configure_command(self):
kargs = {'path': self.get_configure_sh_path(),
'cldb_nodes': ','.join(self.get_cldb_uris()),
'zk_nodes': ','.join(self.get_cldb_uris()),
'rm_node': self.get_rm_uri(),
'hs_node': self.get_hs_uri()}
command = ('{path} -C {cldb_nodes} -Z {zk_nodes} -RM {rm_node}'
' -HS {hs_node} -f').format(**kargs)
if self.is_m7_enabled():
command += ' -M7'
if not self.user_exists():
command += ' --create-user'
return command
def get_fs_wait_command(self):
return '/tmp/waiting_script.sh'
def get_disk_setup_command(self):
return '/opt/mapr/server/disksetup -F /tmp/disk.list'
def get_configure_sh_path(self):
return '/opt/mapr/server/configure.sh'
def get_oozie_version(self):
configs = cu.get_cluster_configs(self.get_cluster())
return (self.get_install_manager_version_separator()
+ configs[n.OOZIE][BaseContext.oozie_version_config] + '*')
def get_hive_version(self):
configs = cu.get_cluster_configs(self.get_cluster())
return (self.get_install_manager_version_separator()
+ configs[n.HIVE][BaseContext.hive_version_config] + "*")
def get_scripts(self):
return

View File

@ -0,0 +1,115 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
import sahara.plugins.mapr.util.plugin_spec as ps
import sahara.plugins.mapr.util.start_helper as sh
import sahara.plugins.mapr.util.validation_utils as vu
import sahara.plugins.mapr.versions.edp_engine as edp
@six.add_metaclass(abc.ABCMeta)
class BaseVersionHandler(object):
def __init__(self):
self.plugin_spec = ps.PluginSpec(self.get_plugin_spec_path())
def get_plugin_spec(self):
return self.plugin_spec
def get_configs(self):
return self.plugin_spec.get_configs()
def get_node_processes(self):
return self.plugin_spec.service_node_process_map
def get_disk_setup_script(self):
return 'plugins/mapr/util/resources/create_disk_list_file.sh'
def validate(self, cluster):
rules = self.get_cluster_validation_rules(cluster)
for rule in rules:
rule(cluster)
def validate_scaling(self, cluster, existing, additional):
fake_cluster = vu.create_fake_cluster(cluster, existing, additional)
self.validate(fake_cluster)
def validate_edp(self, cluster):
for rule in self.get_edp_validation_rules():
rule(cluster)
def configure_cluster(self, cluster):
sh.install_roles(cluster, self.get_context(cluster))
self.get_cluster_configurer(cluster, self.plugin_spec).configure()
def get_name_node_uri(self, cluster):
return self.get_context(cluster).get_cldb_uri()
def get_oozie_server(self, cluster):
return self.get_context(cluster).get_oozie_instance()
def get_oozie_server_uri(self, cluster):
return self.get_context(cluster).get_oozie_uri()
def get_resource_manager_uri(self, cluster):
return self.get_context(cluster).get_rm_uri()
def get_home_dir(self):
return ('plugins/mapr/versions/v%s'
% self.get_plugin_version().replace('.', '_').lower())
def get_plugin_spec_path(self):
return '%s/resources/plugin_spec.json' % self.get_home_dir()
def get_edp_engine(self, cluster, job_type):
if job_type in edp.MapROozieJobEngine.get_supported_job_types():
return edp.MapROozieJobEngine(cluster)
return None
# Astract methods
@abc.abstractmethod
def get_plugin_version(self):
return
@abc.abstractmethod
def get_cluster_validation_rules(self, cluster):
return
@abc.abstractmethod
def get_scaling_validation_rules(self):
return
def get_waiting_script(self):
return
@abc.abstractmethod
def get_edp_validation_rules(self):
return
@abc.abstractmethod
def get_cluster_configurer(self, cluster, plugin_spec):
return
@abc.abstractmethod
def get_configure_sh_string(self, cluster):
return
@abc.abstractmethod
def get_context(self, cluster):
return

View File

@ -0,0 +1,76 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.util.maprfs_helper as mfs
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.service.edp.binary_retrievers.dispatch as d
import sahara.service.edp.oozie.engine as e
class MapROozieJobEngine(e.OozieJobEngine):
def get_hdfs_user(self):
return 'mapr'
def create_hdfs_dir(self, remote, dir_name):
mfs.create_maprfs_dir(remote, dir_name, self.get_hdfs_user())
def _upload_workflow_file(self, where, job_dir, wf_xml, hdfs_user):
f_name = 'workflow.xml'
with where.remote() as r:
mfs.put_file_to_maprfs(r, wf_xml, f_name, job_dir, hdfs_user)
return job_dir + '/' + f_name
def _upload_job_files_to_hdfs(self, where, job_dir, job):
mains = job.mains or []
libs = job.libs or []
uploaded_paths = []
hdfs_user = self.get_hdfs_user()
with where.remote() as r:
for m in mains:
raw_data = d.get_raw_binary(m)
mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user)
uploaded_paths.append(job_dir + '/' + m.name)
for l in libs:
raw_data = d.get_raw_binary(l)
lib_dir = job_dir + '/lib/'
self.create_hdfs_dir(r, lib_dir)
mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir,
hdfs_user)
uploaded_paths.append(lib_dir + l.name)
return uploaded_paths
def get_name_node_uri(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
uri = v_handler.get_name_node_uri(cluster)
return uri
def get_oozie_server_uri(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
uri = v_handler.get_oozie_server_uri(cluster)
return uri
def get_oozie_server(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
server = v_handler.get_oozie_server(cluster)
return server
def get_resource_manager_uri(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
uri = v_handler.get_resource_manager_uri(cluster)
return uri

View File

@ -0,0 +1,24 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.versions.base_cluster_configurer as bcc
class ClusterConfigurer(bcc.BaseClusterConfigurer):
def get_hadoop_conf_dir(self):
return '/opt/mapr/hadoop/hadoop-0.20.2/conf'
def is_node_awareness_enabled(self):
return True

View File

@ -0,0 +1,63 @@
#
# CLDB Config file.
# Properties defined in this file are loaded during startup
# and are valid for only CLDB which loaded the config.
# These parameters are not persisted anywhere else.
#
# Wait until minimum number of fileserver register with
# CLDB before creating Root Volume
cldb.min.fileservers=1
# CLDB listening port
cldb.port=7222
# Number of worker threads
cldb.numthreads=10
# CLDB webport
cldb.web.port=7221
# CLDB https port
cldb.web.https.port=7443
# Disable duplicate hostid detection
cldb.detect.dup.hostid.enabled=false
# Deprecated: This param is no longer supported. To configure
# the container cache, use the param cldb.containers.cache.percent
# Number of RW containers in cache
#cldb.containers.cache.entries=1000000
#
# Percentage (integer) of Xmx setting to be used for container cache
#cldb.containers.cache.percent=20
#
# Topology script to be used to determine
# Rack topology of node
# Script should take an IP address as input and print rack path
# on STDOUT. eg
# $>/home/mapr/topo.pl 10.10.10.10
# $>/mapr-rack1
# $>/home/mapr/topo.pl 10.10.10.20
# $>/mapr-rack2
#net.topology.script.file.name=/home/mapr/topo.pl
#
# Topology mapping file used to determine
# Rack topology of node
# File is of a 2 column format (space separated)
# 1st column is an IP address or hostname
# 2nd column is the rack path
# Line starting with '#' is a comment
# Example file contents
# 10.10.10.10 /mapr-rack1
# 10.10.10.20 /mapr-rack2
# host.foo.com /mapr-rack3
#net.topology.table.file.name=/home/mapr/topo.txt
#
# ZooKeeper address
#cldb.zookeeper.servers=10.250.1.91:5181
# Hadoop metrics jar version
#hadoop.version=0.20.2
# CLDB JMX remote port
cldb.jmxremote.port=7220
num.volmirror.threads=1
# Set this to set the default topology for all volumes and nodes
# The default for all volumes is /data by default
# UNCOMMENT the below to change the default topology.
# For e.g., set cldb.default.topology=/mydata to create volumes
# in /mydata topology and to place all nodes in /mydata topology
# by default
#cldb.default.topology=/mydata

View File

@ -0,0 +1,57 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<!--
Replace 'maprfs' by 'hdfs' to use HDFS.
Replace localhost by an ip address for namenode/cldb.
-->
<configuration>
<property>
<name>fs.default.name</name>
<value>maprfs:///</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
<property>
<name>fs.mapr.working.dir</name>
<value>/user/$USERNAME/</value>
<description>The default directory to be used with relative paths.
Note that $USERNAME is NOT an enviromental variable, but just a placeholder
to indicate that it will be expanded to the corresponding username.
Other example default directories could be "/", "/home/$USERNAME", "/$USERNAME" etc.
</description>
</property>
<property>
<name>fs.s3n.block.size</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3n.blockSize</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3.block.size</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3.blockSize</name>
<value>33554432</value>
</property>
<property>
<name>hadoop.proxyuser.mapr.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapr.hosts</name>
<value>*</value>
</property>
</configuration>

View File

@ -0,0 +1,30 @@
# Sample Exports file
# for /mapr exports
# <Path> <exports_control>
#access_control -> order is specific to default
# list the hosts before specifying a default for all
# a.b.c.d,1.2.3.4(ro) d.e.f.g(ro) (rw)
# enforces ro for a.b.c.d & 1.2.3.4 and everybody else is rw
# special path to export clusters in mapr-clusters.conf. To disable exporting,
# comment it out. to restrict access use the exports_control
#
/mapr (rw)
#to export only certain clusters, comment out the /mapr & uncomment.
# Note: this will cause /mapr to be unexported
#/mapr/clustername (rw)
#to export /mapr only to certain hosts (using exports_control)
#/mapr a.b.c.d(rw),e.f.g.h(ro)
# export /mapr/cluster1 rw to a.b.c.d & ro to e.f.g.h (denied for others)
#/mapr/cluster1 a.b.c.d(rw),e.f.g.h(ro)
# export /mapr/cluster2 only to e.f.g.h (denied for others)
#/mapr/cluster2 e.f.g.h(rw)
# export /mapr/cluster3 rw to e.f.g.h & ro to others
#/mapr/cluster2 e.f.g.h(rw) (ro)

View File

@ -0,0 +1,41 @@
#CLDB metrics config - Pick one out of null,file or ganglia.
#Uncomment all properties in null, file or ganglia context, to send cldb metrics to that context
# Configuration of the "cldb" context for null
#cldb.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
#cldb.period=10
# Configuration of the "cldb" context for file
#cldb.class=org.apache.hadoop.metrics.file.FileContext
#cldb.period=60
#cldb.fileName=/tmp/cldbmetrics.log
# Configuration of the "cldb" context for ganglia
cldb.class=com.mapr.fs.cldb.counters.MapRGangliaContext31
cldb.period=10
cldb.servers=localhost:8649
cldb.spoof=1
#FileServer metrics config - Pick one out of null,file or ganglia.
#Uncomment all properties in null, file or ganglia context, to send fileserver metrics to that context
# Configuration of the "fileserver" context for null
#fileserver.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
#fileserver.period=10
# Configuration of the "fileserver" context for file
#fileserver.class=org.apache.hadoop.metrics.file.FileContext
#fileserver.period=60
#fileserver.fileName=/tmp/fsmetrics.log
# Configuration of the "fileserver" context for ganglia
fileserver.class=com.mapr.fs.cldb.counters.MapRGangliaContext31
fileserver.period=37
fileserver.servers=localhost:8649
fileserver.spoof=1
maprmepredvariant.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContext
maprmepredvariant.period=10
maprmapred.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContextFinal
maprmapred.period=10

View File

@ -0,0 +1,16 @@
#mfs.num.compress.threads=1
#mfs.max.aio.events=5000
#mfs.disable.periodic.flush=0
#mfs.io.disk.timeout=60
#mfs.server.ip=127.0.0.1
#mfs.max.resync.count=16
#mfs.max.restore.count=16
#mfs.ignore.container.delete=0
#mfs.ignore.readdir.pattern=0
mfs.server.port=5660
#mfs.subnets.whitelist=127.0.0.1/8
#UNCOMMENT this line to disable bulk writes
#mfs.bulk.writes.enabled=0
#UNCOMMENT this to set the topology of this node
#For e.g., to set this node's topology to /compute-only uncomment the below line
#mfs.network.location=/compute-only

View File

@ -0,0 +1,43 @@
# Configuration for nfsserver
#
# The system defaults are in the comments
#
# Default compression is true
#Compression = true
# chunksize is 64M
#ChunkSize = 67108864
# Number of threads for compression/decompression: default=2
#CompThreads = 2
#Mount point for the ramfs file for mmap
#RamfsMntDir = /ramfs/mapr
# Size of the ramfile to use (percent of total physical memory) default=0.25
# 0: disables the use of ramfs
#RamfsSize = 0.25
# Loglevel = DEBUG | INFO | WARN | ERROR | CRITICAL | OFF
#Loglevel = INFO
#Duplicate Request cache size & timeout in seconds
#DrCacheSize = 20480
#DrCacheTimeout = 62
# To keep the drcache lean, we only cache the response if the
# time we took to populate is greater than 50% of DrCacheTimeout.
# Set it to 0 to disable this optimization, Note that the DrCacheSize or
# DrCacheTimeout will also need to be changed. Ex: if the nfsserver supports
# 10,000 ops/sec (modification ops): then DrCacheSize will need to change
# to: 10,000*DrCacheTimeout = 620,000
#DRCacheTimeOutOpt = 0.5
#NFS fileid, by default the fileid is of 32 bit size.
#Set Use32BitFileId=0 to use 64 bit fileid (inode number)
#Use32BitFileId=0
#Auto refresh exports time interval in mins.
#default is 0, means there is no auto refresh.
#AutoRefreshExportsTimeInterval = 5

View File

@ -0,0 +1,203 @@
{
"files": [
{
"remote": null,
"type": null,
"configs": {
"required": {
"cluster": [
{
"name": "Enable MapR-DB",
"config_type": "bool",
"default_value": false,
"priority": 1
}
]
}
}
},
{
"remote": "/opt/mapr/conf/cldb.conf",
"local": "default/cldb.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/hadoop-metrics.properties",
"local": "default/hadoop-metrics.properties",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/mfs.conf",
"local": "default/mfs.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/nfsserver.conf",
"local": "default/nfsserver.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/exports",
"local": "default/exports",
"type": "raw"
},
{
"remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml",
"local": "default/core-site.xml",
"type": "xml",
"configs": {
"optional": {
"cluster": [
{
"name": "fs.swift.impl",
"default_value": "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem"
},
{
"name": "fs.swift.connect.timeout",
"config_type": "int",
"default_value": 15000
},
{
"name": "fs.swift.socket.timeout",
"config_type": "int",
"default_value": 60000
},
{
"name": "fs.swift.connect.retry.count",
"config_type": "int",
"default_value": 3
},
{
"name": "fs.swift.connect.throttle.delay",
"config_type": "int",
"default_value": 0
},
{
"name": "fs.swift.blocksize",
"config_type": "int",
"default_value": 32768
},
{
"name": "fs.swift.partsize",
"config_type": "int",
"default_value": 4718592
},
{
"name": "fs.swift.requestsize",
"config_type": "int",
"default_value": 64
},
{
"name": "fs.swift.service.sahara.public",
"config_type": "bool",
"default_value": true
},
{
"name": "fs.swift.service.sahara.http.port",
"config_type": "int",
"default_value": 8080
},
{
"name": "fs.swift.service.sahara.https.port",
"config_type": "int",
"default_value": 443
},
{
"name": "fs.swift.service.sahara.auth.endpoint.prefix",
"default_value": "/endpoints/AUTH_"
}
]
}
}
},
{
"remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml",
"type": "xml"
}
],
"services": [
{
"name": "general",
"files": [
null
]
},
{
"name": "Management",
"node_processes": [
"ZooKeeper",
"Webserver",
"MapR-Client",
"Metrics"
]
},
{
"name": "MapReduce",
"node_processes": [
"TaskTracker",
"JobTracker"
],
"files": [
"/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml"
]
},
{
"name": "MapR FS",
"node_processes": [
"CLDB",
"FileServer",
"NFS"
],
"files": [
"/opt/mapr/conf/cldb.conf",
"/opt/mapr/conf/hadoop-metrics.properties",
"/opt/mapr/conf/mfs.conf",
"/opt/mapr/conf/nfsserver.conf",
"/opt/mapr/conf/exports",
"/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml"
]
},
{
"name": "HBase",
"node_processes": [
"HBase-Master",
"HBase-RegionServer",
"HBase-Client"
]
},
{
"name": "Hive",
"node_processes": [
"HiveMetastore",
"HiveServer2"
],
"versions": [
"0.13",
"0.12"
]
},
{
"name": "Oozie",
"node_processes": [
"Oozie"
],
"versions": [
"4.0.1",
"4.0.0",
"3.3.2"
]
},
{
"name": "Pig",
"node_processes": [
"Pig"
]
},
{
"name": "Mahout",
"node_processes": [
"Mahout"
]
}
]
}

View File

@ -0,0 +1,34 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara.plugins.mapr.util import cluster_helper as clh_utils
from sahara.plugins.mapr.util import start_helper as start_helper
import sahara.plugins.utils as utils
def exec_configure_sh_on_cluster(cluster):
inst_list = utils.get_instances(cluster)
for n in inst_list:
exec_configure_sh_on_instance(cluster, n)
def exec_configure_sh_on_instance(cluster, instance):
script_string = ('/opt/mapr/server/configure.sh'
+ ' -C ' + clh_utils.get_cldb_nodes_ip(cluster)
+ ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster)
+ ' -f')
if not start_helper.check_if_mapr_user_exist(instance):
script_string = script_string + ' --create-user'
instance.remote().execute_command(script_string, True)

View File

@ -0,0 +1,112 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara import context
from sahara.plugins.mapr.util import cluster_helper as clh_utils
import sahara.plugins.mapr.util.config_utils as cu
import sahara.plugins.mapr.util.names as n
from sahara.plugins.mapr.util import scaling
from sahara.plugins.mapr.util import start_helper as start_helper
import sahara.plugins.mapr.util.validation_utils as vu
import sahara.plugins.mapr.versions.base_context as bc
from sahara.plugins.mapr.versions import base_version_handler as bvh
import sahara.plugins.mapr.versions.v3_1_1.cluster_configurer as cc
import sahara.plugins.utils as u
version = '3.1.1'
SIXTY_SECONDS = 60
class VersionHandler(bvh.BaseVersionHandler):
def get_plugin_version(self):
return version
def start_cluster(self, cluster):
start_helper.exec_configure_sh_on_cluster(
cluster, self.get_configure_sh_string(cluster))
start_helper.wait_for_mfs_unlock(cluster, self.get_waiting_script())
start_helper.setup_maprfs_on_cluster(
cluster, self.get_disk_setup_script())
start_helper.start_zookeeper_nodes_on_cluster(cluster)
start_helper.start_warden_on_cldb_nodes(cluster)
context.sleep(SIXTY_SECONDS)
start_helper.start_warden_on_other_nodes(cluster)
start_helper.start_ecosystem(self.get_context(cluster))
def get_waiting_script(self):
return 'plugins/mapr/util/resources/waiting_script.sh'
def get_configure_sh_string(self, cluster):
return ('/opt/mapr/server/configure.sh'
+ ' -C ' + clh_utils.get_cldb_nodes_ip(cluster)
+ ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster) + ' -f')
def scale_cluster(self, cluster, instances):
scaling.scale_cluster(cluster, instances, self.get_disk_setup_script(),
self.get_waiting_script(),
self.get_context(cluster),
self.get_configure_sh_string(cluster), True)
def decommission_nodes(self, cluster, instances):
scaling.decommission_nodes(
cluster, instances, self.get_configure_sh_string(cluster))
def get_cluster_validation_rules(self, cluster):
return [vu.not_less_than_count_component_vr(n.ZOOKEEPER, 1),
vu.not_less_than_count_component_vr(n.CLDB, 1),
vu.not_less_than_count_component_vr(n.TASK_TRACKER, 1),
vu.not_less_than_count_component_vr(n.FILE_SERVER, 1),
vu.not_more_than_count_component_vr(n.OOZIE, 1),
vu.not_less_than_count_component_vr(n.JOBTRACKER, 1),
vu.node_dependency_satisfied_vr(n.TASK_TRACKER, n.FILE_SERVER),
vu.node_dependency_satisfied_vr(n.CLDB, n.FILE_SERVER)]
def get_scaling_validation_rules(self):
return []
def get_edp_validation_rules(self):
return []
def get_cluster_configurer(self, cluster, plugin_spec):
return cc.ClusterConfigurer(cluster, plugin_spec)
def get_context(self, cluster):
return Context(cluster)
class Context(bc.BaseContext):
m7_enabled_config = n.IS_M7_ENABLED
hive_version_config = 'Hive Version'
oozie_version_config = 'Oozie Version'
def __init__(self, cluster):
self.cluster = cluster
def get_cluster(self):
return self.cluster
def is_m7_enabled(self):
configs = cu.get_cluster_configs(self.get_cluster())
return configs[n.GENERAL][Context.m7_enabled_config]
def get_hadoop_version(self):
return '0.20.2'
def get_rm_instance(self):
return u.get_instance(self.get_cluster(), n.JOBTRACKER)
def get_rm_port(self):
return '9001'

View File

@ -0,0 +1,51 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara.i18n import _LI
from sahara.openstack.common import log as logging
import sahara.plugins.mapr.versions.base_cluster_configurer as bcc
import sahara.plugins.utils as u
from sahara.utils import files as f
LOG = logging.getLogger(__name__)
class ClusterConfigurer(bcc.BaseClusterConfigurer):
hadoop_version_path = '/opt/mapr/conf/hadoop_version'
hadoop_mode = 'classic'
hadoop_version_local = 'plugins/mapr/util/resources/hadoop_version'
def get_hadoop_conf_dir(self):
return '/opt/mapr/hadoop/hadoop-0.20.2/conf'
def is_node_awareness_enabled(self):
return True
def set_cluster_mode(self, instances):
if not instances:
instances = u.get_instances(self.cluster)
LOG.info(_LI('Setting cluster mode to classic'))
hv_template = f.get_file_text(self.hadoop_version_local)
hv = hv_template % {"mode": self.hadoop_mode}
for i in instances:
with i.remote() as r:
LOG.debug('Writing file %(f_name)s to node %(node)s',
{'f_name': self.hadoop_version_path,
'node': i.management_ip})
r.write_file_to(self.hadoop_version_path, hv,
run_as_root=True)
def configure_instances(self, instances=None):
super(ClusterConfigurer, self).configure_instances(instances)
self.set_cluster_mode(instances)

View File

@ -0,0 +1,63 @@
#
# CLDB Config file.
# Properties defined in this file are loaded during startup
# and are valid for only CLDB which loaded the config.
# These parameters are not persisted anywhere else.
#
# Wait until minimum number of fileserver register with
# CLDB before creating Root Volume
cldb.min.fileservers=1
# CLDB listening port
cldb.port=7222
# Number of worker threads
cldb.numthreads=10
# CLDB webport
cldb.web.port=7221
# CLDB https port
cldb.web.https.port=7443
# Disable duplicate hostid detection
cldb.detect.dup.hostid.enabled=false
# Deprecated: This param is no longer supported. To configure
# the container cache, use the param cldb.containers.cache.percent
# Number of RW containers in cache
#cldb.containers.cache.entries=1000000
#
# Percentage (integer) of Xmx setting to be used for container cache
#cldb.containers.cache.percent=20
#
# Topology script to be used to determine
# Rack topology of node
# Script should take an IP address as input and print rack path
# on STDOUT. eg
# $>/home/mapr/topo.pl 10.10.10.10
# $>/mapr-rack1
# $>/home/mapr/topo.pl 10.10.10.20
# $>/mapr-rack2
#net.topology.script.file.name=/home/mapr/topo.pl
#
# Topology mapping file used to determine
# Rack topology of node
# File is of a 2 column format (space separated)
# 1st column is an IP address or hostname
# 2nd column is the rack path
# Line starting with '#' is a comment
# Example file contents
# 10.10.10.10 /mapr-rack1
# 10.10.10.20 /mapr-rack2
# host.foo.com /mapr-rack3
#net.topology.table.file.name=/home/mapr/topo.txt
#
# ZooKeeper address
#cldb.zookeeper.servers=10.250.1.91:5181
# Hadoop metrics jar version
#hadoop.version=0.20.2
# CLDB JMX remote port
cldb.jmxremote.port=7220
num.volmirror.threads=1
# Set this to set the default topology for all volumes and nodes
# The default for all volumes is /data by default
# UNCOMMENT the below to change the default topology.
# For e.g., set cldb.default.topology=/mydata to create volumes
# in /mydata topology and to place all nodes in /mydata topology
# by default
#cldb.default.topology=/mydata

View File

@ -0,0 +1,57 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<!--
Replace 'maprfs' by 'hdfs' to use HDFS.
Replace localhost by an ip address for namenode/cldb.
-->
<configuration>
<property>
<name>fs.default.name</name>
<value>maprfs:///</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
<property>
<name>fs.mapr.working.dir</name>
<value>/user/$USERNAME/</value>
<description>The default directory to be used with relative paths.
Note that $USERNAME is NOT an enviromental variable, but just a placeholder
to indicate that it will be expanded to the corresponding username.
Other example default directories could be "/", "/home/$USERNAME", "/$USERNAME" etc.
</description>
</property>
<property>
<name>fs.s3n.block.size</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3n.blockSize</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3.block.size</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3.blockSize</name>
<value>33554432</value>
</property>
<property>
<name>hadoop.proxyuser.mapr.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapr.hosts</name>
<value>*</value>
</property>
</configuration>

View File

@ -0,0 +1,30 @@
# Sample Exports file
# for /mapr exports
# <Path> <exports_control>
#access_control -> order is specific to default
# list the hosts before specifying a default for all
# a.b.c.d,1.2.3.4(ro) d.e.f.g(ro) (rw)
# enforces ro for a.b.c.d & 1.2.3.4 and everybody else is rw
# special path to export clusters in mapr-clusters.conf. To disable exporting,
# comment it out. to restrict access use the exports_control
#
/mapr (rw)
#to export only certain clusters, comment out the /mapr & uncomment.
# Note: this will cause /mapr to be unexported
#/mapr/clustername (rw)
#to export /mapr only to certain hosts (using exports_control)
#/mapr a.b.c.d(rw),e.f.g.h(ro)
# export /mapr/cluster1 rw to a.b.c.d & ro to e.f.g.h (denied for others)
#/mapr/cluster1 a.b.c.d(rw),e.f.g.h(ro)
# export /mapr/cluster2 only to e.f.g.h (denied for others)
#/mapr/cluster2 e.f.g.h(rw)
# export /mapr/cluster3 rw to e.f.g.h & ro to others
#/mapr/cluster2 e.f.g.h(rw) (ro)

View File

@ -0,0 +1,41 @@
#CLDB metrics config - Pick one out of null,file or ganglia.
#Uncomment all properties in null, file or ganglia context, to send cldb metrics to that context
# Configuration of the "cldb" context for null
#cldb.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
#cldb.period=10
# Configuration of the "cldb" context for file
#cldb.class=org.apache.hadoop.metrics.file.FileContext
#cldb.period=60
#cldb.fileName=/tmp/cldbmetrics.log
# Configuration of the "cldb" context for ganglia
cldb.class=com.mapr.fs.cldb.counters.MapRGangliaContext31
cldb.period=10
cldb.servers=localhost:8649
cldb.spoof=1
#FileServer metrics config - Pick one out of null,file or ganglia.
#Uncomment all properties in null, file or ganglia context, to send fileserver metrics to that context
# Configuration of the "fileserver" context for null
#fileserver.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
#fileserver.period=10
# Configuration of the "fileserver" context for file
#fileserver.class=org.apache.hadoop.metrics.file.FileContext
#fileserver.period=60
#fileserver.fileName=/tmp/fsmetrics.log
# Configuration of the "fileserver" context for ganglia
fileserver.class=com.mapr.fs.cldb.counters.MapRGangliaContext31
fileserver.period=37
fileserver.servers=localhost:8649
fileserver.spoof=1
maprmepredvariant.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContext
maprmepredvariant.period=10
maprmapred.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContextFinal
maprmapred.period=10

View File

@ -0,0 +1,16 @@
#mfs.num.compress.threads=1
#mfs.max.aio.events=5000
#mfs.disable.periodic.flush=0
#mfs.io.disk.timeout=60
#mfs.server.ip=127.0.0.1
#mfs.max.resync.count=16
#mfs.max.restore.count=16
#mfs.ignore.container.delete=0
#mfs.ignore.readdir.pattern=0
mfs.server.port=5660
#mfs.subnets.whitelist=127.0.0.1/8
#UNCOMMENT this line to disable bulk writes
#mfs.bulk.writes.enabled=0
#UNCOMMENT this to set the topology of this node
#For e.g., to set this node's topology to /compute-only uncomment the below line
#mfs.network.location=/compute-only

View File

@ -0,0 +1,43 @@
# Configuration for nfsserver
#
# The system defaults are in the comments
#
# Default compression is true
#Compression = true
# chunksize is 64M
#ChunkSize = 67108864
# Number of threads for compression/decompression: default=2
#CompThreads = 2
#Mount point for the ramfs file for mmap
#RamfsMntDir = /ramfs/mapr
# Size of the ramfile to use (percent of total physical memory) default=0.25
# 0: disables the use of ramfs
#RamfsSize = 0.25
# Loglevel = DEBUG | INFO | WARN | ERROR | CRITICAL | OFF
#Loglevel = INFO
#Duplicate Request cache size & timeout in seconds
#DrCacheSize = 20480
#DrCacheTimeout = 62
# To keep the drcache lean, we only cache the response if the
# time we took to populate is greater than 50% of DrCacheTimeout.
# Set it to 0 to disable this optimization, Note that the DrCacheSize or
# DrCacheTimeout will also need to be changed. Ex: if the nfsserver supports
# 10,000 ops/sec (modification ops): then DrCacheSize will need to change
# to: 10,000*DrCacheTimeout = 620,000
#DRCacheTimeOutOpt = 0.5
#NFS fileid, by default the fileid is of 32 bit size.
#Set Use32BitFileId=0 to use 64 bit fileid (inode number)
#Use32BitFileId=0
#Auto refresh exports time interval in mins.
#default is 0, means there is no auto refresh.
#AutoRefreshExportsTimeInterval = 5

View File

@ -0,0 +1,203 @@
{
"files": [
{
"remote": null,
"type": null,
"configs": {
"required": {
"cluster": [
{
"name": "Enable MapR-DB",
"config_type": "bool",
"default_value": false,
"priority": 1
}
]
}
}
},
{
"remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml",
"local": "default/core-site.xml",
"type": "xml",
"configs": {
"optional": {
"cluster": [
{
"name": "fs.swift.impl",
"default_value": "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem"
},
{
"name": "fs.swift.connect.timeout",
"config_type": "int",
"default_value": 15000
},
{
"name": "fs.swift.socket.timeout",
"config_type": "int",
"default_value": 60000
},
{
"name": "fs.swift.connect.retry.count",
"config_type": "int",
"default_value": 3
},
{
"name": "fs.swift.connect.throttle.delay",
"config_type": "int",
"default_value": 0
},
{
"name": "fs.swift.blocksize",
"config_type": "int",
"default_value": 32768
},
{
"name": "fs.swift.partsize",
"config_type": "int",
"default_value": 4718592
},
{
"name": "fs.swift.requestsize",
"config_type": "int",
"default_value": 64
},
{
"name": "fs.swift.service.sahara.public",
"config_type": "bool",
"default_value": true
},
{
"name": "fs.swift.service.sahara.http.port",
"config_type": "int",
"default_value": 8080
},
{
"name": "fs.swift.service.sahara.https.port",
"config_type": "int",
"default_value": 443
},
{
"name": "fs.swift.service.sahara.auth.endpoint.prefix",
"default_value": "/endpoints/AUTH_"
}
]
}
}
},
{
"remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml",
"type": "xml"
},
{
"remote": "/opt/mapr/conf/cldb.conf",
"local": "default/cldb.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/hadoop-metrics.properties",
"local": "default/hadoop-metrics.properties",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/mfs.conf",
"local": "default/mfs.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/nfsserver.conf",
"local": "default/nfsserver.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/exports",
"local": "default/exports",
"type": "raw"
}
],
"services": [
{
"name": "general",
"files": [
null
]
},
{
"name": "Management",
"node_processes": [
"ZooKeeper",
"Webserver",
"MapR-Client",
"Metrics"
]
},
{
"name": "MapReduce",
"node_processes": [
"TaskTracker",
"JobTracker"
],
"files": [
"/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml"
]
},
{
"name": "MapR FS",
"node_processes": [
"CLDB",
"FileServer",
"NFS"
],
"files": [
"/opt/mapr/conf/cldb.conf",
"/opt/mapr/conf/hadoop-metrics.properties",
"/opt/mapr/conf/mfs.conf",
"/opt/mapr/conf/nfsserver.conf",
"/opt/mapr/conf/exports",
"/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml"
]
},
{
"name": "HBase",
"node_processes": [
"HBase-Master",
"HBase-RegionServer",
"HBase-Client"
]
},
{
"name": "Hive",
"node_processes": [
"HiveMetastore",
"HiveServer2"
],
"versions": [
"0.13",
"0.12"
]
},
{
"name": "Oozie",
"node_processes": [
"Oozie"
],
"versions": [
"4.0.1",
"4.0.0",
"3.3.2"
]
},
{
"name": "Pig",
"node_processes": [
"Pig"
]
},
{
"name": "Mahout",
"node_processes": [
"Mahout"
]
}
]
}

View File

@ -0,0 +1,114 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara import context
from sahara.plugins.mapr.util import cluster_helper as clh_utils
import sahara.plugins.mapr.util.config_utils as cu
import sahara.plugins.mapr.util.names as n
from sahara.plugins.mapr.util import scaling
from sahara.plugins.mapr.util import start_helper as start_helper
import sahara.plugins.mapr.util.validation_utils as vu
import sahara.plugins.mapr.versions.base_context as bc
from sahara.plugins.mapr.versions import base_version_handler as bvh
import sahara.plugins.mapr.versions.v4_0_1_mrv1.cluster_configurer as cc
import sahara.plugins.utils as u
version = '4.0.1.mrv1'
SIXTY_SECONDS = 60
class VersionHandler(bvh.BaseVersionHandler):
def get_plugin_version(self):
return version
def start_cluster(self, cluster):
start_helper.exec_configure_sh_on_cluster(
cluster, self.get_configure_sh_string(cluster))
start_helper.wait_for_mfs_unlock(cluster, self.get_waiting_script())
start_helper.setup_maprfs_on_cluster(
cluster, self.get_disk_setup_script())
start_helper.start_zookeeper_nodes_on_cluster(cluster)
start_helper.start_warden_on_cldb_nodes(cluster)
context.sleep(SIXTY_SECONDS)
start_helper.start_warden_on_other_nodes(cluster)
start_helper.start_ecosystem(self.get_context(cluster))
def get_waiting_script(self):
return 'plugins/mapr/util/resources/waiting_script.sh'
def scale_cluster(self, cluster, instances):
scaling.scale_cluster(cluster, instances, self.get_disk_setup_script(),
self.get_waiting_script(),
self.get_context(cluster),
self.get_configure_sh_string(cluster), True)
def decommission_nodes(self, cluster, instances):
scaling.decommission_nodes(
cluster, instances, self.get_configure_sh_string(cluster))
def get_cluster_configurer(self, cluster, plugin_spec):
return cc.ClusterConfigurer(cluster, plugin_spec)
def get_cluster_validation_rules(self, cluster):
return [vu.not_less_than_count_component_vr(n.ZOOKEEPER, 1),
vu.not_less_than_count_component_vr(n.CLDB, 1),
vu.not_less_than_count_component_vr(n.TASK_TRACKER, 1),
vu.not_less_than_count_component_vr(n.FILE_SERVER, 1),
vu.not_more_than_count_component_vr(n.OOZIE, 1),
vu.not_more_than_count_component_vr(n.WEB_SERVER, 1),
vu.equal_count_component_vr(n.JOBTRACKER, 1),
vu.node_dependency_satisfied_vr(n.TASK_TRACKER, n.FILE_SERVER),
vu.node_dependency_satisfied_vr(n.CLDB, n.FILE_SERVER)]
def get_scaling_validation_rules(self):
return []
def get_edp_validation_rules(self):
return []
def get_configure_sh_string(self, cluster):
return ('/opt/mapr/server/configure.sh'
' -C ' + clh_utils.get_cldb_nodes_ip(cluster)
+ ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster)
+ ' -f')
def get_context(self, cluster):
return Context(cluster)
class Context(bc.BaseContext):
m7_enabled_config = n.IS_M7_ENABLED
hive_version_config = 'Hive Version'
oozie_version_config = 'Oozie Version'
def __init__(self, cluster):
self.cluster = cluster
def get_cluster(self):
return self.cluster
def is_m7_enabled(self):
configs = cu.get_cluster_configs(self.get_cluster())
return configs[n.GENERAL][Context.m7_enabled_config]
def get_hadoop_version(self):
return '0.20.2'
def get_rm_instance(self):
return u.get_instance(self.get_cluster(), n.JOBTRACKER)
def get_rm_port(self):
return '9001'

View File

@ -0,0 +1,24 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.versions.base_cluster_configurer as bcc
class ClusterConfigurer(bcc.BaseClusterConfigurer):
def get_hadoop_conf_dir(self):
return '/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop'
def is_node_awareness_enabled(self):
return False

View File

@ -0,0 +1,63 @@
#
# CLDB Config file.
# Properties defined in this file are loaded during startup
# and are valid for only CLDB which loaded the config.
# These parameters are not persisted anywhere else.
#
# Wait until minimum number of fileserver register with
# CLDB before creating Root Volume
cldb.min.fileservers=1
# CLDB listening port
cldb.port=7222
# Number of worker threads
cldb.numthreads=10
# CLDB webport
cldb.web.port=7221
# CLDB https port
cldb.web.https.port=7443
# Disable duplicate hostid detection
cldb.detect.dup.hostid.enabled=false
# Deprecated: This param is no longer supported. To configure
# the container cache, use the param cldb.containers.cache.percent
# Number of RW containers in cache
#cldb.containers.cache.entries=1000000
#
# Percentage (integer) of Xmx setting to be used for container cache
#cldb.containers.cache.percent=20
#
# Topology script to be used to determine
# Rack topology of node
# Script should take an IP address as input and print rack path
# on STDOUT. eg
# $>/home/mapr/topo.pl 10.10.10.10
# $>/mapr-rack1
# $>/home/mapr/topo.pl 10.10.10.20
# $>/mapr-rack2
#net.topology.script.file.name=/home/mapr/topo.pl
#
# Topology mapping file used to determine
# Rack topology of node
# File is of a 2 column format (space separated)
# 1st column is an IP address or hostname
# 2nd column is the rack path
# Line starting with '#' is a comment
# Example file contents
# 10.10.10.10 /mapr-rack1
# 10.10.10.20 /mapr-rack2
# host.foo.com /mapr-rack3
#net.topology.table.file.name=/home/mapr/topo.txt
#
# ZooKeeper address
#cldb.zookeeper.servers=10.250.1.91:5181
# Hadoop metrics jar version
#hadoop.version=0.20.2
# CLDB JMX remote port
cldb.jmxremote.port=7220
num.volmirror.threads=1
# Set this to set the default topology for all volumes and nodes
# The default for all volumes is /data by default
# UNCOMMENT the below to change the default topology.
# For e.g., set cldb.default.topology=/mydata to create volumes
# in /mydata topology and to place all nodes in /mydata topology
# by default
#cldb.default.topology=/mydata

View File

@ -0,0 +1,57 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<!--
Replace 'maprfs' by 'hdfs' to use HDFS.
Replace localhost by an ip address for namenode/cldb.
-->
<configuration>
<property>
<name>fs.default.name</name>
<value>maprfs:///</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
<property>
<name>fs.mapr.working.dir</name>
<value>/user/$USERNAME/</value>
<description>The default directory to be used with relative paths.
Note that $USERNAME is NOT an enviromental variable, but just a placeholder
to indicate that it will be expanded to the corresponding username.
Other example default directories could be "/", "/home/$USERNAME", "/$USERNAME" etc.
</description>
</property>
<property>
<name>fs.s3n.block.size</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3n.blockSize</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3.block.size</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3.blockSize</name>
<value>33554432</value>
</property>
<property>
<name>hadoop.proxyuser.mapr.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapr.hosts</name>
<value>*</value>
</property>
</configuration>

View File

@ -0,0 +1,30 @@
# Sample Exports file
# for /mapr exports
# <Path> <exports_control>
#access_control -> order is specific to default
# list the hosts before specifying a default for all
# a.b.c.d,1.2.3.4(ro) d.e.f.g(ro) (rw)
# enforces ro for a.b.c.d & 1.2.3.4 and everybody else is rw
# special path to export clusters in mapr-clusters.conf. To disable exporting,
# comment it out. to restrict access use the exports_control
#
/mapr (rw)
#to export only certain clusters, comment out the /mapr & uncomment.
# Note: this will cause /mapr to be unexported
#/mapr/clustername (rw)
#to export /mapr only to certain hosts (using exports_control)
#/mapr a.b.c.d(rw),e.f.g.h(ro)
# export /mapr/cluster1 rw to a.b.c.d & ro to e.f.g.h (denied for others)
#/mapr/cluster1 a.b.c.d(rw),e.f.g.h(ro)
# export /mapr/cluster2 only to e.f.g.h (denied for others)
#/mapr/cluster2 e.f.g.h(rw)
# export /mapr/cluster3 rw to e.f.g.h & ro to others
#/mapr/cluster2 e.f.g.h(rw) (ro)

View File

@ -0,0 +1,41 @@
#CLDB metrics config - Pick one out of null,file or ganglia.
#Uncomment all properties in null, file or ganglia context, to send cldb metrics to that context
# Configuration of the "cldb" context for null
#cldb.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
#cldb.period=10
# Configuration of the "cldb" context for file
#cldb.class=org.apache.hadoop.metrics.file.FileContext
#cldb.period=60
#cldb.fileName=/tmp/cldbmetrics.log
# Configuration of the "cldb" context for ganglia
cldb.class=com.mapr.fs.cldb.counters.MapRGangliaContext31
cldb.period=10
cldb.servers=localhost:8649
cldb.spoof=1
#FileServer metrics config - Pick one out of null,file or ganglia.
#Uncomment all properties in null, file or ganglia context, to send fileserver metrics to that context
# Configuration of the "fileserver" context for null
#fileserver.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
#fileserver.period=10
# Configuration of the "fileserver" context for file
#fileserver.class=org.apache.hadoop.metrics.file.FileContext
#fileserver.period=60
#fileserver.fileName=/tmp/fsmetrics.log
# Configuration of the "fileserver" context for ganglia
fileserver.class=com.mapr.fs.cldb.counters.MapRGangliaContext31
fileserver.period=37
fileserver.servers=localhost:8649
fileserver.spoof=1
maprmepredvariant.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContext
maprmepredvariant.period=10
maprmapred.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContextFinal
maprmapred.period=10

View File

@ -0,0 +1,16 @@
#mfs.num.compress.threads=1
#mfs.max.aio.events=5000
#mfs.disable.periodic.flush=0
#mfs.io.disk.timeout=60
#mfs.server.ip=127.0.0.1
#mfs.max.resync.count=16
#mfs.max.restore.count=16
#mfs.ignore.container.delete=0
#mfs.ignore.readdir.pattern=0
mfs.server.port=5660
#mfs.subnets.whitelist=127.0.0.1/8
#UNCOMMENT this line to disable bulk writes
#mfs.bulk.writes.enabled=0
#UNCOMMENT this to set the topology of this node
#For e.g., to set this node's topology to /compute-only uncomment the below line
#mfs.network.location=/compute-only

View File

@ -0,0 +1,43 @@
# Configuration for nfsserver
#
# The system defaults are in the comments
#
# Default compression is true
#Compression = true
# chunksize is 64M
#ChunkSize = 67108864
# Number of threads for compression/decompression: default=2
#CompThreads = 2
#Mount point for the ramfs file for mmap
#RamfsMntDir = /ramfs/mapr
# Size of the ramfile to use (percent of total physical memory) default=0.25
# 0: disables the use of ramfs
#RamfsSize = 0.25
# Loglevel = DEBUG | INFO | WARN | ERROR | CRITICAL | OFF
#Loglevel = INFO
#Duplicate Request cache size & timeout in seconds
#DrCacheSize = 20480
#DrCacheTimeout = 62
# To keep the drcache lean, we only cache the response if the
# time we took to populate is greater than 50% of DrCacheTimeout.
# Set it to 0 to disable this optimization, Note that the DrCacheSize or
# DrCacheTimeout will also need to be changed. Ex: if the nfsserver supports
# 10,000 ops/sec (modification ops): then DrCacheSize will need to change
# to: 10,000*DrCacheTimeout = 620,000
#DRCacheTimeOutOpt = 0.5
#NFS fileid, by default the fileid is of 32 bit size.
#Set Use32BitFileId=0 to use 64 bit fileid (inode number)
#Use32BitFileId=0
#Auto refresh exports time interval in mins.
#default is 0, means there is no auto refresh.
#AutoRefreshExportsTimeInterval = 5

View File

@ -0,0 +1,203 @@
{
"files": [
{
"remote": null,
"type": null,
"configs": {
"required": {
"cluster": [
{
"name": "Enable MapR-DB",
"config_type": "bool",
"default_value": false,
"priority": 1
}
]
}
}
},
{
"remote": "/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop/core-site.xml",
"local": "default/core-site.xml",
"type": "xml",
"configs": {
"optional": {
"cluster": [
{
"name": "fs.swift.impl",
"default_value": "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem"
},
{
"name": "fs.swift.connect.timeout",
"config_type": "int",
"default_value": 15000
},
{
"name": "fs.swift.socket.timeout",
"config_type": "int",
"default_value": 60000
},
{
"name": "fs.swift.connect.retry.count",
"config_type": "int",
"default_value": 3
},
{
"name": "fs.swift.connect.throttle.delay",
"config_type": "int",
"default_value": 0
},
{
"name": "fs.swift.blocksize",
"config_type": "int",
"default_value": 32768
},
{
"name": "fs.swift.partsize",
"config_type": "int",
"default_value": 4718592
},
{
"name": "fs.swift.requestsize",
"config_type": "int",
"default_value": 64
},
{
"name": "fs.swift.service.sahara.public",
"config_type": "bool",
"default_value": true
},
{
"name": "fs.swift.service.sahara.http.port",
"config_type": "int",
"default_value": 8080
},
{
"name": "fs.swift.service.sahara.https.port",
"config_type": "int",
"default_value": 443
},
{
"name": "fs.swift.service.sahara.auth.endpoint.prefix",
"default_value": "/endpoints/AUTH_"
}
]
}
}
},
{
"remote": "/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop/mapred-site.xml",
"type": "xml"
},
{
"remote": "/opt/mapr/conf/hadoop-metrics.properties",
"local": "default/hadoop-metrics.properties",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/cldb.conf",
"local": "default/cldb.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/mfs.conf",
"local": "default/mfs.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/nfsserver.conf",
"local": "default/nfsserver.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/exports",
"local": "default/exports",
"type": "raw"
}
],
"services": [
{
"name": "general",
"files": [
null
]
},
{
"name": "Management",
"node_processes": [
"ZooKeeper",
"Webserver",
"MapR-Client",
"Metrics"
]
},
{
"name": "YARN",
"node_processes": [
"HistoryServer",
"ResourceManager",
"NodeManager"
],
"files": [
"/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop/mapred-site.xml"
]
},
{
"name": "MapR FS",
"node_processes": [
"CLDB",
"FileServer",
"NFS"
],
"files": [
"/opt/mapr/conf/cldb.conf",
"/opt/mapr/conf/hadoop-metrics.properties",
"/opt/mapr/conf/mfs.conf",
"/opt/mapr/conf/nfsserver.conf",
"/opt/mapr/conf/exports",
"/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop/core-site.xml"
]
},
{
"name": "HBase",
"node_processes": [
"HBase-Master",
"HBase-RegionServer",
"HBase-Client"
]
},
{
"name": "Hive",
"node_processes": [
"HiveMetastore",
"HiveServer2"
],
"versions": [
"0.13",
"0.12"
]
},
{
"name": "Oozie",
"node_processes": [
"Oozie"
],
"versions": [
"4.0.1",
"4.0.0"
]
},
{
"name": "Pig",
"node_processes": [
"Pig"
]
},
{
"name": "Mahout",
"node_processes": [
"Mahout"
]
}
]
}

View File

@ -0,0 +1,112 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara import context
from sahara.plugins.mapr.util import cluster_helper as clh_utils
import sahara.plugins.mapr.util.config_utils as cu
import sahara.plugins.mapr.util.names as n
from sahara.plugins.mapr.util import scaling
from sahara.plugins.mapr.util import start_helper as start_helper
import sahara.plugins.mapr.util.validation_utils as vu
import sahara.plugins.mapr.versions.base_context as bc
from sahara.plugins.mapr.versions import base_version_handler as bvh
import sahara.plugins.mapr.versions.v4_0_1_mrv2.cluster_configurer as cc
SIXTY_SECONDS = 60
WAIT_OOZIE_INTERVAL = 300
version = '4.0.1.mrv2'
class VersionHandler(bvh.BaseVersionHandler):
def get_plugin_version(self):
return version
def start_cluster(self, cluster):
start_helper.exec_configure_sh_on_cluster(
cluster, self.get_configure_sh_string(cluster))
start_helper.wait_for_mfs_unlock(cluster, self.get_waiting_script())
start_helper.setup_maprfs_on_cluster(
cluster, self.get_disk_setup_script())
start_helper.start_zookeeper_nodes_on_cluster(cluster)
start_helper.start_warden_on_cldb_nodes(cluster)
context.sleep(SIXTY_SECONDS)
start_helper.start_warden_on_other_nodes(cluster)
start_helper.start_ecosystem(self.get_context(cluster))
def get_cluster_configurer(self, cluster, plugin_spec):
return cc.ClusterConfigurer(cluster, plugin_spec)
def get_configure_sh_string(self, cluster):
return ('/opt/mapr/server/configure.sh'
+ ' -C ' + clh_utils.get_cldb_nodes_ip(cluster)
+ ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster)
+ ' -RM ' + clh_utils.get_resourcemanager_ip(cluster)
+ ' -HS ' + clh_utils.get_historyserver_ip(cluster) + ' -f')
def scale_cluster(self, cluster, instances):
scaling.scale_cluster(cluster, instances, self.get_disk_setup_script(),
self.get_waiting_script(),
self.get_context(cluster),
self.get_configure_sh_string(cluster), False)
def decommission_nodes(self, cluster, instances):
scaling.decommission_nodes(
cluster, instances, self.get_configure_sh_string(cluster))
def get_waiting_script(self):
return 'plugins/mapr/util/resources/waiting_script.sh'
def get_cluster_validation_rules(self, cluster):
return [vu.not_less_than_count_component_vr(n.ZOOKEEPER, 1),
vu.not_less_than_count_component_vr(n.CLDB, 1),
vu.not_less_than_count_component_vr(n.NODE_MANAGER, 1),
vu.not_less_than_count_component_vr(n.FILE_SERVER, 1),
vu.not_more_than_count_component_vr(n.OOZIE, 1),
vu.not_more_than_count_component_vr(n.WEB_SERVER, 1),
vu.equal_count_component_vr(n.RESOURCE_MANAGER, 1),
vu.equal_count_component_vr(n.HISTORY_SERVER, 1),
vu.node_dependency_satisfied_vr(n.NODE_MANAGER, n.FILE_SERVER),
vu.node_dependency_satisfied_vr(n.CLDB, n.FILE_SERVER)]
def get_scaling_validation_rules(self):
return []
def get_edp_validation_rules(self):
return []
def get_context(self, cluster):
return Context(cluster)
class Context(bc.BaseContext):
m7_enabled_config = n.IS_M7_ENABLED
hive_version_config = 'Hive Version'
oozie_version_config = 'Oozie Version'
def __init__(self, cluster):
self.cluster = cluster
def get_cluster(self):
return self.cluster
def is_m7_enabled(self):
configs = cu.get_cluster_configs(self.get_cluster())
return configs[n.GENERAL][Context.m7_enabled_config]
def get_hadoop_version(self):
return '2.4.1'

View File

@ -0,0 +1,53 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
def _load_versions():
d_name = os.path.dirname(__file__)
m_template = 'sahara.plugins.mapr.versions.%s.version_handler'
def predicate(v_dir):
return os.path.isdir(os.path.join(d_name, v_dir))
def mapper(v_dir):
return m_template % v_dir
def reducer(versions, m_name):
m = __import__(m_name, fromlist=['sahara'])
versions[m.version] = getattr(m, 'VersionHandler')()
return versions
v_dirs = filter(predicate, os.listdir(d_name))
m_names = map(mapper, v_dirs)
return reduce(reducer, m_names, {})
class VersionHandlerFactory(object):
instance = None
versions = None
@staticmethod
def get():
if not VersionHandlerFactory.instance:
VersionHandlerFactory.versions = _load_versions()
VersionHandlerFactory.instance = VersionHandlerFactory()
return VersionHandlerFactory.instance
def get_versions(self):
return VersionHandlerFactory.versions.keys()
def get_handler(self, version):
return VersionHandlerFactory.versions[version]

View File

@ -545,6 +545,259 @@ HDP2_CONFIG_OPTS = [
]
MAPR_CONFIG_GROUP = cfg.OptGroup(name='MAPR')
MAPR_CONFIG_OPTS = [
cfg.StrOpt('PLUGIN_NAME',
default='mapr',
help='Name of plugin.'),
cfg.StrOpt('IMAGE_ID',
help='ID for image which is used for cluster creation. Also '
'you can specify image name or tag of image instead of '
'image ID. If you do not specify image related '
'parameters, then image for cluster creation will be '
'chosen by tag "sahara_i_tests".'),
cfg.StrOpt('IMAGE_NAME',
help='Name for image which is used for cluster creation. Also '
'you can specify image ID or tag of image instead of '
'image name. If you do not specify image related '
'parameters, then image for cluster creation will be '
'chosen by tag "sahara_i_tests".'),
cfg.StrOpt('IMAGE_TAG',
help='Tag for image which is used for cluster creation. Also '
'you can specify image ID or image name instead of tag of '
'image. If you do not specify image related parameters, '
'then image for cluster creation will be chosen by '
'tag "sahara_i_tests".'),
cfg.StrOpt('SSH_USERNAME',
help='Username to get cluster node with SSH.'),
cfg.ListOpt('MASTER_NODE_PROCESSES',
default=['CLDB', 'FileServer', 'ZooKeeper',
'TaskTracker', 'JobTracker', 'Oozie'],
help='A list of processes that will be launched '
'on master node'),
cfg.ListOpt('WORKER_NODE_PROCESSES',
default=['FileServer', 'TaskTracker', 'Pig'],
help='A list of processes that will be launched '
'on worker nodes'),
cfg.StrOpt('HADOOP_VERSION',
default='1.0.3',
help='Version of Hadoop.'),
cfg.StrOpt('HADOOP_USER',
default='mapr',
help='Username which is used for access to Hadoop services.'),
cfg.StrOpt('HADOOP_EXAMPLES_JAR_PATH',
default=('/opt/mapr/hadoop/hadoop-0.20.2'
'/hadoop-0.20.2-dev-examples.jar'),
help='Path to hadoop examples jar file.'),
cfg.StrOpt('HADOOP_LOG_DIRECTORY',
default='/opt/mapr/hadoop/hadoop-0.20.2/logs/userlogs',
help='Directory where logs of completed jobs are located.'),
cfg.StrOpt('HADOOP_LOG_DIRECTORY_ON_VOLUME',
default=('/volumes/disk1/mapr/hadoop/'
'hadoop-0.20.2/logs/userlogs'),
help='Directory where logs of completed jobs on volume mounted '
'to node are located.'),
cfg.IntOpt('SCALE_EXISTING_NG_COUNT',
default=1,
help='The number of hosts to add while scaling '
'an existing node group.'),
cfg.IntOpt('SCALE_NEW_NG_COUNT',
default=1,
help='The number of hosts to add while scaling '
'a new node group.'),
cfg.DictOpt('HADOOP_PROCESSES_WITH_PORTS',
default={
'JobTracker': 50030,
'CLDB': 7222,
'TaskTracker': 50060,
'OOZIE': 11000
},
help='Hadoop process map with ports for MAPR plugin.'
),
cfg.DictOpt('PROCESS_NAMES',
default={
'nn': 'CLDB',
'tt': 'TaskTracker',
'dn': 'FileServer'
},
help='Names for namenode, tasktracker and datanode '
'processes.'),
cfg.BoolOpt('SKIP_ALL_TESTS_FOR_PLUGIN',
default=True,
help='If this flag is True, then all tests for MAPR plugin '
'will be skipped.'),
cfg.BoolOpt('SKIP_CINDER_TEST', default=False),
cfg.BoolOpt('SKIP_EDP_TEST', default=False),
cfg.BoolOpt('SKIP_MAP_REDUCE_TEST', default=False),
cfg.BoolOpt('SKIP_SWIFT_TEST', default=False),
cfg.BoolOpt('SKIP_SCALING_TEST', default=False)
]
MAPR4_1_CONFIG_GROUP = cfg.OptGroup(name='MAPR4_1')
MAPR4_1_CONFIG_OPTS = [
cfg.StrOpt('PLUGIN_NAME',
default='mapr4_1',
help='Name of plugin.'),
cfg.StrOpt('IMAGE_ID',
default=None,
help='ID for image which is used for cluster creation. Also '
'you can specify image name or tag of image instead of '
'image ID. If you do not specify image related '
'parameters, then image for cluster creation will be '
'chosen by tag "sahara_i_tests".'),
cfg.StrOpt('IMAGE_NAME',
default=None,
help='Name for image which is used for cluster creation. Also '
'you can specify image ID or tag of image instead of '
'image name. If you do not specify image related '
'parameters, then image for cluster creation will be '
'chosen by tag "sahara_i_tests".'),
cfg.StrOpt('IMAGE_TAG',
default=None,
help='Tag for image which is used for cluster creation. Also '
'you can specify image ID or image name instead of tag of '
'image. If you do not specify image related parameters, '
'then image for cluster creation will be chosen by '
'tag "sahara_i_tests".'),
cfg.StrOpt('SSH_USERNAME',
default=None,
help='Username to get cluster node with SSH.'),
cfg.ListOpt('MASTER_NODE_PROCESSES',
default=['CLDB', 'FileServer', 'ZooKeeper',
'TaskTracker', 'JobTracker', 'Oozie'],
help='A list of processes that will be launched '
'on master node'),
cfg.ListOpt('WORKER_NODE_PROCESSES',
default=['FileServer', 'TaskTracker', 'Pig'],
help='A list of processes that will be launched '
'on worker nodes'),
cfg.StrOpt('HADOOP_VERSION',
default='2.4.0',
help='Version of Hadoop.'),
cfg.StrOpt('HADOOP_USER',
default='mapr',
help='Username which is used for access to Hadoop services.'),
cfg.StrOpt('HADOOP_EXAMPLES_JAR_PATH',
default=('/opt/mapr/hadoop/hadoop-2.3.0/share/hadoop'
'/hadoop-mapreduce-examples-2.3.0-mapr-4.0.0-FCS.jar'),
help='Path to hadoop examples jar file.'),
cfg.IntOpt('SCALE_EXISTING_NG_COUNT',
default=1,
help='The number of hosts to add while scaling '
'an existing node group.'),
cfg.IntOpt('SCALE_NEW_NG_COUNT',
default=1,
help='The number of hosts to add while scaling '
'a new node group.'),
cfg.DictOpt('HADOOP_PROCESSES_WITH_PORTS',
default={
'JobTracker': 50030,
'CLDB': 7222,
'TaskTracker': 50060,
'OOZIE': 11000
},
help='Hadoop process map with ports for MAPR plugin.'
),
cfg.DictOpt('PROCESS_NAMES',
default={
'nn': 'CLDB',
'tt': 'TaskTracker',
'dn': 'FileServer'
},
help='Names for namenode, tasktracker and datanode '
'processes.'),
cfg.BoolOpt('SKIP_ALL_TESTS_FOR_PLUGIN',
default=True,
help='If this flag is True, then all tests for MAPR plugin '
'will be skipped.'),
cfg.BoolOpt('SKIP_EDP_TEST', default=False),
cfg.BoolOpt('SKIP_SWIFT_TEST', default=False),
cfg.BoolOpt('SKIP_SCALING_TEST', default=False)
]
MAPR4_2_CONFIG_GROUP = cfg.OptGroup(name='MAPR4_2')
MAPR4_2_CONFIG_OPTS = [
cfg.StrOpt('PLUGIN_NAME',
default='mapr4_2',
help='Name of plugin.'),
cfg.StrOpt('IMAGE_ID',
default=None,
help='ID for image which is used for cluster creation. Also '
'you can specify image name or tag of image instead of '
'image ID. If you do not specify image related '
'parameters, then image for cluster creation will be '
'chosen by tag "sahara_i_tests".'),
cfg.StrOpt('IMAGE_NAME',
default=None,
help='Name for image which is used for cluster creation. Also '
'you can specify image ID or tag of image instead of '
'image name. If you do not specify image related '
'parameters, then image for cluster creation will be '
'chosen by tag "sahara_i_tests".'),
cfg.StrOpt('IMAGE_TAG',
default=None,
help='Tag for image which is used for cluster creation. Also '
'you can specify image ID or image name instead of tag of '
'image. If you do not specify image related parameters, '
'then image for cluster creation will be chosen by '
'tag "sahara_i_tests".'),
cfg.StrOpt('SSH_USERNAME',
default=None,
help='Username to get cluster node with SSH.'),
cfg.ListOpt('MASTER_NODE_PROCESSES',
default=['CLDB', 'FileServer', 'ZooKeeper', 'NodeManager',
'ResourceManager', 'HistoryServer', 'Oozie'],
help='A list of processes that will be launched '
'on master node'),
cfg.ListOpt('WORKER_NODE_PROCESSES',
default=['FileServer', 'NodeManager', 'Pig'],
help='A list of processes that will be launched '
'on worker nodes'),
cfg.StrOpt('HADOOP_VERSION',
default='2.4.0',
help='Version of Hadoop.'),
cfg.StrOpt('HADOOP_USER',
default='mapr',
help='Username which is used for access to Hadoop services.'),
cfg.StrOpt('HADOOP_EXAMPLES_JAR_PATH',
default=('/opt/mapr/hadoop/hadoop-2.3.0/share/hadoop'
'/hadoop-mapreduce-examples-2.3.0-mapr-4.0.0-FCS.jar'),
help='Path to hadoop examples jar file.'),
cfg.IntOpt('SCALE_EXISTING_NG_COUNT',
default=1,
help='The number of hosts to add while scaling '
'an existing node group.'),
cfg.IntOpt('SCALE_NEW_NG_COUNT',
default=1,
help='The number of hosts to add while scaling '
'a new node group.'),
cfg.DictOpt('HADOOP_PROCESSES_WITH_PORTS',
default={
'ResourceManager': 8032,
'CLDB': 7222,
'HistoryServer': 19888,
'OOZIE': 11000
},
help='Hadoop process map with ports for MAPR plugin.'
),
cfg.DictOpt('PROCESS_NAMES',
default={
'nn': 'CLDB',
'tt': 'NodeManager',
'dn': 'FileServer'
},
help='Names for namenode, tasktracker and datanode '
'processes.'),
cfg.BoolOpt('SKIP_ALL_TESTS_FOR_PLUGIN',
default=True,
help='If this flag is True, then all tests for MAPR plugin '
'will be skipped.'),
cfg.BoolOpt('SKIP_EDP_TEST', default=False),
cfg.BoolOpt('SKIP_SWIFT_TEST', default=False),
cfg.BoolOpt('SKIP_SCALING_TEST', default=False)
]
SPARK_CONFIG_GROUP = cfg.OptGroup(name='SPARK')
SPARK_CONFIG_OPTS = [
cfg.StrOpt('PLUGIN_NAME',
@ -641,6 +894,9 @@ class ITConfig(object):
register_config(cfg.CONF, HDP2_CONFIG_GROUP, HDP2_CONFIG_OPTS)
register_config(
cfg.CONF, VANILLA_TWO_CONFIG_GROUP, VANILLA_TWO_CONFIG_OPTS)
register_config(cfg.CONF, MAPR_CONFIG_GROUP, MAPR_CONFIG_OPTS)
register_config(cfg.CONF, MAPR4_1_CONFIG_GROUP, MAPR4_1_CONFIG_OPTS)
register_config(cfg.CONF, MAPR4_2_CONFIG_GROUP, MAPR4_2_CONFIG_OPTS)
register_config(cfg.CONF, SPARK_CONFIG_GROUP, SPARK_CONFIG_OPTS)
cfg.CONF(
@ -654,4 +910,7 @@ class ITConfig(object):
self.cdh_config = cfg.CONF.CDH
self.hdp_config = cfg.CONF.HDP
self.hdp2_config = cfg.CONF.HDP2
self.mapr_config = cfg.CONF.MAPR
self.mapr4_1_config = cfg.CONF.MAPR4_1
self.mapr4_2_config = cfg.CONF.MAPR4_2
self.spark_config = cfg.CONF.SPARK

View File

@ -22,3 +22,7 @@ SKIP_CLUSTER_CONFIG_TEST = True
IMAGE_ID = 'f7de0ea9-eb4d-4b63-8ed0-abcf11cfaff8'
SKIP_ALL_TESTS_FOR_PLUGIN = False
[MAPR]
IMAGE_ID = 'sahara-mapr-image'
SKIP_ALL_TESTS_FOR_PLUGIN = False

View File

@ -279,6 +279,188 @@
#PROCESS_NAMES = nn: NAMENODE, tt: NODEMANAGER, dn: DATANODE
#SKIP_ALL_TESTS_FOR_PLUGIN = False
#SKIP_EDP_TEST = False
#SKIP_SWIFT_TEST = False
#SKIP_SCALING_TEST = False
[MAPR]
# Name of plugin (string value)
#PLUGIN_NAME = 'mapr'
# ID for image which is used for cluster creation. Also you can specify image
# name or tag of image instead of image ID. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_ID = <None>
# Name for image which is used for cluster creation. Also you can specify image
# ID or tag of image instead of image name. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_NAME = <None>
# Tag for image which is used for cluster creation. Also you can specify image
# ID or image name instead of tag of image. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_TAG = <None>
# Username to get cluster node with SSH (string value)
#SSH_USERNAME = <None>
# A list of processes that will be launched on master node (list value)
#MASTER_NODE_PROCESSES = CLDB, FileServer, ZooKeeper, TaskTracker, JobTracker, Oozie
# A list of processes that will be launched on worker nodes (list value)
#WORKER_NODE_PROCESSES = FileServer, TaskTracker, Pig
# Version of Hadoop (string value)
#HADOOP_VERSION = '1.0.3'
# Username which is used for access to Hadoop services (string value)
#HADOOP_USER = 'mapr'
# Directory where logs of completed jobs are located (string value)
#HADOOP_LOG_DIRECTORY = '/opt/mapr/hadoop/hadoop-0.20.2/logs/userlogs'
# Directory where logs of completed jobs on volume mounted to node are located
# (string value)
#HADOOP_LOG_DIRECTORY_ON_VOLUME = '/volumes/disk1/mapr/hadoop/hadoop-0.20.2/logs/userlogs'
# The number of hosts to add while scaling an existing node group
#SCALE_EXISTING_NG_COUNT = 1
# The number of hosts to add while scaling a new node group
#SCALE_NEW_NG_COUNT = 1
# (dictionary value)
#HADOOP_PROCESSES_WITH_PORTS = JobTracker: 50030, CLDB: 7222, TaskTracker: 50060
# (dictionary value)
#PROCESS_NAMES = nn: CLDB, tt: TaskTracker, dn: FileServer
#SKIP_ALL_TESTS_FOR_PLUGIN = False
#SKIP_CINDER_TEST = False
#SKIP_MAP_REDUCE_TEST = False
#SKIP_SWIFT_TEST = False
#SKIP_SCALING_TEST = False
[MAPR4_1]
# Name of plugin (string value)
#PLUGIN_NAME = 'mapr4_1'
# ID for image which is used for cluster creation. Also you can specify image
# name or tag of image instead of image ID. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_ID = <None>
# Name for image which is used for cluster creation. Also you can specify image
# ID or tag of image instead of image name. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_NAME = <None>
# Tag for image which is used for cluster creation. Also you can specify image
# ID or image name instead of tag of image. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_TAG = <None>
# Username to get cluster node with SSH (string value)
#SSH_USERNAME = <None>
# A list of processes that will be launched on master node (list value)
#MASTER_NODE_PROCESSES = CLDB, FileServer, ZooKeeper, TaskTracker, JobTracker, Oozie
# A list of processes that will be launched on worker nodes (list value)
#WORKER_NODE_PROCESSES = FileServer, TaskTracker, Pig
# Version of Hadoop (string value)
#HADOOP_VERSION = '2.4.0'
# Username which is used for access to Hadoop services (string value)
#HADOOP_USER = 'mapr'
# The number of hosts to add while scaling an existing node group
#SCALE_EXISTING_NG_COUNT = 1
# The number of hosts to add while scaling a new node group
#SCALE_NEW_NG_COUNT = 1
# (dictionary value)
#HADOOP_PROCESSES_WITH_PORTS = JobTracker: 50030, CLDB: 7222, TaskTracker: 50060
# (dictionary value)
#PROCESS_NAMES = nn: CLDB, tt: TaskTracker, dn: FileServer
#SKIP_ALL_TESTS_FOR_PLUGIN = False
#SKIP_EDP_TEST = False
#SKIP_SWIFT_TEST = False
#SKIP_SCALING_TEST = False
[MAPR4_2]
# Name of plugin (string value)
#PLUGIN_NAME = 'mapr4_1'
# ID for image which is used for cluster creation. Also you can specify image
# name or tag of image instead of image ID. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_ID = <None>
# Name for image which is used for cluster creation. Also you can specify image
# ID or tag of image instead of image name. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_NAME = <None>
# Tag for image which is used for cluster creation. Also you can specify image
# ID or image name instead of tag of image. If you do not specify image related
# parameters then image for cluster creation will be chosen by tag
# "sahara_i_tests" (string value)
#IMAGE_TAG = <None>
# Username to get cluster node with SSH (string value)
#SSH_USERNAME = <None>
# A list of processes that will be launched on master node (list value)
#MASTER_NODE_PROCESSES = CLDB, FileServer, ZooKeeper, NodeManager, ResourceManager, HistoryServer, Oozie
# A list of processes that will be launched on worker nodes (list value)
#WORKER_NODE_PROCESSES = FileServer, NodeManager, Pig
# Version of Hadoop (string value)
#HADOOP_VERSION = '2.4.0'
# Username which is used for access to Hadoop services (string value)
#HADOOP_USER = 'mapr'
# The number of hosts to add while scaling an existing node group
#SCALE_EXISTING_NG_COUNT = 1
# The number of hosts to add while scaling a new node group
#SCALE_NEW_NG_COUNT = 1
# (dictionary value)
#HADOOP_PROCESSES_WITH_PORTS = ResourceManager: 8032, CLDB: 7222, HistoryServer: 19888
# (dictionary value)
#PROCESS_NAMES = nn: CLDB, tt: NodeManager, dn: FileServer
#SKIP_ALL_TESTS_FOR_PLUGIN = False
#SKIP_EDP_TEST = False
#SKIP_SWIFT_TEST = False

View File

@ -77,6 +77,9 @@ class ITestCase(testcase.WithAttributes, base.BaseTestCase):
self.vanilla_config = cfg.ITConfig().vanilla_config
self.vanilla_two_config = cfg.ITConfig().vanilla_two_config
self.hdp_config = cfg.ITConfig().hdp_config
self.mapr_config = cfg.ITConfig().mapr_config
self.mapr4_1_config = cfg.ITConfig().mapr4_1_config
self.mapr4_2_config = cfg.ITConfig().mapr4_2_config
telnetlib.Telnet(
self.common_config.SAHARA_HOST, self.common_config.SAHARA_PORT
@ -366,6 +369,25 @@ class ITestCase(testcase.WithAttributes, base.BaseTestCase):
finally:
self.close_ssh_connection()
def await_active_tasktracker(self, node_info, plugin_config):
self.open_ssh_connection(
node_info['namenode_ip'], plugin_config.SSH_USERNAME)
for i in range(self.common_config.HDFS_INITIALIZATION_TIMEOUT * 6):
time.sleep(10)
active_tasktracker_count = self.execute_command(
'sudo -u %s bash -lc "hadoop job -list-active-trackers" '
'| grep "^tracker_" | wc -l'
% plugin_config.HADOOP_USER)[1]
active_tasktracker_count = int(active_tasktracker_count)
if (active_tasktracker_count == node_info['tasktracker_count']):
break
else:
self.fail(
'Tasktracker or datanode cannot be started within '
'%s minute(s) for namenode.'
% self.common_config.HDFS_INITIALIZATION_TIMEOUT)
self.close_ssh_connection()
# --------------------------------Remote---------------------------------------
def connect_to_swift(self):

View File

@ -0,0 +1,233 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from testtools import testcase
from sahara.tests.integration.configs import config as cfg
from sahara.tests.integration.tests import base as b
from sahara.tests.integration.tests import edp
from sahara.tests.integration.tests import scaling
from sahara.tests.integration.tests import swift
from sahara.utils import edp as utils_edp
class Mapr4_1GatingTest(swift.SwiftTest, scaling.ScalingTest,
edp.EDPTest):
config = cfg.ITConfig().mapr4_1_config
SKIP_EDP_TEST = config.SKIP_EDP_TEST
SKIP_SWIFT_TEST = config.SKIP_SWIFT_TEST
SKIP_SCALING_TEST = config.SKIP_SCALING_TEST
def setUp(self):
super(Mapr4_1GatingTest, self).setUp()
self.cluster_id = None
self.cluster_template_id = None
def _prepare_test(self):
self.mapr4_1_config = cfg.ITConfig().mapr4_1_config
self.floating_ip_pool = self.common_config.FLOATING_IP_POOL
self.internal_neutron_net = None
if self.common_config.NEUTRON_ENABLED:
self.internal_neutron_net = self.get_internal_neutron_net_id()
self.floating_ip_pool = (
self.get_floating_ip_pool_id_for_neutron_net())
self.mapr4_1_config.IMAGE_ID, self.mapr4_1_config.SSH_USERNAME = (
(self.get_image_id_and_ssh_username(self.mapr4_1_config)))
@b.errormsg("Failure while 'jt-nn' node group template creation: ")
def _create_jt_nn_ng_template(self):
template = {
'name': 'test-node-group-template-mapr4_1-jt-nn',
'plugin_config': self.mapr4_1_config,
'description': 'test node group template for MAPR plugin',
# NEED CHANGES MASTER_NODE
'node_processes': self.mapr4_1_config.MASTER_NODE_PROCESSES,
'floating_ip_pool': self.floating_ip_pool,
'node_configs': {}
}
self.ng_tmpl_jt_nn_id = self.create_node_group_template(**template)
self.addCleanup(self.delete_objects,
node_group_template_id_list=[self.ng_tmpl_jt_nn_id])
@b.errormsg("Failure while 'nm-dn' node group template creation: ")
def _create_nm_dn_ng_template(self):
template = {
'name': 'test-node-group-template-mapr4_1-nm-dn',
'plugin_config': self.mapr4_1_config,
'description': 'test node group template for MAPR plugin',
# NEED CHANGES WORKER
'node_processes': self.mapr4_1_config.WORKER_NODE_PROCESSES,
'floating_ip_pool': self.floating_ip_pool,
'node_configs': {}
}
self.ng_tmpl_nm_dn_id = self.create_node_group_template(**template)
self.addCleanup(self.delete_objects,
node_group_template_id_list=[self.ng_tmpl_nm_dn_id])
@b.errormsg("Failure while cluster template creation: ")
def _create_cluster_template(self):
template = {
'name': 'test-cluster-template-mapr4_1',
'plugin_config': self.mapr4_1_config,
'description': 'test cluster template for MAPR plugin',
'cluster_configs': {
'YARN': {
'yarn.log-aggregation-enable': False
}
},
'node_groups': [
{
'name': 'master-node-dn',
'node_group_template_id': self.ng_tmpl_jt_nn_id,
'count': 1
},
{
'name': 'worker-node-nm',
'node_group_template_id': self.ng_tmpl_nm_dn_id,
'count': 3
}
],
'net_id': self.internal_neutron_net
}
self.cluster_template_id = self.create_cluster_template(**template)
self.addCleanup(self.delete_objects,
cluster_template_id=self.cluster_template_id)
@b.errormsg("Failure while cluster creation: ")
def _create_cluster(self):
cluster_name = '%s-%s-v2' % (self.common_config.CLUSTER_NAME,
self.mapr4_1_config.PLUGIN_NAME)
cluster = {
'name': cluster_name,
'plugin_config': self.mapr4_1_config,
'cluster_template_id': self.cluster_template_id,
'description': 'test cluster',
'cluster_configs': {}
}
cluster_id = self.create_cluster(**cluster)
self.addCleanup(self.delete_objects, cluster_id=cluster_id)
self.poll_cluster_state(cluster_id)
self.cluster_info = self.get_cluster_info(self.mapr4_1_config)
self.await_active_tasktracker(
self.cluster_info['node_info'], self.mapr4_1_config)
@b.errormsg("Failure during check of Swift availability: ")
def _check_swift(self):
self.check_swift_availability(self.cluster_info)
@b.errormsg("Failure while EDP testing: ")
def _check_edp(self):
self.poll_jobs_status(list(self._run_edp_tests()))
def _run_edp_tests(self):
skipped_edp_job_types = self.mapr4_1_config.SKIP_EDP_JOB_TYPES
if utils_edp.JOB_TYPE_PIG not in skipped_edp_job_types:
yield self._edp_pig_test()
if utils_edp.JOB_TYPE_MAPREDUCE not in skipped_edp_job_types:
yield self._edp_mapreduce_test()
if utils_edp.JOB_TYPE_MAPREDUCE_STREAMING not in skipped_edp_job_types:
yield self._edp_mapreduce_streaming_test()
if utils_edp.JOB_TYPE_JAVA not in skipped_edp_job_types:
yield self._edp_java_test()
def _edp_pig_test(self):
pig_job = self.edp_info.read_pig_example_script()
pig_lib = self.edp_info.read_pig_example_jar()
return self.edp_testing(
job_type=utils_edp.JOB_TYPE_PIG,
job_data_list=[{'pig': pig_job}],
lib_data_list=[{'jar': pig_lib}],
swift_binaries=True,
hdfs_local_output=True)
def _edp_mapreduce_test(self):
mapreduce_jar = self.edp_info.read_mapreduce_example_jar()
mapreduce_configs = self.edp_info.mapreduce_example_configs()
return self.edp_testing(
job_type=utils_edp.JOB_TYPE_MAPREDUCE,
job_data_list=[],
lib_data_list=[{'jar': mapreduce_jar}],
configs=mapreduce_configs,
swift_binaries=True,
hdfs_local_output=True)
def _edp_mapreduce_streaming_test(self):
return self.edp_testing(
job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING,
job_data_list=[],
lib_data_list=[],
configs=self.edp_info.mapreduce_streaming_configs())
def _edp_java_test(self):
java_jar = self.edp_info.read_java_example_lib(1)
java_configs = self.edp_info.java_example_configs(1)
return self.edp_testing(
utils_edp.JOB_TYPE_JAVA,
job_data_list=[],
lib_data_list=[{'jar': java_jar}],
configs=java_configs)
@b.errormsg("Failure while cluster scaling: ")
def _check_scaling(self):
datanode_count_after_resizing = (
self.cluster_info['node_info']['datanode_count']
+ self.mapr4_1_config.SCALE_EXISTING_NG_COUNT)
change_list = [
{
'operation': 'resize',
'info': ['worker-node-nm',
datanode_count_after_resizing]
},
{
'operation': 'add',
'info': ['new-worker-node-tt-dn',
self.mapr4_1_config.SCALE_NEW_NG_COUNT,
'%s' % self.ng_tmpl_nm_dn_id]
}
]
self.cluster_info = self.cluster_scaling(self.cluster_info,
change_list)
self.await_active_tasktracker(
self.cluster_info['node_info'], self.mapr4_1_config)
@b.errormsg(
"Failure during check of Swift availability after cluster scaling: ")
def _check_swift_after_scaling(self):
self.check_swift_availability(self.cluster_info)
@b.errormsg("Failure while EDP testing after cluster scaling: ")
def _check_edp_after_scaling(self):
self._check_edp()
@testcase.attr('mapr4_1')
def test_mapr4_1_plugin_gating(self):
self._prepare_test()
self._create_jt_nn_ng_template()
self._create_nm_dn_ng_template()
self._create_cluster_template()
self._create_cluster()
self._check_swift()
self._check_edp()
if not self.mapr4_1_config.SKIP_SCALING_TEST:
self._check_scaling()
self._check_swift_after_scaling()
self._check_edp_after_scaling()

View File

@ -0,0 +1,233 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from testtools import testcase
from sahara.tests.integration.configs import config as cfg
from sahara.tests.integration.tests import base as b
from sahara.tests.integration.tests import edp
from sahara.tests.integration.tests import scaling
from sahara.tests.integration.tests import swift
from sahara.utils import edp as utils_edp
class Mapr4_2GatingTest(swift.SwiftTest, scaling.ScalingTest,
edp.EDPTest):
config = cfg.ITConfig().mapr4_2_config
SKIP_EDP_TEST = config.SKIP_EDP_TEST
SKIP_SWIFT_TEST = config.SKIP_SWIFT_TEST
SKIP_SCALING_TEST = config.SKIP_SCALING_TEST
def setUp(self):
super(Mapr4_2GatingTest, self).setUp()
self.cluster_id = None
self.cluster_template_id = None
def _prepare_test(self):
self.mapr4_2_config = cfg.ITConfig().mapr4_2_config
self.floating_ip_pool = self.common_config.FLOATING_IP_POOL
self.internal_neutron_net = None
if self.common_config.NEUTRON_ENABLED:
self.internal_neutron_net = self.get_internal_neutron_net_id()
self.floating_ip_pool = (
self.get_floating_ip_pool_id_for_neutron_net())
self.mapr4_2_config.IMAGE_ID, self.mapr4_2_config.SSH_USERNAME = (
(self.get_image_id_and_ssh_username(self.mapr4_2_config)))
@b.errormsg("Failure while 'rm-nn' node group template creation: ")
def _create_rm_nn_ng_template(self):
template = {
'name': 'test-node-group-template-mapr4_2-rm-nn',
'plugin_config': self.mapr4_2_config,
'description': 'test node group template for MAPR plugin',
# NEED CHANGES MASTER_NODE
'node_processes': self.mapr4_2_config.MASTER_NODE_PROCESSES,
'floating_ip_pool': self.floating_ip_pool,
'node_configs': {}
}
self.ng_tmpl_rm_nn_id = self.create_node_group_template(**template)
self.addCleanup(self.delete_objects,
node_group_template_id_list=[self.ng_tmpl_rm_nn_id])
@b.errormsg("Failure while 'nm-dn' node group template creation: ")
def _create_nm_dn_ng_template(self):
template = {
'name': 'test-node-group-template-mapr4_2-nm-dn',
'plugin_config': self.mapr4_2_config,
'description': 'test node group template for MAPR plugin',
# NEED CHANGES WORKER
'node_processes': self.mapr4_2_config.WORKER_NODE_PROCESSES,
'floating_ip_pool': self.floating_ip_pool,
'node_configs': {}
}
self.ng_tmpl_nm_dn_id = self.create_node_group_template(**template)
self.addCleanup(self.delete_objects,
node_group_template_id_list=[self.ng_tmpl_nm_dn_id])
@b.errormsg("Failure while cluster template creation: ")
def _create_cluster_template(self):
template = {
'name': 'test-cluster-template-mapr4_2',
'plugin_config': self.mapr4_2_config,
'description': 'test cluster template for MAPR plugin',
'cluster_configs': {
'YARN': {
'yarn.log-aggregation-enable': False
}
},
'node_groups': [
{
'name': 'master-node-dn',
'node_group_template_id': self.ng_tmpl_rm_nn_id,
'count': 1
},
{
'name': 'worker-node-nm',
'node_group_template_id': self.ng_tmpl_nm_dn_id,
'count': 3
}
],
'net_id': self.internal_neutron_net
}
self.cluster_template_id = self.create_cluster_template(**template)
self.addCleanup(self.delete_objects,
cluster_template_id=self.cluster_template_id)
@b.errormsg("Failure while cluster creation: ")
def _create_cluster(self):
cluster_name = '%s-%s-v2' % (self.common_config.CLUSTER_NAME,
self.mapr4_2_config.PLUGIN_NAME)
cluster = {
'name': cluster_name,
'plugin_config': self.mapr4_2_config,
'cluster_template_id': self.cluster_template_id,
'description': 'test cluster',
'cluster_configs': {}
}
cluster_id = self.create_cluster(**cluster)
self.addCleanup(self.delete_objects, cluster_id=cluster_id)
self.poll_cluster_state(cluster_id)
self.cluster_info = self.get_cluster_info(self.mapr4_2_config)
self.await_active_tasktracker(
self.cluster_info['node_info'], self.mapr4_2_config)
@b.errormsg("Failure during check of Swift availability: ")
def _check_swift(self):
self.check_swift_availability(self.cluster_info)
@b.errormsg("Failure while EDP testing: ")
def _check_edp(self):
self.poll_jobs_status(list(self._run_edp_tests()))
def _run_edp_tests(self):
skipped_edp_job_types = self.mapr4_2_config.SKIP_EDP_JOB_TYPES
if utils_edp.JOB_TYPE_PIG not in skipped_edp_job_types:
yield self._edp_pig_test()
if utils_edp.JOB_TYPE_MAPREDUCE not in skipped_edp_job_types:
yield self._edp_mapreduce_test()
if utils_edp.JOB_TYPE_MAPREDUCE_STREAMING not in skipped_edp_job_types:
yield self._edp_mapreduce_streaming_test()
if utils_edp.JOB_TYPE_JAVA not in skipped_edp_job_types:
yield self._edp_java_test()
def _edp_pig_test(self):
pig_job = self.edp_info.read_pig_example_script()
pig_lib = self.edp_info.read_pig_example_jar()
return self.edp_testing(
job_type=utils_edp.JOB_TYPE_PIG,
job_data_list=[{'pig': pig_job}],
lib_data_list=[{'jar': pig_lib}],
swift_binaries=True,
hdfs_local_output=True)
def _edp_mapreduce_test(self):
mapreduce_jar = self.edp_info.read_mapreduce_example_jar()
mapreduce_configs = self.edp_info.mapreduce_example_configs()
return self.edp_testing(
job_type=utils_edp.JOB_TYPE_MAPREDUCE,
job_data_list=[],
lib_data_list=[{'jar': mapreduce_jar}],
configs=mapreduce_configs,
swift_binaries=True,
hdfs_local_output=True)
def _edp_mapreduce_streaming_test(self):
return self.edp_testing(
job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING,
job_data_list=[],
lib_data_list=[],
configs=self.edp_info.mapreduce_streaming_configs())
def _edp_java_test(self):
java_jar = self.edp_info.read_java_example_lib(2)
java_configs = self.edp_info.java_example_configs(2)
return self.edp_testing(
utils_edp.JOB_TYPE_JAVA,
job_data_list=[],
lib_data_list=[{'jar': java_jar}],
configs=java_configs)
@b.errormsg("Failure while cluster scaling: ")
def _check_scaling(self):
datanode_count_after_resizing = (
self.cluster_info['node_info']['datanode_count']
+ self.mapr4_2_config.SCALE_EXISTING_NG_COUNT)
change_list = [
{
'operation': 'resize',
'info': ['worker-node-nm',
datanode_count_after_resizing]
},
{
'operation': 'add',
'info': ['new-worker-node-tt-dn',
self.mapr4_2_config.SCALE_NEW_NG_COUNT,
'%s' % self.ng_tmpl_nm_dn_id]
}
]
self.cluster_info = self.cluster_scaling(self.cluster_info,
change_list)
self.await_active_tasktracker(
self.cluster_info['node_info'], self.mapr4_2_config)
@b.errormsg(
"Failure during check of Swift availability after cluster scaling: ")
def _check_swift_after_scaling(self):
self.check_swift_availability(self.cluster_info)
@b.errormsg("Failure while EDP testing after cluster scaling: ")
def _check_edp_after_scaling(self):
self._check_edp()
@testcase.attr('mapr4_2')
def test_mapr4_2_plugin_gating(self):
self._prepare_test()
self._create_rm_nn_ng_template()
self._create_nm_dn_ng_template()
self._create_cluster_template()
self._create_cluster()
self._check_swift()
self._check_edp()
if not self.mapr4_2_config.SKIP_SCALING_TEST:
self._check_scaling()
self._check_swift_after_scaling()
self._check_edp_after_scaling()

View File

@ -0,0 +1,330 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo.utils import excutils
from testtools import testcase
from sahara.tests.integration.configs import config as cfg
from sahara.tests.integration.tests import cinder
from sahara.tests.integration.tests import edp
from sahara.tests.integration.tests import map_reduce
from sahara.tests.integration.tests import scaling
from sahara.tests.integration.tests import swift
from sahara.utils import edp as utils_edp
class MaprGatingTest(cinder.CinderVolumeTest, edp.EDPTest,
map_reduce.MapReduceTest, swift.SwiftTest,
scaling.ScalingTest):
config = cfg.ITConfig().mapr_config
SKIP_CINDER_TEST = config.SKIP_CINDER_TEST
SKIP_EDP_TEST = config.SKIP_EDP_TEST
SKIP_MAP_REDUCE_TEST = config.SKIP_MAP_REDUCE_TEST
SKIP_SWIFT_TEST = config.SKIP_SWIFT_TEST
SKIP_SCALING_TEST = config.SKIP_SCALING_TEST
@testcase.skipIf(config.SKIP_ALL_TESTS_FOR_PLUGIN,
'All tests for MAPR plugin were skipped')
@testcase.attr('mapr1')
def test_mapr_plugin_gating(self):
self.mapr_config.IMAGE_ID, self.mapr_config.SSH_USERNAME = (
self.get_image_id_and_ssh_username(self.mapr_config))
# Default value of self.common_config.FLOATING_IP_POOL is None
floating_ip_pool = self.common_config.FLOATING_IP_POOL
internal_neutron_net = None
# If Neutron enabled then get ID of floating IP pool and ID of internal
# Neutron network
if self.common_config.NEUTRON_ENABLED:
floating_ip_pool = self.get_floating_ip_pool_id_for_neutron_net()
internal_neutron_net = self.get_internal_neutron_net_id()
if not self.mapr_config.SKIP_CINDER_TEST:
volumes_per_node = 2
else:
volumes_per_node = 0
node_group_template_id_list = []
# ------------------------------CLUSTER CREATION-------------------------------
# ----------------------"tt-dn" node group template creation-------------------
try:
node_group_template_tt_dn_id = self.create_node_group_template(
name='test-node-group-template-mapr-tt-dn',
plugin_config=self.mapr_config,
description='test node group template for MAPR plugin',
volumes_per_node=volumes_per_node,
node_processes=self.mapr_config.WORKER_NODE_PROCESSES,
# NEED CREATE WORKER_NODE_PROCESSES
node_configs={},
floating_ip_pool=floating_ip_pool
)
node_group_template_id_list.append(node_group_template_tt_dn_id)
except Exception as e:
with excutils.save_and_reraise_exception():
message = ('Failure while \'tt-dn\' node group '
'template creation: ')
self.print_error_log(message, e)
# --------------------------Cluster template creation--------------------------
try:
cluster_template_id = self.create_cluster_template(
name='test-cluster-template-mapr',
plugin_config=self.mapr_config,
description='test cluster template for MAPR plugin',
cluster_configs={},
node_groups=[
dict(
name='master-node-jt-nn',
flavor_id=self.flavor_id,
node_processes=self.mapr_config.MASTER_NODE_PROCESSES,
# NEED CREATE MASTER_NODE_PROCESSES
node_configs={},
floating_ip_pool=floating_ip_pool,
count=1),
dict(
name='worker-node-tt-dn',
node_group_template_id=node_group_template_tt_dn_id,
count=3)
],
net_id=internal_neutron_net
)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
node_group_template_id_list=node_group_template_id_list
)
message = 'Failure while cluster template creation: '
self.print_error_log(message, e)
# ------------------------------Cluster creation-------------------------------
cluster_name = (self.common_config.CLUSTER_NAME + '-' +
self.mapr_config.PLUGIN_NAME)
try:
self.create_cluster(
name=cluster_name,
plugin_config=self.mapr_config,
cluster_template_id=cluster_template_id,
description='test cluster',
cluster_configs={}
)
cluster_info = self.get_cluster_info(self.mapr_config)
self.await_active_tasktracker(
cluster_info['node_info'], self.mapr_config)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
self.cluster_id, cluster_template_id,
node_group_template_id_list
)
message = 'Failure while cluster creation: '
self.print_error_log(message, e)
# --------------------------------CINDER TESTING-------------------------------
try:
self.cinder_volume_testing(cluster_info)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)
message = 'Failure while Cinder testing: '
self.print_error_log(message, e)
# ---------------------------------EDP TESTING---------------------------------
path = 'sahara/tests/integration/tests/resources/'
pig_job_data = open(path + 'edp-job.pig').read()
pig_lib_data = open(path + 'edp-lib.jar').read()
mapreduce_jar_data = open(path + 'edp-mapreduce.jar').read()
# This is a modified version of WordCount that takes swift configs
java_lib_data = open(path + 'edp-java/edp-java.jar').read()
java_configs = {
"configs": {
"edp.java.main_class": ("org.openstack.sahara.examples"
".WordCount")
}
}
mapreduce_configs = {
"configs": {
"mapred.mapper.class": "org.apache.oozie.example.SampleMapper",
"mapred.reducer.class": ("org.apache.oozie.example"
".SampleReducer")
}
}
mapreduce_streaming_configs = {
"configs": {
"edp.streaming.mapper": "/bin/cat",
"edp.streaming.reducer": "/usr/bin/wc"
}
}
try:
self.edp_testing(job_type=utils_edp.JOB_TYPE_PIG,
job_data_list=[{'pig': pig_job_data}],
lib_data_list=[{'jar': pig_lib_data}],
swift_binaries=True,
hdfs_local_output=True)
self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE,
job_data_list=[],
lib_data_list=[{'jar': mapreduce_jar_data}],
configs=mapreduce_configs,
swift_binaries=True,
hdfs_local_output=True)
self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING,
job_data_list=[],
lib_data_list=[],
configs=mapreduce_streaming_configs)
self.edp_testing(job_type=utils_edp.JOB_TYPE_JAVA,
job_data_list=[],
lib_data_list=[{'jar': java_lib_data}],
configs=java_configs,
pass_input_output_args=True)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)
message = 'Failure while EDP testing: '
self.print_error_log(message, e)
# -----------------------------MAP REDUCE TESTING------------------------------
try:
self.map_reduce_testing(cluster_info)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)
message = 'Failure while Map Reduce testing: '
self.print_error_log(message, e)
# --------------------------CHECK SWIFT AVAILABILITY---------------------------
try:
self.check_swift_availability(cluster_info)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)
message = 'Failure during check of Swift availability: '
self.print_error_log(message, e)
# -------------------------------CLUSTER SCALING-------------------------------
if not self.mapr_config.SKIP_SCALING_TEST:
datanode_count_after_resizing = (
cluster_info['node_info']['datanode_count']
+ self.mapr_config.SCALE_EXISTING_NG_COUNT)
change_list = [
{
'operation': 'resize',
'info': ['worker-node-tt-dn',
datanode_count_after_resizing]
},
{
'operation': 'add',
'info': [
'new-worker-node-tt-dn',
self.mapr_config.SCALE_NEW_NG_COUNT,
'%s' % node_group_template_tt_dn_id
]
}
]
try:
new_cluster_info = self.cluster_scaling(cluster_info,
change_list)
self.await_active_tasktracker(
new_cluster_info['node_info'], self.mapr_config)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)
message = 'Failure while cluster scaling: '
self.print_error_log(message, e)
# -------------------------CINDER TESTING AFTER SCALING------------------------
try:
self.cinder_volume_testing(new_cluster_info)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
new_cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)
message = ('Failure while Cinder testing after cluster '
'scaling: ')
self.print_error_log(message, e)
# ----------------------MAP REDUCE TESTING AFTER SCALING-----------------------
try:
self.map_reduce_testing(new_cluster_info)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
new_cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)
message = ('Failure while Map Reduce testing after '
'cluster scaling: ')
self.print_error_log(message, e)
# -------------------CHECK SWIFT AVAILABILITY AFTER SCALING--------------------
try:
self.check_swift_availability(new_cluster_info)
except Exception as e:
with excutils.save_and_reraise_exception():
self.delete_objects(
new_cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)
message = ('Failure during check of Swift availability '
'after cluster scaling: ')
self.print_error_log(message, e)
# ---------------------------DELETE CREATED OBJECTS----------------------------
self.delete_objects(
cluster_info['cluster_id'], cluster_template_id,
node_group_template_id_list
)

View File

@ -0,0 +1,185 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.utils.configs as c
import six
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
node_domain = None
class Cluster(AttrDict):
fields = ['id', 'name', 'description', 'tenant_id', 'trust_id',
'is_transient', 'plugin_name', 'hadoop_version',
'cluster_configs', 'default_image_id', 'anti_affinity',
'management_private_key', 'management_public_key',
'user_keypair_id', 'status', 'status_description', 'info',
'extra', 'node_groups', 'cluster_template_id',
'cluster_template']
def __init__(self, mapping=None, **kwargs):
self.id = None
self.cluster_template_id = None
self.cluster_template = None
self.node_groups = []
d = dict((f, None) for f in Cluster.fields)
if mapping:
d.update(mapping)
if kwargs:
d.update(kwargs)
AttrDict.__init__(self, d)
if self.node_groups:
for ng in self.node_groups:
ng.cluster_id = self.id
ng.cluster = self
ng.cluster_template_id = self.cluster_template_id
ng.cluster_template = self.cluster_template
class NodeGroup(AttrDict):
fields = ['id', 'name', 'flavor_id', 'image_id', 'image_username',
'node_processes', 'node_configs', 'volumes_per_node',
'volumes_size', 'volume_mount_prefix', 'floating_ip_pool',
'count', 'instances', 'node_group_template_id',
'node_group_template', 'cluster_id', 'cluster',
'cluster_template_id', 'cluster_template']
def __init__(self, mapping=None, **kwargs):
self.id = None
self.instances = []
d = dict((f, None) for f in NodeGroup.fields)
if mapping:
d.update(mapping)
if kwargs:
d.update(kwargs)
AttrDict.__init__(self, d)
if self.instances:
for i in self.instances:
i.node_group_id = self.id
i.node_group = self
def configuration(self):
return c.merge_configs(self.cluster.cluster_configs, self.node_configs)
def storage_paths(self):
mp = [self.volume_mount_prefix + str(idx)
for idx in range(1, self.volumes_per_node + 1)]
if not mp:
mp = ['/mnt']
return mp
def get_image_id(self):
return self.image_id or self.cluster.default_image_id
class Instance(AttrDict):
fields = ['id', 'node_group_id', 'node_group', 'instance_id',
'instance_name', 'internal_ip', 'management_ip', 'volumes']
def __init__(self, mapping=None, **kwargs):
d = dict((f, None) for f in Instance.fields)
p = lambda i: i[0] in Instance.fields
if mapping:
d.update(dict(filter(p, six.iteritems(mapping))))
if kwargs:
d.update(dict(filter(p, six.iteritems(kwargs))))
AttrDict.__init__(self, d)
results = kwargs['results'] if 'results' in kwargs else []
default_result = (kwargs['default_result']
if 'default_result' in kwargs
else Remote.DEFAULT_RESULT)
self._remote = Remote(results, default_result)
def hostname(self):
return self.instance_name
def fqdn(self):
return self.instance_name + '.' + node_domain
def remote(self):
return self._remote
class Remote(object):
DEFAULT_RESULT = (0, '', '')
def __init__(self, results=[], default_result=None):
self.fs = []
self.history = []
self.results = results
self.default_result = (default_result
if default_result
else Remote.DEFAULT_RESULT)
def register_result(self, command, result):
result += [(command, result)]
def get_result(self, command):
for r_command, result in self.results:
if r_command == command:
return result
return (self.default_result
if command['get_stderr']
else self.default_result[:-1])
def __exit__(self, *args):
pass
def __enter__(self):
return self
def write_file_to(self, remote_file, data, run_as_root=False, timeout=120):
self.fs += [{'file': remote_file, 'data': data, 'root': run_as_root,
'timeout': timeout}]
def write_files_to(self, files, run_as_root=False, timeout=120):
self.fs += [{'file': f, 'data': d, 'root': run_as_root,
'timeout': timeout}
for f, d in six.iteritems(files)]
def read_file_from(self, remote_file, run_as_root=False, timeout=120):
for f in self.fs:
if f['file'] == remote_file:
return f['data']
return None
def replace_remote_string(self, remote_file, old_str,
new_str, timeout=120):
pass
def get_neutron_info(self):
return
def get_http_client(self, port, info=None):
return
def close_http_sessions(self):
pass
def execute_command(self, cmd, run_as_root=False, get_stderr=False,
raise_when_error=True, timeout=300):
command = {'cmd': cmd,
'run_as_root': run_as_root,
'get_stderr': get_stderr,
'raise_when_error': raise_when_error,
'timeout': timeout}
self.history += [command]
return self.get_result(command)

View File

@ -0,0 +1,2 @@
net.topology.script.file.name=/opt/mapr/topology.sh
cldb.zookeeper.servers=192.168.1.10:5181,192.168.1.11:5181,192.168.1.12:5181

View File

@ -0,0 +1,69 @@
<?xml version="1.0" ?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.swift.impl</name>
<value>org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem
</value>
</property>
<property>
<name>fs.swift.socket.timeout</name>
<value>60000</value>
</property>
<property>
<name>fs.swift.blocksize</name>
<value>32768</value>
</property>
<property>
<name>fs.swift.connect.retry.count</name>
<value>3</value>
</property>
<property>
<name>fs.swift.service.sahara.auth.endpoint.prefix</name>
<value>/endpoints/AUTH_</value>
</property>
<property>
<name>fs.swift.connect.timeout</name>
<value>15000</value>
</property>
<property>
<name>fs.swift.requestsize</name>
<value>64</value>
</property>
<property>
<name>fs.swift.connect.throttle.delay</name>
<value>0</value>
</property>
<property>
<name>k1</name>
<value>v1</value>
</property>
<property>
<name>k0</name>
<value>v0</value>
</property>
<property>
<name>fs.swift.service.sahara.https.port</name>
<value>443</value>
</property>
<property>
<name>fs.swift.partsize</name>
<value>4718592</value>
</property>
<property>
<name>fs.swift.service.sahara.auth.url</name>
<value>http://auth:None/v2.0/tokens/</value>
</property>
<property>
<name>fs.swift.service.sahara.public</name>
<value>True</value>
</property>
<property>
<name>fs.swift.service.sahara.http.port</name>
<value>8080</value>
</property>
<property>
<name>fs.swift.service.sahara.tenant</name>
<value>tenant_0</value>
</property>
</configuration>

View File

@ -0,0 +1,82 @@
<?xml version="1.0" ?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.swift.requestsize</name>
<value>64</value>
</property>
<property>
<name>fs.swift.impl</name>
<value>org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem
</value>
</property>
<property>
<name>fs.swift.socket.timeout</name>
<value>60000</value>
</property>
<property>
<name>fs.swift.blocksize</name>
<value>32768</value>
</property>
<property>
<name>net.topology.impl</name>
<value>org.apache.hadoop.net.NetworkTopologyWithNodeGroup</value>
</property>
<property>
<name>fs.swift.connect.retry.count</name>
<value>3</value>
</property>
<property>
<name>fs.swift.service.sahara.auth.endpoint.prefix</name>
<value>/endpoints/AUTH_</value>
</property>
<property>
<name>fs.swift.connect.timeout</name>
<value>15000</value>
</property>
<property>
<name>dfs.block.replicator.classname</name>
<value>org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyWithNodeGroup
</value>
</property>
<property>
<name>fs.swift.connect.throttle.delay</name>
<value>0</value>
</property>
<property>
<name>k1</name>
<value>v1</value>
</property>
<property>
<name>k0</name>
<value>v0</value>
</property>
<property>
<name>net.topology.nodegroup.aware</name>
<value>True</value>
</property>
<property>
<name>fs.swift.service.sahara.https.port</name>
<value>443</value>
</property>
<property>
<name>fs.swift.partsize</name>
<value>4718592</value>
</property>
<property>
<name>fs.swift.service.sahara.auth.url</name>
<value>http://auth:None/v2.0/tokens/</value>
</property>
<property>
<name>fs.swift.service.sahara.public</name>
<value>True</value>
</property>
<property>
<name>fs.swift.service.sahara.http.port</name>
<value>8080</value>
</property>
<property>
<name>fs.swift.service.sahara.tenant</name>
<value>tenant_0</value>
</property>
</configuration>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" ?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>node_config_0</name>
<value>False</value>
</property>
</configuration>

View File

@ -0,0 +1,16 @@
<?xml version="1.0" ?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>node_config_0</name>
<value>False</value>
</property>
<property>
<name>mapred.task.cache.levels</name>
<value>3</value>
</property>
<property>
<name>mapred.jobtracker.nodegroup.aware</name>
<value>True</value>
</property>
</configuration>

View File

@ -0,0 +1,9 @@
10.10.1.12 r
10.10.1.10 r
10.10.1.11 r
192.168.1.12 r
i1 r
i0 r
192.168.1.11 r
192.168.1.10 r
i2 r

View File

@ -0,0 +1,6 @@
edp-master-0001 /rack1
10.50.0.8 /rack1
edp-slave-0002 /rack1
10.50.0.5 /rack1
edp-slave-0001 /rack2
10.50.0.6 /rack2

View File

@ -0,0 +1,106 @@
{
"files": [
{
"remote": null,
"type": null,
"configs": {
"required": {
"cluster": [
{
"name": "k4"
}
]
}
}
},
{
"remote": "file_0",
"type": "properties"
},
{
"remote": "file_1",
"local": "test.properties",
"type": "properties",
"configs": {
"required": {
"cluster": [
{
"name": "k0",
"default_value": "default_value_0",
"description": "description_0",
"priority": 2
}
],
"node": [
{
"name": "k1",
"config_type": "int",
"default_value": 3,
"priority": 1
}
]
},
"optional": {
"cluster": [
{
"name": "k2",
"config_type": "bool"
}
],
"node": [
{
"name": "k3"
}
]
}
}
},
{
"remote": "file_2",
"local": "test.xml",
"type": "xml"
},
{
"remote": "file_3",
"local": "raw.data",
"type": "raw"
}
],
"services": [
{
"name": "general",
"files": [
null,
"file_3"
]
},
{
"name": "service_0"
},
{
"name": "service_1",
"files": [
],
"node_processes": [
]
},
{
"name": "service_2",
"files": [
"file_0",
"file_1",
"file_2"
],
"node_processes": [
"node_process_0",
"node_process_1"
],
"versions": [
"v1",
"v2"
]
}
]
}

View File

@ -0,0 +1,168 @@
{
"files":[
{
"remote":null,
"type":null,
"configs":{
"required":{
"cluster":[
{
"name":"Enable Data Locality",
"config_type":"bool",
"default_value":false,
"priority":1
},
{
"name":"Enable MapR-DB",
"config_type":"bool",
"default_value":false,
"priority":1
}
]
}
}
},
{
"remote":"/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml",
"type":"xml",
"configs":{
"required":{
"node":[
{
"name":"node_config_0",
"config_type":"bool",
"default_value":false,
"priority":1
}
]
}
}
},
{
"remote":"/opt/mapr/conf/cldb.conf",
"type":"properties"
},
{
"remote":"/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml",
"local":"test.xml",
"type":"xml",
"configs":{
"optional":{
"cluster":[
{
"name":"fs.swift.impl",
"default_value":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem"
},
{
"name":"fs.swift.connect.timeout",
"config_type":"int",
"default_value":15000
},
{
"name":"fs.swift.socket.timeout",
"config_type":"int",
"default_value":60000
},
{
"name":"fs.swift.connect.retry.count",
"config_type":"int",
"default_value":3
},
{
"name":"fs.swift.connect.throttle.delay",
"config_type":"int",
"default_value":0
},
{
"name":"fs.swift.blocksize",
"config_type":"int",
"default_value":32768
},
{
"name":"fs.swift.partsize",
"config_type":"int",
"default_value":4718592
},
{
"name":"fs.swift.requestsize",
"config_type":"int",
"default_value":64
},
{
"name":"fs.swift.service.sahara.public",
"config_type":"bool",
"default_value":true
},
{
"name":"fs.swift.service.sahara.http.port",
"config_type":"int",
"default_value":8080
},
{
"name":"fs.swift.service.sahara.https.port",
"config_type":"int",
"default_value":443
},
{
"name":"fs.swift.service.sahara.auth.endpoint.prefix",
"default_value":"/endpoints/AUTH_"
}
]
}
}
}
],
"services":[
{
"name":"general",
"files":[
null
]
},
{
"name":"Management",
"node_processes":[
"ZooKeeper",
"Webserver",
"MapR Client",
"Metrics"
]
},
{
"name":"MapReduce",
"node_processes":[
"TaskTracker",
"JobTracker"
],
"files":[
"/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml"
]
},
{
"name":"MapR FS",
"node_processes":[
"CLDB",
"FileServer",
"NFS"
],
"files":[
"/opt/mapr/conf/cldb.conf",
"/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml"
]
},
{
"name":"HBase",
"node_processes":[
"HBase Master",
"HBase RegionServer",
"HBase Client"
]
},
{
"name":"Oozie",
"node_processes":[
"Oozie"
]
}
]
}

View File

@ -0,0 +1 @@
Some unparsable data

View File

@ -0,0 +1,4 @@
k0 = v0
# Comment
k1 = v1

View File

@ -0,0 +1,10 @@
<configuration>
<property>
<name>k0</name>
<value>v0</value>
</property>
<property>
<name>k1</name>
<value>v1</value>
</property>
</configuration>

View File

@ -0,0 +1,51 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.util.cluster_info as ci
import sahara.plugins.mapr.util.plugin_spec as ps
import sahara.tests.unit.base as b
import sahara.tests.unit.plugins.mapr.stubs as s
class ClusterInfoTest(b.SaharaTestCase):
def assertItemsEqual(self, expected, actual):
for e in expected:
self.assertIn(e, actual)
for a in actual:
self.assertIn(a, expected)
def setUp(self):
b.SaharaTestCase.setUp(self)
path = 'tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json'
self.plugin_spec = ps.PluginSpec(path)
def test_get_node_group_services(self):
node_processes = ['ZooKeeper', 'Webserver', 'CLDB']
node_group = s.NodeGroup(None, node_processes=node_processes)
cluster_info = ci.ClusterInfo(None, self.plugin_spec)
actual = cluster_info.get_services(node_group)
expected = ['Management', 'MapR FS', 'general']
self.assertItemsEqual(expected, actual)
def test_get_cluster_services(self):
np0 = ['ZooKeeper', 'Webserver', 'CLDB']
ng0 = s.NodeGroup(node_processes=np0)
np1 = ['ZooKeeper', 'TaskTracker', 'FileServer']
ng1 = s.NodeGroup(node_processes=np1)
cluster = s.Cluster(node_groups=[ng0, ng1])
cluster_info = ci.ClusterInfo(cluster, self.plugin_spec)
actual = cluster_info.get_services()
expected = ['Management', 'MapR FS', 'general', 'MapReduce']
self.assertItemsEqual(expected, actual)

View File

@ -0,0 +1,96 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import StringIO as sio
import sahara.plugins.mapr.util.config_file_utils as cfu
import sahara.tests.unit.base as b
import mock as m
dirname = os.path.dirname(__file__)
class ConfigFileUtilsTest(b.SaharaTestCase):
def assertItemsEqual(self, expected, actual):
for e in expected:
self.assertIn(e, actual)
for a in actual:
self.assertIn(a, expected)
def test_load_properties_file(self):
path = 'tests/unit/plugins/mapr/utils/resources/test.properties'
actual = cfu.load_properties_file(path)
expected = {'k0': 'v0', 'k1': 'v1'}
self.assertEqual(expected, actual)
def test_load_xml_file(self):
path = 'tests/unit/plugins/mapr/utils/resources/test.xml'
actual = cfu.load_xml_file(path)
expected = {'k0': 'v0', 'k1': 'v1'}
self.assertEqual(expected, actual)
def test_load_raw_file(self):
path = 'tests/unit/plugins/mapr/utils/resources/raw.data'
actual = cfu.load_raw_file(path)
expected = {'content': 'Some unparsable data'}
self.assertEqual(expected, actual)
@m.patch('__builtin__.open')
def test_to_properties_file_content(self, o_mock):
data = {'k0': 'v0', 'k1': 'v1'}
s = sio.StringIO(cfu.to_properties_file_content(data))
s.flush()
o_mock.return_value = s
actual = cfu.load_properties_file('')
self.assertEqual(data, actual)
data = {}
actual = cfu.to_properties_file_content(data)
expected = ''
self.assertEqual(expected, actual)
@m.patch('__builtin__.open')
def test_to_xml_file_content(self, o_mock):
data = {'k0': 'v0', 'k1': 'v1'}
s = sio.StringIO(cfu.to_xml_file_content(data))
s.flush()
o_mock.return_value = s
actual = cfu.load_xml_file('')
self.assertEqual(data, actual)
def test_to_raw_file_content(self):
data = {'content': 'Some unparsable data'}
actual = cfu.to_raw_file_content(data)
expected = 'Some unparsable data'
self.assertEqual(expected, actual)
def test_load_file(self):
path = 'tests/unit/plugins/mapr/utils/resources/test.properties'
actual = cfu.load_file(path, 'properties')
expected = {'k0': 'v0', 'k1': 'v1'}
self.assertEqual(expected, actual)
path = 'tests/unit/plugins/mapr/utils/resources/test.xml'
actual = cfu.load_file(path, 'xml')
expected = {'k0': 'v0', 'k1': 'v1'}
self.assertEqual(expected, actual)
path = 'tests/unit/plugins/mapr/utils/resources/raw.data'
actual = cfu.load_file(path, 'raw')
expected = {'content': 'Some unparsable data'}
self.assertEqual(expected, actual)

View File

@ -0,0 +1,196 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.util.dict_utils as du
import sahara.plugins.mapr.util.func_utils as fu
import sahara.tests.unit.base as b
class DictUtilsTest(b.SaharaTestCase):
def assertItemsEqual(self, expected, actual):
for e in expected:
self.assertIn(e, actual)
for a in actual:
self.assertIn(a, expected)
def assertDictValueItemsEqual(self, expected, actual):
self.assertItemsEqual(expected.keys(), actual.keys())
for k in actual:
self.assertItemsEqual(expected[k], actual[k])
def test_append_to_key(self):
arg_0 = {'k0': ['v0', 'v1'], 'k1': ['v1', 'v2'], 'k3': ['v3']}
arg_1 = {'v0': {'a': 'a'}, 'v1': {'b': 'b'},
'v2': {'c': 'c'}, 'v4': {'d': 'd'}}
actual = du.append_to_key(arg_0, arg_1)
expected = {'k0': {'v0': {'a': 'a'}, 'v1': {'b': 'b'}},
'k1': {'v1': {'b': 'b'}, 'v2': {'c': 'c'}},
'k3': {}}
self.assertEqual(expected, actual)
def test_iterable_to_values_pair_dict_reducer(self):
vp_dict_r = du.iterable_to_values_pair_dict_reducer
arg = [[{'a': 'a0', 'b': 'b0', 'c': 'c0'},
{'a': 'a1', 'b': 'b1', 'c': 'c1'}],
[{'a': 'a2', 'b': 'b2', 'c': 'c2'}]]
reducer = vp_dict_r('a', 'b')
actual = reduce(reducer, arg, {})
expected = {'a0': 'b0', 'a1': 'b1', 'a2': 'b2'}
self.assertEqual(expected, actual)
def test_flatten_to_list_reducer(self):
arg = [[{'a': 'a0'}, {'a': 'a1'}], [{'a': 'a2'}]]
reducer = du.flatten_to_list_reducer()
actual = reduce(reducer, arg, [])
expected = [{'a': 'a0'}, {'a': 'a1'}, {'a': 'a2'}]
self.assertItemsEqual(expected, actual)
def test_map_by_field_value(self):
arg = [{'a': 'a0', 'b': 'b0', 'c': 'c0'},
{'a': 'a0', 'b': 'b2', 'c': 'c1'},
{'a': 'a2', 'b': 'b2', 'c': 'c2'}]
actual = du.map_by_field_value(arg, 'a')
expected = {'a0': [{'a': 'a0', 'b': 'b0', 'c': 'c0'},
{'a': 'a0', 'b': 'b2', 'c': 'c1'}],
'a2': [{'a': 'a2', 'b': 'b2', 'c': 'c2'}]}
self.assertDictValueItemsEqual(expected, actual)
actual = du.map_by_field_value(arg, 'c')
expected = {'c0': [{'a': 'a0', 'b': 'b0', 'c': 'c0'}],
'c1': [{'a': 'a0', 'b': 'b2', 'c': 'c1'}],
'c2': [{'a': 'a2', 'b': 'b2', 'c': 'c2'}]}
self.assertDictValueItemsEqual(expected, actual)
def test_map_by_fields_values(self):
arg = [{'a': 'a0', 'b': 'b0', 'c': 'c0'},
{'a': 'a0', 'b': 'b2', 'c': 'c1'},
{'a': 'a2', 'b': 'b2', 'c': 'c2'}]
actual = du.map_by_fields_values(arg, ['a', 'b', 'c'])
expected = {'a0': {'b0': {'c0': [{'a': 'a0', 'b': 'b0', 'c': 'c0'}]},
'b2': {'c1': [{'a': 'a0', 'b': 'b2', 'c': 'c1'}]}},
'a2': {'b2': {'c2': [{'a': 'a2', 'b': 'b2', 'c': 'c2'}]}}}
self.assertItemsEqual(expected.keys(), actual.keys())
for k0 in actual:
self.assertItemsEqual(expected[k0].keys(), actual[k0].keys())
for k1 in actual[k0]:
self.assertDictValueItemsEqual(
expected[k0][k1], actual[k0][k1])
def test_get_keys_by_value_type(self):
arg = {'dict_0': {}, 'list': [], 'set': set(['elem']),
'str': 'str', 'dict_1': {}}
actual = du.get_keys_by_value_type(arg, dict)
expected = ['dict_0', 'dict_1']
self.assertItemsEqual(expected, actual)
actual = du.get_keys_by_value_type(arg, list)
expected = ['list']
self.assertItemsEqual(expected, actual)
def test_deep_update(self):
arg_0 = {'a0': {'b0': {'c0': 'v0', 'c1': 'v1'}},
'a1': {'b1': 'v2'}, 'a3': 'v3'}
arg_1 = {'a0': {'b0': {'c0': 'v1', 'c2': 'v2'}, 'b1': 'v4'},
'a1': 'v5', 'a3': {'v1': 'v2'}}
actual = du.deep_update(arg_0, arg_1)
expected = {'a0': {'b0': {'c0': 'v1', 'c1': 'v1', 'c2': 'v2'},
'b1': 'v4'},
'a1': 'v5', 'a3': {'v1': 'v2'}}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg_0)
def test_get_keys_by_value(self):
arg = {'k0': 'v0', 'k1': 'v0', 'k2': 'v2'}
actual = du.get_keys_by_value(arg, 'v0')
expected = ['k0', 'k1']
self.assertItemsEqual(expected, actual)
actual = du.get_keys_by_value(arg, 'v2')
expected = ['k2']
self.assertItemsEqual(expected, actual)
actual = du.get_keys_by_value(arg, 'v')
expected = []
self.assertItemsEqual(expected, actual)
def test_get_keys_by_value_2(self):
arg = {'k0': ['v0', 'v1'], 'k1': ['v1', 'v2'], 'k2': ['v2', 'v3']}
actual = du.get_keys_by_value_2(arg, 'v1')
expected = ['k0', 'k1']
self.assertItemsEqual(expected, actual)
actual = du.get_keys_by_value_2(arg, 'v3')
expected = ['k2']
self.assertItemsEqual(expected, actual)
actual = du.get_keys_by_value_2(arg, 'v')
expected = []
self.assertItemsEqual(expected, actual)
def test_iterable_to_values_list_reducer(self):
arg = [[{'a': 'a0', 'b': 'b0'}, {'a': 'a1', 'b': 'b0'}], [{'a': 'a2'}]]
reducer = du.iterable_to_values_list_reducer('a')
actual = reduce(reducer, arg, [])
expected = ['a0', 'a1', 'a2']
self.assertTrue(isinstance(actual, list))
self.assertItemsEqual(expected, actual)
def test_select(self):
source = [{'a': 'a0', 'b': 'b0', 'c': 'c0'},
{'a': 'a1', 'b': 'b1', 'c': 'c0'},
{'a': 'a2', 'b': 'b2', 'c': 'c0'}]
predicate = fu.like_predicate({'c': 'c0'})
actual = du.select(['a', 'b', 'c'], source, predicate)
expected = [{'a': 'a0', 'b': 'b0', 'c': 'c0'},
{'a': 'a1', 'b': 'b1', 'c': 'c0'},
{'a': 'a2', 'b': 'b2', 'c': 'c0'}]
self.assertItemsEqual(expected, actual)
predicate = fu.in_predicate('b', ['b0', 'b1'])
actual = du.select(['a'], source, predicate)
expected = [{'a': 'a0'}, {'a': 'a1'}]
self.assertItemsEqual(expected, actual)
def test_list_of_vp_dicts_function(self):
arg = {'a0': 'b0', 'a1': 'b1'}
actual = du.list_of_vp_dicts_function('a', 'b')(arg)
expected = [{'a': 'a0', 'b': 'b0'}, {'a': 'a1', 'b': 'b1'}]
self.assertTrue(isinstance(actual, list))
for a in actual:
self.assertTrue(isinstance(a, dict))
self.assertItemsEqual(expected, actual)
def test_flattened_dict(self):
arg = {'a0': {'b0': {'c0': 'd0'}},
'a1': {'b0': {'c1': 'd1',
'c2': 'd2'},
'b1': {'c0': 'd0'}}}
actual = du.flattened_dict(arg, ['a', 'b', 'c', 'd'])
expected = [{'a': 'a0', 'b': 'b0', 'c': 'c0', 'd': 'd0'},
{'a': 'a1', 'b': 'b0', 'c': 'c1', 'd': 'd1'},
{'a': 'a1', 'b': 'b0', 'c': 'c2', 'd': 'd2'},
{'a': 'a1', 'b': 'b1', 'c': 'c0', 'd': 'd0'}]
self.assertItemsEqual(expected, actual)
arg = {'a0': 'b0', 'a1': 'b1'}
actual = du.flattened_dict(arg, ['a', 'b'])
expected = [{'a': 'a0', 'b': 'b0'}, {'a': 'a1', 'b': 'b1'}]
self.assertItemsEqual(expected, actual)

View File

@ -0,0 +1,202 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.util.func_utils as fu
import sahara.tests.unit.base as b
class PredicatesTest(b.SaharaTestCase):
def test_true_predicate(self):
self.assertTrue(fu.true_predicate(None))
def test_false_predicate(self):
self.assertFalse(fu.false_predicate(None))
def test_not_predicate(self):
self.assertFalse(fu.not_predicate(fu.true_predicate)(None))
self.assertTrue(fu.not_predicate(fu.false_predicate)(None))
def test_and_predicate(self):
true_p = fu.true_predicate
false_p = fu.false_predicate
and_p = fu.and_predicate
self.assertTrue(and_p(true_p, true_p)(None))
self.assertFalse(and_p(false_p, true_p)(None))
self.assertFalse(and_p(true_p, false_p)(None))
self.assertFalse(and_p(false_p, false_p)(None))
def test_or_predicate(self):
true_p = fu.true_predicate
false_p = fu.false_predicate
or_p = fu.or_predicate
self.assertTrue(or_p(true_p, true_p)(None))
self.assertTrue(or_p(false_p, true_p)(None))
self.assertTrue(or_p(true_p, false_p)(None))
self.assertFalse(or_p(false_p, false_p)(None))
def test_field_equals_predicate(self):
field_equals_p = fu.field_equals_predicate
arg = {'a': 'a', 'b': 'b'}
self.assertTrue(field_equals_p('a', 'a')(arg))
self.assertFalse(field_equals_p('b', 'a')(arg))
def test_like_predicate(self):
like_p = fu.like_predicate
arg = {'a': 'a', 'b': 'b', 'c': 'c'}
self.assertTrue(like_p({'a': 'a', 'b': 'b', 'c': 'c'})(arg))
self.assertTrue(like_p({'a': 'a', 'b': 'b'})(arg))
self.assertTrue(like_p({'a': 'a'})(arg))
self.assertTrue(like_p({'a': 'a'}, ['a'])(arg))
self.assertTrue(like_p({})(arg))
self.assertTrue(like_p({'a': 'a', 'b': 'b', 'c': 'a'}, ['c'])(arg))
self.assertFalse(like_p({'a': 'a', 'b': 'b', 'c': 'a'})(arg))
self.assertFalse(like_p({'a': 'a', 'c': 'a'})(arg))
self.assertFalse(like_p({'c': 'a'}, ['a'])(arg))
def test_in_predicate(self):
in_p = fu.in_predicate
arg = {'a': 'a', 'b': 'b'}
self.assertTrue(in_p('a', ['a', 'b'])(arg))
self.assertFalse(in_p('a', ['c', 'b'])(arg))
self.assertFalse(in_p('a', [])(arg))
class FunctionsTest(b.SaharaTestCase):
def test_copy_function(self):
copy_f = fu.copy_function
arg = {'a': 'a'}
actual = copy_f()(arg)
expected = {'a': 'a'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
def test_append_field_function(self):
append_field_f = fu.append_field_function
arg = {'a': 'a'}
actual = append_field_f('b', 'b')(arg)
expected = {'a': 'a', 'b': 'b'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
def test_append_fields_function(self):
append_fields_f = fu.append_fields_function
arg = {'a': 'a'}
actual = append_fields_f({'b': 'b', 'c': 'c'})(arg)
expected = {'a': 'a', 'b': 'b', 'c': 'c'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
actual = append_fields_f({'b': 'b'})(arg)
expected = {'a': 'a', 'b': 'b'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
actual = append_fields_f({})(arg)
expected = {'a': 'a'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
def test_get_values_pair_function(self):
get_values_pair_f = fu.get_values_pair_function
arg = {'a': 'a', 'b': 'b'}
actual = get_values_pair_f('a', 'b')(arg)
expected = ('a', 'b')
self.assertEqual(expected, actual)
def test_get_field_function(self):
get_field_f = fu.get_field_function
arg = {'a': 'a', 'b': 'b'}
actual = get_field_f('a')(arg)
expected = ('a', 'a')
self.assertEqual(expected, actual)
def test_get_fields_function(self):
get_fields_f = fu.get_fields_function
arg = {'a': 'a', 'b': 'b'}
actual = get_fields_f(['a', 'b'])(arg)
expected = [('a', 'a'), ('b', 'b')]
self.assertEqual(expected, actual)
actual = get_fields_f(['a'])(arg)
expected = [('a', 'a')]
self.assertEqual(expected, actual)
def test_extract_fields_function(self):
extract_fields_f = fu.extract_fields_function
arg = {'a': 'a', 'b': 'b'}
actual = extract_fields_f(['a', 'b'])(arg)
expected = {'a': 'a', 'b': 'b'}
self.assertEqual(expected, actual)
actual = extract_fields_f(['a'])(arg)
expected = {'a': 'a'}
self.assertEqual(expected, actual)
def test_get_value_function(self):
get_value_f = fu.get_value_function
arg = {'a': 'a', 'b': 'b'}
actual = get_value_f('a')(arg)
expected = 'a'
self.assertEqual(expected, actual)
def test_set_default_value_function(self):
set_default_value_f = fu.set_default_value_function
arg = {'a': 'a'}
actual = set_default_value_f('b', 'b')(arg)
expected = {'a': 'a', 'b': 'b'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
actual = set_default_value_f('a', 'b')(arg)
expected = {'a': 'a'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
def test_set_default_values_function(self):
set_default_values_f = fu.set_default_values_function
arg = {'a': 'a'}
actual = set_default_values_f({'a': 'b', 'c': 'c'})(arg)
expected = {'a': 'a', 'c': 'c'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
actual = set_default_values_f({'b': 'b'})(arg)
expected = {'a': 'a', 'b': 'b'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
actual = set_default_values_f({})(arg)
expected = {'a': 'a'}
self.assertEqual(expected, actual)
self.assertIsNot(actual, arg)
def test_values_pair_to_dict_function(self):
values_pair_to_dict_f = fu.values_pair_to_dict_function
arg = ('a', 'b')
actual = values_pair_to_dict_f('a', 'b')(arg)
expected = {'a': 'a', 'b': 'b'}
self.assertEqual(expected, actual)

View File

@ -0,0 +1,324 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import sahara.plugins.mapr.util.plugin_spec as ps
import sahara.plugins.provisioning as p
import sahara.tests.unit.base as b
import mock as m
class PluginSpecTest(b.SaharaTestCase):
def assertItemsEqual(self, expected, actual):
for e in expected:
self.assertIn(e, actual)
for a in actual:
self.assertIn(a, expected)
def assertDictValueItemsEqual(self, expected, actual):
self.assertItemsEqual(expected.keys(), actual.keys())
for k in actual:
self.assertItemsEqual(expected[k], actual[k])
@m.patch.object(ps.PluginSpec, '__init__', new=lambda i: None)
def setUp(self):
super(PluginSpecTest, self).setUp()
path = 'tests/unit/plugins/mapr/utils/resources/plugin_spec.json'
plugin_spec = ps.PluginSpec()
plugin_spec.base_dir = os.path.dirname(path)
plugin_spec.plugin_spec_dict = plugin_spec._load_plugin_spec_dict(path)
self.plugin_spec = plugin_spec
def test_load_service_file_name_map(self):
plugin_spec = self.plugin_spec
actual = plugin_spec._load_service_file_name_map()
expected = {'service_2': ['file_0', 'file_1', 'file_2'],
'general': ['file_3', None]}
self.assertDictValueItemsEqual(expected, actual)
def test_load_file_name_config_map(self):
plugin_spec = self.plugin_spec
actual = plugin_spec._load_file_name_config_map()
expected = {'file_1': ['k1', 'k0', 'k3', 'k2'], None: ['k4']}
self.assertDictValueItemsEqual(expected, actual)
def test_load_default_configs(self):
pls = self.plugin_spec
pls.service_file_name_map = pls._load_service_file_name_map()
actual = pls._load_default_configs()
expected = {'service_2': {'file_1': {'k0': 'v0', 'k1': 'v1'},
'file_2': {'k0': 'v0', 'k1': 'v1'}},
'general': {'file_3': {'content': 'Some unparsable data'}}}
self.assertEqual(expected, actual)
def test_load_service_node_process_map(self):
pls = self.plugin_spec
actual = pls._load_service_node_process_map()
expected = {'service_2': ['node_process_0', 'node_process_1']}
self.assertDictValueItemsEqual(expected, actual)
def test_load_plugin_config_items(self):
pls = self.plugin_spec
pls.service_file_name_map = pls._load_service_file_name_map()
pls.default_configs = pls._load_default_configs()
pls.plugin_config_objects = pls._load_plugin_config_objects()
pls.file_name_config_map = pls._load_file_name_config_map()
actual = pls._load_plugin_config_items()
expected = [{'default_value': 3, 'name': 'k1', 'config_values': None,
'priority': 1, 'config_type': 'int', 'file': 'file_1',
'applicable_target': 'service_2', 'is_optional': False,
'scope': 'node', 'description': None},
{'default_value': None, 'name': 'k2',
'config_values': None, 'priority': 2,
'config_type': 'bool', 'file': 'file_1',
'applicable_target': 'service_2', 'is_optional': True,
'scope': 'cluster', 'description': None},
{'default_value': 'default_value_0', 'name': 'k0',
'config_values': None, 'priority': 2, 'file': 'file_1',
'config_type': 'string', 'applicable_target': 'service_2',
'is_optional': False, 'scope': 'cluster',
'description': 'description_0'},
{'default_value': None, 'name': 'k3',
'config_values': None, 'priority': 2,
'config_type': 'string', 'file': 'file_1',
'applicable_target': 'service_2', 'is_optional': True,
'scope': 'node', 'description': None},
{'default_value': None, 'name': 'k4',
'config_values': None, 'priority': 2,
'config_type': 'string', 'file': None,
'applicable_target': 'general', 'is_optional': False,
'scope': 'cluster', 'description': None}]
self.assertItemsEqual(expected, actual)
def test_load_plugin_configs(self):
pls = self.plugin_spec
pls.service_file_name_map = pls._load_service_file_name_map()
pls.plugin_config_objects = pls._load_plugin_config_objects()
pls.file_name_config_map = pls._load_file_name_config_map()
pls.plugin_config_items = pls._load_plugin_config_items()
actual = pls._load_plugin_configs()
expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3,
'k2': None, 'k3': None}},
'general': {None: {'k4': None}}}
self.assertEqual(expected, actual)
def test_load_default_plugin_configs(self):
pls = self.plugin_spec
pls.service_file_name_map = pls._load_service_file_name_map()
pls.default_configs = pls._load_default_configs()
pls.plugin_config_objects = pls._load_plugin_config_objects()
pls.file_name_config_map = pls._load_file_name_config_map()
pls.plugin_config_items = pls._load_plugin_config_items()
pls.plugin_configs = pls._load_plugin_configs()
actual = pls._load_default_plugin_configs()
expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3,
'k2': None, 'k3': None},
'file_2': {'k0': 'v0', 'k1': 'v1'}},
'general': {None: {'k4': None},
'file_3': {'content': 'Some unparsable data'}}}
self.assertEqual(expected, actual)
def test_load_plugin_config_objects(self):
pls = self.plugin_spec
pls.service_file_name_map = pls._load_service_file_name_map()
pls.default_configs = pls._load_default_configs()
actual = pls._load_plugin_config_objects()
expected = [p.Config('k0', 'service_2', 'cluster',
default_value='default_value_0',
description='description_0'),
p.Config('k1', 'service_2', 'node',
config_type='int', default_value=3, priority=1),
p.Config('k2', 'service_2', 'cluster',
config_type='bool', is_optional=True),
p.Config('k3', 'service_2', 'node', is_optional=True),
p.Config('k4', 'general', 'cluster', is_optional=False)]
m_actual = map(lambda i: i.to_dict(), actual)
m_expected = map(lambda i: i.to_dict(), expected)
self.assertItemsEqual(m_expected, m_actual)
def test_get_node_process_service(self):
pls = self.plugin_spec
pls.service_node_process_map = pls._load_service_node_process_map()
actual = pls.get_node_process_service('node_process_0')
expected = 'service_2'
self.assertEqual(expected, actual)
def test_get_default_plugin_configs(self):
pls = self.plugin_spec
pls.service_file_name_map = pls._load_service_file_name_map()
pls.default_configs = pls._load_default_configs()
pls.plugin_config_objects = pls._load_plugin_config_objects()
pls.file_name_config_map = pls._load_file_name_config_map()
pls.plugin_config_items = pls._load_plugin_config_items()
pls.plugin_configs = pls._load_plugin_configs()
pls.default_plugin_configs = pls._load_default_plugin_configs()
actual = pls.get_default_plugin_configs(['service_2'])
expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3,
'k2': None, 'k3': None},
'file_2': {'k0': 'v0', 'k1': 'v1'}}}
self.assertEqual(expected, actual)
def test_get_config_file(self):
path = 'tests/unit/plugins/mapr/utils/resources/plugin_spec.json'
plugin_spec = ps.PluginSpec(path)
arg = {'service': 'service_2', 'scope': 'node', 'name': 'k1'}
actual = plugin_spec.get_config_file(**arg)
expected = 'file_1'
self.assertEqual(expected, actual)
arg = {'service': 'service_1', 'scope': 'node', 'name': 'k1'}
actual = plugin_spec.get_config_file(**arg)
expected = None
self.assertEqual(expected, actual)
def test_get_version_config_objects(self):
actual = self.plugin_spec.get_version_config_objects()
expected = [p.Config(name='service_2 Version',
applicable_target='service_2',
scope='cluster',
config_type='dropdown',
config_values=[('v1', 'v1'), ('v2', 'v2')],
is_optional=False,
priority=1)]
m_actual = map(lambda i: i.to_dict(), actual)
m_expected = map(lambda i: i.to_dict(), expected)
self.assertItemsEqual(m_expected, m_actual)
def test_get_configs(self):
pls = self.plugin_spec
pls.service_file_name_map = pls._load_service_file_name_map()
pls.default_configs = pls._load_default_configs()
pls.plugin_config_objects = pls._load_plugin_config_objects()
actual = pls.get_configs()
expected = [p.Config('k0', 'service_2', 'cluster',
default_value='default_value_0',
description='description_0'),
p.Config('k1', 'service_2', 'node',
config_type='int', default_value=3, priority=1),
p.Config('k2', 'service_2', 'cluster',
config_type='bool', is_optional=True),
p.Config('k3', 'service_2', 'node', is_optional=True),
p.Config('k4', 'general', 'cluster', is_optional=False),
p.Config('service_2 Version', 'service_2', 'cluster',
config_type='dropdown',
config_values=[('v1', 'v1'), ('v2', 'v2')],
is_optional=False, priority=1)]
m_actual = map(lambda i: i.to_dict(), actual)
m_expected = map(lambda i: i.to_dict(), expected)
self.assertItemsEqual(m_expected, m_actual)
def test_init(self):
path = 'tests/unit/plugins/mapr/utils/resources/plugin_spec.json'
plugin_spec = ps.PluginSpec(path)
actual = plugin_spec.service_file_name_map
expected = {'service_2': ['file_0', 'file_1', 'file_2'],
'general': [None, 'file_3']}
self.assertDictValueItemsEqual(expected, actual)
actual = plugin_spec.file_name_config_map
expected = {'file_1': ['k1', 'k0', 'k3', 'k2'], None: ['k4']}
self.assertDictValueItemsEqual(expected, actual)
actual = plugin_spec.default_configs
expected = {'service_2': {'file_1': {'k0': 'v0', 'k1': 'v1'},
'file_2': {'k0': 'v0', 'k1': 'v1'}},
'general': {'file_3': {'content': 'Some unparsable data'}}}
self.assertEqual(expected, actual)
actual = plugin_spec.service_node_process_map
expected = {'service_2': ['node_process_0', 'node_process_1']}
self.assertDictValueItemsEqual(expected, actual)
actual = plugin_spec.plugin_config_items
expected = [{'default_value': 3, 'name': 'k1', 'config_values': None,
'priority': 1, 'config_type': 'int', 'file': 'file_1',
'applicable_target': 'service_2', 'is_optional': False,
'scope': 'node', 'description': None},
{'default_value': None, 'name': 'k2',
'config_values': None, 'priority': 2,
'config_type': 'bool', 'file': 'file_1',
'applicable_target': 'service_2', 'is_optional': True,
'scope': 'cluster', 'description': None},
{'default_value': 'default_value_0', 'name': 'k0',
'config_values': None, 'priority': 2, 'file': u'file_1',
'config_type': 'string',
'applicable_target': u'service_2',
'is_optional': False, 'scope': u'cluster',
'description': 'description_0'},
{'default_value': None, 'name': 'k3',
'config_values': None, 'priority': 2,
'config_type': 'string', 'file': u'file_1',
'applicable_target': u'service_2', 'is_optional': True,
'scope': u'node', 'description': None},
{'default_value': None, 'name': 'k4',
'config_values': None, 'priority': 2,
'config_type': 'string', 'file': None,
'applicable_target': 'general', 'is_optional': False,
'scope': 'cluster', 'description': None}]
self.assertItemsEqual(expected, actual)
actual = plugin_spec.plugin_configs
expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3,
'k2': None, 'k3': None}},
'general': {None: {'k4': None}}}
self.assertEqual(expected, actual)
actual = plugin_spec.default_plugin_configs
expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3,
'k2': None, 'k3': None},
'file_2': {'k0': 'v0', 'k1': 'v1'}},
'general': {None: {'k4': None},
'file_3': {'content': 'Some unparsable data'}}}
self.assertEqual(expected, actual)
actual = plugin_spec._load_plugin_config_objects()
expected = [p.Config('k0', 'service_2', 'cluster',
default_value='default_value_0',
description='description_0'),
p.Config('k1', 'service_2', 'node',
config_type='int', default_value=3, priority=1),
p.Config('k2', 'service_2', 'cluster',
config_type='bool', is_optional=True),
p.Config('k3', 'service_2', 'node', is_optional=True),
p.Config('k4', 'general', 'cluster', is_optional=False)]
m_actual = map(lambda i: i.to_dict(), actual)
m_expected = map(lambda i: i.to_dict(), expected)
self.assertItemsEqual(m_expected, m_actual)
actual = plugin_spec.get_node_process_service('node_process_0')
expected = 'service_2'
self.assertEqual(expected, actual)
actual = plugin_spec.get_default_plugin_configs(['service_2'])
expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3,
'k2': None, 'k3': None},
'file_2': {'k0': 'v0', 'k1': 'v1'}}}
self.assertEqual(expected, actual)

View File

@ -0,0 +1,204 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import mock as m
import six
import sahara.plugins.mapr.util.config_file_utils as cfu
import sahara.plugins.mapr.util.plugin_spec as ps
import sahara.plugins.mapr.versions.v4_0_1_mrv1.cluster_configurer as bcc
import sahara.swift.swift_helper as sh
import sahara.tests.unit.base as b
import sahara.tests.unit.plugins.mapr.stubs as s
import sahara.utils.files as f
__dirname__ = os.path.dirname(__file__)
class BaseClusterConfigurerTest(b.SaharaTestCase):
def assertItemsEqual(self, expected, actual):
for e in expected:
self.assertIn(e, actual)
for a in actual:
self.assertIn(a, expected)
@m.patch('sahara.context.ctx')
@m.patch('sahara.plugins.mapr.util.config.is_data_locality_enabled')
@m.patch('sahara.plugins.mapr.util.config_file_utils.to_file_content')
def test_configure_wo_generals(self, tfc_mock, gtm_mock, cc_mock):
def to_file_content(*args, **kargs):
data = args[0]
if isinstance(data, dict):
return dict(map(lambda i: (str(i[0]), str(i[1])),
six.iteritems(args[0])))
elif isinstance(data, str):
return {None: data}
tfc_mock.side_effect = to_file_content
gtm_mock.return_value = False
cc_mock.return_value = s.AttrDict(auth_uri='http://auth',
tenant_name='tenant_0',
tenant_id='tenant_id')
sh.CONF.os_region_name = None
i0 = s.Instance(instance_name='i0',
management_ip='192.168.1.10',
internal_ip='10.10.1.10')
i1 = s.Instance(instance_name='i1',
management_ip='192.168.1.11',
internal_ip='10.10.1.11')
i2 = s.Instance(instance_name='i2',
management_ip='192.168.1.12',
internal_ip='10.10.1.12')
np0 = ['ZooKeeper', 'FileServer', 'TaskTracker']
np1 = ['ZooKeeper', 'NFS', 'Oozie']
ng0 = s.NodeGroup(id='ng0', instances=[i0, i1], node_processes=np0)
ng1 = s.NodeGroup(id='ng1', instances=[i2], node_processes=np1)
cc = {'general': {}}
cluster = s.Cluster(node_groups=[ng0, ng1], cluster_configs=cc,
hadoop_version='4.0.1.mrv1')
plugin_spec = ps.PluginSpec(
'tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json')
configurer = bcc.ClusterConfigurer(cluster, plugin_spec)
cu_mock = m.MagicMock()
configurer.conductor = m.MagicMock()
configurer.conductor.cluster_update = cu_mock
configurer.configure()
bcc_expected_path = (
'tests/unit/plugins/mapr/utils/resources/bcc_expected')
core_site = {'data': cfu.load_xml_file(('%s/core-site-0.xml'
% bcc_expected_path)),
'file': ('/opt/mapr/hadoop/hadoop-0.20.2'
'/conf/core-site.xml'),
'root': True,
'timeout': 120}
mapred_site = {'data': cfu.load_xml_file(('%s/mapred-site-0.xml'
% bcc_expected_path)),
'root': True,
'file': ('/opt/mapr/hadoop/hadoop-0.20.2'
'/conf/mapred-site.xml'),
'timeout': 120}
cldb = {'root': True,
'data': {'cldb.zookeeper.servers': ('192.168.1.10:5181,'
'192.168.1.11:5181,'
'192.168.1.12:5181')},
'timeout': 120,
'file': '/opt/mapr/conf/cldb.conf'}
hadoop_v = {'root': True,
'data': f.get_file_text('plugins/mapr/util'
'/resources/'
'hadoop_version') %
{"mode": 'classic'},
'timeout': 120,
'file': '/opt/mapr/conf/hadoop_version'}
self.assertItemsEqual(i0.remote().fs, [core_site, cldb, mapred_site,
hadoop_v])
self.assertItemsEqual(i1.remote().fs, [core_site, mapred_site, cldb,
hadoop_v])
self.assertItemsEqual(i2.remote().fs, [core_site, cldb,
hadoop_v])
@m.patch('sahara.context.ctx')
@m.patch('sahara.plugins.mapr.util.config.is_data_locality_enabled')
@m.patch('sahara.topology.topology_helper.generate_topology_map')
@m.patch('sahara.plugins.mapr.util.config_file_utils.to_file_content')
def test_configure_with_topology(self, tfc_mock, gtm_mock,
dle_mock, cc_mock):
def to_file_content(*args, **kargs):
data = args[0]
if isinstance(data, dict):
return dict(map(lambda i: (str(i[0]), str(i[1])),
six.iteritems(args[0])))
elif isinstance(data, str):
return {None: data}
tfc_mock.side_effect = to_file_content
dle_mock.return_value = True
gtm_mock.return_value = {'i0': 'r', '192.168.1.10': 'r',
'10.10.1.10': 'r',
'i1': 'r', '192.168.1.11': 'r',
'10.10.1.11': 'r',
'i2': 'r', '192.168.1.12': 'r',
'10.10.1.12': 'r'}
cc_mock.return_value = s.AttrDict(auth_uri='http://auth',
tenant_name='tenant_0',
tenant_id='tenant_id')
sh.CONF.os_region_name = None
i0 = s.Instance(instance_name='i0',
management_ip='192.168.1.10',
internal_ip='10.10.1.10')
i1 = s.Instance(instance_name='i1',
management_ip='192.168.1.11',
internal_ip='10.10.1.11')
i2 = s.Instance(instance_name='i2',
management_ip='192.168.1.12',
internal_ip='10.10.1.12')
np0 = ['ZooKeeper', 'FileServer', 'TaskTracker']
np1 = ['ZooKeeper', 'NFS', 'HBase RegionServer']
ng0 = s.NodeGroup(id='ng0', instances=[i0, i1], node_processes=np0)
ng1 = s.NodeGroup(id='ng1', instances=[i2], node_processes=np1)
cc = {'general': {}}
cluster = s.Cluster(node_groups=[ng0, ng1], cluster_configs=cc,
hadoop_version='4.0.1.mrv1')
plugin_spec = ps.PluginSpec(
'tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json')
configurer = bcc.ClusterConfigurer(cluster, plugin_spec)
cu_mock = m.MagicMock()
configurer.conductor = m.MagicMock()
configurer.conductor.cluster_update = cu_mock
configurer.configure()
self.assertEqual(1, gtm_mock.call_count)
bcc_expected_path = (
'tests/unit/plugins/mapr/utils/resources/bcc_expected')
core_site = {'data': cfu.load_xml_file(('%s/core-site-1.xml'
% bcc_expected_path)),
'file': ('/opt/mapr/hadoop/hadoop-0.20.2'
'/conf/core-site.xml'),
'root': True,
'timeout': 120}
mapred_site = {'data': cfu.load_xml_file('%s/mapred-site-1.xml'
% bcc_expected_path),
'root': True,
'file': ('/opt/mapr/hadoop/hadoop-0.20.2'
'/conf/mapred-site.xml'),
'timeout': 120}
topology_data = {'data': gtm_mock.return_value,
'file': '/opt/mapr/topology.data',
'root': True, 'timeout': 120}
cldb = {'data': cfu.load_properties_file(('%s/cldb-1.conf'
% bcc_expected_path)),
'file': '/opt/mapr/conf/cldb.conf',
'root': True, 'timeout': 120}
t_sh = {'data': f.get_file_text('plugins/mapr/util'
'/resources/topology.sh'),
'file': '/opt/mapr/topology.sh',
'root': True, 'timeout': 120}
hadoop_v = {'root': True,
'data': f.get_file_text('plugins/mapr/util'
'/resources/hadoop_version') %
{'mode': 'classic'},
'timeout': 120,
'file': '/opt/mapr/conf/hadoop_version'}
self.assertItemsEqual(i0.remote().fs,
[core_site, mapred_site,
topology_data, cldb, t_sh, hadoop_v])
self.assertItemsEqual(i1.remote().fs,
[core_site, mapred_site,
topology_data, cldb, t_sh, hadoop_v])
self.assertItemsEqual(i2.remote().fs,
[core_site, topology_data, cldb, t_sh,
hadoop_v])

View File

@ -39,6 +39,7 @@ console_scripts =
sahara.cluster.plugins =
vanilla = sahara.plugins.vanilla.plugin:VanillaProvider
hdp = sahara.plugins.hdp.ambariplugin:AmbariPlugin
mapr = sahara.plugins.mapr.plugin:MapRPlugin
cdh = sahara.plugins.cdh.plugin:CDHPluginProvider
fake = sahara.plugins.fake.plugin:FakePluginProvider
spark = sahara.plugins.spark.plugin:SparkProvider