From e46cbb7f36b1801a2d909fc317d9c98445e75996 Mon Sep 17 00:00:00 2001 From: Artem Osadchiy Date: Tue, 12 Aug 2014 01:28:41 +0300 Subject: [PATCH] MapR plugin implementation Change-Id: Id08a3c7a8a40fd826cd902cedbc2590ca284e548 Implements: blueprint mapr-plugin --- MANIFEST.in | 17 + doc/source/userdoc/mapr_plugin.rst | 67 ++++ doc/source/userdoc/plugins.rst | 1 + sahara/plugins/mapr/__init__.py | 0 sahara/plugins/mapr/plugin.py | 88 +++++ sahara/plugins/mapr/util/__init__.py | 0 sahara/plugins/mapr/util/attr_dict.py | 20 ++ sahara/plugins/mapr/util/cluster_helper.py | 78 +++++ sahara/plugins/mapr/util/cluster_info.py | 76 ++++ sahara/plugins/mapr/util/config.py | 110 ++++++ sahara/plugins/mapr/util/config_file_utils.py | 81 +++++ sahara/plugins/mapr/util/config_utils.py | 77 ++++ sahara/plugins/mapr/util/dict_utils.py | 124 +++++++ sahara/plugins/mapr/util/func_utils.py | 167 +++++++++ sahara/plugins/mapr/util/maprfs_helper.py | 37 ++ sahara/plugins/mapr/util/names.py | 41 +++ sahara/plugins/mapr/util/plugin_spec.py | 198 +++++++++++ .../util/resources/create_disk_list_file.sh | 13 + .../mapr/util/resources/hadoop_version | 3 + .../plugins/mapr/util/resources/topology.sh | 20 ++ .../mapr/util/resources/waiting_script.sh | 9 + sahara/plugins/mapr/util/run_scripts.py | 71 ++++ sahara/plugins/mapr/util/scaling.py | 144 ++++++++ sahara/plugins/mapr/util/start_helper.py | 177 ++++++++++ sahara/plugins/mapr/util/validation_utils.py | 135 +++++++ sahara/plugins/mapr/util/wrapper.py | 28 ++ sahara/plugins/mapr/versions/__init__.py | 0 .../mapr/versions/base_cluster_configurer.py | 167 +++++++++ sahara/plugins/mapr/versions/base_context.py | 173 +++++++++ .../mapr/versions/base_version_handler.py | 115 ++++++ sahara/plugins/mapr/versions/edp_engine.py | 76 ++++ .../plugins/mapr/versions/v3_1_1/__init__.py | 0 .../versions/v3_1_1/cluster_configurer.py | 24 ++ .../v3_1_1/resources/default/cldb.conf | 63 ++++ .../v3_1_1/resources/default/core-site.xml | 57 +++ .../versions/v3_1_1/resources/default/exports | 30 ++ .../default/hadoop-metrics.properties | 41 +++ .../v3_1_1/resources/default/mfs.conf | 16 + .../v3_1_1/resources/default/nfsserver.conf | 43 +++ .../v3_1_1/resources/plugin_spec.json | 203 +++++++++++ .../mapr/versions/v3_1_1/start_utils.py | 34 ++ .../mapr/versions/v3_1_1/version_handler.py | 112 ++++++ .../mapr/versions/v4_0_1_mrv1/__init__.py | 0 .../v4_0_1_mrv1/cluster_configurer.py | 51 +++ .../v4_0_1_mrv1/resources/default/cldb.conf | 63 ++++ .../resources/default/core-site.xml | 57 +++ .../v4_0_1_mrv1/resources/default/exports | 30 ++ .../default/hadoop-metrics.properties | 41 +++ .../v4_0_1_mrv1/resources/default/mfs.conf | 16 + .../resources/default/nfsserver.conf | 43 +++ .../v4_0_1_mrv1/resources/plugin_spec.json | 203 +++++++++++ .../versions/v4_0_1_mrv1/version_handler.py | 114 ++++++ .../mapr/versions/v4_0_1_mrv2/__init__.py | 0 .../v4_0_1_mrv2/cluster_configurer.py | 24 ++ .../v4_0_1_mrv2/resources/default/cldb.conf | 63 ++++ .../resources/default/core-site.xml | 57 +++ .../v4_0_1_mrv2/resources/default/exports | 30 ++ .../default/hadoop-metrics.properties | 41 +++ .../v4_0_1_mrv2/resources/default/mfs.conf | 16 + .../resources/default/nfsserver.conf | 43 +++ .../v4_0_1_mrv2/resources/plugin_spec.json | 203 +++++++++++ .../versions/v4_0_1_mrv2/version_handler.py | 112 ++++++ .../mapr/versions/version_handler_factory.py | 53 +++ sahara/tests/integration/configs/config.py | 259 ++++++++++++++ .../integration/configs/itest.conf.sample | 4 + .../configs/itest.conf.sample-full | 182 ++++++++++ sahara/tests/integration/tests/base.py | 22 ++ .../tests/gating/test_mapr4_1_gating.py | 233 +++++++++++++ .../tests/gating/test_mapr4_2_gating.py | 233 +++++++++++++ .../tests/gating/test_mapr_gating.py | 330 ++++++++++++++++++ sahara/tests/unit/plugins/mapr/__init__.py | 0 sahara/tests/unit/plugins/mapr/stubs.py | 185 ++++++++++ .../tests/unit/plugins/mapr/utils/__init__.py | 0 .../utils/resources/bcc_expected/cldb-1.conf | 2 + .../resources/bcc_expected/core-site-0.xml | 69 ++++ .../resources/bcc_expected/core-site-1.xml | 82 +++++ .../resources/bcc_expected/mapred-site-0.xml | 8 + .../resources/bcc_expected/mapred-site-1.xml | 16 + .../resources/bcc_expected/topology-1.data | 9 + .../mapr/utils/resources/compute.topology | 6 + .../mapr/utils/resources/plugin_spec.json | 106 ++++++ .../mapr/utils/resources/plugin_spec_ci.json | 168 +++++++++ .../plugins/mapr/utils/resources/raw.data | 1 + .../mapr/utils/resources/test.properties | 4 + .../plugins/mapr/utils/resources/test.xml | 10 + .../plugins/mapr/utils/test_cluster_info.py | 51 +++ .../mapr/utils/test_config_file_utils.py | 96 +++++ .../plugins/mapr/utils/test_dict_utils.py | 196 +++++++++++ .../plugins/mapr/utils/test_func_utils.py | 202 +++++++++++ .../plugins/mapr/utils/test_plugin_spec.py | 324 +++++++++++++++++ .../unit/plugins/mapr/versions/__init__.py | 0 .../versions/test_base_cluster_configurer.py | 204 +++++++++++ setup.cfg | 1 + 93 files changed, 7199 insertions(+) create mode 100644 doc/source/userdoc/mapr_plugin.rst create mode 100644 sahara/plugins/mapr/__init__.py create mode 100644 sahara/plugins/mapr/plugin.py create mode 100644 sahara/plugins/mapr/util/__init__.py create mode 100644 sahara/plugins/mapr/util/attr_dict.py create mode 100644 sahara/plugins/mapr/util/cluster_helper.py create mode 100644 sahara/plugins/mapr/util/cluster_info.py create mode 100644 sahara/plugins/mapr/util/config.py create mode 100644 sahara/plugins/mapr/util/config_file_utils.py create mode 100644 sahara/plugins/mapr/util/config_utils.py create mode 100644 sahara/plugins/mapr/util/dict_utils.py create mode 100644 sahara/plugins/mapr/util/func_utils.py create mode 100644 sahara/plugins/mapr/util/maprfs_helper.py create mode 100644 sahara/plugins/mapr/util/names.py create mode 100644 sahara/plugins/mapr/util/plugin_spec.py create mode 100644 sahara/plugins/mapr/util/resources/create_disk_list_file.sh create mode 100644 sahara/plugins/mapr/util/resources/hadoop_version create mode 100755 sahara/plugins/mapr/util/resources/topology.sh create mode 100644 sahara/plugins/mapr/util/resources/waiting_script.sh create mode 100644 sahara/plugins/mapr/util/run_scripts.py create mode 100644 sahara/plugins/mapr/util/scaling.py create mode 100644 sahara/plugins/mapr/util/start_helper.py create mode 100644 sahara/plugins/mapr/util/validation_utils.py create mode 100644 sahara/plugins/mapr/util/wrapper.py create mode 100644 sahara/plugins/mapr/versions/__init__.py create mode 100644 sahara/plugins/mapr/versions/base_cluster_configurer.py create mode 100644 sahara/plugins/mapr/versions/base_context.py create mode 100644 sahara/plugins/mapr/versions/base_version_handler.py create mode 100644 sahara/plugins/mapr/versions/edp_engine.py create mode 100644 sahara/plugins/mapr/versions/v3_1_1/__init__.py create mode 100644 sahara/plugins/mapr/versions/v3_1_1/cluster_configurer.py create mode 100644 sahara/plugins/mapr/versions/v3_1_1/resources/default/cldb.conf create mode 100644 sahara/plugins/mapr/versions/v3_1_1/resources/default/core-site.xml create mode 100644 sahara/plugins/mapr/versions/v3_1_1/resources/default/exports create mode 100644 sahara/plugins/mapr/versions/v3_1_1/resources/default/hadoop-metrics.properties create mode 100644 sahara/plugins/mapr/versions/v3_1_1/resources/default/mfs.conf create mode 100644 sahara/plugins/mapr/versions/v3_1_1/resources/default/nfsserver.conf create mode 100644 sahara/plugins/mapr/versions/v3_1_1/resources/plugin_spec.json create mode 100644 sahara/plugins/mapr/versions/v3_1_1/start_utils.py create mode 100644 sahara/plugins/mapr/versions/v3_1_1/version_handler.py create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/__init__.py create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/cluster_configurer.py create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/cldb.conf create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/core-site.xml create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/exports create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/hadoop-metrics.properties create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/mfs.conf create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/nfsserver.conf create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/plugin_spec.json create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv1/version_handler.py create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/__init__.py create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/cluster_configurer.py create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/cldb.conf create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/core-site.xml create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/exports create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/hadoop-metrics.properties create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/mfs.conf create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/nfsserver.conf create mode 100644 sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/plugin_spec.json create mode 100755 sahara/plugins/mapr/versions/v4_0_1_mrv2/version_handler.py create mode 100644 sahara/plugins/mapr/versions/version_handler_factory.py create mode 100644 sahara/tests/integration/tests/gating/test_mapr4_1_gating.py create mode 100644 sahara/tests/integration/tests/gating/test_mapr4_2_gating.py create mode 100644 sahara/tests/integration/tests/gating/test_mapr_gating.py create mode 100644 sahara/tests/unit/plugins/mapr/__init__.py create mode 100644 sahara/tests/unit/plugins/mapr/stubs.py create mode 100644 sahara/tests/unit/plugins/mapr/utils/__init__.py create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/cldb-1.conf create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/core-site-0.xml create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/core-site-1.xml create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/mapred-site-0.xml create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/mapred-site-1.xml create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/topology-1.data create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/compute.topology create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/plugin_spec.json create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/raw.data create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/test.properties create mode 100644 sahara/tests/unit/plugins/mapr/utils/resources/test.xml create mode 100644 sahara/tests/unit/plugins/mapr/utils/test_cluster_info.py create mode 100644 sahara/tests/unit/plugins/mapr/utils/test_config_file_utils.py create mode 100644 sahara/tests/unit/plugins/mapr/utils/test_dict_utils.py create mode 100644 sahara/tests/unit/plugins/mapr/utils/test_func_utils.py create mode 100644 sahara/tests/unit/plugins/mapr/utils/test_plugin_spec.py create mode 100644 sahara/tests/unit/plugins/mapr/versions/__init__.py create mode 100644 sahara/tests/unit/plugins/mapr/versions/test_base_cluster_configurer.py diff --git a/MANIFEST.in b/MANIFEST.in index 294b5173..f2fd1116 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -25,6 +25,15 @@ include sahara/plugins/hdp/versions/version_1_3_2/resources/*.sh include sahara/plugins/hdp/versions/version_2_0_6/resources/*.template include sahara/plugins/hdp/versions/version_2_0_6/resources/*.json include sahara/plugins/hdp/versions/version_2_0_6/resources/*.sh +include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.sh +include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.json +include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.xml +include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.sh +include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.json +include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.xml +include sahara/plugins/mapr/versions/v3_1_1/resources/*.sh +include sahara/plugins/mapr/versions/v3_1_1/resources/*.json +include sahara/plugins/mapr/versions/v3_1_1/resources/*.xml include sahara/plugins/spark/resources/*.xml include sahara/plugins/spark/resources/*.sh include sahara/plugins/spark/resources/*.template @@ -32,6 +41,14 @@ include sahara/resources/*.heat include sahara/service/edp/resources/*.xml include sahara/swift/resources/*.xml include sahara/tests/unit/plugins/vanilla/hadoop2/resources/*.txt +include sahara/tests/unit/plugins/mapr/utils/resources/*.topology +include sahara/tests/unit/plugins/mapr/utils/resources/*.json +include sahara/tests/unit/plugins/mapr/utils/resources/*.data +include sahara/tests/unit/plugins/mapr/utils/resources/*.properties +include sahara/tests/unit/plugins/mapr/utils/resources/*.xml +include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.conf +include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.data +include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.xml include sahara/tests/unit/resources/*.heat include sahara/tests/unit/resources/*.xml include sahara/tests/unit/resources/*.txt diff --git a/doc/source/userdoc/mapr_plugin.rst b/doc/source/userdoc/mapr_plugin.rst new file mode 100644 index 00000000..4c746be8 --- /dev/null +++ b/doc/source/userdoc/mapr_plugin.rst @@ -0,0 +1,67 @@ +MapR Distribution Plugin +======================== + +The MapR Sahara plugin allows to provision MapR clusters on +OpenStack in an easy way and do it, quickly, conveniently and simply. + + +Operation +--------- + +The MapR Plugin performs the following four primary functions during cluster creation: + +1. MapR components deployment - the plugin manages the deployment of the required software to the target VMs +2. Services Installation - MapR services are installed according to provided roles list +3. Services Configuration - the plugin combines default settings with user provided settings +4. Services Start - the plugin starts appropriate services according to specified roles + +Images +------ + +For cluster provisioning prepared images should be used. They already have +MapR 3.1.1 (with Apache Hadoop 0.20.2) and MapR 4.0.1 (with Apache Hadoop 2.4.1) installed. + + +MapR plugin needs an image to be tagged in Sahara Image Registry with +two tags: 'MapR' and '' (e.g. '4.0.1'). + +Note that you should provide username of default cloud-user used in the Image: + ++--------------+------------+ +| OS | username | ++==============+============+ +| Ubuntu 14.04 | ubuntu | ++--------------+------------+ +| CentOS 6.5 | cloud-user | ++--------------+------------+ + + +Hadoop Version Support +---------------------- +The MapR plugin currently supports Hadoop 0.20.2 and Hadoop 2.4.1. + +Cluster Validation +------------------ + +Mr1 Cluster is valid if and only if: + +1. Zookeeper component count per cluster equals 1 or greater. Zookeeper service is up and running. + 2.1 Each node has Fileserver component. Fileserver is up and running on each node. Or + 2.2 Each node has NFS server component. NFS server is up and running. +3. If node has TaskTracker component then Fileserver must be also. +4. Web-server component count per cluster equals 0 or 1. Web-server is up and running. + + +YARN Cluster is valid if and only if: + +1. Zookeeper component count per cluster equals 1 or greater. Zookeeper service is up and running. +2. Resource manager component count per cluster equals 1 or greater. Resource manager component is up and running. + 3.1 Each node has Fileserver component. Fileserver is up and running on each node. Or + 3.2 Each node has NFS server component. NFS server is up and running. +4. Web-server component count per cluster equals 0 or 1. Web-server is up and running. +5. History server component count per cluster equals 1. History server is up and running. + + +The MapR Plugin +--------------- +For more information, please contact MapR. diff --git a/doc/source/userdoc/plugins.rst b/doc/source/userdoc/plugins.rst index eb4924b2..fb8c9bd7 100644 --- a/doc/source/userdoc/plugins.rst +++ b/doc/source/userdoc/plugins.rst @@ -8,3 +8,4 @@ distribution in various topologies and with management/monitoring tools. * :doc:`vanilla_plugin` - deploys Vanilla Apache Hadoop * :doc:`hdp_plugin` - deploys Hortonworks Data Platform * :doc:`spark_plugin` - deploys Apache Spark with Cloudera HDFS +* :doc:`mapr_plugin` - deploys MapR plugin with MapR File System diff --git a/sahara/plugins/mapr/__init__.py b/sahara/plugins/mapr/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/plugins/mapr/plugin.py b/sahara/plugins/mapr/plugin.py new file mode 100644 index 00000000..4f3216f2 --- /dev/null +++ b/sahara/plugins/mapr/plugin.py @@ -0,0 +1,88 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import sahara.plugins.mapr.versions.version_handler_factory as vhf +import sahara.plugins.provisioning as p + + +class MapRPlugin(p.ProvisioningPluginBase): + title = 'MapR Hadoop Distribution' + description = ('The MapR Distribution provides a full Hadoop stack that' + ' includes the MapR File System (MapR-FS), MapReduce,' + ' a complete Hadoop ecosystem, and the MapR Control System' + ' user interface') + hdfs_user = 'mapr' + + def _get_handler(self, hadoop_version): + return vhf.VersionHandlerFactory.get().get_handler(hadoop_version) + + def get_title(self): + return MapRPlugin.title + + def get_description(self): + return MapRPlugin.description + + def get_hdfs_user(self): + return MapRPlugin.hdfs_user + + def get_versions(self): + return vhf.VersionHandlerFactory.get().get_versions() + + def get_node_processes(self, hadoop_version): + return self._get_handler(hadoop_version).get_node_processes() + + def get_configs(self, hadoop_version): + return self._get_handler(hadoop_version).get_configs() + + def configure_cluster(self, cluster): + self._get_handler(cluster.hadoop_version).configure_cluster(cluster) + + def start_cluster(self, cluster): + self._get_handler(cluster.hadoop_version).start_cluster(cluster) + + def validate(self, cluster): + self._get_handler(cluster.hadoop_version).validate(cluster) + + def validate_scaling(self, cluster, existing, additional): + v_handler = self._get_handler(cluster.hadoop_version) + v_handler.validate_scaling(cluster, existing, additional) + + def scale_cluster(self, cluster, instances): + v_handler = self._get_handler(cluster.hadoop_version) + v_handler.scale_cluster(cluster, instances) + + def decommission_nodes(self, cluster, instances): + v_handler = self._get_handler(cluster.hadoop_version) + v_handler.decommission_nodes(cluster, instances) + + def get_oozie_server(self, cluster): + v_handler = self._get_handler(cluster.hadoop_version) + return v_handler.get_oozie_server(cluster) + + def get_name_node_uri(self, cluster): + v_handler = self._get_handler(cluster.hadoop_version) + return v_handler.get_name_node_uri(cluster) + + def get_oozie_server_uri(self, cluster): + v_handler = self._get_handler(cluster.hadoop_version) + return v_handler.get_oozie_server_uri(cluster) + + def get_resource_manager_uri(self, cluster): + v_handler = self._get_handler(cluster.hadoop_version) + return v_handler.get_resource_manager_uri(cluster) + + def get_edp_engine(self, cluster, job_type): + v_handler = self._get_handler(cluster.hadoop_version) + return v_handler.get_edp_engine(cluster, job_type) diff --git a/sahara/plugins/mapr/util/__init__.py b/sahara/plugins/mapr/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/plugins/mapr/util/attr_dict.py b/sahara/plugins/mapr/util/attr_dict.py new file mode 100644 index 00000000..c85b2e48 --- /dev/null +++ b/sahara/plugins/mapr/util/attr_dict.py @@ -0,0 +1,20 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +class AttrDict(dict): + + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self diff --git a/sahara/plugins/mapr/util/cluster_helper.py b/sahara/plugins/mapr/util/cluster_helper.py new file mode 100644 index 00000000..a2dd9475 --- /dev/null +++ b/sahara/plugins/mapr/util/cluster_helper.py @@ -0,0 +1,78 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara.plugins.mapr.util import names +import sahara.plugins.utils as u + + +ZOOKEEPER_CLIENT_PORT = 5181 + + +def get_cldb_nodes_ip(cluster): + cldb_node_list = u.get_instances(cluster, names.CLDB) + return ','.join([i.management_ip for i in cldb_node_list]) + + +def get_zookeeper_nodes_ip(cluster): + zkeeper_node_list = u.get_instances(cluster, names.ZOOKEEPER) + return ','.join([i.management_ip for i in zkeeper_node_list]) + + +def get_zookeeper_nodes_ip_with_port(cluster): + zkeeper_node_list = u.get_instances(cluster, names.ZOOKEEPER) + return ','.join(['%s:%s' % (i.management_ip, ZOOKEEPER_CLIENT_PORT) + for i in zkeeper_node_list]) + + +def get_resourcemanager_ip(cluster): + rm_instance = u.get_instance(cluster, names.RESOURCE_MANAGER) + return rm_instance.management_ip + + +def get_historyserver_ip(cluster): + hs_instance = u.get_instance(cluster, names.HISTORY_SERVER) + return hs_instance.management_ip + + +def get_jobtracker(cluster): + instance = u.get_instance(cluster, names.JOBTRACKER) + return instance + + +def get_resourcemanager(cluster): + return u.get_instance(cluster, names.RESOURCE_MANAGER) + + +def get_nodemanagers(cluster): + return u.get_instances(cluster, names.NODE_MANAGER) + + +def get_oozie(cluster): + return u.get_instance(cluster, names.OOZIE) + + +def get_datanodes(cluster): + return u.get_instances(cluster, names.DATANODE) + + +def get_tasktrackers(cluster): + return u.get_instances(cluster, names.TASK_TRACKER) + + +def get_secondarynamenodes(cluster): + return u.get_instances(cluster, names.SECONDARY_NAMENODE) + + +def get_historyserver(cluster): + return u.get_instance(cluster, names.HISTORY_SERVER) diff --git a/sahara/plugins/mapr/util/cluster_info.py b/sahara/plugins/mapr/util/cluster_info.py new file mode 100644 index 00000000..40085179 --- /dev/null +++ b/sahara/plugins/mapr/util/cluster_info.py @@ -0,0 +1,76 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import collections as c + +import six + +import sahara.plugins.utils as u + + +class ClusterInfo(object): + + # TODO(aosadchiy): perform lookup for plugin_spec + + def __init__(self, cluster, plugin_spec): + self.cluster = cluster + self.plugin_spec = plugin_spec + + def get_default_configs(self, node_group=None): + services = self.get_services(node_group) + return self.plugin_spec.get_default_plugin_configs(services) + + def get_services(self, node_group=None): + if not node_group: + return set(service for node_group in self.cluster.node_groups + for service in self.get_services(node_group)) + else: + return (set(self.plugin_spec.get_node_process_service(node_process) + for node_process in node_group.node_processes) + | set(['general'])) + + def get_user_configs(self, node_group=None): + services = self.get_services(node_group) + predicate = lambda i: i[0] in services and i[1] + configs = dict(filter( + predicate, six.iteritems(self.cluster.cluster_configs))) + scope = 'node' if node_group else 'cluster' + result = c.defaultdict(lambda: c.defaultdict(dict)) + for service, kv in six.iteritems(configs): + for key, value in six.iteritems(kv): + filename = self.plugin_spec.get_config_file( + scope, service, key) + result[service][filename][key] = value + return result + + def get_node_group_files(self): + return + + def get_node_groups(self, node_process=None): + return u.get_node_groups(self.cluster, node_process) + + def get_instances_count(self, node_process=None): + return u.get_instances_count(self.cluster, node_process) + + def get_instances(self, node_process=None): + return u.get_instances(self.cluster, node_process) + + def get_instance(self, node_process): + return u.get_instance(self.cluster, node_process) + + def get_instances_ip(self, node_process): + return [i.management_ip for i in self.get_instances(node_process)] + + def get_instance_ip(self, node_process): + return self.get_instance(node_process).management_ip diff --git a/sahara/plugins/mapr/util/config.py b/sahara/plugins/mapr/util/config.py new file mode 100644 index 00000000..eebb19a2 --- /dev/null +++ b/sahara/plugins/mapr/util/config.py @@ -0,0 +1,110 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo.config import cfg + +from sahara import exceptions as ex +from sahara.i18n import _ +from sahara.i18n import _LI +from sahara.openstack.common import log as logging +import sahara.plugins.mapr.util.config_file_utils as cfu +import sahara.plugins.mapr.versions.version_handler_factory as vhf +from sahara.plugins import provisioning as p +import sahara.plugins.utils as u +from sahara.topology import topology_helper as th +from sahara.utils import files as f + + +MAPR_HOME = '/opt/mapr' +LOG = logging.getLogger(__name__) +CONF = cfg.CONF +CONF.import_opt("enable_data_locality", "sahara.topology.topology_helper") +ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster', + config_type="bool", priority=1, + default_value=True, is_optional=True) + + +def post_configure_instance(instance): + LOG.info(_LI('START: Post configuration for instance.')) + with instance.remote() as r: + if is_data_locality_enabled(instance.node_group.cluster): + LOG.debug('Data locality is enabled.') + t_script = MAPR_HOME + '/topology.sh' + LOG.debug('Start writing file %s', t_script) + r.write_file_to(t_script, f.get_file_text( + 'plugins/mapr/util/resources/topology.sh'), run_as_root=True) + LOG.debug('Done for writing file %s', t_script) + LOG.debug('Start executing command: chmod +x %s', t_script) + r.execute_command('chmod +x ' + t_script, run_as_root=True) + LOG.debug('Done for executing command.') + else: + LOG.debug('Data locality is disabled.') + LOG.info(_LI('END: Post configuration for instance.')) + + +def configure_instances(cluster, instances): + h_version = cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + p_spec = v_handler.get_plugin_spec() + configurer = v_handler.get_cluster_configurer(cluster, p_spec) + configurer.configure(instances) + + +def configure_topology_data(cluster, is_node_awareness): + LOG.info(_LI('START: configuring topology data.')) + if is_data_locality_enabled(cluster): + LOG.debug('Data locality is enabled.') + LOG.debug('Start generating topology map.') + topology_map = th.generate_topology_map(cluster, is_node_awareness) + LOG.debug('Done for generating topology map.') + topology_data = cfu.to_file_content(topology_map, 'topology') + for i in u.get_instances(cluster): + LOG.debug( + 'Start writing to file: %s/topology.data', MAPR_HOME) + i.remote().write_file_to(MAPR_HOME + "/topology.data", + topology_data, run_as_root=True) + LOG.debug('Done writing to file: %s/topology.data', MAPR_HOME) + else: + LOG.debug('Data locality is disabled.') + LOG.info(_LI('END: configuring topology data.')) + + +def get_plugin_configs(): + configs = [] + if CONF.enable_data_locality: + configs.append(ENABLE_DATA_LOCALITY) + return configs + + +def get_plugin_config_value(service, name, cluster): + if cluster: + for ng in cluster.node_groups: + cl_param = ng.configuration().get(service, {}).get(name) + if cl_param is not None: + return cl_param + + for c in get_plugin_configs(): + if c.applicable_target == service and c.name == name: + return c.default_value + + raise ex.NotFoundException( + name, (_("Unable to get parameter '%(name)s' from service %(service)s") + % {'name': name, 'service': service})) + + +def is_data_locality_enabled(cluster): + if not CONF.enable_data_locality: + return False + return get_plugin_config_value(ENABLE_DATA_LOCALITY.applicable_target, + ENABLE_DATA_LOCALITY.name, cluster) diff --git a/sahara/plugins/mapr/util/config_file_utils.py b/sahara/plugins/mapr/util/config_file_utils.py new file mode 100644 index 00000000..53d86c19 --- /dev/null +++ b/sahara/plugins/mapr/util/config_file_utils.py @@ -0,0 +1,81 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import six + +import sahara.plugins.mapr.util.func_utils as fu +import sahara.utils.files as f +import sahara.utils.xmlutils as x + + +def load_properties_file(path): + predicate = fu.and_predicate(lambda i: len(i) != 0, + lambda i: not i.isspace(), + lambda i: not i.startswith('#')) + mapper = fu.chain_function(lambda i: tuple(i.split('=')), + lambda i: (i[0].strip(), i[1].strip())) + lines = f.get_file_text(path).splitlines() + return dict(map(mapper, filter(predicate, lines))) + + +def load_xml_file(path): + kv_mapper = lambda i: (x._get_text_from_node(i, 'name'), + x._adjust_field(x._get_text_from_node(i, 'value'))) + strip_mapper = lambda i: (i[0].strip(), i[1].strip()) + props = x.load_xml_document(path).getElementsByTagName('property') + return dict(map(strip_mapper, map(kv_mapper, props))) + + +def load_raw_file(path): + return {'content': f.get_file_text(path)} + + +def to_properties_file_content(data): + mapper = lambda i: '%s=%s\n' % i + reducer = lambda p, c: p + c + return reduce(reducer, map(mapper, six.iteritems(data)), '') + + +def to_xml_file_content(data): + return x.create_hadoop_xml(data) + + +def to_topology_file_content(data): + mapper = lambda i: '%s %s\n' % i + reducer = lambda p, c: p + c + return reduce(reducer, map(mapper, six.iteritems(data))) + + +def to_raw_file_content(data, cfu=True, conv=str): + return data['content'] if cfu else conv(data) + + +def load_file(path, file_type): + if file_type == 'properties': + return load_properties_file(path) + elif file_type == 'xml': + return load_xml_file(path) + elif file_type == 'raw': + return load_raw_file(path) + + +def to_file_content(data, file_type, *args, **kargs): + if file_type == 'properties': + return to_properties_file_content(data, *args, **kargs) + elif file_type == 'xml': + return to_xml_file_content(data, *args, **kargs) + elif file_type == 'topology': + return to_topology_file_content(data, *args, **kargs) + elif file_type == 'raw': + return to_raw_file_content(data, *args, **kargs) diff --git a/sahara/plugins/mapr/util/config_utils.py b/sahara/plugins/mapr/util/config_utils.py new file mode 100644 index 00000000..07f7639d --- /dev/null +++ b/sahara/plugins/mapr/util/config_utils.py @@ -0,0 +1,77 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sahara.plugins.mapr.util.dict_utils as du +import sahara.plugins.mapr.util.func_utils as fu +import sahara.plugins.mapr.versions.version_handler_factory as vhf +import sahara.utils.configs as c + + +def get_scope_default_configs(version_handler, scope, services=None): + configs = map(lambda i: i.to_dict(), version_handler.get_configs()) + q_predicate = fu.field_equals_predicate('scope', scope) + if services: + at_predicate = fu.in_predicate('applicable_target', services) + q_predicate = fu.and_predicate(q_predicate, at_predicate) + q_fields = ['applicable_target', 'name', 'default_value'] + q_result = du.select(q_fields, configs, q_predicate) + m_reducer = du.iterable_to_values_pair_dict_reducer( + 'name', 'default_value') + return du.map_by_field_value(q_result, 'applicable_target', + dict, m_reducer) + + +def get_cluster_default_configs(version_handler, services=None): + return get_scope_default_configs(version_handler, 'cluster', services) + + +def get_node_default_configs(version_handler, services=None): + return get_scope_default_configs(version_handler, 'node', services) + + +def get_default_configs(version_handler, services=None): + cluster_configs = get_cluster_default_configs(version_handler, services) + node_configs = get_node_default_configs(version_handler, services) + return c.merge_configs(cluster_configs, node_configs) + + +def get_node_group_services(node_group): + h_version = node_group.cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + services = v_handler.get_node_processes() + node_processes = node_group.node_processes + return set(s for np in node_processes + for s in services if np in services[s]) + + +def get_cluster_configs(cluster): + h_version = cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + default_configs = get_cluster_default_configs(v_handler) + user_configs = cluster.cluster_configs + return c.merge_configs(default_configs, user_configs) + + +def get_configs(node_group): + services = get_node_group_services(node_group) + h_version = node_group.cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + default_configs = get_default_configs(v_handler, services) + user_configs = node_group.configuration() + return c.merge_configs(default_configs, user_configs) + + +def get_service(version_handler, node_process): + node_processes = version_handler.get_node_processes() + return du.get_keys_by_value_2(node_processes, node_process) diff --git a/sahara/plugins/mapr/util/dict_utils.py b/sahara/plugins/mapr/util/dict_utils.py new file mode 100644 index 00000000..94ed65a6 --- /dev/null +++ b/sahara/plugins/mapr/util/dict_utils.py @@ -0,0 +1,124 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import collections as cl +import copy as cp +import functools as ft +import itertools as it + +import six + +import sahara.plugins.mapr.util.func_utils as fu + + +def append_to_key(dict_0, dict_1): + return dict((k0, dict((k1, dict_1[k1]) for k1 in keys_1 if k1 in dict_1)) + for k0, keys_1 in six.iteritems(dict_0)) + + +def iterable_to_values_pair_dict_reducer(key_0, key_1): + def reducer(previous, iterable, mapper): + previous.update(dict(map(mapper, iterable))) + return previous + return ft.partial(reducer, mapper=fu.get_values_pair_function(key_0, + key_1)) + + +def flatten_to_list_reducer(): + def reducer(previous, iterable): + previous.extend(list(iterable)) + return previous + return reducer + + +def map_by_field_value(iterable, key, factory=list, + iterator_reducer=flatten_to_list_reducer()): + def reducer(mapping, current): + mapping[current[0]] = iterator_reducer( + mapping[current[0]], iter(current[1])) + return mapping + groups = it.groupby(iterable, fu.get_value_function(key)) + return reduce(reducer, groups, cl.defaultdict(factory)) + + +def map_by_fields_values(iterable, fields, factory=list, + reducer=flatten_to_list_reducer()): + if len(fields) == 1: + return map_by_field_value(iterable, fields[0], factory, reducer) + else: + return dict((k, map_by_fields_values(v, fields[1:], factory, reducer)) + for k, v in six.iteritems(map_by_field_value( + iterable, fields[0]))) + + +def get_keys_by_value_type(mapping, value_type): + return filter(lambda k: isinstance(mapping[k], value_type), mapping) + + +def deep_update(dict_0, dict_1, copy=True): + result = cp.deepcopy(dict_0) if copy else dict_0 + dict_valued_keys_0 = set(get_keys_by_value_type(dict_0, dict)) + dict_valued_keys_1 = set(get_keys_by_value_type(dict_1, dict)) + common_keys = dict_valued_keys_0 & dict_valued_keys_1 + if not common_keys: + result.update(dict_1) + else: + for k1, v1 in six.iteritems(dict_1): + result[k1] = deep_update( + dict_0[k1], v1) if k1 in common_keys else v1 + return result + + +def get_keys_by_value(mapping, value): + return [k for k, v in six.iteritems(mapping) if v == value] + +# TODO(aosadchiy): find more appropriate name + + +def get_keys_by_value_2(mapping, value): + return [k for k, v in six.iteritems(mapping) if value in v] + + +def iterable_to_values_list_reducer(key): + def reducer(previous, iterable, mapper): + previous.extend(map(mapper, iterable)) + return previous + return ft.partial(reducer, mapper=fu.get_value_function(key)) + + +def select(fields, iterable, predicate=fu.true_predicate): + return map(fu.extract_fields_function(fields), filter(predicate, iterable)) + +has_no_dict_values_predicate = lambda n: not get_keys_by_value_type(n, dict) + + +def list_of_vp_dicts_function(key_0, key_1): + def transformer(item, key_0, key_1): + return [fu.values_pair_to_dict_function(key_0, key_1)(i) + for i in six.iteritems(item)] + return ft.partial(transformer, key_0=key_0, key_1=key_1) + + +def flattened_dict(mapping, keys, is_terminal=has_no_dict_values_predicate, + transform=None): + if not transform: + transform = list_of_vp_dicts_function(*keys[-2:]) + if is_terminal(mapping): + return list(transform(mapping)) + else: + temp = [it.imap(fu.append_field_function(keys[0], key), + flattened_dict(value, keys[1:], + is_terminal, transform)) + for key, value in six.iteritems(mapping)] + return list(it.chain(*temp)) diff --git a/sahara/plugins/mapr/util/func_utils.py b/sahara/plugins/mapr/util/func_utils.py new file mode 100644 index 00000000..52c3a4d0 --- /dev/null +++ b/sahara/plugins/mapr/util/func_utils.py @@ -0,0 +1,167 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import copy as c +import functools as ft +import itertools as it + +import six + + +# predicates +true_predicate = lambda i: True +false_predicate = lambda i: False + + +def not_predicate(predicate): + return ft.partial(lambda i, p: not p(i), p=predicate) + + +def and_predicate(*predicates): + if len(predicates) == 1: + return predicates[0] + else: + def predicate(item, predicates): + for p in predicates: + if not p(item): + return False + return True + return ft.partial(predicate, predicates=predicates) + + +def or_predicate(*predicates): + if len(predicates) == 1: + return predicates[0] + else: + def predicate(item, predicates): + for p in predicates: + if p(item): + return True + return False + return ft.partial(predicate, predicates=predicates) + + +def impl_predicate(p0, p1): + return or_predicate(not_predicate(p0), p1) + + +def field_equals_predicate(key, value): + return ft.partial(lambda i, k, v: i[k] == v, k=key, v=value) + + +def like_predicate(template, ignored=[]): + if not template: + return true_predicate + elif len(template) == 1: + k, v = six.iteritems(template).next() + return true_predicate if k in ignored else field_equals_predicate(k, v) + else: + return and_predicate(*[field_equals_predicate(key, value) + for key, value in six.iteritems(template) + if key not in ignored]) + + +def in_predicate(key, values): + if not values: + return false_predicate + else: + return or_predicate(*[field_equals_predicate(key, value) + for value in values]) + +# functions + + +def chain_function(*functions): + return reduce(lambda p, c: ft.partial(lambda i, p, c: c(p(i)), p=p, c=c), + functions) + + +def copy_function(): + return lambda i: c.deepcopy(i) + + +def append_field_function(key, value): + def mapper(item, key, value): + item = c.deepcopy(item) + item[key] = value + return item + return ft.partial(mapper, key=key, value=value) + + +def append_fields_function(fields): + if not fields: + return copy_function() + elif len(fields) == 1: + key, value = six.iteritems(fields).next() + return append_field_function(key, value) + else: + return chain_function(*[append_field_function(key, value) + for key, value in six.iteritems(fields)]) + + +def get_values_pair_function(key_0, key_1): + return ft.partial(lambda i, k0, k1: (i[k0], i[k1]), k0=key_0, k1=key_1) + + +def get_field_function(key): + return ft.partial(lambda i, k: (k, i[k]), k=key) + + +def get_fields_function(keys): + return ft.partial(lambda i, k: [f(i) for f in [get_field_function(key) + for key in k]], k=keys) + + +def extract_fields_function(keys): + return lambda i: dict(get_fields_function(keys)(i)) + + +def get_value_function(key): + return ft.partial(lambda i, k: i[k], k=key) + + +def set_default_value_function(key, value): + def mapper(item, key, value): + item = c.deepcopy(item) + if key not in item: + item[key] = value + return item + return ft.partial(mapper, key=key, value=value) + + +def set_default_values_function(fields): + if not fields: + return copy_function() + elif len(fields) == 1: + key, value = six.iteritems(fields).next() + return set_default_value_function(key, value) + else: + return chain_function(*[set_default_value_function(key, value) + for key, value in six.iteritems(fields)]) + + +def values_pair_to_dict_function(key_0, key_1): + return ft.partial(lambda vp, k0, k1: {k0: vp[0], k1: vp[1]}, + k0=key_0, k1=key_1) + + +def flatten(iterable): + return it.chain.from_iterable(iterable) + + +def sync_execute_consumer(*consumers): + def consumer(argument, consumers): + for cn in consumers: + cn(argument) + return ft.partial(consumer, consumers=consumers) diff --git a/sahara/plugins/mapr/util/maprfs_helper.py b/sahara/plugins/mapr/util/maprfs_helper.py new file mode 100644 index 00000000..2d7fec37 --- /dev/null +++ b/sahara/plugins/mapr/util/maprfs_helper.py @@ -0,0 +1,37 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import uuid + +import six + +MV_TO_MAPRFS_CMD = ('sudo -u %(user)s' + ' hadoop fs -copyFromLocal %(source)s %(target)s' + ' && sudo rm -f %(source)s') +MKDIR_CMD = 'sudo -u %(user)s hadoop fs -mkdir -p %(path)s' + + +def put_file_to_maprfs(r, content, file_name, path, hdfs_user): + tmp_file_name = '/tmp/%s.%s' % (file_name, six.text_type(uuid.uuid4())) + r.write_file_to(tmp_file_name, content) + move_from_local(r, tmp_file_name, path + '/' + file_name, hdfs_user) + + +def move_from_local(r, source, target, hdfs_user): + args = {'user': hdfs_user, 'source': source, 'target': target} + r.execute_command(MV_TO_MAPRFS_CMD % args) + + +def create_maprfs_dir(remote, dir_name, hdfs_user): + remote.execute_command(MKDIR_CMD % {'user': hdfs_user, 'path': dir_name}) diff --git a/sahara/plugins/mapr/util/names.py b/sahara/plugins/mapr/util/names.py new file mode 100644 index 00000000..62490195 --- /dev/null +++ b/sahara/plugins/mapr/util/names.py @@ -0,0 +1,41 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +OOZIE = 'Oozie' +HIVE = 'Hive' +HIVE_METASTORE = 'HiveMetastore' +HIVE_SERVER2 = 'HiveServer2' +CLDB = 'CLDB' +FILE_SERVER = 'FileServer' +ZOOKEEPER = 'ZooKeeper' +RESOURCE_MANAGER = 'ResourceManager' +HISTORY_SERVER = 'HistoryServer' +IS_M7_ENABLED = 'Enable MapR-DB' +GENERAL = 'general' +JOBTRACKER = 'JobTracker' +NODE_MANAGER = 'NodeManager' +DATANODE = 'Datanode' +TASK_TRACKER = 'TaskTracker' +SECONDARY_NAMENODE = 'SecondaryNamenode' +NFS = 'NFS' +WEB_SERVER = 'Webserver' +WAIT_OOZIE_INTERVAL = 300 +WAIT_NODE_ALARM_NO_HEARTBEAT = 360 +ecosystem_components = ['Oozie', + 'Hive-Metastore', + 'HiveServer2', + 'HBase-Master', + 'HBase-RegionServer', + 'HBase-Client', + 'Pig'] diff --git a/sahara/plugins/mapr/util/plugin_spec.py b/sahara/plugins/mapr/util/plugin_spec.py new file mode 100644 index 00000000..4bdfce73 --- /dev/null +++ b/sahara/plugins/mapr/util/plugin_spec.py @@ -0,0 +1,198 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json +import os.path + +import six + +import sahara.openstack.common.log as logging +import sahara.plugins.mapr.util.config_file_utils as cfu +import sahara.plugins.mapr.util.dict_utils as du +import sahara.plugins.mapr.util.func_utils as fu +import sahara.plugins.provisioning as p +import sahara.utils.files as fm + + +LOG = logging.getLogger(__name__) + + +class PluginSpec(object): + + def __init__(self, path): + self.base_dir = os.path.dirname(path) + self.plugin_spec_dict = self._load_plugin_spec_dict(path) + self.service_file_name_map = self._load_service_file_name_map() + self.default_configs = self._load_default_configs() + self.service_node_process_map = self._load_service_node_process_map() + self.plugin_config_objects = self._load_plugin_config_objects() + self.file_name_config_map = self._load_file_name_config_map() + self.plugin_config_items = self._load_plugin_config_items() + self.plugin_configs = self._load_plugin_configs() + self.default_plugin_configs = self._load_default_plugin_configs() + self.file_type_map = self._load_file_type_map() + + def _load_plugin_spec_dict(self, path): + LOG.debug('Loading plugin spec from %s', path) + plugin_spec_dict = json.loads(fm.get_file_text(path)) + return plugin_spec_dict + + def _load_service_file_name_map(self): + LOG.debug('Loading service -> filename mapping') + return dict((s['name'], [fn for fn in s['files']]) + for s in self.plugin_spec_dict['services'] + if 'files' in s and s['files']) + + def _load_default_configs(self): + LOG.debug('Loading defaults from local files') + file_name_data_map = {} + for f in self.plugin_spec_dict['files']: + if 'local' not in f: + LOG.debug('%s skipped. No "local" section', f['remote']) + continue + local_path = os.path.join(self.base_dir, f['local']) + LOG.debug('Loading %(local_path)s as default for %(remote)s', + {'local_path': local_path, 'remote': f['remote']}) + data = cfu.load_file(local_path, f['type']) + file_name_data_map[f['remote']] = data + return du.append_to_key(self.service_file_name_map, file_name_data_map) + + def _load_plugin_config_items(self): + LOG.debug('Loading full configs map for plugin') + items = map(lambda i: i.to_dict(), self.plugin_config_objects) + + def mapper(item): + file_name = du.get_keys_by_value_2( + self.file_name_config_map, item['name'])[0] + append_f = fu.append_field_function('file', file_name) + return append_f(item) + return map(mapper, items) + + def _load_plugin_configs(self): + LOG.debug('Loading plugin configs {service:{file:{name:value}}}') + m_fields = ['applicable_target', 'file'] + vp_fields = ('name', 'default_value') + reducer = du.iterable_to_values_pair_dict_reducer(*vp_fields) + return du.map_by_fields_values(self.plugin_config_items, + m_fields, dict, reducer) + + def _load_default_plugin_configs(self): + return du.deep_update(self.default_configs, self.plugin_configs) + + def _load_service_node_process_map(self): + LOG.debug('Loading {service:[node process]} mapping') + return dict((s['name'], [np for np in s['node_processes']]) + for s in self.plugin_spec_dict['services'] + if 'node_processes' in s and s['node_processes']) + + def _load_file_name_config_map(self): + LOG.debug('Loading {filename:[config_name]} names mapping') + r = {} + for fd in self.plugin_spec_dict['files']: + if 'configs' in fd: + r[fd['remote']] = [i['name'] + for ir, sd in six.iteritems(fd['configs']) + for s, items in six.iteritems(sd) + for i in items] + return r + + def _load_plugin_config_objects(self): + LOG.debug('Loading config objects for sahara-dashboard') + + def mapper(item): + req = ['name', 'applicable_target', 'scope'] + opt = ['description', 'config_type', 'config_values', + 'default_value', 'is_optional', 'priority'] + kargs = dict((k, item[k]) for k in req + opt if k in item) + return p.Config(**kargs) + result = [] + for file_dict in self.plugin_spec_dict['files']: + if 'configs' not in file_dict: + LOG.debug('%s skipped. No "configs" section', + file_dict['remote']) + continue + remote_path = file_dict['remote'] + applicable_target = du.get_keys_by_value_2( + self.service_file_name_map, remote_path)[0] + for is_required, scope_dict in six.iteritems(file_dict['configs']): + is_optional = is_required != 'required' + for scope, items in six.iteritems(scope_dict): + fields = {'file': remote_path, 'is_optional': is_optional, + 'scope': scope, + 'applicable_target': applicable_target} + append_f = fu.append_fields_function(fields) + result.extend([append_f(i) for i in items]) + return map(mapper, result) + + def _load_file_type_map(self): + LOG.debug('Loading {filename:type} mapping') + return dict((f['remote'], f['type']) + for f in self.plugin_spec_dict['files']) + + def get_node_process_service(self, node_process): + return du.get_keys_by_value_2(self.service_node_process_map, + node_process)[0] + + def get_default_plugin_configs(self, services): + return dict((k, self.default_plugin_configs[k]) + for k in services if k in self.default_plugin_configs) + + def get_config_file(self, scope, service, name): + p_template = { + 'applicable_target': service, 'scope': scope, 'name': name} + q_fields = ['file'] + q_predicate = fu.like_predicate(p_template) + q_source = self.plugin_config_items + q_result = du.select(q_fields, q_source, q_predicate) + if q_result and 'file' in q_result[0]: + return q_result[0]['file'] + else: + return None + + def get_file_type(self, file_name): + if file_name in self.file_type_map: + return self.file_type_map[file_name] + else: + return None + + def get_service_for_file_name(self, file_name): + return du.get_keys_by_value_2(self.service_file_name_map, file_name)[0] + + def get_version_config_objects(self): + common_fields = {'scope': 'cluster', + 'config_type': 'dropdown', + 'is_optional': False, + 'priority': 1} + + def has_version_field(service): + return 'versions' in service + + def get_versions(service): + return {'name': '%s Version' % service['name'], + 'applicable_target': service['name'], + 'config_values': [(v, v) for v in service['versions']]} + + def add_common_fields(item): + item.update(common_fields) + return item + + def to_config(item): + return p.Config(**item) + + mapper = fu.chain_function(get_versions, add_common_fields, to_config) + source = self.plugin_spec_dict['services'] + return map(mapper, filter(has_version_field, source)) + + def get_configs(self): + return self.plugin_config_objects + self.get_version_config_objects() diff --git a/sahara/plugins/mapr/util/resources/create_disk_list_file.sh b/sahara/plugins/mapr/util/resources/create_disk_list_file.sh new file mode 100644 index 00000000..b9805744 --- /dev/null +++ b/sahara/plugins/mapr/util/resources/create_disk_list_file.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +disk_list_file=/tmp/disk.list + +if [ -f ${disk_list_file} ]; then + rm -f ${disk_list_file} +fi + +for path in $*; do + device=`findmnt ${path} -cno SOURCE` + umount -f ${device} + echo ${device} >> ${disk_list_file} +done diff --git a/sahara/plugins/mapr/util/resources/hadoop_version b/sahara/plugins/mapr/util/resources/hadoop_version new file mode 100644 index 00000000..861e8b09 --- /dev/null +++ b/sahara/plugins/mapr/util/resources/hadoop_version @@ -0,0 +1,3 @@ +classic_version=0.20.2 +yarn_version=2.4.1 +default_mode=%(mode)s \ No newline at end of file diff --git a/sahara/plugins/mapr/util/resources/topology.sh b/sahara/plugins/mapr/util/resources/topology.sh new file mode 100755 index 00000000..3c8a8189 --- /dev/null +++ b/sahara/plugins/mapr/util/resources/topology.sh @@ -0,0 +1,20 @@ +#!/bin/bash +MAPR_HOME=/opt/mapr + +while [ $# -gt 0 ] ; do + nodeArg=$1 + exec< ${MAPR_HOME}/topology.data + result="" + while read line ; do + ar=( $line ) + if [ "${ar[0]}" = "$nodeArg" ]; then + result="${ar[1]}" + fi + done + shift + if [ -z "$result" ]; then + echo -n "/default/rack " + else + echo -n "$result " + fi +done diff --git a/sahara/plugins/mapr/util/resources/waiting_script.sh b/sahara/plugins/mapr/util/resources/waiting_script.sh new file mode 100644 index 00000000..6c14452d --- /dev/null +++ b/sahara/plugins/mapr/util/resources/waiting_script.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +while True; do + if [ -f '/tmp/launching-mapr-mfs.lck' ]; then + sleep 5 + else + break + fi +done diff --git a/sahara/plugins/mapr/util/run_scripts.py b/sahara/plugins/mapr/util/run_scripts.py new file mode 100644 index 00000000..13ed8ed8 --- /dev/null +++ b/sahara/plugins/mapr/util/run_scripts.py @@ -0,0 +1,71 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara.i18n import _LI +from sahara.openstack.common import log as logging + + +LOG = logging.getLogger(__name__) + + +def run_configure_sh(remote, script_string): + LOG.info(_LI("running configure.sh script")) + remote.execute_command(script_string, run_as_root=True) + + +def start_zookeeper(remote): + LOG.info(_LI("Starting mapr-zookeeper")) + remote.execute_command('service mapr-zookeeper start', run_as_root=True) + + +def start_oozie(remote): + LOG.info(_LI("Starting mapr-oozie")) + remote.execute_command('service mapr-oozie start', + run_as_root=True, + raise_when_error=False) + + +def start_hive_metastore(remote): + LOG.info(_LI("Starting mapr-hive-server2")) + remote.execute_command('service mapr-hivemetastore start', + run_as_root=True) + + +def start_hive_server2(remote): + LOG.info(_LI("Starting mapr-hive-server2")) + remote.execute_command('service mapr-hiveserver2 start', run_as_root=True) + + +def start_warden(remote): + LOG.info(_LI("Starting mapr-warden")) + remote.execute_command('service mapr-warden start', run_as_root=True) + + +def start_cldb(remote): + LOG.info(_LI("Starting mapr-cldb")) + remote.execute_command('service mapr-cldb start', run_as_root=True) + + +def start_node_manager(remote): + LOG.info(_LI("Starting nodemanager")) + remote.execute_command(('/opt/mapr/hadoop/hadoop-2.3.0' + '/sbin/yarn-daemon.sh start nodemanager'), + run_as_root=True) + + +def start_resource_manager(remote): + LOG.info(_LI("Starting resourcemanager")) + remote.execute_command(('/opt/mapr/hadoop/hadoop-2.3.0' + '/sbin/yarn-daemon.sh start resourcemanager'), + run_as_root=True) diff --git a/sahara/plugins/mapr/util/scaling.py b/sahara/plugins/mapr/util/scaling.py new file mode 100644 index 00000000..b9eddb54 --- /dev/null +++ b/sahara/plugins/mapr/util/scaling.py @@ -0,0 +1,144 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara import context +from sahara.i18n import _LI +from sahara.openstack.common import log as logging +from sahara.plugins.mapr.util import config +from sahara.plugins.mapr.util import names +from sahara.plugins.mapr.util import run_scripts +from sahara.plugins.mapr.util import start_helper +from sahara.utils import general as gen + + +LOG = logging.getLogger(__name__) + +STOP_WARDEN_CMD = 'service mapr-warden stop' +STOP_ZOOKEEPER_CMD = 'service mapr-zookeeper stop' +GET_SERVER_ID_CMD = ('maprcli node list -json -filter [ip==%s] -columns id' + ' | grep id | grep -o \'[0-9]*\'') +MOVE_NODE_CMD = 'maprcli node move -serverids %s -topology /decommissioned' +GET_HOSTNAME_CMD = ('maprcli node list -json -filter [ip==%s]' + ' -columns hostname | grep hostname' + ' | grep -Po \'(?<=("hostname":")).*?(?=")\'') +REMOVE_NODE_CMD = 'maprcli node remove -filter [ip==%(ip)s] -nodes %(nodes)s' +REMOVE_MAPR_PACKAGES_CMD = ('python -mplatform | grep Ubuntu ' + '&& apt-get remove mapr-\* -y' + ' || yum remove mapr-\* -y') +REMOVE_MAPR_HOME_CMD = 'rm -rf /opt/mapr' +REMOVE_MAPR_CORES_CMD = 'rm -rf /opt/cores/*.core.*' + + +def scale_cluster(cluster, instances, disk_setup_script_path, waiting_script, + context, configure_sh_string, is_node_awareness): + LOG.info(_LI('START: Cluster scaling. Cluster = %s'), cluster.name) + for inst in instances: + start_helper.install_role_on_instance(inst, context) + config.configure_instances(cluster, instances) + start_services(cluster, instances, disk_setup_script_path, + waiting_script, configure_sh_string) + LOG.info(_LI('END: Cluster scaling. Cluster = %s'), cluster) + + +def decommission_nodes(cluster, instances, configure_sh_string): + LOG.info(_LI('Start decommission . Cluster = %s'), cluster.name) + move_node(cluster, instances) + stop_services(cluster, instances) + context.sleep(names.WAIT_NODE_ALARM_NO_HEARTBEAT) + remove_node(cluster, instances) + remove_services(cluster, instances) + if check_for_cldb_or_zookeeper_service(instances): + all_instances = gen.get_instances(cluster) + current_cluster_instances = [ + x for x in all_instances if x not in instances] + for inst in current_cluster_instances: + start_helper.exec_configure_sh_on_instance( + cluster, inst, configure_sh_string) + LOG.info(_LI('End decommission. Cluster = %s'), cluster.name) + + +def start_services(cluster, instances, disk_setup_script_path, + waiting_script, configure_sh_string): + LOG.info(_LI('START: Starting services.')) + for inst in instances: + start_helper.exec_configure_sh_on_instance( + cluster, inst, configure_sh_string) + start_helper.wait_for_mfs_unlock(cluster, waiting_script) + start_helper.setup_maprfs_on_instance(inst, disk_setup_script_path) + + if check_if_is_zookeeper_node(inst): + run_scripts.start_zookeeper(inst.remote()) + + run_scripts.start_warden(inst.remote()) + + if check_for_cldb_or_zookeeper_service(instances): + start_helper.exec_configure_sh_on_cluster( + cluster, configure_sh_string) + LOG.info(_LI('END: Starting services.')) + + +def stop_services(cluster, instances): + LOG.info(_LI("Stop warden and zookeeper")) + for instance in instances: + with instance.remote() as r: + r.execute_command(STOP_WARDEN_CMD, run_as_root=True) + if check_if_is_zookeeper_node(instance): + r.execute_command(STOP_ZOOKEEPER_CMD, run_as_root=True) + LOG.info(_LI("Warden and zookeeper stoped")) + + +def move_node(cluster, instances): + LOG.info(_LI("Start moving the node to the /decommissioned")) + for instance in instances: + with instance.remote() as r: + command = GET_SERVER_ID_CMD % instance.management_ip + ec, out = r.execute_command(command, run_as_root=True) + command = MOVE_NODE_CMD % out.strip() + r.execute_command(command, run_as_root=True) + LOG.info(_LI("Nodes moved to the /decommissioned")) + + +def remove_node(cluster, instances): + LOG.info("Start removing the nodes") + for instance in instances: + with instance.remote() as r: + command = GET_HOSTNAME_CMD % instance.management_ip + ec, out = r.execute_command(command, run_as_root=True) + command = REMOVE_NODE_CMD % {'ip': instance.management_ip, + 'nodes': out.strip()} + r.execute_command(command, run_as_root=True) + LOG.info("Nodes removed") + + +def remove_services(cluster, instances): + LOG.info(_LI("Start remove all mapr services")) + for instance in instances: + with instance.remote() as r: + r.execute_command(REMOVE_MAPR_PACKAGES_CMD, run_as_root=True) + r.execute_command(REMOVE_MAPR_HOME_CMD, run_as_root=True) + r.execute_command(REMOVE_MAPR_CORES_CMD, run_as_root=True) + LOG.info(_LI("All mapr services removed")) + + +def check_if_is_zookeeper_node(instance): + processes_list = instance.node_group.node_processes + return names.ZOOKEEPER in processes_list + + +def check_for_cldb_or_zookeeper_service(instances): + for inst in instances: + np_list = inst.node_group.node_processes + if names.ZOOKEEPER in np_list or names.CLDB in np_list: + return True + return False diff --git a/sahara/plugins/mapr/util/start_helper.py b/sahara/plugins/mapr/util/start_helper.py new file mode 100644 index 00000000..fdbe25d7 --- /dev/null +++ b/sahara/plugins/mapr/util/start_helper.py @@ -0,0 +1,177 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara import context +from sahara.i18n import _LI +from sahara.openstack.common import log as logging +from sahara.plugins.mapr.util import names +from sahara.plugins.mapr.util import run_scripts +import sahara.plugins.mapr.versions.version_handler_factory as vhf +import sahara.plugins.utils as utils +from sahara.utils import files as files + + +LOG = logging.getLogger(__name__) + + +def exec_configure_sh_on_cluster(cluster, script_string): + inst_list = utils.get_instances(cluster) + for n in inst_list: + exec_configure_sh_on_instance(cluster, n, script_string) + + +def exec_configure_sh_on_instance(cluster, instance, script_string): + LOG.info(_LI('START: Executing configure.sh')) + if check_for_mapr_db(cluster): + script_string += ' -M7' + if not check_if_mapr_user_exist(instance): + script_string += ' --create-user' + LOG.debug('script_string = %s', script_string) + instance.remote().execute_command(script_string, run_as_root=True) + LOG.info(_LI('END: Executing configure.sh')) + + +def check_for_mapr_db(cluster): + h_version = cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + return v_handler.get_context(cluster).is_m7_enabled() + + +def setup_maprfs_on_cluster(cluster, path_to_disk_setup_script): + mapr_node_list = utils.get_instances(cluster, 'FileServer') + for instance in mapr_node_list: + setup_maprfs_on_instance(instance, path_to_disk_setup_script) + + +def setup_maprfs_on_instance(instance, path_to_disk_setup_script): + LOG.info(_LI('START: Setup maprfs on instance %s'), instance.instance_name) + create_disk_list_file(instance, path_to_disk_setup_script) + execute_disksetup(instance) + LOG.info(_LI('END: Setup maprfs on instance.')) + + +def create_disk_list_file(instance, path_to_disk_setup_script): + LOG.info(_LI('START: Creating disk list file.')) + script_path = '/tmp/disk_setup_script.sh' + rmt = instance.remote() + LOG.debug('Writing /tmp/disk_setup_script.sh') + rmt.write_file_to( + script_path, files.get_file_text(path_to_disk_setup_script)) + LOG.debug('Start executing command: chmod +x %s', script_path) + rmt.execute_command('chmod +x ' + script_path, run_as_root=True) + LOG.debug('Done for executing command.') + args = ' '.join(instance.node_group.storage_paths()) + cmd = '%s %s' % (script_path, args) + LOG.debug('Executing %s', cmd) + rmt.execute_command(cmd, run_as_root=True) + LOG.info(_LI('END: Creating disk list file.')) + + +def execute_disksetup(instance): + LOG.info(_LI('START: Executing disksetup.')) + rmt = instance.remote() + rmt.execute_command( + '/opt/mapr/server/disksetup -F /tmp/disk.list', run_as_root=True) + LOG.info(_LI('END: Executing disksetup.')) + + +def wait_for_mfs_unlock(cluster, path_to_waiting_script): + mapr_node_list = utils.get_instances(cluster, names.FILE_SERVER) + for instance in mapr_node_list: + create_waiting_script_file(instance, path_to_waiting_script) + exec_waiting_script_on_instance(instance) + + +def start_zookeeper_nodes_on_cluster(cluster): + zkeeper_node_list = utils.get_instances(cluster, names.ZOOKEEPER) + for z_keeper_node in zkeeper_node_list: + run_scripts.start_zookeeper(z_keeper_node.remote()) + + +def start_warden_on_cluster(cluster): + node_list = utils.get_instances(cluster) + for node in node_list: + run_scripts.start_warden(node.remote()) + + +def start_warden_on_cldb_nodes(cluster): + node_list = utils.get_instances(cluster, names.CLDB) + for node in node_list: + run_scripts.start_warden(node.remote()) + + +def start_warden_on_other_nodes(cluster): + node_list = utils.get_instances(cluster) + for node in node_list: + if names.CLDB not in node.node_group.node_processes: + run_scripts.start_warden(node.remote()) + + +def create_waiting_script_file(instance, path_to_waiting_script): + LOG.info(_LI('START: Creating waiting script file.')) + script_path = '/tmp/waiting_script.sh' + rmt = instance.remote() + rmt.write_file_to(script_path, files.get_file_text(path_to_waiting_script)) + LOG.info(_LI('END: Creating waiting script file.')) + + +def exec_waiting_script_on_instance(instance): + LOG.info(_LI('START: Waiting script')) + rmt = instance.remote() + rmt.execute_command('chmod +x /tmp/waiting_script.sh', run_as_root=True) + rmt.execute_command('/tmp/waiting_script.sh', run_as_root=True) + LOG.info(_LI('END: Waiting script')) + + +def check_if_mapr_user_exist(instance): + ec, out = instance.remote().execute_command('id -u mapr', + run_as_root=True, + raise_when_error=False) + return ec == 0 + + +def check_for_mapr_component(instance, component_name): + component_list = instance.node_group.node_processes + return component_name in component_list + + +def install_role_on_instance(instance, cluster_context): + LOG.info(_LI('START: Installing roles on node ')) + roles_list = instance.node_group.node_processes + exec_str = (cluster_context.get_install_manager() + + cluster_context.get_roles_str(roles_list)) + LOG.debug('Executing "%(command)s" on %(instance)s', + {'command': exec_str, 'instance': instance.instance_id}) + + instance.remote().execute_command(exec_str, run_as_root=True, timeout=900) + LOG.info(_LI('END: Installing roles on node ')) + + +def install_roles(cluster, cluster_context): + LOG.info(_LI('START: Installing roles on cluster')) + instances = utils.get_instances(cluster) + with context.ThreadGroup(len(instances)) as tg: + for instance in instances: + tg.spawn('install_roles_%s' % instance.instance_id, + install_role_on_instance, + instance, + cluster_context) + LOG.info(_LI('END: Installing roles on cluster')) + + +def start_ecosystem(cluster_context): + oozie_inst = cluster_context.get_oozie_instance() + if oozie_inst is not None: + context.sleep(names.WAIT_OOZIE_INTERVAL) + run_scripts.start_oozie(oozie_inst.remote()) diff --git a/sahara/plugins/mapr/util/validation_utils.py b/sahara/plugins/mapr/util/validation_utils.py new file mode 100644 index 00000000..74836500 --- /dev/null +++ b/sahara/plugins/mapr/util/validation_utils.py @@ -0,0 +1,135 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import functools as ft + +from sahara.i18n import _ +import sahara.plugins.exceptions as e +import sahara.plugins.mapr.util.cluster_info as ci +import sahara.plugins.mapr.util.wrapper as w + + +class LessThanCountException(e.InvalidComponentCountException): + + def __init__(self, component, expected_count, count): + super(LessThanCountException, self).__init__( + component, expected_count, count) + self.message = (_("Hadoop cluster should contain at least" + " %(expected_count)d %(component)s component(s)." + " Actual %(component)s count is %(count)d") + % {'expected_count': expected_count, + 'component': component, 'count': count}) + + +class MoreThanCountException(e.InvalidComponentCountException): + + def __init__(self, component, expected_count, count): + super(MoreThanCountException, self).__init__( + component, expected_count, count) + self.message = (_("Hadoop cluster should contain not more than" + " %(expected_count)d %(component)s component(s)." + " Actual %(component)s count is %(count)d") + % {'expected_count': expected_count, + 'component': component, 'count': count}) + + +class NodeRequiredServiceMissingException(e.RequiredServiceMissingException): + + def __init__(self, service_name, required_by=None): + super(NodeRequiredServiceMissingException, self).__init__( + service_name, required_by) + self.message = _('Node is missing a service: %s') % service_name + if required_by: + self.message = (_('%(message)s, required by service:' + ' %(required_by)s') + % {'message': self.message, + 'required_by': required_by}) + + +def not_less_than_count_component_vr(component, count): + def validate(cluster, component, count): + c_info = ci.ClusterInfo(cluster, None) + actual_count = c_info.get_instances_count(component) + if not actual_count >= count: + raise LessThanCountException(component, count, actual_count) + return ft.partial(validate, component=component, count=count) + + +def not_more_than_count_component_vr(component, count): + def validate(cluster, component, count): + c_info = ci.ClusterInfo(cluster, None) + actual_count = c_info.get_instances_count(component) + if not actual_count <= count: + raise MoreThanCountException(component, count, actual_count) + return ft.partial(validate, component=component, count=count) + + +def equal_count_component_vr(component, count): + def validate(cluster, component, count): + c_info = ci.ClusterInfo(cluster, None) + actual_count = c_info.get_instances_count(component) + if not actual_count == count: + raise e.InvalidComponentCountException( + component, count, actual_count) + return ft.partial(validate, component=component, count=count) + + +def require_component_vr(component): + def validate(instance, component): + if component not in instance.node_group.node_processes: + raise NodeRequiredServiceMissingException(component) + return ft.partial(validate, component=component) + + +def require_of_listed_components(components): + def validate(instance, components): + if (False in (c in instance.node_group.node_processes + for c in components)): + raise NodeRequiredServiceMissingException(components) + return ft.partial(validate, components=components) + + +def each_node_has_component_vr(component): + def validate(cluster, component): + rc_vr = require_component_vr(component) + c_info = ci.ClusterInfo(cluster, None) + for i in c_info.get_instances(): + rc_vr(i) + return ft.partial(validate, component=component) + + +def each_node_has_at_least_one_of_listed_components(components): + def validate(cluster, components): + rc_vr = require_of_listed_components(components) + c_info = ci.ClusterInfo(cluster, None) + for i in c_info.get_instances(): + rc_vr(i) + return ft.partial(validate, components=components) + + +def node_dependency_satisfied_vr(component, dependency): + def validate(cluster, component, dependency): + c_info = ci.ClusterInfo(cluster, None) + for ng in c_info.get_node_groups(component): + if dependency not in ng.node_processes: + raise NodeRequiredServiceMissingException( + component, dependency) + return ft.partial(validate, component=component, dependency=dependency) + + +def create_fake_cluster(cluster, existing, additional): + w_node_groups = [w.Wrapper(ng, count=existing[ng.id]) + if ng.id in existing else ng + for ng in cluster.node_groups] + return w.Wrapper(cluster, node_groups=w_node_groups) diff --git a/sahara/plugins/mapr/util/wrapper.py b/sahara/plugins/mapr/util/wrapper.py new file mode 100644 index 00000000..757db4bc --- /dev/null +++ b/sahara/plugins/mapr/util/wrapper.py @@ -0,0 +1,28 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +class Wrapper(object): + WRAPPED = '__wrapped__' + + def __init__(self, wrapped, **kargs): + object.__getattribute__(self, '__dict__').update(kargs) + object.__setattr__(self, Wrapper.WRAPPED, wrapped) + + def __getattribute__(self, name): + wrapped = object.__getattribute__(self, Wrapper.WRAPPED) + try: + return object.__getattribute__(self, name) + except AttributeError: + return object.__getattribute__(wrapped, name) diff --git a/sahara/plugins/mapr/versions/__init__.py b/sahara/plugins/mapr/versions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/plugins/mapr/versions/base_cluster_configurer.py b/sahara/plugins/mapr/versions/base_cluster_configurer.py new file mode 100644 index 00000000..87e35ab0 --- /dev/null +++ b/sahara/plugins/mapr/versions/base_cluster_configurer.py @@ -0,0 +1,167 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import abc +import collections as c +import os + +import six + +from sahara.i18n import _LI +from sahara.openstack.common import log as logging +import sahara.plugins.mapr.util.cluster_helper as ch +import sahara.plugins.mapr.util.cluster_info as ci +from sahara.plugins.mapr.util import config +import sahara.plugins.mapr.util.config_file_utils as cfu +import sahara.plugins.mapr.util.dict_utils as du +import sahara.plugins.mapr.versions.version_handler_factory as vhf +import sahara.plugins.utils as u +import sahara.swift.swift_helper as sh + + +LOG = logging.getLogger(__name__) + + +@six.add_metaclass(abc.ABCMeta) +class BaseClusterConfigurer(object): + + def get_topology_configs(self): + result = c.defaultdict(dict) + if config.is_data_locality_enabled(self.cluster): + if self.is_node_awareness_enabled(): + LOG.debug('Node group awareness is set to True') + + file_name = '%s/core-site.xml' % self.get_hadoop_conf_dir() + service = self.plugin_spec.get_service_for_file_name(file_name) + data = {} + data['net.topology.impl'] = ( + 'org.apache.hadoop.net.NetworkTopologyWithNodeGroup') + data['net.topology.nodegroup.aware'] = True + data['dfs.block.replicator.classname'] = ( + 'org.apache.hadoop.hdfs.server.namenode' + '.BlockPlacementPolicyWithNodeGroup') + result[service][file_name] = data + + file_name = '%s/mapred-site.xml' % self.get_hadoop_conf_dir() + service = self.plugin_spec.get_service_for_file_name(file_name) + data = {} + data['mapred.jobtracker.nodegroup.aware'] = True + data['mapred.task.cache.levels'] = 3 + result[service][file_name] = data + + file_name = '/opt/mapr/conf/cldb.conf' + service = self.plugin_spec.get_service_for_file_name(file_name) + data = {} + data['net.topology.script.file.name'] = '/opt/mapr/topology.sh' + result[service][file_name] = data + else: + LOG.debug('Node group awareness is not implemented in YARN' + ' yet so enable_hypervisor_awareness set to' + ' False explicitly') + return result + + def get_swift_configs(self): + mapper = lambda i: (i['name'], i['value']) + file_name = '%s/core-site.xml' % self.get_hadoop_conf_dir() + service = self.plugin_spec.get_service_for_file_name(file_name) + data = dict(map(mapper, sh.get_swift_configs())) + return {service: {file_name: data}} + + def get_cluster_configs(self): + default_configs = self.cluster_info.get_default_configs() + user_configs = self.cluster_info.get_user_configs() + result = du.deep_update(default_configs, user_configs) + file_name = '/opt/mapr/conf/cldb.conf' + service = self.plugin_spec.get_service_for_file_name(file_name) + if file_name not in result[service]: + result[service][file_name] = {} + data = result[service][file_name] + data['cldb.zookeeper.servers'] = ch.get_zookeeper_nodes_ip_with_port( + self.cluster) + return result + + def get_cluster_configs_template(self): + template = {} + du.deep_update(template, self.get_topology_configs(), False) + du.deep_update(template, self.get_swift_configs(), False) + du.deep_update(template, self.get_cluster_configs(), False) + return template + + def get_node_group_configs(self, node_groups=None): + ng_configs = {} + if not node_groups: + node_groups = self.cluster.node_groups + cc_template = self.cluster_configs_template + p_spec = self.plugin_spec + for ng in node_groups: + ng_services = self.cluster_info.get_services(ng) + d_configs = dict(filter(lambda i: i[0] in ng_services, + six.iteritems(cc_template))) + u_configs = self.cluster_info.get_user_configs(ng) + nc_template = du.deep_update(d_configs, u_configs) + nc_data = {} + for files in nc_template.values(): + for f_name, f_data in six.iteritems(files): + if f_name: + f_type = p_spec.get_file_type(f_name) + f_content = cfu.to_file_content(f_data, f_type) + if f_content: + nc_data[f_name] = f_content + ng_configs[ng.id] = nc_data + return ng_configs + + def configure_instances(self, instances=None): + if not instances: + instances = u.get_instances(self.cluster) + for i in instances: + i_files = self.node_group_files[i.node_group_id] + LOG.info(_LI('Writing files %(f_names)s to node %(node)s'), + {'f_names': i_files.keys(), 'node': i.management_ip}) + with i.remote() as r: + for f_name in i_files: + r.execute_command('mkdir -p ' + os.path.dirname(f_name), + run_as_root=True) + LOG.debug('Created dir: %s', os.path.dirname(f_name)) + r.write_files_to(i_files, run_as_root=True) + config.post_configure_instance(i) + + def __init__(self, cluster, plugin_spec): + h_version = cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + self.context = v_handler.get_context(cluster) + self.cluster = cluster + self.plugin_spec = plugin_spec + self.cluster_info = ci.ClusterInfo(self.cluster, self.plugin_spec) + self.cluster_configs_template = self.get_cluster_configs_template() + self.node_group_files = self.get_node_group_configs() + + def configure(self, instances=None): + self.configure_topology_data(self.cluster) + self.configure_instances(instances) + + @staticmethod + def _post_configure_instance(instance): + config.post_configure_instance(instance) + + def configure_topology_data(self, cluster): + config.configure_topology_data( + cluster, self.is_node_awareness_enabled()) + + @abc.abstractmethod + def get_hadoop_conf_dir(self): + return + + @abc.abstractmethod + def is_node_awareness_enabled(self): + return diff --git a/sahara/plugins/mapr/versions/base_context.py b/sahara/plugins/mapr/versions/base_context.py new file mode 100644 index 00000000..4ebbf3d2 --- /dev/null +++ b/sahara/plugins/mapr/versions/base_context.py @@ -0,0 +1,173 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import abc + +import six + +import sahara.plugins.mapr.util.config_utils as cu +import sahara.plugins.mapr.util.names as n +import sahara.plugins.utils as u + + +@six.add_metaclass(abc.ABCMeta) +class BaseContext(object): + + hive_version_config = 'Hive Version' + oozie_version_config = 'Oozie Version' + + @abc.abstractmethod + def get_cluster(self): + return + + @abc.abstractmethod + def is_m7_enabled(self): + return + + @abc.abstractmethod + def get_hadoop_version(self): + return + + def get_linux_distro_version(self): + return self.get_zk_instances()[0].remote().execute_command( + 'lsb_release -is', run_as_root=True)[1].rstrip() + + def get_install_manager(self): + install_manager_map = {'Ubuntu': 'apt-get install --force-yes -y ', + 'CentOS': 'yum install -y ', + 'RedHatEnterpriseServer': 'yum install -y ', + 'Suse': 'zypper '} + return install_manager_map.get(self.get_linux_distro_version()) + + def get_install_manager_version_separator(self): + install_manager_map = {'Ubuntu': '=', + 'CentOS': '-', + 'RedHatEnterpriseServer': '-', + 'Suse': ':'} + return install_manager_map.get(self.get_linux_distro_version()) + + def get_fs_instances(self): + return u.get_instances(self.get_cluster(), n.FILE_SERVER) + + def get_zk_instances(self): + return u.get_instances(self.get_cluster(), n.ZOOKEEPER) + + def get_zk_uris(self): + mapper = lambda i: '%s' % i.management_ip + return map(mapper, self.get_zk_instances()) + + def get_cldb_instances(self): + return u.get_instances(self.get_cluster(), n.CLDB) + + def get_cldb_uris(self): + mapper = lambda i: '%s' % i.management_ip + return map(mapper, self.get_cldb_instances()) + + def get_cldb_uri(self): + return 'maprfs:///' + + def get_rm_instance(self): + return u.get_instance(self.get_cluster(), n.RESOURCE_MANAGER) + + def get_rm_port(self): + return '8032' + + def get_rm_uri(self): + port = self.get_rm_port() + ip = self.get_rm_instance().management_ip + return '%s:%s' % (ip, port) if port else ip + + def get_hs_instance(self): + return u.get_instance(self.get_cluster(), n.HISTORY_SERVER) + + def get_hs_uri(self): + return self.get_hs_instance().management_ip + + def get_oozie_instance(self): + return u.get_instance(self.get_cluster(), n.OOZIE) + + def get_hive_metastore_instances(self): + return u.get_instances(self.get_cluster(), n.HIVE_METASTORE) + + def get_hive_server2_instances(self): + return u.get_instances(self.get_cluster(), n.HIVE_SERVER2) + + def get_oozie_uri(self): + ip = self.get_oozie_instance().management_ip + return 'http://%s:11000/oozie' % ip + + def get_roles_str(self, comp_list): + component_list_str = 'mapr-core ' + ' '.join(['mapr-' + role + ' ' + for role in comp_list]) + if 'HBase-Client' in comp_list: + component_list_str = component_list_str.replace( + 'HBase-Client', 'hbase') + if 'Oozie' in comp_list: + component_list_str = component_list_str.replace( + 'Oozie', 'oozie' + self.get_oozie_version()) + if 'HiveMetastore' in comp_list: + component_list_str = component_list_str.replace( + 'HiveMetastore', 'HiveMetastore' + self.get_hive_version()) + if 'HiveServer2' in comp_list: + component_list_str = component_list_str.replace( + 'HiveServer2', 'HiveServer2' + self.get_hive_version()) + + return component_list_str.lower() + + def user_exists(self): + return + + def get_plain_instances(self): + fs = self.get_fs_instances() + zk = self.get_zk_instances() + cldb = self.get_cldb_instances() + zk_fs_cldb = zk + fs + cldb + instances = u.get_instances(self.get_cluster()) + return [i for i in instances if i not in zk_fs_cldb] + + def get_configure_command(self): + kargs = {'path': self.get_configure_sh_path(), + 'cldb_nodes': ','.join(self.get_cldb_uris()), + 'zk_nodes': ','.join(self.get_cldb_uris()), + 'rm_node': self.get_rm_uri(), + 'hs_node': self.get_hs_uri()} + command = ('{path} -C {cldb_nodes} -Z {zk_nodes} -RM {rm_node}' + ' -HS {hs_node} -f').format(**kargs) + if self.is_m7_enabled(): + command += ' -M7' + if not self.user_exists(): + command += ' --create-user' + return command + + def get_fs_wait_command(self): + return '/tmp/waiting_script.sh' + + def get_disk_setup_command(self): + return '/opt/mapr/server/disksetup -F /tmp/disk.list' + + def get_configure_sh_path(self): + return '/opt/mapr/server/configure.sh' + + def get_oozie_version(self): + configs = cu.get_cluster_configs(self.get_cluster()) + return (self.get_install_manager_version_separator() + + configs[n.OOZIE][BaseContext.oozie_version_config] + '*') + + def get_hive_version(self): + configs = cu.get_cluster_configs(self.get_cluster()) + return (self.get_install_manager_version_separator() + + configs[n.HIVE][BaseContext.hive_version_config] + "*") + + def get_scripts(self): + return diff --git a/sahara/plugins/mapr/versions/base_version_handler.py b/sahara/plugins/mapr/versions/base_version_handler.py new file mode 100644 index 00000000..a6335e39 --- /dev/null +++ b/sahara/plugins/mapr/versions/base_version_handler.py @@ -0,0 +1,115 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import abc + +import six + +import sahara.plugins.mapr.util.plugin_spec as ps +import sahara.plugins.mapr.util.start_helper as sh +import sahara.plugins.mapr.util.validation_utils as vu +import sahara.plugins.mapr.versions.edp_engine as edp + + +@six.add_metaclass(abc.ABCMeta) +class BaseVersionHandler(object): + + def __init__(self): + self.plugin_spec = ps.PluginSpec(self.get_plugin_spec_path()) + + def get_plugin_spec(self): + return self.plugin_spec + + def get_configs(self): + return self.plugin_spec.get_configs() + + def get_node_processes(self): + return self.plugin_spec.service_node_process_map + + def get_disk_setup_script(self): + return 'plugins/mapr/util/resources/create_disk_list_file.sh' + + def validate(self, cluster): + rules = self.get_cluster_validation_rules(cluster) + for rule in rules: + rule(cluster) + + def validate_scaling(self, cluster, existing, additional): + fake_cluster = vu.create_fake_cluster(cluster, existing, additional) + self.validate(fake_cluster) + + def validate_edp(self, cluster): + for rule in self.get_edp_validation_rules(): + rule(cluster) + + def configure_cluster(self, cluster): + sh.install_roles(cluster, self.get_context(cluster)) + self.get_cluster_configurer(cluster, self.plugin_spec).configure() + + def get_name_node_uri(self, cluster): + return self.get_context(cluster).get_cldb_uri() + + def get_oozie_server(self, cluster): + return self.get_context(cluster).get_oozie_instance() + + def get_oozie_server_uri(self, cluster): + return self.get_context(cluster).get_oozie_uri() + + def get_resource_manager_uri(self, cluster): + return self.get_context(cluster).get_rm_uri() + + def get_home_dir(self): + return ('plugins/mapr/versions/v%s' + % self.get_plugin_version().replace('.', '_').lower()) + + def get_plugin_spec_path(self): + return '%s/resources/plugin_spec.json' % self.get_home_dir() + + def get_edp_engine(self, cluster, job_type): + if job_type in edp.MapROozieJobEngine.get_supported_job_types(): + return edp.MapROozieJobEngine(cluster) + return None + + # Astract methods + + @abc.abstractmethod + def get_plugin_version(self): + return + + @abc.abstractmethod + def get_cluster_validation_rules(self, cluster): + return + + @abc.abstractmethod + def get_scaling_validation_rules(self): + return + + def get_waiting_script(self): + return + + @abc.abstractmethod + def get_edp_validation_rules(self): + return + + @abc.abstractmethod + def get_cluster_configurer(self, cluster, plugin_spec): + return + + @abc.abstractmethod + def get_configure_sh_string(self, cluster): + return + + @abc.abstractmethod + def get_context(self, cluster): + return diff --git a/sahara/plugins/mapr/versions/edp_engine.py b/sahara/plugins/mapr/versions/edp_engine.py new file mode 100644 index 00000000..5d2e3adb --- /dev/null +++ b/sahara/plugins/mapr/versions/edp_engine.py @@ -0,0 +1,76 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sahara.plugins.mapr.util.maprfs_helper as mfs +import sahara.plugins.mapr.versions.version_handler_factory as vhf +import sahara.service.edp.binary_retrievers.dispatch as d +import sahara.service.edp.oozie.engine as e + + +class MapROozieJobEngine(e.OozieJobEngine): + + def get_hdfs_user(self): + return 'mapr' + + def create_hdfs_dir(self, remote, dir_name): + mfs.create_maprfs_dir(remote, dir_name, self.get_hdfs_user()) + + def _upload_workflow_file(self, where, job_dir, wf_xml, hdfs_user): + f_name = 'workflow.xml' + with where.remote() as r: + mfs.put_file_to_maprfs(r, wf_xml, f_name, job_dir, hdfs_user) + return job_dir + '/' + f_name + + def _upload_job_files_to_hdfs(self, where, job_dir, job): + mains = job.mains or [] + libs = job.libs or [] + uploaded_paths = [] + hdfs_user = self.get_hdfs_user() + with where.remote() as r: + for m in mains: + raw_data = d.get_raw_binary(m) + mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user) + uploaded_paths.append(job_dir + '/' + m.name) + for l in libs: + raw_data = d.get_raw_binary(l) + lib_dir = job_dir + '/lib/' + self.create_hdfs_dir(r, lib_dir) + mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir, + hdfs_user) + uploaded_paths.append(lib_dir + l.name) + return uploaded_paths + + def get_name_node_uri(self, cluster): + h_version = cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + uri = v_handler.get_name_node_uri(cluster) + return uri + + def get_oozie_server_uri(self, cluster): + h_version = cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + uri = v_handler.get_oozie_server_uri(cluster) + return uri + + def get_oozie_server(self, cluster): + h_version = cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + server = v_handler.get_oozie_server(cluster) + return server + + def get_resource_manager_uri(self, cluster): + h_version = cluster.hadoop_version + v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version) + uri = v_handler.get_resource_manager_uri(cluster) + return uri diff --git a/sahara/plugins/mapr/versions/v3_1_1/__init__.py b/sahara/plugins/mapr/versions/v3_1_1/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/plugins/mapr/versions/v3_1_1/cluster_configurer.py b/sahara/plugins/mapr/versions/v3_1_1/cluster_configurer.py new file mode 100644 index 00000000..6c749cff --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/cluster_configurer.py @@ -0,0 +1,24 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sahara.plugins.mapr.versions.base_cluster_configurer as bcc + + +class ClusterConfigurer(bcc.BaseClusterConfigurer): + + def get_hadoop_conf_dir(self): + return '/opt/mapr/hadoop/hadoop-0.20.2/conf' + + def is_node_awareness_enabled(self): + return True diff --git a/sahara/plugins/mapr/versions/v3_1_1/resources/default/cldb.conf b/sahara/plugins/mapr/versions/v3_1_1/resources/default/cldb.conf new file mode 100644 index 00000000..7f501d8c --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/resources/default/cldb.conf @@ -0,0 +1,63 @@ +# +# CLDB Config file. +# Properties defined in this file are loaded during startup +# and are valid for only CLDB which loaded the config. +# These parameters are not persisted anywhere else. +# +# Wait until minimum number of fileserver register with +# CLDB before creating Root Volume +cldb.min.fileservers=1 +# CLDB listening port +cldb.port=7222 +# Number of worker threads +cldb.numthreads=10 +# CLDB webport +cldb.web.port=7221 +# CLDB https port +cldb.web.https.port=7443 +# Disable duplicate hostid detection +cldb.detect.dup.hostid.enabled=false +# Deprecated: This param is no longer supported. To configure +# the container cache, use the param cldb.containers.cache.percent +# Number of RW containers in cache +#cldb.containers.cache.entries=1000000 +# +# Percentage (integer) of Xmx setting to be used for container cache +#cldb.containers.cache.percent=20 +# +# Topology script to be used to determine +# Rack topology of node +# Script should take an IP address as input and print rack path +# on STDOUT. eg +# $>/home/mapr/topo.pl 10.10.10.10 +# $>/mapr-rack1 +# $>/home/mapr/topo.pl 10.10.10.20 +# $>/mapr-rack2 +#net.topology.script.file.name=/home/mapr/topo.pl +# +# Topology mapping file used to determine +# Rack topology of node +# File is of a 2 column format (space separated) +# 1st column is an IP address or hostname +# 2nd column is the rack path +# Line starting with '#' is a comment +# Example file contents +# 10.10.10.10 /mapr-rack1 +# 10.10.10.20 /mapr-rack2 +# host.foo.com /mapr-rack3 +#net.topology.table.file.name=/home/mapr/topo.txt +# +# ZooKeeper address +#cldb.zookeeper.servers=10.250.1.91:5181 +# Hadoop metrics jar version +#hadoop.version=0.20.2 +# CLDB JMX remote port +cldb.jmxremote.port=7220 +num.volmirror.threads=1 +# Set this to set the default topology for all volumes and nodes +# The default for all volumes is /data by default +# UNCOMMENT the below to change the default topology. +# For e.g., set cldb.default.topology=/mydata to create volumes +# in /mydata topology and to place all nodes in /mydata topology +# by default +#cldb.default.topology=/mydata diff --git a/sahara/plugins/mapr/versions/v3_1_1/resources/default/core-site.xml b/sahara/plugins/mapr/versions/v3_1_1/resources/default/core-site.xml new file mode 100644 index 00000000..506080a6 --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/resources/default/core-site.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + fs.default.name + maprfs:/// + The name of the default file system. A URI whose + scheme and authority determine the FileSystem implementation. The + uri's scheme determines the config property (fs.SCHEME.impl) naming + the FileSystem implementation class. The uri's authority is used to + determine the host, port, etc. for a filesystem. + + + + fs.mapr.working.dir + /user/$USERNAME/ + The default directory to be used with relative paths. + Note that $USERNAME is NOT an enviromental variable, but just a placeholder + to indicate that it will be expanded to the corresponding username. + Other example default directories could be "/", "/home/$USERNAME", "/$USERNAME" etc. + + + + + fs.s3n.block.size + 33554432 + + + fs.s3n.blockSize + 33554432 + + + fs.s3.block.size + 33554432 + + + fs.s3.blockSize + 33554432 + + + hadoop.proxyuser.mapr.groups + * + + + hadoop.proxyuser.mapr.hosts + * + + diff --git a/sahara/plugins/mapr/versions/v3_1_1/resources/default/exports b/sahara/plugins/mapr/versions/v3_1_1/resources/default/exports new file mode 100644 index 00000000..96111856 --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/resources/default/exports @@ -0,0 +1,30 @@ +# Sample Exports file + +# for /mapr exports +# + +#access_control -> order is specific to default +# list the hosts before specifying a default for all +# a.b.c.d,1.2.3.4(ro) d.e.f.g(ro) (rw) +# enforces ro for a.b.c.d & 1.2.3.4 and everybody else is rw + +# special path to export clusters in mapr-clusters.conf. To disable exporting, +# comment it out. to restrict access use the exports_control +# +/mapr (rw) + +#to export only certain clusters, comment out the /mapr & uncomment. +# Note: this will cause /mapr to be unexported +#/mapr/clustername (rw) + +#to export /mapr only to certain hosts (using exports_control) +#/mapr a.b.c.d(rw),e.f.g.h(ro) + +# export /mapr/cluster1 rw to a.b.c.d & ro to e.f.g.h (denied for others) +#/mapr/cluster1 a.b.c.d(rw),e.f.g.h(ro) + +# export /mapr/cluster2 only to e.f.g.h (denied for others) +#/mapr/cluster2 e.f.g.h(rw) + +# export /mapr/cluster3 rw to e.f.g.h & ro to others +#/mapr/cluster2 e.f.g.h(rw) (ro) diff --git a/sahara/plugins/mapr/versions/v3_1_1/resources/default/hadoop-metrics.properties b/sahara/plugins/mapr/versions/v3_1_1/resources/default/hadoop-metrics.properties new file mode 100644 index 00000000..b65260ce --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/resources/default/hadoop-metrics.properties @@ -0,0 +1,41 @@ +#CLDB metrics config - Pick one out of null,file or ganglia. +#Uncomment all properties in null, file or ganglia context, to send cldb metrics to that context + +# Configuration of the "cldb" context for null +#cldb.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread +#cldb.period=10 + +# Configuration of the "cldb" context for file +#cldb.class=org.apache.hadoop.metrics.file.FileContext +#cldb.period=60 +#cldb.fileName=/tmp/cldbmetrics.log + +# Configuration of the "cldb" context for ganglia +cldb.class=com.mapr.fs.cldb.counters.MapRGangliaContext31 +cldb.period=10 +cldb.servers=localhost:8649 +cldb.spoof=1 + +#FileServer metrics config - Pick one out of null,file or ganglia. +#Uncomment all properties in null, file or ganglia context, to send fileserver metrics to that context + +# Configuration of the "fileserver" context for null +#fileserver.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread +#fileserver.period=10 + +# Configuration of the "fileserver" context for file +#fileserver.class=org.apache.hadoop.metrics.file.FileContext +#fileserver.period=60 +#fileserver.fileName=/tmp/fsmetrics.log + +# Configuration of the "fileserver" context for ganglia +fileserver.class=com.mapr.fs.cldb.counters.MapRGangliaContext31 +fileserver.period=37 +fileserver.servers=localhost:8649 +fileserver.spoof=1 + +maprmepredvariant.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContext +maprmepredvariant.period=10 +maprmapred.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContextFinal +maprmapred.period=10 + diff --git a/sahara/plugins/mapr/versions/v3_1_1/resources/default/mfs.conf b/sahara/plugins/mapr/versions/v3_1_1/resources/default/mfs.conf new file mode 100644 index 00000000..5288dffd --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/resources/default/mfs.conf @@ -0,0 +1,16 @@ +#mfs.num.compress.threads=1 +#mfs.max.aio.events=5000 +#mfs.disable.periodic.flush=0 +#mfs.io.disk.timeout=60 +#mfs.server.ip=127.0.0.1 +#mfs.max.resync.count=16 +#mfs.max.restore.count=16 +#mfs.ignore.container.delete=0 +#mfs.ignore.readdir.pattern=0 +mfs.server.port=5660 +#mfs.subnets.whitelist=127.0.0.1/8 +#UNCOMMENT this line to disable bulk writes +#mfs.bulk.writes.enabled=0 +#UNCOMMENT this to set the topology of this node +#For e.g., to set this node's topology to /compute-only uncomment the below line +#mfs.network.location=/compute-only diff --git a/sahara/plugins/mapr/versions/v3_1_1/resources/default/nfsserver.conf b/sahara/plugins/mapr/versions/v3_1_1/resources/default/nfsserver.conf new file mode 100644 index 00000000..fa28d369 --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/resources/default/nfsserver.conf @@ -0,0 +1,43 @@ +# Configuration for nfsserver + +# +# The system defaults are in the comments +# + +# Default compression is true +#Compression = true + +# chunksize is 64M +#ChunkSize = 67108864 + +# Number of threads for compression/decompression: default=2 +#CompThreads = 2 + +#Mount point for the ramfs file for mmap +#RamfsMntDir = /ramfs/mapr + +# Size of the ramfile to use (percent of total physical memory) default=0.25 +# 0: disables the use of ramfs +#RamfsSize = 0.25 + +# Loglevel = DEBUG | INFO | WARN | ERROR | CRITICAL | OFF +#Loglevel = INFO + +#Duplicate Request cache size & timeout in seconds +#DrCacheSize = 20480 +#DrCacheTimeout = 62 +# To keep the drcache lean, we only cache the response if the +# time we took to populate is greater than 50% of DrCacheTimeout. +# Set it to 0 to disable this optimization, Note that the DrCacheSize or +# DrCacheTimeout will also need to be changed. Ex: if the nfsserver supports +# 10,000 ops/sec (modification ops): then DrCacheSize will need to change +# to: 10,000*DrCacheTimeout = 620,000 +#DRCacheTimeOutOpt = 0.5 + +#NFS fileid, by default the fileid is of 32 bit size. +#Set Use32BitFileId=0 to use 64 bit fileid (inode number) +#Use32BitFileId=0 + +#Auto refresh exports time interval in mins. +#default is 0, means there is no auto refresh. +#AutoRefreshExportsTimeInterval = 5 diff --git a/sahara/plugins/mapr/versions/v3_1_1/resources/plugin_spec.json b/sahara/plugins/mapr/versions/v3_1_1/resources/plugin_spec.json new file mode 100644 index 00000000..8f06a5e2 --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/resources/plugin_spec.json @@ -0,0 +1,203 @@ +{ + "files": [ + { + "remote": null, + "type": null, + "configs": { + "required": { + "cluster": [ + { + "name": "Enable MapR-DB", + "config_type": "bool", + "default_value": false, + "priority": 1 + } + ] + } + } + }, + { + "remote": "/opt/mapr/conf/cldb.conf", + "local": "default/cldb.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/hadoop-metrics.properties", + "local": "default/hadoop-metrics.properties", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/mfs.conf", + "local": "default/mfs.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/nfsserver.conf", + "local": "default/nfsserver.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/exports", + "local": "default/exports", + "type": "raw" + }, + { + "remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml", + "local": "default/core-site.xml", + "type": "xml", + "configs": { + "optional": { + "cluster": [ + { + "name": "fs.swift.impl", + "default_value": "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem" + }, + { + "name": "fs.swift.connect.timeout", + "config_type": "int", + "default_value": 15000 + }, + { + "name": "fs.swift.socket.timeout", + "config_type": "int", + "default_value": 60000 + }, + { + "name": "fs.swift.connect.retry.count", + "config_type": "int", + "default_value": 3 + }, + { + "name": "fs.swift.connect.throttle.delay", + "config_type": "int", + "default_value": 0 + }, + { + "name": "fs.swift.blocksize", + "config_type": "int", + "default_value": 32768 + }, + { + "name": "fs.swift.partsize", + "config_type": "int", + "default_value": 4718592 + }, + { + "name": "fs.swift.requestsize", + "config_type": "int", + "default_value": 64 + }, + { + "name": "fs.swift.service.sahara.public", + "config_type": "bool", + "default_value": true + }, + { + "name": "fs.swift.service.sahara.http.port", + "config_type": "int", + "default_value": 8080 + }, + { + "name": "fs.swift.service.sahara.https.port", + "config_type": "int", + "default_value": 443 + }, + { + "name": "fs.swift.service.sahara.auth.endpoint.prefix", + "default_value": "/endpoints/AUTH_" + } + ] + } + } + }, + { + "remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml", + "type": "xml" + } + ], + "services": [ + { + "name": "general", + "files": [ + null + ] + }, + { + "name": "Management", + "node_processes": [ + "ZooKeeper", + "Webserver", + "MapR-Client", + "Metrics" + ] + }, + { + "name": "MapReduce", + "node_processes": [ + "TaskTracker", + "JobTracker" + ], + "files": [ + "/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml" + ] + }, + { + "name": "MapR FS", + "node_processes": [ + "CLDB", + "FileServer", + "NFS" + ], + "files": [ + "/opt/mapr/conf/cldb.conf", + "/opt/mapr/conf/hadoop-metrics.properties", + "/opt/mapr/conf/mfs.conf", + "/opt/mapr/conf/nfsserver.conf", + "/opt/mapr/conf/exports", + "/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml" + ] + }, + { + "name": "HBase", + "node_processes": [ + "HBase-Master", + "HBase-RegionServer", + "HBase-Client" + ] + }, + { + "name": "Hive", + "node_processes": [ + "HiveMetastore", + "HiveServer2" + ], + "versions": [ + "0.13", + "0.12" + ] + }, + { + "name": "Oozie", + "node_processes": [ + "Oozie" + ], + "versions": [ + "4.0.1", + "4.0.0", + "3.3.2" + ] + }, + { + "name": "Pig", + "node_processes": [ + "Pig" + ] + }, + { + "name": "Mahout", + "node_processes": [ + "Mahout" + ] + } + ] +} \ No newline at end of file diff --git a/sahara/plugins/mapr/versions/v3_1_1/start_utils.py b/sahara/plugins/mapr/versions/v3_1_1/start_utils.py new file mode 100644 index 00000000..63992613 --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/start_utils.py @@ -0,0 +1,34 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara.plugins.mapr.util import cluster_helper as clh_utils +from sahara.plugins.mapr.util import start_helper as start_helper +import sahara.plugins.utils as utils + + +def exec_configure_sh_on_cluster(cluster): + inst_list = utils.get_instances(cluster) + for n in inst_list: + exec_configure_sh_on_instance(cluster, n) + + +def exec_configure_sh_on_instance(cluster, instance): + script_string = ('/opt/mapr/server/configure.sh' + + ' -C ' + clh_utils.get_cldb_nodes_ip(cluster) + + ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster) + + ' -f') + if not start_helper.check_if_mapr_user_exist(instance): + script_string = script_string + ' --create-user' + + instance.remote().execute_command(script_string, True) diff --git a/sahara/plugins/mapr/versions/v3_1_1/version_handler.py b/sahara/plugins/mapr/versions/v3_1_1/version_handler.py new file mode 100644 index 00000000..c4a28d7d --- /dev/null +++ b/sahara/plugins/mapr/versions/v3_1_1/version_handler.py @@ -0,0 +1,112 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara import context +from sahara.plugins.mapr.util import cluster_helper as clh_utils +import sahara.plugins.mapr.util.config_utils as cu +import sahara.plugins.mapr.util.names as n +from sahara.plugins.mapr.util import scaling +from sahara.plugins.mapr.util import start_helper as start_helper +import sahara.plugins.mapr.util.validation_utils as vu +import sahara.plugins.mapr.versions.base_context as bc +from sahara.plugins.mapr.versions import base_version_handler as bvh +import sahara.plugins.mapr.versions.v3_1_1.cluster_configurer as cc +import sahara.plugins.utils as u + + +version = '3.1.1' +SIXTY_SECONDS = 60 + + +class VersionHandler(bvh.BaseVersionHandler): + + def get_plugin_version(self): + return version + + def start_cluster(self, cluster): + start_helper.exec_configure_sh_on_cluster( + cluster, self.get_configure_sh_string(cluster)) + start_helper.wait_for_mfs_unlock(cluster, self.get_waiting_script()) + start_helper.setup_maprfs_on_cluster( + cluster, self.get_disk_setup_script()) + start_helper.start_zookeeper_nodes_on_cluster(cluster) + start_helper.start_warden_on_cldb_nodes(cluster) + context.sleep(SIXTY_SECONDS) + start_helper.start_warden_on_other_nodes(cluster) + start_helper.start_ecosystem(self.get_context(cluster)) + + def get_waiting_script(self): + return 'plugins/mapr/util/resources/waiting_script.sh' + + def get_configure_sh_string(self, cluster): + return ('/opt/mapr/server/configure.sh' + + ' -C ' + clh_utils.get_cldb_nodes_ip(cluster) + + ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster) + ' -f') + + def scale_cluster(self, cluster, instances): + scaling.scale_cluster(cluster, instances, self.get_disk_setup_script(), + self.get_waiting_script(), + self.get_context(cluster), + self.get_configure_sh_string(cluster), True) + + def decommission_nodes(self, cluster, instances): + scaling.decommission_nodes( + cluster, instances, self.get_configure_sh_string(cluster)) + + def get_cluster_validation_rules(self, cluster): + return [vu.not_less_than_count_component_vr(n.ZOOKEEPER, 1), + vu.not_less_than_count_component_vr(n.CLDB, 1), + vu.not_less_than_count_component_vr(n.TASK_TRACKER, 1), + vu.not_less_than_count_component_vr(n.FILE_SERVER, 1), + vu.not_more_than_count_component_vr(n.OOZIE, 1), + vu.not_less_than_count_component_vr(n.JOBTRACKER, 1), + vu.node_dependency_satisfied_vr(n.TASK_TRACKER, n.FILE_SERVER), + vu.node_dependency_satisfied_vr(n.CLDB, n.FILE_SERVER)] + + def get_scaling_validation_rules(self): + return [] + + def get_edp_validation_rules(self): + return [] + + def get_cluster_configurer(self, cluster, plugin_spec): + return cc.ClusterConfigurer(cluster, plugin_spec) + + def get_context(self, cluster): + return Context(cluster) + + +class Context(bc.BaseContext): + m7_enabled_config = n.IS_M7_ENABLED + hive_version_config = 'Hive Version' + oozie_version_config = 'Oozie Version' + + def __init__(self, cluster): + self.cluster = cluster + + def get_cluster(self): + return self.cluster + + def is_m7_enabled(self): + configs = cu.get_cluster_configs(self.get_cluster()) + return configs[n.GENERAL][Context.m7_enabled_config] + + def get_hadoop_version(self): + return '0.20.2' + + def get_rm_instance(self): + return u.get_instance(self.get_cluster(), n.JOBTRACKER) + + def get_rm_port(self): + return '9001' diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/__init__.py b/sahara/plugins/mapr/versions/v4_0_1_mrv1/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/cluster_configurer.py b/sahara/plugins/mapr/versions/v4_0_1_mrv1/cluster_configurer.py new file mode 100644 index 00000000..64c10982 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/cluster_configurer.py @@ -0,0 +1,51 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara.i18n import _LI +from sahara.openstack.common import log as logging +import sahara.plugins.mapr.versions.base_cluster_configurer as bcc +import sahara.plugins.utils as u +from sahara.utils import files as f + +LOG = logging.getLogger(__name__) + + +class ClusterConfigurer(bcc.BaseClusterConfigurer): + hadoop_version_path = '/opt/mapr/conf/hadoop_version' + hadoop_mode = 'classic' + hadoop_version_local = 'plugins/mapr/util/resources/hadoop_version' + + def get_hadoop_conf_dir(self): + return '/opt/mapr/hadoop/hadoop-0.20.2/conf' + + def is_node_awareness_enabled(self): + return True + + def set_cluster_mode(self, instances): + if not instances: + instances = u.get_instances(self.cluster) + LOG.info(_LI('Setting cluster mode to classic')) + hv_template = f.get_file_text(self.hadoop_version_local) + hv = hv_template % {"mode": self.hadoop_mode} + for i in instances: + with i.remote() as r: + LOG.debug('Writing file %(f_name)s to node %(node)s', + {'f_name': self.hadoop_version_path, + 'node': i.management_ip}) + r.write_file_to(self.hadoop_version_path, hv, + run_as_root=True) + + def configure_instances(self, instances=None): + super(ClusterConfigurer, self).configure_instances(instances) + self.set_cluster_mode(instances) \ No newline at end of file diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/cldb.conf b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/cldb.conf new file mode 100644 index 00000000..7f501d8c --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/cldb.conf @@ -0,0 +1,63 @@ +# +# CLDB Config file. +# Properties defined in this file are loaded during startup +# and are valid for only CLDB which loaded the config. +# These parameters are not persisted anywhere else. +# +# Wait until minimum number of fileserver register with +# CLDB before creating Root Volume +cldb.min.fileservers=1 +# CLDB listening port +cldb.port=7222 +# Number of worker threads +cldb.numthreads=10 +# CLDB webport +cldb.web.port=7221 +# CLDB https port +cldb.web.https.port=7443 +# Disable duplicate hostid detection +cldb.detect.dup.hostid.enabled=false +# Deprecated: This param is no longer supported. To configure +# the container cache, use the param cldb.containers.cache.percent +# Number of RW containers in cache +#cldb.containers.cache.entries=1000000 +# +# Percentage (integer) of Xmx setting to be used for container cache +#cldb.containers.cache.percent=20 +# +# Topology script to be used to determine +# Rack topology of node +# Script should take an IP address as input and print rack path +# on STDOUT. eg +# $>/home/mapr/topo.pl 10.10.10.10 +# $>/mapr-rack1 +# $>/home/mapr/topo.pl 10.10.10.20 +# $>/mapr-rack2 +#net.topology.script.file.name=/home/mapr/topo.pl +# +# Topology mapping file used to determine +# Rack topology of node +# File is of a 2 column format (space separated) +# 1st column is an IP address or hostname +# 2nd column is the rack path +# Line starting with '#' is a comment +# Example file contents +# 10.10.10.10 /mapr-rack1 +# 10.10.10.20 /mapr-rack2 +# host.foo.com /mapr-rack3 +#net.topology.table.file.name=/home/mapr/topo.txt +# +# ZooKeeper address +#cldb.zookeeper.servers=10.250.1.91:5181 +# Hadoop metrics jar version +#hadoop.version=0.20.2 +# CLDB JMX remote port +cldb.jmxremote.port=7220 +num.volmirror.threads=1 +# Set this to set the default topology for all volumes and nodes +# The default for all volumes is /data by default +# UNCOMMENT the below to change the default topology. +# For e.g., set cldb.default.topology=/mydata to create volumes +# in /mydata topology and to place all nodes in /mydata topology +# by default +#cldb.default.topology=/mydata diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/core-site.xml b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/core-site.xml new file mode 100644 index 00000000..506080a6 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/core-site.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + fs.default.name + maprfs:/// + The name of the default file system. A URI whose + scheme and authority determine the FileSystem implementation. The + uri's scheme determines the config property (fs.SCHEME.impl) naming + the FileSystem implementation class. The uri's authority is used to + determine the host, port, etc. for a filesystem. + + + + fs.mapr.working.dir + /user/$USERNAME/ + The default directory to be used with relative paths. + Note that $USERNAME is NOT an enviromental variable, but just a placeholder + to indicate that it will be expanded to the corresponding username. + Other example default directories could be "/", "/home/$USERNAME", "/$USERNAME" etc. + + + + + fs.s3n.block.size + 33554432 + + + fs.s3n.blockSize + 33554432 + + + fs.s3.block.size + 33554432 + + + fs.s3.blockSize + 33554432 + + + hadoop.proxyuser.mapr.groups + * + + + hadoop.proxyuser.mapr.hosts + * + + diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/exports b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/exports new file mode 100644 index 00000000..96111856 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/exports @@ -0,0 +1,30 @@ +# Sample Exports file + +# for /mapr exports +# + +#access_control -> order is specific to default +# list the hosts before specifying a default for all +# a.b.c.d,1.2.3.4(ro) d.e.f.g(ro) (rw) +# enforces ro for a.b.c.d & 1.2.3.4 and everybody else is rw + +# special path to export clusters in mapr-clusters.conf. To disable exporting, +# comment it out. to restrict access use the exports_control +# +/mapr (rw) + +#to export only certain clusters, comment out the /mapr & uncomment. +# Note: this will cause /mapr to be unexported +#/mapr/clustername (rw) + +#to export /mapr only to certain hosts (using exports_control) +#/mapr a.b.c.d(rw),e.f.g.h(ro) + +# export /mapr/cluster1 rw to a.b.c.d & ro to e.f.g.h (denied for others) +#/mapr/cluster1 a.b.c.d(rw),e.f.g.h(ro) + +# export /mapr/cluster2 only to e.f.g.h (denied for others) +#/mapr/cluster2 e.f.g.h(rw) + +# export /mapr/cluster3 rw to e.f.g.h & ro to others +#/mapr/cluster2 e.f.g.h(rw) (ro) diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/hadoop-metrics.properties b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/hadoop-metrics.properties new file mode 100644 index 00000000..b65260ce --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/hadoop-metrics.properties @@ -0,0 +1,41 @@ +#CLDB metrics config - Pick one out of null,file or ganglia. +#Uncomment all properties in null, file or ganglia context, to send cldb metrics to that context + +# Configuration of the "cldb" context for null +#cldb.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread +#cldb.period=10 + +# Configuration of the "cldb" context for file +#cldb.class=org.apache.hadoop.metrics.file.FileContext +#cldb.period=60 +#cldb.fileName=/tmp/cldbmetrics.log + +# Configuration of the "cldb" context for ganglia +cldb.class=com.mapr.fs.cldb.counters.MapRGangliaContext31 +cldb.period=10 +cldb.servers=localhost:8649 +cldb.spoof=1 + +#FileServer metrics config - Pick one out of null,file or ganglia. +#Uncomment all properties in null, file or ganglia context, to send fileserver metrics to that context + +# Configuration of the "fileserver" context for null +#fileserver.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread +#fileserver.period=10 + +# Configuration of the "fileserver" context for file +#fileserver.class=org.apache.hadoop.metrics.file.FileContext +#fileserver.period=60 +#fileserver.fileName=/tmp/fsmetrics.log + +# Configuration of the "fileserver" context for ganglia +fileserver.class=com.mapr.fs.cldb.counters.MapRGangliaContext31 +fileserver.period=37 +fileserver.servers=localhost:8649 +fileserver.spoof=1 + +maprmepredvariant.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContext +maprmepredvariant.period=10 +maprmapred.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContextFinal +maprmapred.period=10 + diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/mfs.conf b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/mfs.conf new file mode 100644 index 00000000..5288dffd --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/mfs.conf @@ -0,0 +1,16 @@ +#mfs.num.compress.threads=1 +#mfs.max.aio.events=5000 +#mfs.disable.periodic.flush=0 +#mfs.io.disk.timeout=60 +#mfs.server.ip=127.0.0.1 +#mfs.max.resync.count=16 +#mfs.max.restore.count=16 +#mfs.ignore.container.delete=0 +#mfs.ignore.readdir.pattern=0 +mfs.server.port=5660 +#mfs.subnets.whitelist=127.0.0.1/8 +#UNCOMMENT this line to disable bulk writes +#mfs.bulk.writes.enabled=0 +#UNCOMMENT this to set the topology of this node +#For e.g., to set this node's topology to /compute-only uncomment the below line +#mfs.network.location=/compute-only diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/nfsserver.conf b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/nfsserver.conf new file mode 100644 index 00000000..fa28d369 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/default/nfsserver.conf @@ -0,0 +1,43 @@ +# Configuration for nfsserver + +# +# The system defaults are in the comments +# + +# Default compression is true +#Compression = true + +# chunksize is 64M +#ChunkSize = 67108864 + +# Number of threads for compression/decompression: default=2 +#CompThreads = 2 + +#Mount point for the ramfs file for mmap +#RamfsMntDir = /ramfs/mapr + +# Size of the ramfile to use (percent of total physical memory) default=0.25 +# 0: disables the use of ramfs +#RamfsSize = 0.25 + +# Loglevel = DEBUG | INFO | WARN | ERROR | CRITICAL | OFF +#Loglevel = INFO + +#Duplicate Request cache size & timeout in seconds +#DrCacheSize = 20480 +#DrCacheTimeout = 62 +# To keep the drcache lean, we only cache the response if the +# time we took to populate is greater than 50% of DrCacheTimeout. +# Set it to 0 to disable this optimization, Note that the DrCacheSize or +# DrCacheTimeout will also need to be changed. Ex: if the nfsserver supports +# 10,000 ops/sec (modification ops): then DrCacheSize will need to change +# to: 10,000*DrCacheTimeout = 620,000 +#DRCacheTimeOutOpt = 0.5 + +#NFS fileid, by default the fileid is of 32 bit size. +#Set Use32BitFileId=0 to use 64 bit fileid (inode number) +#Use32BitFileId=0 + +#Auto refresh exports time interval in mins. +#default is 0, means there is no auto refresh. +#AutoRefreshExportsTimeInterval = 5 diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/plugin_spec.json b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/plugin_spec.json new file mode 100644 index 00000000..0b8806f6 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/plugin_spec.json @@ -0,0 +1,203 @@ +{ + "files": [ + { + "remote": null, + "type": null, + "configs": { + "required": { + "cluster": [ + { + "name": "Enable MapR-DB", + "config_type": "bool", + "default_value": false, + "priority": 1 + } + ] + } + } + }, + { + "remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml", + "local": "default/core-site.xml", + "type": "xml", + "configs": { + "optional": { + "cluster": [ + { + "name": "fs.swift.impl", + "default_value": "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem" + }, + { + "name": "fs.swift.connect.timeout", + "config_type": "int", + "default_value": 15000 + }, + { + "name": "fs.swift.socket.timeout", + "config_type": "int", + "default_value": 60000 + }, + { + "name": "fs.swift.connect.retry.count", + "config_type": "int", + "default_value": 3 + }, + { + "name": "fs.swift.connect.throttle.delay", + "config_type": "int", + "default_value": 0 + }, + { + "name": "fs.swift.blocksize", + "config_type": "int", + "default_value": 32768 + }, + { + "name": "fs.swift.partsize", + "config_type": "int", + "default_value": 4718592 + }, + { + "name": "fs.swift.requestsize", + "config_type": "int", + "default_value": 64 + }, + { + "name": "fs.swift.service.sahara.public", + "config_type": "bool", + "default_value": true + }, + { + "name": "fs.swift.service.sahara.http.port", + "config_type": "int", + "default_value": 8080 + }, + { + "name": "fs.swift.service.sahara.https.port", + "config_type": "int", + "default_value": 443 + }, + { + "name": "fs.swift.service.sahara.auth.endpoint.prefix", + "default_value": "/endpoints/AUTH_" + } + ] + } + } + }, + { + "remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml", + "type": "xml" + }, + { + "remote": "/opt/mapr/conf/cldb.conf", + "local": "default/cldb.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/hadoop-metrics.properties", + "local": "default/hadoop-metrics.properties", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/mfs.conf", + "local": "default/mfs.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/nfsserver.conf", + "local": "default/nfsserver.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/exports", + "local": "default/exports", + "type": "raw" + } + ], + "services": [ + { + "name": "general", + "files": [ + null + ] + }, + { + "name": "Management", + "node_processes": [ + "ZooKeeper", + "Webserver", + "MapR-Client", + "Metrics" + ] + }, + { + "name": "MapReduce", + "node_processes": [ + "TaskTracker", + "JobTracker" + ], + "files": [ + "/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml" + ] + }, + { + "name": "MapR FS", + "node_processes": [ + "CLDB", + "FileServer", + "NFS" + ], + "files": [ + "/opt/mapr/conf/cldb.conf", + "/opt/mapr/conf/hadoop-metrics.properties", + "/opt/mapr/conf/mfs.conf", + "/opt/mapr/conf/nfsserver.conf", + "/opt/mapr/conf/exports", + "/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml" + ] + }, + { + "name": "HBase", + "node_processes": [ + "HBase-Master", + "HBase-RegionServer", + "HBase-Client" + ] + }, + { + "name": "Hive", + "node_processes": [ + "HiveMetastore", + "HiveServer2" + ], + "versions": [ + "0.13", + "0.12" + ] + }, + { + "name": "Oozie", + "node_processes": [ + "Oozie" + ], + "versions": [ + "4.0.1", + "4.0.0", + "3.3.2" + ] + }, + { + "name": "Pig", + "node_processes": [ + "Pig" + ] + }, + { + "name": "Mahout", + "node_processes": [ + "Mahout" + ] + } + ] +} \ No newline at end of file diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv1/version_handler.py b/sahara/plugins/mapr/versions/v4_0_1_mrv1/version_handler.py new file mode 100644 index 00000000..c4bb9f52 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv1/version_handler.py @@ -0,0 +1,114 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara import context +from sahara.plugins.mapr.util import cluster_helper as clh_utils +import sahara.plugins.mapr.util.config_utils as cu +import sahara.plugins.mapr.util.names as n +from sahara.plugins.mapr.util import scaling +from sahara.plugins.mapr.util import start_helper as start_helper +import sahara.plugins.mapr.util.validation_utils as vu +import sahara.plugins.mapr.versions.base_context as bc +from sahara.plugins.mapr.versions import base_version_handler as bvh +import sahara.plugins.mapr.versions.v4_0_1_mrv1.cluster_configurer as cc +import sahara.plugins.utils as u + + +version = '4.0.1.mrv1' +SIXTY_SECONDS = 60 + + +class VersionHandler(bvh.BaseVersionHandler): + + def get_plugin_version(self): + return version + + def start_cluster(self, cluster): + start_helper.exec_configure_sh_on_cluster( + cluster, self.get_configure_sh_string(cluster)) + start_helper.wait_for_mfs_unlock(cluster, self.get_waiting_script()) + start_helper.setup_maprfs_on_cluster( + cluster, self.get_disk_setup_script()) + start_helper.start_zookeeper_nodes_on_cluster(cluster) + start_helper.start_warden_on_cldb_nodes(cluster) + context.sleep(SIXTY_SECONDS) + start_helper.start_warden_on_other_nodes(cluster) + start_helper.start_ecosystem(self.get_context(cluster)) + + def get_waiting_script(self): + return 'plugins/mapr/util/resources/waiting_script.sh' + + def scale_cluster(self, cluster, instances): + scaling.scale_cluster(cluster, instances, self.get_disk_setup_script(), + self.get_waiting_script(), + self.get_context(cluster), + self.get_configure_sh_string(cluster), True) + + def decommission_nodes(self, cluster, instances): + scaling.decommission_nodes( + cluster, instances, self.get_configure_sh_string(cluster)) + + def get_cluster_configurer(self, cluster, plugin_spec): + return cc.ClusterConfigurer(cluster, plugin_spec) + + def get_cluster_validation_rules(self, cluster): + return [vu.not_less_than_count_component_vr(n.ZOOKEEPER, 1), + vu.not_less_than_count_component_vr(n.CLDB, 1), + vu.not_less_than_count_component_vr(n.TASK_TRACKER, 1), + vu.not_less_than_count_component_vr(n.FILE_SERVER, 1), + vu.not_more_than_count_component_vr(n.OOZIE, 1), + vu.not_more_than_count_component_vr(n.WEB_SERVER, 1), + vu.equal_count_component_vr(n.JOBTRACKER, 1), + vu.node_dependency_satisfied_vr(n.TASK_TRACKER, n.FILE_SERVER), + vu.node_dependency_satisfied_vr(n.CLDB, n.FILE_SERVER)] + + def get_scaling_validation_rules(self): + return [] + + def get_edp_validation_rules(self): + return [] + + def get_configure_sh_string(self, cluster): + return ('/opt/mapr/server/configure.sh' + ' -C ' + clh_utils.get_cldb_nodes_ip(cluster) + + ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster) + + ' -f') + + def get_context(self, cluster): + return Context(cluster) + + +class Context(bc.BaseContext): + m7_enabled_config = n.IS_M7_ENABLED + hive_version_config = 'Hive Version' + oozie_version_config = 'Oozie Version' + + def __init__(self, cluster): + self.cluster = cluster + + def get_cluster(self): + return self.cluster + + def is_m7_enabled(self): + configs = cu.get_cluster_configs(self.get_cluster()) + return configs[n.GENERAL][Context.m7_enabled_config] + + def get_hadoop_version(self): + return '0.20.2' + + def get_rm_instance(self): + return u.get_instance(self.get_cluster(), n.JOBTRACKER) + + def get_rm_port(self): + return '9001' diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/__init__.py b/sahara/plugins/mapr/versions/v4_0_1_mrv2/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/cluster_configurer.py b/sahara/plugins/mapr/versions/v4_0_1_mrv2/cluster_configurer.py new file mode 100644 index 00000000..723986b6 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/cluster_configurer.py @@ -0,0 +1,24 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sahara.plugins.mapr.versions.base_cluster_configurer as bcc + + +class ClusterConfigurer(bcc.BaseClusterConfigurer): + + def get_hadoop_conf_dir(self): + return '/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop' + + def is_node_awareness_enabled(self): + return False diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/cldb.conf b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/cldb.conf new file mode 100644 index 00000000..7f501d8c --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/cldb.conf @@ -0,0 +1,63 @@ +# +# CLDB Config file. +# Properties defined in this file are loaded during startup +# and are valid for only CLDB which loaded the config. +# These parameters are not persisted anywhere else. +# +# Wait until minimum number of fileserver register with +# CLDB before creating Root Volume +cldb.min.fileservers=1 +# CLDB listening port +cldb.port=7222 +# Number of worker threads +cldb.numthreads=10 +# CLDB webport +cldb.web.port=7221 +# CLDB https port +cldb.web.https.port=7443 +# Disable duplicate hostid detection +cldb.detect.dup.hostid.enabled=false +# Deprecated: This param is no longer supported. To configure +# the container cache, use the param cldb.containers.cache.percent +# Number of RW containers in cache +#cldb.containers.cache.entries=1000000 +# +# Percentage (integer) of Xmx setting to be used for container cache +#cldb.containers.cache.percent=20 +# +# Topology script to be used to determine +# Rack topology of node +# Script should take an IP address as input and print rack path +# on STDOUT. eg +# $>/home/mapr/topo.pl 10.10.10.10 +# $>/mapr-rack1 +# $>/home/mapr/topo.pl 10.10.10.20 +# $>/mapr-rack2 +#net.topology.script.file.name=/home/mapr/topo.pl +# +# Topology mapping file used to determine +# Rack topology of node +# File is of a 2 column format (space separated) +# 1st column is an IP address or hostname +# 2nd column is the rack path +# Line starting with '#' is a comment +# Example file contents +# 10.10.10.10 /mapr-rack1 +# 10.10.10.20 /mapr-rack2 +# host.foo.com /mapr-rack3 +#net.topology.table.file.name=/home/mapr/topo.txt +# +# ZooKeeper address +#cldb.zookeeper.servers=10.250.1.91:5181 +# Hadoop metrics jar version +#hadoop.version=0.20.2 +# CLDB JMX remote port +cldb.jmxremote.port=7220 +num.volmirror.threads=1 +# Set this to set the default topology for all volumes and nodes +# The default for all volumes is /data by default +# UNCOMMENT the below to change the default topology. +# For e.g., set cldb.default.topology=/mydata to create volumes +# in /mydata topology and to place all nodes in /mydata topology +# by default +#cldb.default.topology=/mydata diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/core-site.xml b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/core-site.xml new file mode 100644 index 00000000..506080a6 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/core-site.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + fs.default.name + maprfs:/// + The name of the default file system. A URI whose + scheme and authority determine the FileSystem implementation. The + uri's scheme determines the config property (fs.SCHEME.impl) naming + the FileSystem implementation class. The uri's authority is used to + determine the host, port, etc. for a filesystem. + + + + fs.mapr.working.dir + /user/$USERNAME/ + The default directory to be used with relative paths. + Note that $USERNAME is NOT an enviromental variable, but just a placeholder + to indicate that it will be expanded to the corresponding username. + Other example default directories could be "/", "/home/$USERNAME", "/$USERNAME" etc. + + + + + fs.s3n.block.size + 33554432 + + + fs.s3n.blockSize + 33554432 + + + fs.s3.block.size + 33554432 + + + fs.s3.blockSize + 33554432 + + + hadoop.proxyuser.mapr.groups + * + + + hadoop.proxyuser.mapr.hosts + * + + diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/exports b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/exports new file mode 100644 index 00000000..96111856 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/exports @@ -0,0 +1,30 @@ +# Sample Exports file + +# for /mapr exports +# + +#access_control -> order is specific to default +# list the hosts before specifying a default for all +# a.b.c.d,1.2.3.4(ro) d.e.f.g(ro) (rw) +# enforces ro for a.b.c.d & 1.2.3.4 and everybody else is rw + +# special path to export clusters in mapr-clusters.conf. To disable exporting, +# comment it out. to restrict access use the exports_control +# +/mapr (rw) + +#to export only certain clusters, comment out the /mapr & uncomment. +# Note: this will cause /mapr to be unexported +#/mapr/clustername (rw) + +#to export /mapr only to certain hosts (using exports_control) +#/mapr a.b.c.d(rw),e.f.g.h(ro) + +# export /mapr/cluster1 rw to a.b.c.d & ro to e.f.g.h (denied for others) +#/mapr/cluster1 a.b.c.d(rw),e.f.g.h(ro) + +# export /mapr/cluster2 only to e.f.g.h (denied for others) +#/mapr/cluster2 e.f.g.h(rw) + +# export /mapr/cluster3 rw to e.f.g.h & ro to others +#/mapr/cluster2 e.f.g.h(rw) (ro) diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/hadoop-metrics.properties b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/hadoop-metrics.properties new file mode 100644 index 00000000..b65260ce --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/hadoop-metrics.properties @@ -0,0 +1,41 @@ +#CLDB metrics config - Pick one out of null,file or ganglia. +#Uncomment all properties in null, file or ganglia context, to send cldb metrics to that context + +# Configuration of the "cldb" context for null +#cldb.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread +#cldb.period=10 + +# Configuration of the "cldb" context for file +#cldb.class=org.apache.hadoop.metrics.file.FileContext +#cldb.period=60 +#cldb.fileName=/tmp/cldbmetrics.log + +# Configuration of the "cldb" context for ganglia +cldb.class=com.mapr.fs.cldb.counters.MapRGangliaContext31 +cldb.period=10 +cldb.servers=localhost:8649 +cldb.spoof=1 + +#FileServer metrics config - Pick one out of null,file or ganglia. +#Uncomment all properties in null, file or ganglia context, to send fileserver metrics to that context + +# Configuration of the "fileserver" context for null +#fileserver.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread +#fileserver.period=10 + +# Configuration of the "fileserver" context for file +#fileserver.class=org.apache.hadoop.metrics.file.FileContext +#fileserver.period=60 +#fileserver.fileName=/tmp/fsmetrics.log + +# Configuration of the "fileserver" context for ganglia +fileserver.class=com.mapr.fs.cldb.counters.MapRGangliaContext31 +fileserver.period=37 +fileserver.servers=localhost:8649 +fileserver.spoof=1 + +maprmepredvariant.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContext +maprmepredvariant.period=10 +maprmapred.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContextFinal +maprmapred.period=10 + diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/mfs.conf b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/mfs.conf new file mode 100644 index 00000000..5288dffd --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/mfs.conf @@ -0,0 +1,16 @@ +#mfs.num.compress.threads=1 +#mfs.max.aio.events=5000 +#mfs.disable.periodic.flush=0 +#mfs.io.disk.timeout=60 +#mfs.server.ip=127.0.0.1 +#mfs.max.resync.count=16 +#mfs.max.restore.count=16 +#mfs.ignore.container.delete=0 +#mfs.ignore.readdir.pattern=0 +mfs.server.port=5660 +#mfs.subnets.whitelist=127.0.0.1/8 +#UNCOMMENT this line to disable bulk writes +#mfs.bulk.writes.enabled=0 +#UNCOMMENT this to set the topology of this node +#For e.g., to set this node's topology to /compute-only uncomment the below line +#mfs.network.location=/compute-only diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/nfsserver.conf b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/nfsserver.conf new file mode 100644 index 00000000..fa28d369 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/default/nfsserver.conf @@ -0,0 +1,43 @@ +# Configuration for nfsserver + +# +# The system defaults are in the comments +# + +# Default compression is true +#Compression = true + +# chunksize is 64M +#ChunkSize = 67108864 + +# Number of threads for compression/decompression: default=2 +#CompThreads = 2 + +#Mount point for the ramfs file for mmap +#RamfsMntDir = /ramfs/mapr + +# Size of the ramfile to use (percent of total physical memory) default=0.25 +# 0: disables the use of ramfs +#RamfsSize = 0.25 + +# Loglevel = DEBUG | INFO | WARN | ERROR | CRITICAL | OFF +#Loglevel = INFO + +#Duplicate Request cache size & timeout in seconds +#DrCacheSize = 20480 +#DrCacheTimeout = 62 +# To keep the drcache lean, we only cache the response if the +# time we took to populate is greater than 50% of DrCacheTimeout. +# Set it to 0 to disable this optimization, Note that the DrCacheSize or +# DrCacheTimeout will also need to be changed. Ex: if the nfsserver supports +# 10,000 ops/sec (modification ops): then DrCacheSize will need to change +# to: 10,000*DrCacheTimeout = 620,000 +#DRCacheTimeOutOpt = 0.5 + +#NFS fileid, by default the fileid is of 32 bit size. +#Set Use32BitFileId=0 to use 64 bit fileid (inode number) +#Use32BitFileId=0 + +#Auto refresh exports time interval in mins. +#default is 0, means there is no auto refresh. +#AutoRefreshExportsTimeInterval = 5 diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/plugin_spec.json b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/plugin_spec.json new file mode 100644 index 00000000..2022f403 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/plugin_spec.json @@ -0,0 +1,203 @@ +{ + "files": [ + { + "remote": null, + "type": null, + "configs": { + "required": { + "cluster": [ + { + "name": "Enable MapR-DB", + "config_type": "bool", + "default_value": false, + "priority": 1 + } + ] + } + } + }, + { + "remote": "/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop/core-site.xml", + "local": "default/core-site.xml", + "type": "xml", + "configs": { + "optional": { + "cluster": [ + { + "name": "fs.swift.impl", + "default_value": "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem" + }, + { + "name": "fs.swift.connect.timeout", + "config_type": "int", + "default_value": 15000 + }, + { + "name": "fs.swift.socket.timeout", + "config_type": "int", + "default_value": 60000 + }, + { + "name": "fs.swift.connect.retry.count", + "config_type": "int", + "default_value": 3 + }, + { + "name": "fs.swift.connect.throttle.delay", + "config_type": "int", + "default_value": 0 + }, + { + "name": "fs.swift.blocksize", + "config_type": "int", + "default_value": 32768 + }, + { + "name": "fs.swift.partsize", + "config_type": "int", + "default_value": 4718592 + }, + { + "name": "fs.swift.requestsize", + "config_type": "int", + "default_value": 64 + }, + { + "name": "fs.swift.service.sahara.public", + "config_type": "bool", + "default_value": true + }, + { + "name": "fs.swift.service.sahara.http.port", + "config_type": "int", + "default_value": 8080 + }, + { + "name": "fs.swift.service.sahara.https.port", + "config_type": "int", + "default_value": 443 + }, + { + "name": "fs.swift.service.sahara.auth.endpoint.prefix", + "default_value": "/endpoints/AUTH_" + } + ] + } + } + }, + { + "remote": "/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop/mapred-site.xml", + "type": "xml" + }, + { + "remote": "/opt/mapr/conf/hadoop-metrics.properties", + "local": "default/hadoop-metrics.properties", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/cldb.conf", + "local": "default/cldb.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/mfs.conf", + "local": "default/mfs.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/nfsserver.conf", + "local": "default/nfsserver.conf", + "type": "properties" + }, + { + "remote": "/opt/mapr/conf/exports", + "local": "default/exports", + "type": "raw" + } + ], + "services": [ + { + "name": "general", + "files": [ + null + ] + }, + { + "name": "Management", + "node_processes": [ + "ZooKeeper", + "Webserver", + "MapR-Client", + "Metrics" + ] + }, + { + "name": "YARN", + "node_processes": [ + "HistoryServer", + "ResourceManager", + "NodeManager" + ], + "files": [ + "/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop/mapred-site.xml" + ] + }, + { + "name": "MapR FS", + "node_processes": [ + "CLDB", + "FileServer", + "NFS" + ], + "files": [ + "/opt/mapr/conf/cldb.conf", + "/opt/mapr/conf/hadoop-metrics.properties", + "/opt/mapr/conf/mfs.conf", + "/opt/mapr/conf/nfsserver.conf", + "/opt/mapr/conf/exports", + "/opt/mapr/hadoop/hadoop-2.4.1/etc/hadoop/core-site.xml" + ] + }, + { + "name": "HBase", + "node_processes": [ + "HBase-Master", + "HBase-RegionServer", + "HBase-Client" + ] + }, + { + "name": "Hive", + "node_processes": [ + "HiveMetastore", + "HiveServer2" + ], + "versions": [ + "0.13", + "0.12" + ] + }, + { + "name": "Oozie", + "node_processes": [ + "Oozie" + ], + "versions": [ + "4.0.1", + "4.0.0" + ] + }, + { + "name": "Pig", + "node_processes": [ + "Pig" + ] + }, + { + "name": "Mahout", + "node_processes": [ + "Mahout" + ] + } + ] +} \ No newline at end of file diff --git a/sahara/plugins/mapr/versions/v4_0_1_mrv2/version_handler.py b/sahara/plugins/mapr/versions/v4_0_1_mrv2/version_handler.py new file mode 100755 index 00000000..1b15f2f0 --- /dev/null +++ b/sahara/plugins/mapr/versions/v4_0_1_mrv2/version_handler.py @@ -0,0 +1,112 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sahara import context +from sahara.plugins.mapr.util import cluster_helper as clh_utils +import sahara.plugins.mapr.util.config_utils as cu +import sahara.plugins.mapr.util.names as n +from sahara.plugins.mapr.util import scaling +from sahara.plugins.mapr.util import start_helper as start_helper +import sahara.plugins.mapr.util.validation_utils as vu +import sahara.plugins.mapr.versions.base_context as bc +from sahara.plugins.mapr.versions import base_version_handler as bvh +import sahara.plugins.mapr.versions.v4_0_1_mrv2.cluster_configurer as cc + + +SIXTY_SECONDS = 60 +WAIT_OOZIE_INTERVAL = 300 + + +version = '4.0.1.mrv2' + + +class VersionHandler(bvh.BaseVersionHandler): + + def get_plugin_version(self): + return version + + def start_cluster(self, cluster): + start_helper.exec_configure_sh_on_cluster( + cluster, self.get_configure_sh_string(cluster)) + start_helper.wait_for_mfs_unlock(cluster, self.get_waiting_script()) + start_helper.setup_maprfs_on_cluster( + cluster, self.get_disk_setup_script()) + start_helper.start_zookeeper_nodes_on_cluster(cluster) + start_helper.start_warden_on_cldb_nodes(cluster) + context.sleep(SIXTY_SECONDS) + start_helper.start_warden_on_other_nodes(cluster) + start_helper.start_ecosystem(self.get_context(cluster)) + + def get_cluster_configurer(self, cluster, plugin_spec): + return cc.ClusterConfigurer(cluster, plugin_spec) + + def get_configure_sh_string(self, cluster): + return ('/opt/mapr/server/configure.sh' + + ' -C ' + clh_utils.get_cldb_nodes_ip(cluster) + + ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster) + + ' -RM ' + clh_utils.get_resourcemanager_ip(cluster) + + ' -HS ' + clh_utils.get_historyserver_ip(cluster) + ' -f') + + def scale_cluster(self, cluster, instances): + scaling.scale_cluster(cluster, instances, self.get_disk_setup_script(), + self.get_waiting_script(), + self.get_context(cluster), + self.get_configure_sh_string(cluster), False) + + def decommission_nodes(self, cluster, instances): + scaling.decommission_nodes( + cluster, instances, self.get_configure_sh_string(cluster)) + + def get_waiting_script(self): + return 'plugins/mapr/util/resources/waiting_script.sh' + + def get_cluster_validation_rules(self, cluster): + return [vu.not_less_than_count_component_vr(n.ZOOKEEPER, 1), + vu.not_less_than_count_component_vr(n.CLDB, 1), + vu.not_less_than_count_component_vr(n.NODE_MANAGER, 1), + vu.not_less_than_count_component_vr(n.FILE_SERVER, 1), + vu.not_more_than_count_component_vr(n.OOZIE, 1), + vu.not_more_than_count_component_vr(n.WEB_SERVER, 1), + vu.equal_count_component_vr(n.RESOURCE_MANAGER, 1), + vu.equal_count_component_vr(n.HISTORY_SERVER, 1), + vu.node_dependency_satisfied_vr(n.NODE_MANAGER, n.FILE_SERVER), + vu.node_dependency_satisfied_vr(n.CLDB, n.FILE_SERVER)] + + def get_scaling_validation_rules(self): + return [] + + def get_edp_validation_rules(self): + return [] + + def get_context(self, cluster): + return Context(cluster) + + +class Context(bc.BaseContext): + m7_enabled_config = n.IS_M7_ENABLED + hive_version_config = 'Hive Version' + oozie_version_config = 'Oozie Version' + + def __init__(self, cluster): + self.cluster = cluster + + def get_cluster(self): + return self.cluster + + def is_m7_enabled(self): + configs = cu.get_cluster_configs(self.get_cluster()) + return configs[n.GENERAL][Context.m7_enabled_config] + + def get_hadoop_version(self): + return '2.4.1' diff --git a/sahara/plugins/mapr/versions/version_handler_factory.py b/sahara/plugins/mapr/versions/version_handler_factory.py new file mode 100644 index 00000000..8156d2fa --- /dev/null +++ b/sahara/plugins/mapr/versions/version_handler_factory.py @@ -0,0 +1,53 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os + + +def _load_versions(): + d_name = os.path.dirname(__file__) + m_template = 'sahara.plugins.mapr.versions.%s.version_handler' + + def predicate(v_dir): + return os.path.isdir(os.path.join(d_name, v_dir)) + + def mapper(v_dir): + return m_template % v_dir + + def reducer(versions, m_name): + m = __import__(m_name, fromlist=['sahara']) + versions[m.version] = getattr(m, 'VersionHandler')() + return versions + + v_dirs = filter(predicate, os.listdir(d_name)) + m_names = map(mapper, v_dirs) + return reduce(reducer, m_names, {}) + + +class VersionHandlerFactory(object): + instance = None + versions = None + + @staticmethod + def get(): + if not VersionHandlerFactory.instance: + VersionHandlerFactory.versions = _load_versions() + VersionHandlerFactory.instance = VersionHandlerFactory() + return VersionHandlerFactory.instance + + def get_versions(self): + return VersionHandlerFactory.versions.keys() + + def get_handler(self, version): + return VersionHandlerFactory.versions[version] diff --git a/sahara/tests/integration/configs/config.py b/sahara/tests/integration/configs/config.py index 4c0d7c8d..2bc46b5b 100644 --- a/sahara/tests/integration/configs/config.py +++ b/sahara/tests/integration/configs/config.py @@ -545,6 +545,259 @@ HDP2_CONFIG_OPTS = [ ] +MAPR_CONFIG_GROUP = cfg.OptGroup(name='MAPR') +MAPR_CONFIG_OPTS = [ + cfg.StrOpt('PLUGIN_NAME', + default='mapr', + help='Name of plugin.'), + cfg.StrOpt('IMAGE_ID', + help='ID for image which is used for cluster creation. Also ' + 'you can specify image name or tag of image instead of ' + 'image ID. If you do not specify image related ' + 'parameters, then image for cluster creation will be ' + 'chosen by tag "sahara_i_tests".'), + cfg.StrOpt('IMAGE_NAME', + help='Name for image which is used for cluster creation. Also ' + 'you can specify image ID or tag of image instead of ' + 'image name. If you do not specify image related ' + 'parameters, then image for cluster creation will be ' + 'chosen by tag "sahara_i_tests".'), + cfg.StrOpt('IMAGE_TAG', + help='Tag for image which is used for cluster creation. Also ' + 'you can specify image ID or image name instead of tag of ' + 'image. If you do not specify image related parameters, ' + 'then image for cluster creation will be chosen by ' + 'tag "sahara_i_tests".'), + cfg.StrOpt('SSH_USERNAME', + help='Username to get cluster node with SSH.'), + cfg.ListOpt('MASTER_NODE_PROCESSES', + default=['CLDB', 'FileServer', 'ZooKeeper', + 'TaskTracker', 'JobTracker', 'Oozie'], + help='A list of processes that will be launched ' + 'on master node'), + cfg.ListOpt('WORKER_NODE_PROCESSES', + default=['FileServer', 'TaskTracker', 'Pig'], + help='A list of processes that will be launched ' + 'on worker nodes'), + cfg.StrOpt('HADOOP_VERSION', + default='1.0.3', + help='Version of Hadoop.'), + cfg.StrOpt('HADOOP_USER', + default='mapr', + help='Username which is used for access to Hadoop services.'), + cfg.StrOpt('HADOOP_EXAMPLES_JAR_PATH', + default=('/opt/mapr/hadoop/hadoop-0.20.2' + '/hadoop-0.20.2-dev-examples.jar'), + help='Path to hadoop examples jar file.'), + cfg.StrOpt('HADOOP_LOG_DIRECTORY', + default='/opt/mapr/hadoop/hadoop-0.20.2/logs/userlogs', + help='Directory where logs of completed jobs are located.'), + cfg.StrOpt('HADOOP_LOG_DIRECTORY_ON_VOLUME', + default=('/volumes/disk1/mapr/hadoop/' + 'hadoop-0.20.2/logs/userlogs'), + help='Directory where logs of completed jobs on volume mounted ' + 'to node are located.'), + cfg.IntOpt('SCALE_EXISTING_NG_COUNT', + default=1, + help='The number of hosts to add while scaling ' + 'an existing node group.'), + cfg.IntOpt('SCALE_NEW_NG_COUNT', + default=1, + help='The number of hosts to add while scaling ' + 'a new node group.'), + cfg.DictOpt('HADOOP_PROCESSES_WITH_PORTS', + default={ + 'JobTracker': 50030, + 'CLDB': 7222, + 'TaskTracker': 50060, + 'OOZIE': 11000 + }, + help='Hadoop process map with ports for MAPR plugin.' + ), + cfg.DictOpt('PROCESS_NAMES', + default={ + 'nn': 'CLDB', + 'tt': 'TaskTracker', + 'dn': 'FileServer' + }, + help='Names for namenode, tasktracker and datanode ' + 'processes.'), + cfg.BoolOpt('SKIP_ALL_TESTS_FOR_PLUGIN', + default=True, + help='If this flag is True, then all tests for MAPR plugin ' + 'will be skipped.'), + cfg.BoolOpt('SKIP_CINDER_TEST', default=False), + cfg.BoolOpt('SKIP_EDP_TEST', default=False), + cfg.BoolOpt('SKIP_MAP_REDUCE_TEST', default=False), + cfg.BoolOpt('SKIP_SWIFT_TEST', default=False), + cfg.BoolOpt('SKIP_SCALING_TEST', default=False) +] + +MAPR4_1_CONFIG_GROUP = cfg.OptGroup(name='MAPR4_1') +MAPR4_1_CONFIG_OPTS = [ + cfg.StrOpt('PLUGIN_NAME', + default='mapr4_1', + help='Name of plugin.'), + cfg.StrOpt('IMAGE_ID', + default=None, + help='ID for image which is used for cluster creation. Also ' + 'you can specify image name or tag of image instead of ' + 'image ID. If you do not specify image related ' + 'parameters, then image for cluster creation will be ' + 'chosen by tag "sahara_i_tests".'), + cfg.StrOpt('IMAGE_NAME', + default=None, + help='Name for image which is used for cluster creation. Also ' + 'you can specify image ID or tag of image instead of ' + 'image name. If you do not specify image related ' + 'parameters, then image for cluster creation will be ' + 'chosen by tag "sahara_i_tests".'), + cfg.StrOpt('IMAGE_TAG', + default=None, + help='Tag for image which is used for cluster creation. Also ' + 'you can specify image ID or image name instead of tag of ' + 'image. If you do not specify image related parameters, ' + 'then image for cluster creation will be chosen by ' + 'tag "sahara_i_tests".'), + cfg.StrOpt('SSH_USERNAME', + default=None, + help='Username to get cluster node with SSH.'), + cfg.ListOpt('MASTER_NODE_PROCESSES', + default=['CLDB', 'FileServer', 'ZooKeeper', + 'TaskTracker', 'JobTracker', 'Oozie'], + help='A list of processes that will be launched ' + 'on master node'), + cfg.ListOpt('WORKER_NODE_PROCESSES', + default=['FileServer', 'TaskTracker', 'Pig'], + help='A list of processes that will be launched ' + 'on worker nodes'), + cfg.StrOpt('HADOOP_VERSION', + default='2.4.0', + help='Version of Hadoop.'), + cfg.StrOpt('HADOOP_USER', + default='mapr', + help='Username which is used for access to Hadoop services.'), + cfg.StrOpt('HADOOP_EXAMPLES_JAR_PATH', + default=('/opt/mapr/hadoop/hadoop-2.3.0/share/hadoop' + '/hadoop-mapreduce-examples-2.3.0-mapr-4.0.0-FCS.jar'), + help='Path to hadoop examples jar file.'), + cfg.IntOpt('SCALE_EXISTING_NG_COUNT', + default=1, + help='The number of hosts to add while scaling ' + 'an existing node group.'), + cfg.IntOpt('SCALE_NEW_NG_COUNT', + default=1, + help='The number of hosts to add while scaling ' + 'a new node group.'), + cfg.DictOpt('HADOOP_PROCESSES_WITH_PORTS', + default={ + 'JobTracker': 50030, + 'CLDB': 7222, + 'TaskTracker': 50060, + 'OOZIE': 11000 + }, + help='Hadoop process map with ports for MAPR plugin.' + ), + cfg.DictOpt('PROCESS_NAMES', + default={ + 'nn': 'CLDB', + 'tt': 'TaskTracker', + 'dn': 'FileServer' + }, + help='Names for namenode, tasktracker and datanode ' + 'processes.'), + cfg.BoolOpt('SKIP_ALL_TESTS_FOR_PLUGIN', + default=True, + help='If this flag is True, then all tests for MAPR plugin ' + 'will be skipped.'), + cfg.BoolOpt('SKIP_EDP_TEST', default=False), + cfg.BoolOpt('SKIP_SWIFT_TEST', default=False), + cfg.BoolOpt('SKIP_SCALING_TEST', default=False) +] + +MAPR4_2_CONFIG_GROUP = cfg.OptGroup(name='MAPR4_2') +MAPR4_2_CONFIG_OPTS = [ + cfg.StrOpt('PLUGIN_NAME', + default='mapr4_2', + help='Name of plugin.'), + cfg.StrOpt('IMAGE_ID', + default=None, + help='ID for image which is used for cluster creation. Also ' + 'you can specify image name or tag of image instead of ' + 'image ID. If you do not specify image related ' + 'parameters, then image for cluster creation will be ' + 'chosen by tag "sahara_i_tests".'), + cfg.StrOpt('IMAGE_NAME', + default=None, + help='Name for image which is used for cluster creation. Also ' + 'you can specify image ID or tag of image instead of ' + 'image name. If you do not specify image related ' + 'parameters, then image for cluster creation will be ' + 'chosen by tag "sahara_i_tests".'), + cfg.StrOpt('IMAGE_TAG', + default=None, + help='Tag for image which is used for cluster creation. Also ' + 'you can specify image ID or image name instead of tag of ' + 'image. If you do not specify image related parameters, ' + 'then image for cluster creation will be chosen by ' + 'tag "sahara_i_tests".'), + cfg.StrOpt('SSH_USERNAME', + default=None, + help='Username to get cluster node with SSH.'), + cfg.ListOpt('MASTER_NODE_PROCESSES', + default=['CLDB', 'FileServer', 'ZooKeeper', 'NodeManager', + 'ResourceManager', 'HistoryServer', 'Oozie'], + help='A list of processes that will be launched ' + 'on master node'), + cfg.ListOpt('WORKER_NODE_PROCESSES', + default=['FileServer', 'NodeManager', 'Pig'], + help='A list of processes that will be launched ' + 'on worker nodes'), + cfg.StrOpt('HADOOP_VERSION', + default='2.4.0', + help='Version of Hadoop.'), + cfg.StrOpt('HADOOP_USER', + default='mapr', + help='Username which is used for access to Hadoop services.'), + cfg.StrOpt('HADOOP_EXAMPLES_JAR_PATH', + default=('/opt/mapr/hadoop/hadoop-2.3.0/share/hadoop' + '/hadoop-mapreduce-examples-2.3.0-mapr-4.0.0-FCS.jar'), + help='Path to hadoop examples jar file.'), + cfg.IntOpt('SCALE_EXISTING_NG_COUNT', + default=1, + help='The number of hosts to add while scaling ' + 'an existing node group.'), + cfg.IntOpt('SCALE_NEW_NG_COUNT', + default=1, + help='The number of hosts to add while scaling ' + 'a new node group.'), + cfg.DictOpt('HADOOP_PROCESSES_WITH_PORTS', + default={ + 'ResourceManager': 8032, + 'CLDB': 7222, + 'HistoryServer': 19888, + 'OOZIE': 11000 + }, + help='Hadoop process map with ports for MAPR plugin.' + ), + cfg.DictOpt('PROCESS_NAMES', + default={ + 'nn': 'CLDB', + 'tt': 'NodeManager', + 'dn': 'FileServer' + }, + help='Names for namenode, tasktracker and datanode ' + 'processes.'), + cfg.BoolOpt('SKIP_ALL_TESTS_FOR_PLUGIN', + default=True, + help='If this flag is True, then all tests for MAPR plugin ' + 'will be skipped.'), + cfg.BoolOpt('SKIP_EDP_TEST', default=False), + cfg.BoolOpt('SKIP_SWIFT_TEST', default=False), + cfg.BoolOpt('SKIP_SCALING_TEST', default=False) +] + + SPARK_CONFIG_GROUP = cfg.OptGroup(name='SPARK') SPARK_CONFIG_OPTS = [ cfg.StrOpt('PLUGIN_NAME', @@ -641,6 +894,9 @@ class ITConfig(object): register_config(cfg.CONF, HDP2_CONFIG_GROUP, HDP2_CONFIG_OPTS) register_config( cfg.CONF, VANILLA_TWO_CONFIG_GROUP, VANILLA_TWO_CONFIG_OPTS) + register_config(cfg.CONF, MAPR_CONFIG_GROUP, MAPR_CONFIG_OPTS) + register_config(cfg.CONF, MAPR4_1_CONFIG_GROUP, MAPR4_1_CONFIG_OPTS) + register_config(cfg.CONF, MAPR4_2_CONFIG_GROUP, MAPR4_2_CONFIG_OPTS) register_config(cfg.CONF, SPARK_CONFIG_GROUP, SPARK_CONFIG_OPTS) cfg.CONF( @@ -654,4 +910,7 @@ class ITConfig(object): self.cdh_config = cfg.CONF.CDH self.hdp_config = cfg.CONF.HDP self.hdp2_config = cfg.CONF.HDP2 + self.mapr_config = cfg.CONF.MAPR + self.mapr4_1_config = cfg.CONF.MAPR4_1 + self.mapr4_2_config = cfg.CONF.MAPR4_2 self.spark_config = cfg.CONF.SPARK diff --git a/sahara/tests/integration/configs/itest.conf.sample b/sahara/tests/integration/configs/itest.conf.sample index e2fccc59..80757773 100644 --- a/sahara/tests/integration/configs/itest.conf.sample +++ b/sahara/tests/integration/configs/itest.conf.sample @@ -22,3 +22,7 @@ SKIP_CLUSTER_CONFIG_TEST = True IMAGE_ID = 'f7de0ea9-eb4d-4b63-8ed0-abcf11cfaff8' SKIP_ALL_TESTS_FOR_PLUGIN = False + +[MAPR] +IMAGE_ID = 'sahara-mapr-image' +SKIP_ALL_TESTS_FOR_PLUGIN = False \ No newline at end of file diff --git a/sahara/tests/integration/configs/itest.conf.sample-full b/sahara/tests/integration/configs/itest.conf.sample-full index f68b449a..74cf4e70 100644 --- a/sahara/tests/integration/configs/itest.conf.sample-full +++ b/sahara/tests/integration/configs/itest.conf.sample-full @@ -279,6 +279,188 @@ #PROCESS_NAMES = nn: NAMENODE, tt: NODEMANAGER, dn: DATANODE +#SKIP_ALL_TESTS_FOR_PLUGIN = False +#SKIP_EDP_TEST = False +#SKIP_SWIFT_TEST = False +#SKIP_SCALING_TEST = False + + +[MAPR] + +# Name of plugin (string value) +#PLUGIN_NAME = 'mapr' + + +# ID for image which is used for cluster creation. Also you can specify image +# name or tag of image instead of image ID. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_ID = + +# Name for image which is used for cluster creation. Also you can specify image +# ID or tag of image instead of image name. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_NAME = + +# Tag for image which is used for cluster creation. Also you can specify image +# ID or image name instead of tag of image. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_TAG = + + +# Username to get cluster node with SSH (string value) +#SSH_USERNAME = + +# A list of processes that will be launched on master node (list value) +#MASTER_NODE_PROCESSES = CLDB, FileServer, ZooKeeper, TaskTracker, JobTracker, Oozie + +# A list of processes that will be launched on worker nodes (list value) +#WORKER_NODE_PROCESSES = FileServer, TaskTracker, Pig + +# Version of Hadoop (string value) +#HADOOP_VERSION = '1.0.3' + +# Username which is used for access to Hadoop services (string value) +#HADOOP_USER = 'mapr' + +# Directory where logs of completed jobs are located (string value) +#HADOOP_LOG_DIRECTORY = '/opt/mapr/hadoop/hadoop-0.20.2/logs/userlogs' + +# Directory where logs of completed jobs on volume mounted to node are located +# (string value) +#HADOOP_LOG_DIRECTORY_ON_VOLUME = '/volumes/disk1/mapr/hadoop/hadoop-0.20.2/logs/userlogs' + +# The number of hosts to add while scaling an existing node group +#SCALE_EXISTING_NG_COUNT = 1 + +# The number of hosts to add while scaling a new node group +#SCALE_NEW_NG_COUNT = 1 + +# (dictionary value) +#HADOOP_PROCESSES_WITH_PORTS = JobTracker: 50030, CLDB: 7222, TaskTracker: 50060 + + +# (dictionary value) +#PROCESS_NAMES = nn: CLDB, tt: TaskTracker, dn: FileServer + + +#SKIP_ALL_TESTS_FOR_PLUGIN = False +#SKIP_CINDER_TEST = False +#SKIP_MAP_REDUCE_TEST = False +#SKIP_SWIFT_TEST = False +#SKIP_SCALING_TEST = False + +[MAPR4_1] + +# Name of plugin (string value) +#PLUGIN_NAME = 'mapr4_1' + +# ID for image which is used for cluster creation. Also you can specify image +# name or tag of image instead of image ID. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_ID = + +# Name for image which is used for cluster creation. Also you can specify image +# ID or tag of image instead of image name. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_NAME = + +# Tag for image which is used for cluster creation. Also you can specify image +# ID or image name instead of tag of image. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_TAG = + +# Username to get cluster node with SSH (string value) +#SSH_USERNAME = + +# A list of processes that will be launched on master node (list value) +#MASTER_NODE_PROCESSES = CLDB, FileServer, ZooKeeper, TaskTracker, JobTracker, Oozie + +# A list of processes that will be launched on worker nodes (list value) +#WORKER_NODE_PROCESSES = FileServer, TaskTracker, Pig + +# Version of Hadoop (string value) +#HADOOP_VERSION = '2.4.0' + +# Username which is used for access to Hadoop services (string value) +#HADOOP_USER = 'mapr' + +# The number of hosts to add while scaling an existing node group +#SCALE_EXISTING_NG_COUNT = 1 + +# The number of hosts to add while scaling a new node group +#SCALE_NEW_NG_COUNT = 1 + +# (dictionary value) +#HADOOP_PROCESSES_WITH_PORTS = JobTracker: 50030, CLDB: 7222, TaskTracker: 50060 + + +# (dictionary value) +#PROCESS_NAMES = nn: CLDB, tt: TaskTracker, dn: FileServer + + +#SKIP_ALL_TESTS_FOR_PLUGIN = False +#SKIP_EDP_TEST = False +#SKIP_SWIFT_TEST = False +#SKIP_SCALING_TEST = False + +[MAPR4_2] + +# Name of plugin (string value) +#PLUGIN_NAME = 'mapr4_1' + +# ID for image which is used for cluster creation. Also you can specify image +# name or tag of image instead of image ID. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_ID = + +# Name for image which is used for cluster creation. Also you can specify image +# ID or tag of image instead of image name. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_NAME = + +# Tag for image which is used for cluster creation. Also you can specify image +# ID or image name instead of tag of image. If you do not specify image related +# parameters then image for cluster creation will be chosen by tag +# "sahara_i_tests" (string value) +#IMAGE_TAG = + +# Username to get cluster node with SSH (string value) +#SSH_USERNAME = + +# A list of processes that will be launched on master node (list value) +#MASTER_NODE_PROCESSES = CLDB, FileServer, ZooKeeper, NodeManager, ResourceManager, HistoryServer, Oozie + +# A list of processes that will be launched on worker nodes (list value) +#WORKER_NODE_PROCESSES = FileServer, NodeManager, Pig + +# Version of Hadoop (string value) +#HADOOP_VERSION = '2.4.0' + +# Username which is used for access to Hadoop services (string value) +#HADOOP_USER = 'mapr' + +# The number of hosts to add while scaling an existing node group +#SCALE_EXISTING_NG_COUNT = 1 + +# The number of hosts to add while scaling a new node group +#SCALE_NEW_NG_COUNT = 1 + +# (dictionary value) +#HADOOP_PROCESSES_WITH_PORTS = ResourceManager: 8032, CLDB: 7222, HistoryServer: 19888 + + +# (dictionary value) +#PROCESS_NAMES = nn: CLDB, tt: NodeManager, dn: FileServer + + #SKIP_ALL_TESTS_FOR_PLUGIN = False #SKIP_EDP_TEST = False #SKIP_SWIFT_TEST = False diff --git a/sahara/tests/integration/tests/base.py b/sahara/tests/integration/tests/base.py index 953877e2..a926655c 100644 --- a/sahara/tests/integration/tests/base.py +++ b/sahara/tests/integration/tests/base.py @@ -77,6 +77,9 @@ class ITestCase(testcase.WithAttributes, base.BaseTestCase): self.vanilla_config = cfg.ITConfig().vanilla_config self.vanilla_two_config = cfg.ITConfig().vanilla_two_config self.hdp_config = cfg.ITConfig().hdp_config + self.mapr_config = cfg.ITConfig().mapr_config + self.mapr4_1_config = cfg.ITConfig().mapr4_1_config + self.mapr4_2_config = cfg.ITConfig().mapr4_2_config telnetlib.Telnet( self.common_config.SAHARA_HOST, self.common_config.SAHARA_PORT @@ -366,6 +369,25 @@ class ITestCase(testcase.WithAttributes, base.BaseTestCase): finally: self.close_ssh_connection() + def await_active_tasktracker(self, node_info, plugin_config): + self.open_ssh_connection( + node_info['namenode_ip'], plugin_config.SSH_USERNAME) + for i in range(self.common_config.HDFS_INITIALIZATION_TIMEOUT * 6): + time.sleep(10) + active_tasktracker_count = self.execute_command( + 'sudo -u %s bash -lc "hadoop job -list-active-trackers" ' + '| grep "^tracker_" | wc -l' + % plugin_config.HADOOP_USER)[1] + active_tasktracker_count = int(active_tasktracker_count) + if (active_tasktracker_count == node_info['tasktracker_count']): + break + else: + self.fail( + 'Tasktracker or datanode cannot be started within ' + '%s minute(s) for namenode.' + % self.common_config.HDFS_INITIALIZATION_TIMEOUT) + self.close_ssh_connection() + # --------------------------------Remote--------------------------------------- def connect_to_swift(self): diff --git a/sahara/tests/integration/tests/gating/test_mapr4_1_gating.py b/sahara/tests/integration/tests/gating/test_mapr4_1_gating.py new file mode 100644 index 00000000..4c95b7f3 --- /dev/null +++ b/sahara/tests/integration/tests/gating/test_mapr4_1_gating.py @@ -0,0 +1,233 @@ +# Copyright (c) 2014 Mirantis Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from testtools import testcase + +from sahara.tests.integration.configs import config as cfg +from sahara.tests.integration.tests import base as b +from sahara.tests.integration.tests import edp +from sahara.tests.integration.tests import scaling +from sahara.tests.integration.tests import swift +from sahara.utils import edp as utils_edp + + +class Mapr4_1GatingTest(swift.SwiftTest, scaling.ScalingTest, + edp.EDPTest): + + config = cfg.ITConfig().mapr4_1_config + SKIP_EDP_TEST = config.SKIP_EDP_TEST + SKIP_SWIFT_TEST = config.SKIP_SWIFT_TEST + SKIP_SCALING_TEST = config.SKIP_SCALING_TEST + + def setUp(self): + super(Mapr4_1GatingTest, self).setUp() + self.cluster_id = None + self.cluster_template_id = None + + def _prepare_test(self): + self.mapr4_1_config = cfg.ITConfig().mapr4_1_config + self.floating_ip_pool = self.common_config.FLOATING_IP_POOL + self.internal_neutron_net = None + if self.common_config.NEUTRON_ENABLED: + self.internal_neutron_net = self.get_internal_neutron_net_id() + self.floating_ip_pool = ( + self.get_floating_ip_pool_id_for_neutron_net()) + + self.mapr4_1_config.IMAGE_ID, self.mapr4_1_config.SSH_USERNAME = ( + (self.get_image_id_and_ssh_username(self.mapr4_1_config))) + + @b.errormsg("Failure while 'jt-nn' node group template creation: ") + def _create_jt_nn_ng_template(self): + template = { + 'name': 'test-node-group-template-mapr4_1-jt-nn', + 'plugin_config': self.mapr4_1_config, + 'description': 'test node group template for MAPR plugin', + # NEED CHANGES MASTER_NODE + 'node_processes': self.mapr4_1_config.MASTER_NODE_PROCESSES, + 'floating_ip_pool': self.floating_ip_pool, + 'node_configs': {} + } + self.ng_tmpl_jt_nn_id = self.create_node_group_template(**template) + self.addCleanup(self.delete_objects, + node_group_template_id_list=[self.ng_tmpl_jt_nn_id]) + + @b.errormsg("Failure while 'nm-dn' node group template creation: ") + def _create_nm_dn_ng_template(self): + template = { + 'name': 'test-node-group-template-mapr4_1-nm-dn', + 'plugin_config': self.mapr4_1_config, + 'description': 'test node group template for MAPR plugin', + # NEED CHANGES WORKER + 'node_processes': self.mapr4_1_config.WORKER_NODE_PROCESSES, + 'floating_ip_pool': self.floating_ip_pool, + 'node_configs': {} + } + self.ng_tmpl_nm_dn_id = self.create_node_group_template(**template) + self.addCleanup(self.delete_objects, + node_group_template_id_list=[self.ng_tmpl_nm_dn_id]) + + @b.errormsg("Failure while cluster template creation: ") + def _create_cluster_template(self): + template = { + 'name': 'test-cluster-template-mapr4_1', + 'plugin_config': self.mapr4_1_config, + 'description': 'test cluster template for MAPR plugin', + 'cluster_configs': { + 'YARN': { + 'yarn.log-aggregation-enable': False + } + }, + 'node_groups': [ + { + 'name': 'master-node-dn', + 'node_group_template_id': self.ng_tmpl_jt_nn_id, + 'count': 1 + }, + { + 'name': 'worker-node-nm', + 'node_group_template_id': self.ng_tmpl_nm_dn_id, + 'count': 3 + } + ], + 'net_id': self.internal_neutron_net + } + self.cluster_template_id = self.create_cluster_template(**template) + self.addCleanup(self.delete_objects, + cluster_template_id=self.cluster_template_id) + + @b.errormsg("Failure while cluster creation: ") + def _create_cluster(self): + cluster_name = '%s-%s-v2' % (self.common_config.CLUSTER_NAME, + self.mapr4_1_config.PLUGIN_NAME) + cluster = { + 'name': cluster_name, + 'plugin_config': self.mapr4_1_config, + 'cluster_template_id': self.cluster_template_id, + 'description': 'test cluster', + 'cluster_configs': {} + } + cluster_id = self.create_cluster(**cluster) + self.addCleanup(self.delete_objects, cluster_id=cluster_id) + self.poll_cluster_state(cluster_id) + self.cluster_info = self.get_cluster_info(self.mapr4_1_config) + self.await_active_tasktracker( + self.cluster_info['node_info'], self.mapr4_1_config) + + @b.errormsg("Failure during check of Swift availability: ") + def _check_swift(self): + self.check_swift_availability(self.cluster_info) + + @b.errormsg("Failure while EDP testing: ") + def _check_edp(self): + self.poll_jobs_status(list(self._run_edp_tests())) + + def _run_edp_tests(self): + skipped_edp_job_types = self.mapr4_1_config.SKIP_EDP_JOB_TYPES + + if utils_edp.JOB_TYPE_PIG not in skipped_edp_job_types: + yield self._edp_pig_test() + if utils_edp.JOB_TYPE_MAPREDUCE not in skipped_edp_job_types: + yield self._edp_mapreduce_test() + if utils_edp.JOB_TYPE_MAPREDUCE_STREAMING not in skipped_edp_job_types: + yield self._edp_mapreduce_streaming_test() + if utils_edp.JOB_TYPE_JAVA not in skipped_edp_job_types: + yield self._edp_java_test() + + def _edp_pig_test(self): + pig_job = self.edp_info.read_pig_example_script() + pig_lib = self.edp_info.read_pig_example_jar() + + return self.edp_testing( + job_type=utils_edp.JOB_TYPE_PIG, + job_data_list=[{'pig': pig_job}], + lib_data_list=[{'jar': pig_lib}], + swift_binaries=True, + hdfs_local_output=True) + + def _edp_mapreduce_test(self): + mapreduce_jar = self.edp_info.read_mapreduce_example_jar() + mapreduce_configs = self.edp_info.mapreduce_example_configs() + return self.edp_testing( + job_type=utils_edp.JOB_TYPE_MAPREDUCE, + job_data_list=[], + lib_data_list=[{'jar': mapreduce_jar}], + configs=mapreduce_configs, + swift_binaries=True, + hdfs_local_output=True) + + def _edp_mapreduce_streaming_test(self): + return self.edp_testing( + job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING, + job_data_list=[], + lib_data_list=[], + configs=self.edp_info.mapreduce_streaming_configs()) + + def _edp_java_test(self): + java_jar = self.edp_info.read_java_example_lib(1) + java_configs = self.edp_info.java_example_configs(1) + return self.edp_testing( + utils_edp.JOB_TYPE_JAVA, + job_data_list=[], + lib_data_list=[{'jar': java_jar}], + configs=java_configs) + + @b.errormsg("Failure while cluster scaling: ") + def _check_scaling(self): + datanode_count_after_resizing = ( + self.cluster_info['node_info']['datanode_count'] + + self.mapr4_1_config.SCALE_EXISTING_NG_COUNT) + change_list = [ + { + 'operation': 'resize', + 'info': ['worker-node-nm', + datanode_count_after_resizing] + }, + { + 'operation': 'add', + 'info': ['new-worker-node-tt-dn', + self.mapr4_1_config.SCALE_NEW_NG_COUNT, + '%s' % self.ng_tmpl_nm_dn_id] + } + ] + + self.cluster_info = self.cluster_scaling(self.cluster_info, + change_list) + self.await_active_tasktracker( + self.cluster_info['node_info'], self.mapr4_1_config) + + @b.errormsg( + "Failure during check of Swift availability after cluster scaling: ") + def _check_swift_after_scaling(self): + self.check_swift_availability(self.cluster_info) + + @b.errormsg("Failure while EDP testing after cluster scaling: ") + def _check_edp_after_scaling(self): + self._check_edp() + + @testcase.attr('mapr4_1') + def test_mapr4_1_plugin_gating(self): + self._prepare_test() + self._create_jt_nn_ng_template() + self._create_nm_dn_ng_template() + self._create_cluster_template() + self._create_cluster() + + self._check_swift() + self._check_edp() + + if not self.mapr4_1_config.SKIP_SCALING_TEST: + self._check_scaling() + self._check_swift_after_scaling() + self._check_edp_after_scaling() diff --git a/sahara/tests/integration/tests/gating/test_mapr4_2_gating.py b/sahara/tests/integration/tests/gating/test_mapr4_2_gating.py new file mode 100644 index 00000000..a41c1bd9 --- /dev/null +++ b/sahara/tests/integration/tests/gating/test_mapr4_2_gating.py @@ -0,0 +1,233 @@ +# Copyright (c) 2014 Mirantis Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from testtools import testcase + +from sahara.tests.integration.configs import config as cfg +from sahara.tests.integration.tests import base as b +from sahara.tests.integration.tests import edp +from sahara.tests.integration.tests import scaling +from sahara.tests.integration.tests import swift +from sahara.utils import edp as utils_edp + + +class Mapr4_2GatingTest(swift.SwiftTest, scaling.ScalingTest, + edp.EDPTest): + + config = cfg.ITConfig().mapr4_2_config + SKIP_EDP_TEST = config.SKIP_EDP_TEST + SKIP_SWIFT_TEST = config.SKIP_SWIFT_TEST + SKIP_SCALING_TEST = config.SKIP_SCALING_TEST + + def setUp(self): + super(Mapr4_2GatingTest, self).setUp() + self.cluster_id = None + self.cluster_template_id = None + + def _prepare_test(self): + self.mapr4_2_config = cfg.ITConfig().mapr4_2_config + self.floating_ip_pool = self.common_config.FLOATING_IP_POOL + self.internal_neutron_net = None + if self.common_config.NEUTRON_ENABLED: + self.internal_neutron_net = self.get_internal_neutron_net_id() + self.floating_ip_pool = ( + self.get_floating_ip_pool_id_for_neutron_net()) + + self.mapr4_2_config.IMAGE_ID, self.mapr4_2_config.SSH_USERNAME = ( + (self.get_image_id_and_ssh_username(self.mapr4_2_config))) + + @b.errormsg("Failure while 'rm-nn' node group template creation: ") + def _create_rm_nn_ng_template(self): + template = { + 'name': 'test-node-group-template-mapr4_2-rm-nn', + 'plugin_config': self.mapr4_2_config, + 'description': 'test node group template for MAPR plugin', + # NEED CHANGES MASTER_NODE + 'node_processes': self.mapr4_2_config.MASTER_NODE_PROCESSES, + 'floating_ip_pool': self.floating_ip_pool, + 'node_configs': {} + } + self.ng_tmpl_rm_nn_id = self.create_node_group_template(**template) + self.addCleanup(self.delete_objects, + node_group_template_id_list=[self.ng_tmpl_rm_nn_id]) + + @b.errormsg("Failure while 'nm-dn' node group template creation: ") + def _create_nm_dn_ng_template(self): + template = { + 'name': 'test-node-group-template-mapr4_2-nm-dn', + 'plugin_config': self.mapr4_2_config, + 'description': 'test node group template for MAPR plugin', + # NEED CHANGES WORKER + 'node_processes': self.mapr4_2_config.WORKER_NODE_PROCESSES, + 'floating_ip_pool': self.floating_ip_pool, + 'node_configs': {} + } + self.ng_tmpl_nm_dn_id = self.create_node_group_template(**template) + self.addCleanup(self.delete_objects, + node_group_template_id_list=[self.ng_tmpl_nm_dn_id]) + + @b.errormsg("Failure while cluster template creation: ") + def _create_cluster_template(self): + template = { + 'name': 'test-cluster-template-mapr4_2', + 'plugin_config': self.mapr4_2_config, + 'description': 'test cluster template for MAPR plugin', + 'cluster_configs': { + 'YARN': { + 'yarn.log-aggregation-enable': False + } + }, + 'node_groups': [ + { + 'name': 'master-node-dn', + 'node_group_template_id': self.ng_tmpl_rm_nn_id, + 'count': 1 + }, + { + 'name': 'worker-node-nm', + 'node_group_template_id': self.ng_tmpl_nm_dn_id, + 'count': 3 + } + ], + 'net_id': self.internal_neutron_net + } + self.cluster_template_id = self.create_cluster_template(**template) + self.addCleanup(self.delete_objects, + cluster_template_id=self.cluster_template_id) + + @b.errormsg("Failure while cluster creation: ") + def _create_cluster(self): + cluster_name = '%s-%s-v2' % (self.common_config.CLUSTER_NAME, + self.mapr4_2_config.PLUGIN_NAME) + cluster = { + 'name': cluster_name, + 'plugin_config': self.mapr4_2_config, + 'cluster_template_id': self.cluster_template_id, + 'description': 'test cluster', + 'cluster_configs': {} + } + cluster_id = self.create_cluster(**cluster) + self.addCleanup(self.delete_objects, cluster_id=cluster_id) + self.poll_cluster_state(cluster_id) + self.cluster_info = self.get_cluster_info(self.mapr4_2_config) + self.await_active_tasktracker( + self.cluster_info['node_info'], self.mapr4_2_config) + + @b.errormsg("Failure during check of Swift availability: ") + def _check_swift(self): + self.check_swift_availability(self.cluster_info) + + @b.errormsg("Failure while EDP testing: ") + def _check_edp(self): + self.poll_jobs_status(list(self._run_edp_tests())) + + def _run_edp_tests(self): + skipped_edp_job_types = self.mapr4_2_config.SKIP_EDP_JOB_TYPES + + if utils_edp.JOB_TYPE_PIG not in skipped_edp_job_types: + yield self._edp_pig_test() + if utils_edp.JOB_TYPE_MAPREDUCE not in skipped_edp_job_types: + yield self._edp_mapreduce_test() + if utils_edp.JOB_TYPE_MAPREDUCE_STREAMING not in skipped_edp_job_types: + yield self._edp_mapreduce_streaming_test() + if utils_edp.JOB_TYPE_JAVA not in skipped_edp_job_types: + yield self._edp_java_test() + + def _edp_pig_test(self): + pig_job = self.edp_info.read_pig_example_script() + pig_lib = self.edp_info.read_pig_example_jar() + + return self.edp_testing( + job_type=utils_edp.JOB_TYPE_PIG, + job_data_list=[{'pig': pig_job}], + lib_data_list=[{'jar': pig_lib}], + swift_binaries=True, + hdfs_local_output=True) + + def _edp_mapreduce_test(self): + mapreduce_jar = self.edp_info.read_mapreduce_example_jar() + mapreduce_configs = self.edp_info.mapreduce_example_configs() + return self.edp_testing( + job_type=utils_edp.JOB_TYPE_MAPREDUCE, + job_data_list=[], + lib_data_list=[{'jar': mapreduce_jar}], + configs=mapreduce_configs, + swift_binaries=True, + hdfs_local_output=True) + + def _edp_mapreduce_streaming_test(self): + return self.edp_testing( + job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING, + job_data_list=[], + lib_data_list=[], + configs=self.edp_info.mapreduce_streaming_configs()) + + def _edp_java_test(self): + java_jar = self.edp_info.read_java_example_lib(2) + java_configs = self.edp_info.java_example_configs(2) + return self.edp_testing( + utils_edp.JOB_TYPE_JAVA, + job_data_list=[], + lib_data_list=[{'jar': java_jar}], + configs=java_configs) + + @b.errormsg("Failure while cluster scaling: ") + def _check_scaling(self): + datanode_count_after_resizing = ( + self.cluster_info['node_info']['datanode_count'] + + self.mapr4_2_config.SCALE_EXISTING_NG_COUNT) + change_list = [ + { + 'operation': 'resize', + 'info': ['worker-node-nm', + datanode_count_after_resizing] + }, + { + 'operation': 'add', + 'info': ['new-worker-node-tt-dn', + self.mapr4_2_config.SCALE_NEW_NG_COUNT, + '%s' % self.ng_tmpl_nm_dn_id] + } + ] + + self.cluster_info = self.cluster_scaling(self.cluster_info, + change_list) + self.await_active_tasktracker( + self.cluster_info['node_info'], self.mapr4_2_config) + + @b.errormsg( + "Failure during check of Swift availability after cluster scaling: ") + def _check_swift_after_scaling(self): + self.check_swift_availability(self.cluster_info) + + @b.errormsg("Failure while EDP testing after cluster scaling: ") + def _check_edp_after_scaling(self): + self._check_edp() + + @testcase.attr('mapr4_2') + def test_mapr4_2_plugin_gating(self): + self._prepare_test() + self._create_rm_nn_ng_template() + self._create_nm_dn_ng_template() + self._create_cluster_template() + self._create_cluster() + + self._check_swift() + self._check_edp() + + if not self.mapr4_2_config.SKIP_SCALING_TEST: + self._check_scaling() + self._check_swift_after_scaling() + self._check_edp_after_scaling() diff --git a/sahara/tests/integration/tests/gating/test_mapr_gating.py b/sahara/tests/integration/tests/gating/test_mapr_gating.py new file mode 100644 index 00000000..f4c4069a --- /dev/null +++ b/sahara/tests/integration/tests/gating/test_mapr_gating.py @@ -0,0 +1,330 @@ +# Copyright (c) 2013 Mirantis Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from oslo.utils import excutils +from testtools import testcase + +from sahara.tests.integration.configs import config as cfg +from sahara.tests.integration.tests import cinder +from sahara.tests.integration.tests import edp +from sahara.tests.integration.tests import map_reduce +from sahara.tests.integration.tests import scaling +from sahara.tests.integration.tests import swift +from sahara.utils import edp as utils_edp + + +class MaprGatingTest(cinder.CinderVolumeTest, edp.EDPTest, + map_reduce.MapReduceTest, swift.SwiftTest, + scaling.ScalingTest): + config = cfg.ITConfig().mapr_config + SKIP_CINDER_TEST = config.SKIP_CINDER_TEST + SKIP_EDP_TEST = config.SKIP_EDP_TEST + SKIP_MAP_REDUCE_TEST = config.SKIP_MAP_REDUCE_TEST + SKIP_SWIFT_TEST = config.SKIP_SWIFT_TEST + SKIP_SCALING_TEST = config.SKIP_SCALING_TEST + + @testcase.skipIf(config.SKIP_ALL_TESTS_FOR_PLUGIN, + 'All tests for MAPR plugin were skipped') + @testcase.attr('mapr1') + def test_mapr_plugin_gating(self): + self.mapr_config.IMAGE_ID, self.mapr_config.SSH_USERNAME = ( + self.get_image_id_and_ssh_username(self.mapr_config)) + + # Default value of self.common_config.FLOATING_IP_POOL is None + floating_ip_pool = self.common_config.FLOATING_IP_POOL + internal_neutron_net = None + # If Neutron enabled then get ID of floating IP pool and ID of internal + # Neutron network + if self.common_config.NEUTRON_ENABLED: + floating_ip_pool = self.get_floating_ip_pool_id_for_neutron_net() + internal_neutron_net = self.get_internal_neutron_net_id() + + if not self.mapr_config.SKIP_CINDER_TEST: + volumes_per_node = 2 + else: + volumes_per_node = 0 + + node_group_template_id_list = [] + +# ------------------------------CLUSTER CREATION------------------------------- + +# ----------------------"tt-dn" node group template creation------------------- + + try: + node_group_template_tt_dn_id = self.create_node_group_template( + name='test-node-group-template-mapr-tt-dn', + plugin_config=self.mapr_config, + description='test node group template for MAPR plugin', + volumes_per_node=volumes_per_node, + node_processes=self.mapr_config.WORKER_NODE_PROCESSES, + # NEED CREATE WORKER_NODE_PROCESSES + node_configs={}, + floating_ip_pool=floating_ip_pool + ) + node_group_template_id_list.append(node_group_template_tt_dn_id) + + except Exception as e: + with excutils.save_and_reraise_exception(): + message = ('Failure while \'tt-dn\' node group ' + 'template creation: ') + self.print_error_log(message, e) + +# --------------------------Cluster template creation-------------------------- + + try: + cluster_template_id = self.create_cluster_template( + name='test-cluster-template-mapr', + plugin_config=self.mapr_config, + description='test cluster template for MAPR plugin', + cluster_configs={}, + node_groups=[ + dict( + name='master-node-jt-nn', + flavor_id=self.flavor_id, + node_processes=self.mapr_config.MASTER_NODE_PROCESSES, + # NEED CREATE MASTER_NODE_PROCESSES + node_configs={}, + floating_ip_pool=floating_ip_pool, + count=1), + dict( + name='worker-node-tt-dn', + node_group_template_id=node_group_template_tt_dn_id, + count=3) + ], + net_id=internal_neutron_net + ) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + node_group_template_id_list=node_group_template_id_list + ) + message = 'Failure while cluster template creation: ' + self.print_error_log(message, e) + +# ------------------------------Cluster creation------------------------------- + + cluster_name = (self.common_config.CLUSTER_NAME + '-' + + self.mapr_config.PLUGIN_NAME) + try: + self.create_cluster( + name=cluster_name, + plugin_config=self.mapr_config, + cluster_template_id=cluster_template_id, + description='test cluster', + cluster_configs={} + ) + + cluster_info = self.get_cluster_info(self.mapr_config) + self.await_active_tasktracker( + cluster_info['node_info'], self.mapr_config) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + self.cluster_id, cluster_template_id, + node_group_template_id_list + ) + message = 'Failure while cluster creation: ' + self.print_error_log(message, e) + +# --------------------------------CINDER TESTING------------------------------- + + try: + self.cinder_volume_testing(cluster_info) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) + message = 'Failure while Cinder testing: ' + self.print_error_log(message, e) + +# ---------------------------------EDP TESTING--------------------------------- + + path = 'sahara/tests/integration/tests/resources/' + pig_job_data = open(path + 'edp-job.pig').read() + pig_lib_data = open(path + 'edp-lib.jar').read() + mapreduce_jar_data = open(path + 'edp-mapreduce.jar').read() + + # This is a modified version of WordCount that takes swift configs + java_lib_data = open(path + 'edp-java/edp-java.jar').read() + java_configs = { + "configs": { + "edp.java.main_class": ("org.openstack.sahara.examples" + ".WordCount") + } + } + + mapreduce_configs = { + "configs": { + "mapred.mapper.class": "org.apache.oozie.example.SampleMapper", + "mapred.reducer.class": ("org.apache.oozie.example" + ".SampleReducer") + } + } + mapreduce_streaming_configs = { + "configs": { + "edp.streaming.mapper": "/bin/cat", + "edp.streaming.reducer": "/usr/bin/wc" + } + } + try: + self.edp_testing(job_type=utils_edp.JOB_TYPE_PIG, + job_data_list=[{'pig': pig_job_data}], + lib_data_list=[{'jar': pig_lib_data}], + swift_binaries=True, + hdfs_local_output=True) + self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE, + job_data_list=[], + lib_data_list=[{'jar': mapreduce_jar_data}], + configs=mapreduce_configs, + swift_binaries=True, + hdfs_local_output=True) + self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING, + job_data_list=[], + lib_data_list=[], + configs=mapreduce_streaming_configs) + self.edp_testing(job_type=utils_edp.JOB_TYPE_JAVA, + job_data_list=[], + lib_data_list=[{'jar': java_lib_data}], + configs=java_configs, + pass_input_output_args=True) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) + message = 'Failure while EDP testing: ' + self.print_error_log(message, e) + +# -----------------------------MAP REDUCE TESTING------------------------------ + + try: + self.map_reduce_testing(cluster_info) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) + message = 'Failure while Map Reduce testing: ' + self.print_error_log(message, e) + +# --------------------------CHECK SWIFT AVAILABILITY--------------------------- + + try: + self.check_swift_availability(cluster_info) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) + message = 'Failure during check of Swift availability: ' + self.print_error_log(message, e) + +# -------------------------------CLUSTER SCALING------------------------------- + + if not self.mapr_config.SKIP_SCALING_TEST: + datanode_count_after_resizing = ( + cluster_info['node_info']['datanode_count'] + + self.mapr_config.SCALE_EXISTING_NG_COUNT) + change_list = [ + { + 'operation': 'resize', + 'info': ['worker-node-tt-dn', + datanode_count_after_resizing] + }, + { + 'operation': 'add', + 'info': [ + 'new-worker-node-tt-dn', + self.mapr_config.SCALE_NEW_NG_COUNT, + '%s' % node_group_template_tt_dn_id + ] + } + ] + try: + new_cluster_info = self.cluster_scaling(cluster_info, + change_list) + self.await_active_tasktracker( + new_cluster_info['node_info'], self.mapr_config) + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) + message = 'Failure while cluster scaling: ' + self.print_error_log(message, e) + +# -------------------------CINDER TESTING AFTER SCALING------------------------ + + try: + self.cinder_volume_testing(new_cluster_info) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + new_cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) + message = ('Failure while Cinder testing after cluster ' + 'scaling: ') + self.print_error_log(message, e) + +# ----------------------MAP REDUCE TESTING AFTER SCALING----------------------- + + try: + self.map_reduce_testing(new_cluster_info) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + new_cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) + message = ('Failure while Map Reduce testing after ' + 'cluster scaling: ') + self.print_error_log(message, e) + +# -------------------CHECK SWIFT AVAILABILITY AFTER SCALING-------------------- + + try: + self.check_swift_availability(new_cluster_info) + + except Exception as e: + with excutils.save_and_reraise_exception(): + self.delete_objects( + new_cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) + message = ('Failure during check of Swift availability ' + 'after cluster scaling: ') + self.print_error_log(message, e) + +# ---------------------------DELETE CREATED OBJECTS---------------------------- + + self.delete_objects( + cluster_info['cluster_id'], cluster_template_id, + node_group_template_id_list + ) diff --git a/sahara/tests/unit/plugins/mapr/__init__.py b/sahara/tests/unit/plugins/mapr/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/tests/unit/plugins/mapr/stubs.py b/sahara/tests/unit/plugins/mapr/stubs.py new file mode 100644 index 00000000..5f8daf23 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/stubs.py @@ -0,0 +1,185 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sahara.utils.configs as c + +import six + + +class AttrDict(dict): + + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + +node_domain = None + + +class Cluster(AttrDict): + fields = ['id', 'name', 'description', 'tenant_id', 'trust_id', + 'is_transient', 'plugin_name', 'hadoop_version', + 'cluster_configs', 'default_image_id', 'anti_affinity', + 'management_private_key', 'management_public_key', + 'user_keypair_id', 'status', 'status_description', 'info', + 'extra', 'node_groups', 'cluster_template_id', + 'cluster_template'] + + def __init__(self, mapping=None, **kwargs): + self.id = None + self.cluster_template_id = None + self.cluster_template = None + self.node_groups = [] + d = dict((f, None) for f in Cluster.fields) + if mapping: + d.update(mapping) + if kwargs: + d.update(kwargs) + AttrDict.__init__(self, d) + if self.node_groups: + for ng in self.node_groups: + ng.cluster_id = self.id + ng.cluster = self + ng.cluster_template_id = self.cluster_template_id + ng.cluster_template = self.cluster_template + + +class NodeGroup(AttrDict): + fields = ['id', 'name', 'flavor_id', 'image_id', 'image_username', + 'node_processes', 'node_configs', 'volumes_per_node', + 'volumes_size', 'volume_mount_prefix', 'floating_ip_pool', + 'count', 'instances', 'node_group_template_id', + 'node_group_template', 'cluster_id', 'cluster', + 'cluster_template_id', 'cluster_template'] + + def __init__(self, mapping=None, **kwargs): + self.id = None + self.instances = [] + d = dict((f, None) for f in NodeGroup.fields) + if mapping: + d.update(mapping) + if kwargs: + d.update(kwargs) + AttrDict.__init__(self, d) + if self.instances: + for i in self.instances: + i.node_group_id = self.id + i.node_group = self + + def configuration(self): + return c.merge_configs(self.cluster.cluster_configs, self.node_configs) + + def storage_paths(self): + mp = [self.volume_mount_prefix + str(idx) + for idx in range(1, self.volumes_per_node + 1)] + if not mp: + mp = ['/mnt'] + return mp + + def get_image_id(self): + return self.image_id or self.cluster.default_image_id + + +class Instance(AttrDict): + fields = ['id', 'node_group_id', 'node_group', 'instance_id', + 'instance_name', 'internal_ip', 'management_ip', 'volumes'] + + def __init__(self, mapping=None, **kwargs): + d = dict((f, None) for f in Instance.fields) + p = lambda i: i[0] in Instance.fields + if mapping: + d.update(dict(filter(p, six.iteritems(mapping)))) + if kwargs: + d.update(dict(filter(p, six.iteritems(kwargs)))) + AttrDict.__init__(self, d) + results = kwargs['results'] if 'results' in kwargs else [] + default_result = (kwargs['default_result'] + if 'default_result' in kwargs + else Remote.DEFAULT_RESULT) + self._remote = Remote(results, default_result) + + def hostname(self): + return self.instance_name + + def fqdn(self): + return self.instance_name + '.' + node_domain + + def remote(self): + return self._remote + + +class Remote(object): + DEFAULT_RESULT = (0, '', '') + + def __init__(self, results=[], default_result=None): + self.fs = [] + self.history = [] + self.results = results + self.default_result = (default_result + if default_result + else Remote.DEFAULT_RESULT) + + def register_result(self, command, result): + result += [(command, result)] + + def get_result(self, command): + for r_command, result in self.results: + if r_command == command: + return result + return (self.default_result + if command['get_stderr'] + else self.default_result[:-1]) + + def __exit__(self, *args): + pass + + def __enter__(self): + return self + + def write_file_to(self, remote_file, data, run_as_root=False, timeout=120): + self.fs += [{'file': remote_file, 'data': data, 'root': run_as_root, + 'timeout': timeout}] + + def write_files_to(self, files, run_as_root=False, timeout=120): + self.fs += [{'file': f, 'data': d, 'root': run_as_root, + 'timeout': timeout} + for f, d in six.iteritems(files)] + + def read_file_from(self, remote_file, run_as_root=False, timeout=120): + for f in self.fs: + if f['file'] == remote_file: + return f['data'] + return None + + def replace_remote_string(self, remote_file, old_str, + new_str, timeout=120): + pass + + def get_neutron_info(self): + return + + def get_http_client(self, port, info=None): + return + + def close_http_sessions(self): + pass + + def execute_command(self, cmd, run_as_root=False, get_stderr=False, + raise_when_error=True, timeout=300): + command = {'cmd': cmd, + 'run_as_root': run_as_root, + 'get_stderr': get_stderr, + 'raise_when_error': raise_when_error, + 'timeout': timeout} + self.history += [command] + return self.get_result(command) diff --git a/sahara/tests/unit/plugins/mapr/utils/__init__.py b/sahara/tests/unit/plugins/mapr/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/cldb-1.conf b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/cldb-1.conf new file mode 100644 index 00000000..680f62a5 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/cldb-1.conf @@ -0,0 +1,2 @@ +net.topology.script.file.name=/opt/mapr/topology.sh +cldb.zookeeper.servers=192.168.1.10:5181,192.168.1.11:5181,192.168.1.12:5181 \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/core-site-0.xml b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/core-site-0.xml new file mode 100644 index 00000000..34b8300e --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/core-site-0.xml @@ -0,0 +1,69 @@ + + + + + fs.swift.impl + org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem + + + + fs.swift.socket.timeout + 60000 + + + fs.swift.blocksize + 32768 + + + fs.swift.connect.retry.count + 3 + + + fs.swift.service.sahara.auth.endpoint.prefix + /endpoints/AUTH_ + + + fs.swift.connect.timeout + 15000 + + + fs.swift.requestsize + 64 + + + fs.swift.connect.throttle.delay + 0 + + + k1 + v1 + + + k0 + v0 + + + fs.swift.service.sahara.https.port + 443 + + + fs.swift.partsize + 4718592 + + + fs.swift.service.sahara.auth.url + http://auth:None/v2.0/tokens/ + + + fs.swift.service.sahara.public + True + + + fs.swift.service.sahara.http.port + 8080 + + + fs.swift.service.sahara.tenant + tenant_0 + + \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/core-site-1.xml b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/core-site-1.xml new file mode 100644 index 00000000..e9c85a21 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/core-site-1.xml @@ -0,0 +1,82 @@ + + + + + fs.swift.requestsize + 64 + + + fs.swift.impl + org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem + + + + fs.swift.socket.timeout + 60000 + + + fs.swift.blocksize + 32768 + + + net.topology.impl + org.apache.hadoop.net.NetworkTopologyWithNodeGroup + + + fs.swift.connect.retry.count + 3 + + + fs.swift.service.sahara.auth.endpoint.prefix + /endpoints/AUTH_ + + + fs.swift.connect.timeout + 15000 + + + dfs.block.replicator.classname + org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyWithNodeGroup + + + + fs.swift.connect.throttle.delay + 0 + + + k1 + v1 + + + k0 + v0 + + + net.topology.nodegroup.aware + True + + + fs.swift.service.sahara.https.port + 443 + + + fs.swift.partsize + 4718592 + + + fs.swift.service.sahara.auth.url + http://auth:None/v2.0/tokens/ + + + fs.swift.service.sahara.public + True + + + fs.swift.service.sahara.http.port + 8080 + + + fs.swift.service.sahara.tenant + tenant_0 + + \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/mapred-site-0.xml b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/mapred-site-0.xml new file mode 100644 index 00000000..e861a8fa --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/mapred-site-0.xml @@ -0,0 +1,8 @@ + + + + + node_config_0 + False + + \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/mapred-site-1.xml b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/mapred-site-1.xml new file mode 100644 index 00000000..38cad9ef --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/mapred-site-1.xml @@ -0,0 +1,16 @@ + + + + + node_config_0 + False + + + mapred.task.cache.levels + 3 + + + mapred.jobtracker.nodegroup.aware + True + + \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/topology-1.data b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/topology-1.data new file mode 100644 index 00000000..1154eb4a --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/topology-1.data @@ -0,0 +1,9 @@ +10.10.1.12 r +10.10.1.10 r +10.10.1.11 r +192.168.1.12 r +i1 r +i0 r +192.168.1.11 r +192.168.1.10 r +i2 r \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/compute.topology b/sahara/tests/unit/plugins/mapr/utils/resources/compute.topology new file mode 100644 index 00000000..e65d9ab4 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/compute.topology @@ -0,0 +1,6 @@ +edp-master-0001 /rack1 +10.50.0.8 /rack1 +edp-slave-0002 /rack1 +10.50.0.5 /rack1 +edp-slave-0001 /rack2 +10.50.0.6 /rack2 diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/plugin_spec.json b/sahara/tests/unit/plugins/mapr/utils/resources/plugin_spec.json new file mode 100644 index 00000000..b36534aa --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/plugin_spec.json @@ -0,0 +1,106 @@ +{ + "files": [ + { + "remote": null, + "type": null, + "configs": { + "required": { + "cluster": [ + { + "name": "k4" + } + ] + } + } + }, + { + "remote": "file_0", + "type": "properties" + }, + { + "remote": "file_1", + "local": "test.properties", + "type": "properties", + "configs": { + "required": { + "cluster": [ + { + "name": "k0", + "default_value": "default_value_0", + "description": "description_0", + "priority": 2 + } + ], + "node": [ + { + "name": "k1", + "config_type": "int", + "default_value": 3, + "priority": 1 + } + ] + }, + "optional": { + "cluster": [ + { + "name": "k2", + "config_type": "bool" + } + ], + "node": [ + { + "name": "k3" + } + ] + } + } + }, + { + "remote": "file_2", + "local": "test.xml", + "type": "xml" + }, + { + "remote": "file_3", + "local": "raw.data", + "type": "raw" + } + ], + "services": [ + { + "name": "general", + "files": [ + null, + "file_3" + ] + }, + { + "name": "service_0" + }, + { + "name": "service_1", + "files": [ + + ], + "node_processes": [ + + ] + }, + { + "name": "service_2", + "files": [ + "file_0", + "file_1", + "file_2" + ], + "node_processes": [ + "node_process_0", + "node_process_1" + ], + "versions": [ + "v1", + "v2" + ] + } + ] +} \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json b/sahara/tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json new file mode 100644 index 00000000..c7b36ade --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json @@ -0,0 +1,168 @@ +{ + "files":[ + { + "remote":null, + "type":null, + "configs":{ + "required":{ + "cluster":[ + { + "name":"Enable Data Locality", + "config_type":"bool", + "default_value":false, + "priority":1 + }, + { + "name":"Enable MapR-DB", + "config_type":"bool", + "default_value":false, + "priority":1 + } + ] + } + } + }, + { + "remote":"/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml", + "type":"xml", + "configs":{ + "required":{ + "node":[ + { + "name":"node_config_0", + "config_type":"bool", + "default_value":false, + "priority":1 + } + ] + } + } + }, + { + "remote":"/opt/mapr/conf/cldb.conf", + "type":"properties" + }, + { + "remote":"/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml", + "local":"test.xml", + "type":"xml", + "configs":{ + "optional":{ + "cluster":[ + { + "name":"fs.swift.impl", + "default_value":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem" + }, + { + "name":"fs.swift.connect.timeout", + "config_type":"int", + "default_value":15000 + }, + { + "name":"fs.swift.socket.timeout", + "config_type":"int", + "default_value":60000 + }, + { + "name":"fs.swift.connect.retry.count", + "config_type":"int", + "default_value":3 + }, + { + "name":"fs.swift.connect.throttle.delay", + "config_type":"int", + "default_value":0 + }, + { + "name":"fs.swift.blocksize", + "config_type":"int", + "default_value":32768 + }, + { + "name":"fs.swift.partsize", + "config_type":"int", + "default_value":4718592 + }, + { + "name":"fs.swift.requestsize", + "config_type":"int", + "default_value":64 + }, + { + "name":"fs.swift.service.sahara.public", + "config_type":"bool", + "default_value":true + }, + { + "name":"fs.swift.service.sahara.http.port", + "config_type":"int", + "default_value":8080 + }, + { + "name":"fs.swift.service.sahara.https.port", + "config_type":"int", + "default_value":443 + }, + { + "name":"fs.swift.service.sahara.auth.endpoint.prefix", + "default_value":"/endpoints/AUTH_" + } + ] + } + } + } + ], + "services":[ + { + "name":"general", + "files":[ + null + ] + }, + { + "name":"Management", + "node_processes":[ + "ZooKeeper", + "Webserver", + "MapR Client", + "Metrics" + ] + }, + { + "name":"MapReduce", + "node_processes":[ + "TaskTracker", + "JobTracker" + ], + "files":[ + "/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml" + ] + }, + { + "name":"MapR FS", + "node_processes":[ + "CLDB", + "FileServer", + "NFS" + ], + "files":[ + "/opt/mapr/conf/cldb.conf", + "/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml" + ] + }, + { + "name":"HBase", + "node_processes":[ + "HBase Master", + "HBase RegionServer", + "HBase Client" + ] + }, + { + "name":"Oozie", + "node_processes":[ + "Oozie" + ] + } + ] +} \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/raw.data b/sahara/tests/unit/plugins/mapr/utils/resources/raw.data new file mode 100644 index 00000000..d1553b51 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/raw.data @@ -0,0 +1 @@ +Some unparsable data \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/test.properties b/sahara/tests/unit/plugins/mapr/utils/resources/test.properties new file mode 100644 index 00000000..03d36e41 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/test.properties @@ -0,0 +1,4 @@ +k0 = v0 + +# Comment +k1 = v1 \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/resources/test.xml b/sahara/tests/unit/plugins/mapr/utils/resources/test.xml new file mode 100644 index 00000000..5058ad94 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/resources/test.xml @@ -0,0 +1,10 @@ + + + k0 + v0 + + + k1 + v1 + + \ No newline at end of file diff --git a/sahara/tests/unit/plugins/mapr/utils/test_cluster_info.py b/sahara/tests/unit/plugins/mapr/utils/test_cluster_info.py new file mode 100644 index 00000000..638d4e46 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/test_cluster_info.py @@ -0,0 +1,51 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sahara.plugins.mapr.util.cluster_info as ci +import sahara.plugins.mapr.util.plugin_spec as ps +import sahara.tests.unit.base as b +import sahara.tests.unit.plugins.mapr.stubs as s + + +class ClusterInfoTest(b.SaharaTestCase): + + def assertItemsEqual(self, expected, actual): + for e in expected: + self.assertIn(e, actual) + for a in actual: + self.assertIn(a, expected) + + def setUp(self): + b.SaharaTestCase.setUp(self) + path = 'tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json' + self.plugin_spec = ps.PluginSpec(path) + + def test_get_node_group_services(self): + node_processes = ['ZooKeeper', 'Webserver', 'CLDB'] + node_group = s.NodeGroup(None, node_processes=node_processes) + cluster_info = ci.ClusterInfo(None, self.plugin_spec) + actual = cluster_info.get_services(node_group) + expected = ['Management', 'MapR FS', 'general'] + self.assertItemsEqual(expected, actual) + + def test_get_cluster_services(self): + np0 = ['ZooKeeper', 'Webserver', 'CLDB'] + ng0 = s.NodeGroup(node_processes=np0) + np1 = ['ZooKeeper', 'TaskTracker', 'FileServer'] + ng1 = s.NodeGroup(node_processes=np1) + cluster = s.Cluster(node_groups=[ng0, ng1]) + cluster_info = ci.ClusterInfo(cluster, self.plugin_spec) + actual = cluster_info.get_services() + expected = ['Management', 'MapR FS', 'general', 'MapReduce'] + self.assertItemsEqual(expected, actual) diff --git a/sahara/tests/unit/plugins/mapr/utils/test_config_file_utils.py b/sahara/tests/unit/plugins/mapr/utils/test_config_file_utils.py new file mode 100644 index 00000000..c3891553 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/test_config_file_utils.py @@ -0,0 +1,96 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import StringIO as sio + +import sahara.plugins.mapr.util.config_file_utils as cfu +import sahara.tests.unit.base as b + +import mock as m + + +dirname = os.path.dirname(__file__) + + +class ConfigFileUtilsTest(b.SaharaTestCase): + + def assertItemsEqual(self, expected, actual): + for e in expected: + self.assertIn(e, actual) + for a in actual: + self.assertIn(a, expected) + + def test_load_properties_file(self): + path = 'tests/unit/plugins/mapr/utils/resources/test.properties' + actual = cfu.load_properties_file(path) + expected = {'k0': 'v0', 'k1': 'v1'} + self.assertEqual(expected, actual) + + def test_load_xml_file(self): + path = 'tests/unit/plugins/mapr/utils/resources/test.xml' + actual = cfu.load_xml_file(path) + expected = {'k0': 'v0', 'k1': 'v1'} + self.assertEqual(expected, actual) + + def test_load_raw_file(self): + path = 'tests/unit/plugins/mapr/utils/resources/raw.data' + actual = cfu.load_raw_file(path) + expected = {'content': 'Some unparsable data'} + self.assertEqual(expected, actual) + + @m.patch('__builtin__.open') + def test_to_properties_file_content(self, o_mock): + data = {'k0': 'v0', 'k1': 'v1'} + s = sio.StringIO(cfu.to_properties_file_content(data)) + s.flush() + o_mock.return_value = s + actual = cfu.load_properties_file('') + self.assertEqual(data, actual) + + data = {} + actual = cfu.to_properties_file_content(data) + expected = '' + self.assertEqual(expected, actual) + + @m.patch('__builtin__.open') + def test_to_xml_file_content(self, o_mock): + data = {'k0': 'v0', 'k1': 'v1'} + s = sio.StringIO(cfu.to_xml_file_content(data)) + s.flush() + o_mock.return_value = s + actual = cfu.load_xml_file('') + self.assertEqual(data, actual) + + def test_to_raw_file_content(self): + data = {'content': 'Some unparsable data'} + actual = cfu.to_raw_file_content(data) + expected = 'Some unparsable data' + self.assertEqual(expected, actual) + + def test_load_file(self): + path = 'tests/unit/plugins/mapr/utils/resources/test.properties' + actual = cfu.load_file(path, 'properties') + expected = {'k0': 'v0', 'k1': 'v1'} + self.assertEqual(expected, actual) + + path = 'tests/unit/plugins/mapr/utils/resources/test.xml' + actual = cfu.load_file(path, 'xml') + expected = {'k0': 'v0', 'k1': 'v1'} + self.assertEqual(expected, actual) + + path = 'tests/unit/plugins/mapr/utils/resources/raw.data' + actual = cfu.load_file(path, 'raw') + expected = {'content': 'Some unparsable data'} + self.assertEqual(expected, actual) diff --git a/sahara/tests/unit/plugins/mapr/utils/test_dict_utils.py b/sahara/tests/unit/plugins/mapr/utils/test_dict_utils.py new file mode 100644 index 00000000..35c0f1f3 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/test_dict_utils.py @@ -0,0 +1,196 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sahara.plugins.mapr.util.dict_utils as du +import sahara.plugins.mapr.util.func_utils as fu +import sahara.tests.unit.base as b + + +class DictUtilsTest(b.SaharaTestCase): + + def assertItemsEqual(self, expected, actual): + for e in expected: + self.assertIn(e, actual) + for a in actual: + self.assertIn(a, expected) + + def assertDictValueItemsEqual(self, expected, actual): + self.assertItemsEqual(expected.keys(), actual.keys()) + for k in actual: + self.assertItemsEqual(expected[k], actual[k]) + + def test_append_to_key(self): + arg_0 = {'k0': ['v0', 'v1'], 'k1': ['v1', 'v2'], 'k3': ['v3']} + arg_1 = {'v0': {'a': 'a'}, 'v1': {'b': 'b'}, + 'v2': {'c': 'c'}, 'v4': {'d': 'd'}} + actual = du.append_to_key(arg_0, arg_1) + expected = {'k0': {'v0': {'a': 'a'}, 'v1': {'b': 'b'}}, + 'k1': {'v1': {'b': 'b'}, 'v2': {'c': 'c'}}, + 'k3': {}} + self.assertEqual(expected, actual) + + def test_iterable_to_values_pair_dict_reducer(self): + vp_dict_r = du.iterable_to_values_pair_dict_reducer + arg = [[{'a': 'a0', 'b': 'b0', 'c': 'c0'}, + {'a': 'a1', 'b': 'b1', 'c': 'c1'}], + [{'a': 'a2', 'b': 'b2', 'c': 'c2'}]] + reducer = vp_dict_r('a', 'b') + actual = reduce(reducer, arg, {}) + expected = {'a0': 'b0', 'a1': 'b1', 'a2': 'b2'} + self.assertEqual(expected, actual) + + def test_flatten_to_list_reducer(self): + arg = [[{'a': 'a0'}, {'a': 'a1'}], [{'a': 'a2'}]] + reducer = du.flatten_to_list_reducer() + actual = reduce(reducer, arg, []) + expected = [{'a': 'a0'}, {'a': 'a1'}, {'a': 'a2'}] + self.assertItemsEqual(expected, actual) + + def test_map_by_field_value(self): + arg = [{'a': 'a0', 'b': 'b0', 'c': 'c0'}, + {'a': 'a0', 'b': 'b2', 'c': 'c1'}, + {'a': 'a2', 'b': 'b2', 'c': 'c2'}] + + actual = du.map_by_field_value(arg, 'a') + expected = {'a0': [{'a': 'a0', 'b': 'b0', 'c': 'c0'}, + {'a': 'a0', 'b': 'b2', 'c': 'c1'}], + 'a2': [{'a': 'a2', 'b': 'b2', 'c': 'c2'}]} + self.assertDictValueItemsEqual(expected, actual) + + actual = du.map_by_field_value(arg, 'c') + expected = {'c0': [{'a': 'a0', 'b': 'b0', 'c': 'c0'}], + 'c1': [{'a': 'a0', 'b': 'b2', 'c': 'c1'}], + 'c2': [{'a': 'a2', 'b': 'b2', 'c': 'c2'}]} + self.assertDictValueItemsEqual(expected, actual) + + def test_map_by_fields_values(self): + arg = [{'a': 'a0', 'b': 'b0', 'c': 'c0'}, + {'a': 'a0', 'b': 'b2', 'c': 'c1'}, + {'a': 'a2', 'b': 'b2', 'c': 'c2'}] + actual = du.map_by_fields_values(arg, ['a', 'b', 'c']) + expected = {'a0': {'b0': {'c0': [{'a': 'a0', 'b': 'b0', 'c': 'c0'}]}, + 'b2': {'c1': [{'a': 'a0', 'b': 'b2', 'c': 'c1'}]}}, + 'a2': {'b2': {'c2': [{'a': 'a2', 'b': 'b2', 'c': 'c2'}]}}} + self.assertItemsEqual(expected.keys(), actual.keys()) + for k0 in actual: + self.assertItemsEqual(expected[k0].keys(), actual[k0].keys()) + for k1 in actual[k0]: + self.assertDictValueItemsEqual( + expected[k0][k1], actual[k0][k1]) + + def test_get_keys_by_value_type(self): + arg = {'dict_0': {}, 'list': [], 'set': set(['elem']), + 'str': 'str', 'dict_1': {}} + + actual = du.get_keys_by_value_type(arg, dict) + expected = ['dict_0', 'dict_1'] + self.assertItemsEqual(expected, actual) + + actual = du.get_keys_by_value_type(arg, list) + expected = ['list'] + self.assertItemsEqual(expected, actual) + + def test_deep_update(self): + arg_0 = {'a0': {'b0': {'c0': 'v0', 'c1': 'v1'}}, + 'a1': {'b1': 'v2'}, 'a3': 'v3'} + arg_1 = {'a0': {'b0': {'c0': 'v1', 'c2': 'v2'}, 'b1': 'v4'}, + 'a1': 'v5', 'a3': {'v1': 'v2'}} + actual = du.deep_update(arg_0, arg_1) + expected = {'a0': {'b0': {'c0': 'v1', 'c1': 'v1', 'c2': 'v2'}, + 'b1': 'v4'}, + 'a1': 'v5', 'a3': {'v1': 'v2'}} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg_0) + + def test_get_keys_by_value(self): + arg = {'k0': 'v0', 'k1': 'v0', 'k2': 'v2'} + + actual = du.get_keys_by_value(arg, 'v0') + expected = ['k0', 'k1'] + self.assertItemsEqual(expected, actual) + + actual = du.get_keys_by_value(arg, 'v2') + expected = ['k2'] + self.assertItemsEqual(expected, actual) + + actual = du.get_keys_by_value(arg, 'v') + expected = [] + self.assertItemsEqual(expected, actual) + + def test_get_keys_by_value_2(self): + arg = {'k0': ['v0', 'v1'], 'k1': ['v1', 'v2'], 'k2': ['v2', 'v3']} + + actual = du.get_keys_by_value_2(arg, 'v1') + expected = ['k0', 'k1'] + self.assertItemsEqual(expected, actual) + + actual = du.get_keys_by_value_2(arg, 'v3') + expected = ['k2'] + self.assertItemsEqual(expected, actual) + + actual = du.get_keys_by_value_2(arg, 'v') + expected = [] + self.assertItemsEqual(expected, actual) + + def test_iterable_to_values_list_reducer(self): + arg = [[{'a': 'a0', 'b': 'b0'}, {'a': 'a1', 'b': 'b0'}], [{'a': 'a2'}]] + reducer = du.iterable_to_values_list_reducer('a') + actual = reduce(reducer, arg, []) + expected = ['a0', 'a1', 'a2'] + self.assertTrue(isinstance(actual, list)) + self.assertItemsEqual(expected, actual) + + def test_select(self): + source = [{'a': 'a0', 'b': 'b0', 'c': 'c0'}, + {'a': 'a1', 'b': 'b1', 'c': 'c0'}, + {'a': 'a2', 'b': 'b2', 'c': 'c0'}] + + predicate = fu.like_predicate({'c': 'c0'}) + actual = du.select(['a', 'b', 'c'], source, predicate) + expected = [{'a': 'a0', 'b': 'b0', 'c': 'c0'}, + {'a': 'a1', 'b': 'b1', 'c': 'c0'}, + {'a': 'a2', 'b': 'b2', 'c': 'c0'}] + self.assertItemsEqual(expected, actual) + + predicate = fu.in_predicate('b', ['b0', 'b1']) + actual = du.select(['a'], source, predicate) + expected = [{'a': 'a0'}, {'a': 'a1'}] + self.assertItemsEqual(expected, actual) + + def test_list_of_vp_dicts_function(self): + arg = {'a0': 'b0', 'a1': 'b1'} + actual = du.list_of_vp_dicts_function('a', 'b')(arg) + expected = [{'a': 'a0', 'b': 'b0'}, {'a': 'a1', 'b': 'b1'}] + self.assertTrue(isinstance(actual, list)) + for a in actual: + self.assertTrue(isinstance(a, dict)) + self.assertItemsEqual(expected, actual) + + def test_flattened_dict(self): + arg = {'a0': {'b0': {'c0': 'd0'}}, + 'a1': {'b0': {'c1': 'd1', + 'c2': 'd2'}, + 'b1': {'c0': 'd0'}}} + + actual = du.flattened_dict(arg, ['a', 'b', 'c', 'd']) + expected = [{'a': 'a0', 'b': 'b0', 'c': 'c0', 'd': 'd0'}, + {'a': 'a1', 'b': 'b0', 'c': 'c1', 'd': 'd1'}, + {'a': 'a1', 'b': 'b0', 'c': 'c2', 'd': 'd2'}, + {'a': 'a1', 'b': 'b1', 'c': 'c0', 'd': 'd0'}] + self.assertItemsEqual(expected, actual) + + arg = {'a0': 'b0', 'a1': 'b1'} + actual = du.flattened_dict(arg, ['a', 'b']) + expected = [{'a': 'a0', 'b': 'b0'}, {'a': 'a1', 'b': 'b1'}] + self.assertItemsEqual(expected, actual) diff --git a/sahara/tests/unit/plugins/mapr/utils/test_func_utils.py b/sahara/tests/unit/plugins/mapr/utils/test_func_utils.py new file mode 100644 index 00000000..2e8d37de --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/test_func_utils.py @@ -0,0 +1,202 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sahara.plugins.mapr.util.func_utils as fu +import sahara.tests.unit.base as b + + +class PredicatesTest(b.SaharaTestCase): + + def test_true_predicate(self): + self.assertTrue(fu.true_predicate(None)) + + def test_false_predicate(self): + self.assertFalse(fu.false_predicate(None)) + + def test_not_predicate(self): + self.assertFalse(fu.not_predicate(fu.true_predicate)(None)) + self.assertTrue(fu.not_predicate(fu.false_predicate)(None)) + + def test_and_predicate(self): + true_p = fu.true_predicate + false_p = fu.false_predicate + and_p = fu.and_predicate + self.assertTrue(and_p(true_p, true_p)(None)) + self.assertFalse(and_p(false_p, true_p)(None)) + self.assertFalse(and_p(true_p, false_p)(None)) + self.assertFalse(and_p(false_p, false_p)(None)) + + def test_or_predicate(self): + true_p = fu.true_predicate + false_p = fu.false_predicate + or_p = fu.or_predicate + self.assertTrue(or_p(true_p, true_p)(None)) + self.assertTrue(or_p(false_p, true_p)(None)) + self.assertTrue(or_p(true_p, false_p)(None)) + self.assertFalse(or_p(false_p, false_p)(None)) + + def test_field_equals_predicate(self): + field_equals_p = fu.field_equals_predicate + arg = {'a': 'a', 'b': 'b'} + self.assertTrue(field_equals_p('a', 'a')(arg)) + self.assertFalse(field_equals_p('b', 'a')(arg)) + + def test_like_predicate(self): + like_p = fu.like_predicate + arg = {'a': 'a', 'b': 'b', 'c': 'c'} + self.assertTrue(like_p({'a': 'a', 'b': 'b', 'c': 'c'})(arg)) + self.assertTrue(like_p({'a': 'a', 'b': 'b'})(arg)) + self.assertTrue(like_p({'a': 'a'})(arg)) + self.assertTrue(like_p({'a': 'a'}, ['a'])(arg)) + self.assertTrue(like_p({})(arg)) + self.assertTrue(like_p({'a': 'a', 'b': 'b', 'c': 'a'}, ['c'])(arg)) + self.assertFalse(like_p({'a': 'a', 'b': 'b', 'c': 'a'})(arg)) + self.assertFalse(like_p({'a': 'a', 'c': 'a'})(arg)) + self.assertFalse(like_p({'c': 'a'}, ['a'])(arg)) + + def test_in_predicate(self): + in_p = fu.in_predicate + arg = {'a': 'a', 'b': 'b'} + self.assertTrue(in_p('a', ['a', 'b'])(arg)) + self.assertFalse(in_p('a', ['c', 'b'])(arg)) + self.assertFalse(in_p('a', [])(arg)) + + +class FunctionsTest(b.SaharaTestCase): + + def test_copy_function(self): + copy_f = fu.copy_function + arg = {'a': 'a'} + + actual = copy_f()(arg) + expected = {'a': 'a'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + def test_append_field_function(self): + append_field_f = fu.append_field_function + arg = {'a': 'a'} + + actual = append_field_f('b', 'b')(arg) + expected = {'a': 'a', 'b': 'b'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + def test_append_fields_function(self): + append_fields_f = fu.append_fields_function + arg = {'a': 'a'} + + actual = append_fields_f({'b': 'b', 'c': 'c'})(arg) + expected = {'a': 'a', 'b': 'b', 'c': 'c'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + actual = append_fields_f({'b': 'b'})(arg) + expected = {'a': 'a', 'b': 'b'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + actual = append_fields_f({})(arg) + expected = {'a': 'a'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + def test_get_values_pair_function(self): + get_values_pair_f = fu.get_values_pair_function + arg = {'a': 'a', 'b': 'b'} + + actual = get_values_pair_f('a', 'b')(arg) + expected = ('a', 'b') + self.assertEqual(expected, actual) + + def test_get_field_function(self): + get_field_f = fu.get_field_function + arg = {'a': 'a', 'b': 'b'} + + actual = get_field_f('a')(arg) + expected = ('a', 'a') + self.assertEqual(expected, actual) + + def test_get_fields_function(self): + get_fields_f = fu.get_fields_function + arg = {'a': 'a', 'b': 'b'} + + actual = get_fields_f(['a', 'b'])(arg) + expected = [('a', 'a'), ('b', 'b')] + self.assertEqual(expected, actual) + + actual = get_fields_f(['a'])(arg) + expected = [('a', 'a')] + self.assertEqual(expected, actual) + + def test_extract_fields_function(self): + extract_fields_f = fu.extract_fields_function + arg = {'a': 'a', 'b': 'b'} + + actual = extract_fields_f(['a', 'b'])(arg) + expected = {'a': 'a', 'b': 'b'} + self.assertEqual(expected, actual) + + actual = extract_fields_f(['a'])(arg) + expected = {'a': 'a'} + self.assertEqual(expected, actual) + + def test_get_value_function(self): + get_value_f = fu.get_value_function + arg = {'a': 'a', 'b': 'b'} + + actual = get_value_f('a')(arg) + expected = 'a' + self.assertEqual(expected, actual) + + def test_set_default_value_function(self): + set_default_value_f = fu.set_default_value_function + arg = {'a': 'a'} + + actual = set_default_value_f('b', 'b')(arg) + expected = {'a': 'a', 'b': 'b'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + actual = set_default_value_f('a', 'b')(arg) + expected = {'a': 'a'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + def test_set_default_values_function(self): + set_default_values_f = fu.set_default_values_function + arg = {'a': 'a'} + + actual = set_default_values_f({'a': 'b', 'c': 'c'})(arg) + expected = {'a': 'a', 'c': 'c'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + actual = set_default_values_f({'b': 'b'})(arg) + expected = {'a': 'a', 'b': 'b'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + actual = set_default_values_f({})(arg) + expected = {'a': 'a'} + self.assertEqual(expected, actual) + self.assertIsNot(actual, arg) + + def test_values_pair_to_dict_function(self): + values_pair_to_dict_f = fu.values_pair_to_dict_function + arg = ('a', 'b') + + actual = values_pair_to_dict_f('a', 'b')(arg) + expected = {'a': 'a', 'b': 'b'} + self.assertEqual(expected, actual) diff --git a/sahara/tests/unit/plugins/mapr/utils/test_plugin_spec.py b/sahara/tests/unit/plugins/mapr/utils/test_plugin_spec.py new file mode 100644 index 00000000..3854d876 --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/utils/test_plugin_spec.py @@ -0,0 +1,324 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import os + +import sahara.plugins.mapr.util.plugin_spec as ps +import sahara.plugins.provisioning as p +import sahara.tests.unit.base as b + +import mock as m + + +class PluginSpecTest(b.SaharaTestCase): + + def assertItemsEqual(self, expected, actual): + for e in expected: + self.assertIn(e, actual) + for a in actual: + self.assertIn(a, expected) + + def assertDictValueItemsEqual(self, expected, actual): + self.assertItemsEqual(expected.keys(), actual.keys()) + for k in actual: + self.assertItemsEqual(expected[k], actual[k]) + + @m.patch.object(ps.PluginSpec, '__init__', new=lambda i: None) + def setUp(self): + super(PluginSpecTest, self).setUp() + path = 'tests/unit/plugins/mapr/utils/resources/plugin_spec.json' + plugin_spec = ps.PluginSpec() + plugin_spec.base_dir = os.path.dirname(path) + plugin_spec.plugin_spec_dict = plugin_spec._load_plugin_spec_dict(path) + self.plugin_spec = plugin_spec + + def test_load_service_file_name_map(self): + plugin_spec = self.plugin_spec + + actual = plugin_spec._load_service_file_name_map() + expected = {'service_2': ['file_0', 'file_1', 'file_2'], + 'general': ['file_3', None]} + self.assertDictValueItemsEqual(expected, actual) + + def test_load_file_name_config_map(self): + plugin_spec = self.plugin_spec + + actual = plugin_spec._load_file_name_config_map() + expected = {'file_1': ['k1', 'k0', 'k3', 'k2'], None: ['k4']} + self.assertDictValueItemsEqual(expected, actual) + + def test_load_default_configs(self): + pls = self.plugin_spec + pls.service_file_name_map = pls._load_service_file_name_map() + + actual = pls._load_default_configs() + expected = {'service_2': {'file_1': {'k0': 'v0', 'k1': 'v1'}, + 'file_2': {'k0': 'v0', 'k1': 'v1'}}, + 'general': {'file_3': {'content': 'Some unparsable data'}}} + self.assertEqual(expected, actual) + + def test_load_service_node_process_map(self): + pls = self.plugin_spec + + actual = pls._load_service_node_process_map() + expected = {'service_2': ['node_process_0', 'node_process_1']} + self.assertDictValueItemsEqual(expected, actual) + + def test_load_plugin_config_items(self): + pls = self.plugin_spec + pls.service_file_name_map = pls._load_service_file_name_map() + pls.default_configs = pls._load_default_configs() + pls.plugin_config_objects = pls._load_plugin_config_objects() + pls.file_name_config_map = pls._load_file_name_config_map() + + actual = pls._load_plugin_config_items() + expected = [{'default_value': 3, 'name': 'k1', 'config_values': None, + 'priority': 1, 'config_type': 'int', 'file': 'file_1', + 'applicable_target': 'service_2', 'is_optional': False, + 'scope': 'node', 'description': None}, + {'default_value': None, 'name': 'k2', + 'config_values': None, 'priority': 2, + 'config_type': 'bool', 'file': 'file_1', + 'applicable_target': 'service_2', 'is_optional': True, + 'scope': 'cluster', 'description': None}, + {'default_value': 'default_value_0', 'name': 'k0', + 'config_values': None, 'priority': 2, 'file': 'file_1', + 'config_type': 'string', 'applicable_target': 'service_2', + 'is_optional': False, 'scope': 'cluster', + 'description': 'description_0'}, + {'default_value': None, 'name': 'k3', + 'config_values': None, 'priority': 2, + 'config_type': 'string', 'file': 'file_1', + 'applicable_target': 'service_2', 'is_optional': True, + 'scope': 'node', 'description': None}, + {'default_value': None, 'name': 'k4', + 'config_values': None, 'priority': 2, + 'config_type': 'string', 'file': None, + 'applicable_target': 'general', 'is_optional': False, + 'scope': 'cluster', 'description': None}] + self.assertItemsEqual(expected, actual) + + def test_load_plugin_configs(self): + pls = self.plugin_spec + pls.service_file_name_map = pls._load_service_file_name_map() + pls.plugin_config_objects = pls._load_plugin_config_objects() + pls.file_name_config_map = pls._load_file_name_config_map() + pls.plugin_config_items = pls._load_plugin_config_items() + + actual = pls._load_plugin_configs() + expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3, + 'k2': None, 'k3': None}}, + 'general': {None: {'k4': None}}} + self.assertEqual(expected, actual) + + def test_load_default_plugin_configs(self): + pls = self.plugin_spec + pls.service_file_name_map = pls._load_service_file_name_map() + pls.default_configs = pls._load_default_configs() + pls.plugin_config_objects = pls._load_plugin_config_objects() + pls.file_name_config_map = pls._load_file_name_config_map() + pls.plugin_config_items = pls._load_plugin_config_items() + pls.plugin_configs = pls._load_plugin_configs() + + actual = pls._load_default_plugin_configs() + expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3, + 'k2': None, 'k3': None}, + 'file_2': {'k0': 'v0', 'k1': 'v1'}}, + 'general': {None: {'k4': None}, + 'file_3': {'content': 'Some unparsable data'}}} + self.assertEqual(expected, actual) + + def test_load_plugin_config_objects(self): + pls = self.plugin_spec + pls.service_file_name_map = pls._load_service_file_name_map() + pls.default_configs = pls._load_default_configs() + + actual = pls._load_plugin_config_objects() + expected = [p.Config('k0', 'service_2', 'cluster', + default_value='default_value_0', + description='description_0'), + p.Config('k1', 'service_2', 'node', + config_type='int', default_value=3, priority=1), + p.Config('k2', 'service_2', 'cluster', + config_type='bool', is_optional=True), + p.Config('k3', 'service_2', 'node', is_optional=True), + p.Config('k4', 'general', 'cluster', is_optional=False)] + m_actual = map(lambda i: i.to_dict(), actual) + m_expected = map(lambda i: i.to_dict(), expected) + self.assertItemsEqual(m_expected, m_actual) + + def test_get_node_process_service(self): + pls = self.plugin_spec + pls.service_node_process_map = pls._load_service_node_process_map() + + actual = pls.get_node_process_service('node_process_0') + expected = 'service_2' + self.assertEqual(expected, actual) + + def test_get_default_plugin_configs(self): + pls = self.plugin_spec + pls.service_file_name_map = pls._load_service_file_name_map() + pls.default_configs = pls._load_default_configs() + pls.plugin_config_objects = pls._load_plugin_config_objects() + pls.file_name_config_map = pls._load_file_name_config_map() + pls.plugin_config_items = pls._load_plugin_config_items() + pls.plugin_configs = pls._load_plugin_configs() + pls.default_plugin_configs = pls._load_default_plugin_configs() + + actual = pls.get_default_plugin_configs(['service_2']) + expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3, + 'k2': None, 'k3': None}, + 'file_2': {'k0': 'v0', 'k1': 'v1'}}} + self.assertEqual(expected, actual) + + def test_get_config_file(self): + path = 'tests/unit/plugins/mapr/utils/resources/plugin_spec.json' + plugin_spec = ps.PluginSpec(path) + + arg = {'service': 'service_2', 'scope': 'node', 'name': 'k1'} + actual = plugin_spec.get_config_file(**arg) + expected = 'file_1' + self.assertEqual(expected, actual) + + arg = {'service': 'service_1', 'scope': 'node', 'name': 'k1'} + actual = plugin_spec.get_config_file(**arg) + expected = None + self.assertEqual(expected, actual) + + def test_get_version_config_objects(self): + actual = self.plugin_spec.get_version_config_objects() + expected = [p.Config(name='service_2 Version', + applicable_target='service_2', + scope='cluster', + config_type='dropdown', + config_values=[('v1', 'v1'), ('v2', 'v2')], + is_optional=False, + priority=1)] + m_actual = map(lambda i: i.to_dict(), actual) + m_expected = map(lambda i: i.to_dict(), expected) + self.assertItemsEqual(m_expected, m_actual) + + def test_get_configs(self): + pls = self.plugin_spec + pls.service_file_name_map = pls._load_service_file_name_map() + pls.default_configs = pls._load_default_configs() + pls.plugin_config_objects = pls._load_plugin_config_objects() + + actual = pls.get_configs() + expected = [p.Config('k0', 'service_2', 'cluster', + default_value='default_value_0', + description='description_0'), + p.Config('k1', 'service_2', 'node', + config_type='int', default_value=3, priority=1), + p.Config('k2', 'service_2', 'cluster', + config_type='bool', is_optional=True), + p.Config('k3', 'service_2', 'node', is_optional=True), + p.Config('k4', 'general', 'cluster', is_optional=False), + p.Config('service_2 Version', 'service_2', 'cluster', + config_type='dropdown', + config_values=[('v1', 'v1'), ('v2', 'v2')], + is_optional=False, priority=1)] + m_actual = map(lambda i: i.to_dict(), actual) + m_expected = map(lambda i: i.to_dict(), expected) + self.assertItemsEqual(m_expected, m_actual) + + def test_init(self): + path = 'tests/unit/plugins/mapr/utils/resources/plugin_spec.json' + plugin_spec = ps.PluginSpec(path) + + actual = plugin_spec.service_file_name_map + expected = {'service_2': ['file_0', 'file_1', 'file_2'], + 'general': [None, 'file_3']} + self.assertDictValueItemsEqual(expected, actual) + + actual = plugin_spec.file_name_config_map + expected = {'file_1': ['k1', 'k0', 'k3', 'k2'], None: ['k4']} + self.assertDictValueItemsEqual(expected, actual) + + actual = plugin_spec.default_configs + expected = {'service_2': {'file_1': {'k0': 'v0', 'k1': 'v1'}, + 'file_2': {'k0': 'v0', 'k1': 'v1'}}, + 'general': {'file_3': {'content': 'Some unparsable data'}}} + self.assertEqual(expected, actual) + + actual = plugin_spec.service_node_process_map + expected = {'service_2': ['node_process_0', 'node_process_1']} + self.assertDictValueItemsEqual(expected, actual) + + actual = plugin_spec.plugin_config_items + expected = [{'default_value': 3, 'name': 'k1', 'config_values': None, + 'priority': 1, 'config_type': 'int', 'file': 'file_1', + 'applicable_target': 'service_2', 'is_optional': False, + 'scope': 'node', 'description': None}, + {'default_value': None, 'name': 'k2', + 'config_values': None, 'priority': 2, + 'config_type': 'bool', 'file': 'file_1', + 'applicable_target': 'service_2', 'is_optional': True, + 'scope': 'cluster', 'description': None}, + {'default_value': 'default_value_0', 'name': 'k0', + 'config_values': None, 'priority': 2, 'file': u'file_1', + 'config_type': 'string', + 'applicable_target': u'service_2', + 'is_optional': False, 'scope': u'cluster', + 'description': 'description_0'}, + {'default_value': None, 'name': 'k3', + 'config_values': None, 'priority': 2, + 'config_type': 'string', 'file': u'file_1', + 'applicable_target': u'service_2', 'is_optional': True, + 'scope': u'node', 'description': None}, + {'default_value': None, 'name': 'k4', + 'config_values': None, 'priority': 2, + 'config_type': 'string', 'file': None, + 'applicable_target': 'general', 'is_optional': False, + 'scope': 'cluster', 'description': None}] + self.assertItemsEqual(expected, actual) + + actual = plugin_spec.plugin_configs + expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3, + 'k2': None, 'k3': None}}, + 'general': {None: {'k4': None}}} + self.assertEqual(expected, actual) + + actual = plugin_spec.default_plugin_configs + expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3, + 'k2': None, 'k3': None}, + 'file_2': {'k0': 'v0', 'k1': 'v1'}}, + 'general': {None: {'k4': None}, + 'file_3': {'content': 'Some unparsable data'}}} + self.assertEqual(expected, actual) + + actual = plugin_spec._load_plugin_config_objects() + expected = [p.Config('k0', 'service_2', 'cluster', + default_value='default_value_0', + description='description_0'), + p.Config('k1', 'service_2', 'node', + config_type='int', default_value=3, priority=1), + p.Config('k2', 'service_2', 'cluster', + config_type='bool', is_optional=True), + p.Config('k3', 'service_2', 'node', is_optional=True), + p.Config('k4', 'general', 'cluster', is_optional=False)] + m_actual = map(lambda i: i.to_dict(), actual) + m_expected = map(lambda i: i.to_dict(), expected) + self.assertItemsEqual(m_expected, m_actual) + + actual = plugin_spec.get_node_process_service('node_process_0') + expected = 'service_2' + self.assertEqual(expected, actual) + + actual = plugin_spec.get_default_plugin_configs(['service_2']) + expected = {'service_2': {'file_1': {'k0': 'default_value_0', 'k1': 3, + 'k2': None, 'k3': None}, + 'file_2': {'k0': 'v0', 'k1': 'v1'}}} + self.assertEqual(expected, actual) diff --git a/sahara/tests/unit/plugins/mapr/versions/__init__.py b/sahara/tests/unit/plugins/mapr/versions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sahara/tests/unit/plugins/mapr/versions/test_base_cluster_configurer.py b/sahara/tests/unit/plugins/mapr/versions/test_base_cluster_configurer.py new file mode 100644 index 00000000..1822e6bc --- /dev/null +++ b/sahara/tests/unit/plugins/mapr/versions/test_base_cluster_configurer.py @@ -0,0 +1,204 @@ +# Copyright (c) 2014, MapR Technologies +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os + +import mock as m +import six + +import sahara.plugins.mapr.util.config_file_utils as cfu +import sahara.plugins.mapr.util.plugin_spec as ps +import sahara.plugins.mapr.versions.v4_0_1_mrv1.cluster_configurer as bcc +import sahara.swift.swift_helper as sh +import sahara.tests.unit.base as b +import sahara.tests.unit.plugins.mapr.stubs as s +import sahara.utils.files as f + + +__dirname__ = os.path.dirname(__file__) + + +class BaseClusterConfigurerTest(b.SaharaTestCase): + + def assertItemsEqual(self, expected, actual): + for e in expected: + self.assertIn(e, actual) + for a in actual: + self.assertIn(a, expected) + + @m.patch('sahara.context.ctx') + @m.patch('sahara.plugins.mapr.util.config.is_data_locality_enabled') + @m.patch('sahara.plugins.mapr.util.config_file_utils.to_file_content') + def test_configure_wo_generals(self, tfc_mock, gtm_mock, cc_mock): + def to_file_content(*args, **kargs): + data = args[0] + if isinstance(data, dict): + return dict(map(lambda i: (str(i[0]), str(i[1])), + six.iteritems(args[0]))) + elif isinstance(data, str): + return {None: data} + tfc_mock.side_effect = to_file_content + gtm_mock.return_value = False + cc_mock.return_value = s.AttrDict(auth_uri='http://auth', + tenant_name='tenant_0', + tenant_id='tenant_id') + sh.CONF.os_region_name = None + + i0 = s.Instance(instance_name='i0', + management_ip='192.168.1.10', + internal_ip='10.10.1.10') + i1 = s.Instance(instance_name='i1', + management_ip='192.168.1.11', + internal_ip='10.10.1.11') + i2 = s.Instance(instance_name='i2', + management_ip='192.168.1.12', + internal_ip='10.10.1.12') + np0 = ['ZooKeeper', 'FileServer', 'TaskTracker'] + np1 = ['ZooKeeper', 'NFS', 'Oozie'] + ng0 = s.NodeGroup(id='ng0', instances=[i0, i1], node_processes=np0) + ng1 = s.NodeGroup(id='ng1', instances=[i2], node_processes=np1) + cc = {'general': {}} + cluster = s.Cluster(node_groups=[ng0, ng1], cluster_configs=cc, + hadoop_version='4.0.1.mrv1') + + plugin_spec = ps.PluginSpec( + 'tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json') + configurer = bcc.ClusterConfigurer(cluster, plugin_spec) + cu_mock = m.MagicMock() + configurer.conductor = m.MagicMock() + configurer.conductor.cluster_update = cu_mock + configurer.configure() + bcc_expected_path = ( + 'tests/unit/plugins/mapr/utils/resources/bcc_expected') + core_site = {'data': cfu.load_xml_file(('%s/core-site-0.xml' + % bcc_expected_path)), + 'file': ('/opt/mapr/hadoop/hadoop-0.20.2' + '/conf/core-site.xml'), + 'root': True, + 'timeout': 120} + mapred_site = {'data': cfu.load_xml_file(('%s/mapred-site-0.xml' + % bcc_expected_path)), + 'root': True, + 'file': ('/opt/mapr/hadoop/hadoop-0.20.2' + '/conf/mapred-site.xml'), + 'timeout': 120} + cldb = {'root': True, + 'data': {'cldb.zookeeper.servers': ('192.168.1.10:5181,' + '192.168.1.11:5181,' + '192.168.1.12:5181')}, + 'timeout': 120, + 'file': '/opt/mapr/conf/cldb.conf'} + hadoop_v = {'root': True, + 'data': f.get_file_text('plugins/mapr/util' + '/resources/' + 'hadoop_version') % + {"mode": 'classic'}, + 'timeout': 120, + 'file': '/opt/mapr/conf/hadoop_version'} + self.assertItemsEqual(i0.remote().fs, [core_site, cldb, mapred_site, + hadoop_v]) + self.assertItemsEqual(i1.remote().fs, [core_site, mapred_site, cldb, + hadoop_v]) + self.assertItemsEqual(i2.remote().fs, [core_site, cldb, + hadoop_v]) + + @m.patch('sahara.context.ctx') + @m.patch('sahara.plugins.mapr.util.config.is_data_locality_enabled') + @m.patch('sahara.topology.topology_helper.generate_topology_map') + @m.patch('sahara.plugins.mapr.util.config_file_utils.to_file_content') + def test_configure_with_topology(self, tfc_mock, gtm_mock, + dle_mock, cc_mock): + def to_file_content(*args, **kargs): + data = args[0] + if isinstance(data, dict): + return dict(map(lambda i: (str(i[0]), str(i[1])), + six.iteritems(args[0]))) + elif isinstance(data, str): + return {None: data} + tfc_mock.side_effect = to_file_content + dle_mock.return_value = True + gtm_mock.return_value = {'i0': 'r', '192.168.1.10': 'r', + '10.10.1.10': 'r', + 'i1': 'r', '192.168.1.11': 'r', + '10.10.1.11': 'r', + 'i2': 'r', '192.168.1.12': 'r', + '10.10.1.12': 'r'} + cc_mock.return_value = s.AttrDict(auth_uri='http://auth', + tenant_name='tenant_0', + tenant_id='tenant_id') + sh.CONF.os_region_name = None + i0 = s.Instance(instance_name='i0', + management_ip='192.168.1.10', + internal_ip='10.10.1.10') + i1 = s.Instance(instance_name='i1', + management_ip='192.168.1.11', + internal_ip='10.10.1.11') + i2 = s.Instance(instance_name='i2', + management_ip='192.168.1.12', + internal_ip='10.10.1.12') + np0 = ['ZooKeeper', 'FileServer', 'TaskTracker'] + np1 = ['ZooKeeper', 'NFS', 'HBase RegionServer'] + ng0 = s.NodeGroup(id='ng0', instances=[i0, i1], node_processes=np0) + ng1 = s.NodeGroup(id='ng1', instances=[i2], node_processes=np1) + cc = {'general': {}} + cluster = s.Cluster(node_groups=[ng0, ng1], cluster_configs=cc, + hadoop_version='4.0.1.mrv1') + plugin_spec = ps.PluginSpec( + 'tests/unit/plugins/mapr/utils/resources/plugin_spec_ci.json') + configurer = bcc.ClusterConfigurer(cluster, plugin_spec) + cu_mock = m.MagicMock() + configurer.conductor = m.MagicMock() + configurer.conductor.cluster_update = cu_mock + configurer.configure() + self.assertEqual(1, gtm_mock.call_count) + bcc_expected_path = ( + 'tests/unit/plugins/mapr/utils/resources/bcc_expected') + core_site = {'data': cfu.load_xml_file(('%s/core-site-1.xml' + % bcc_expected_path)), + 'file': ('/opt/mapr/hadoop/hadoop-0.20.2' + '/conf/core-site.xml'), + 'root': True, + 'timeout': 120} + mapred_site = {'data': cfu.load_xml_file('%s/mapred-site-1.xml' + % bcc_expected_path), + 'root': True, + 'file': ('/opt/mapr/hadoop/hadoop-0.20.2' + '/conf/mapred-site.xml'), + 'timeout': 120} + topology_data = {'data': gtm_mock.return_value, + 'file': '/opt/mapr/topology.data', + 'root': True, 'timeout': 120} + cldb = {'data': cfu.load_properties_file(('%s/cldb-1.conf' + % bcc_expected_path)), + 'file': '/opt/mapr/conf/cldb.conf', + 'root': True, 'timeout': 120} + t_sh = {'data': f.get_file_text('plugins/mapr/util' + '/resources/topology.sh'), + 'file': '/opt/mapr/topology.sh', + 'root': True, 'timeout': 120} + hadoop_v = {'root': True, + 'data': f.get_file_text('plugins/mapr/util' + '/resources/hadoop_version') % + {'mode': 'classic'}, + 'timeout': 120, + 'file': '/opt/mapr/conf/hadoop_version'} + self.assertItemsEqual(i0.remote().fs, + [core_site, mapred_site, + topology_data, cldb, t_sh, hadoop_v]) + self.assertItemsEqual(i1.remote().fs, + [core_site, mapred_site, + topology_data, cldb, t_sh, hadoop_v]) + self.assertItemsEqual(i2.remote().fs, + [core_site, topology_data, cldb, t_sh, + hadoop_v]) diff --git a/setup.cfg b/setup.cfg index f538bf4c..1ef8cd29 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,6 +39,7 @@ console_scripts = sahara.cluster.plugins = vanilla = sahara.plugins.vanilla.plugin:VanillaProvider hdp = sahara.plugins.hdp.ambariplugin:AmbariPlugin + mapr = sahara.plugins.mapr.plugin:MapRPlugin cdh = sahara.plugins.cdh.plugin:CDHPluginProvider fake = sahara.plugins.fake.plugin:FakePluginProvider spark = sahara.plugins.spark.plugin:SparkProvider