Refactor MapR plugin for Sahara

Implements: blueprint mapr-refactor
Change-Id: Ib4385845a00aab2506dddccb9e6a6f3297a5622d
This commit is contained in:
artemosadchiy 2015-01-09 15:27:41 +02:00 committed by Artem Osadchyi
parent c0cc1fe808
commit a6df0c1dfc
158 changed files with 6044 additions and 6575 deletions

View File

@ -31,15 +31,17 @@ include sahara/plugins/hdp/versions/version_1_3_2/resources/*.sh
include sahara/plugins/hdp/versions/version_2_0_6/resources/*.template
include sahara/plugins/hdp/versions/version_2_0_6/resources/*.json
include sahara/plugins/hdp/versions/version_2_0_6/resources/*.sh
include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.sh
include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.json
include sahara/plugins/mapr/versions/v4_0_1_mrv2/resources/*.xml
include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.sh
include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.json
include sahara/plugins/mapr/versions/v4_0_1_mrv1/resources/*.xml
include sahara/plugins/mapr/versions/v3_1_1/resources/*.sh
include sahara/plugins/mapr/versions/v3_1_1/resources/*.json
include sahara/plugins/mapr/versions/v3_1_1/resources/*.xml
include sahara/plugins/mapr/resources/*.sh
include sahara/plugins/mapr/services/hbase/resources/*.json
include sahara/plugins/mapr/services/hive/resources/*.xml
include sahara/plugins/mapr/services/httpfs/resources/*.json
include sahara/plugins/mapr/services/mapreduce/resources/*.json
include sahara/plugins/mapr/services/maprfs/resources/*.conf
include sahara/plugins/mapr/services/mysql/resources/*.sql
include sahara/plugins/mapr/services/oozie/resources/*.json
include sahara/plugins/mapr/services/swift/resources/*.jar
include sahara/plugins/mapr/services/swift/resources/*.json
include sahara/plugins/mapr/services/yarn/resources/*.json
include sahara/plugins/spark/resources/*.xml
include sahara/plugins/spark/resources/*.sh
include sahara/plugins/spark/resources/*.template
@ -49,14 +51,6 @@ include sahara/service/edp/resources/*.jar
include sahara/service/edp/resources/launch_command.py
include sahara/swift/resources/*.xml
include sahara/tests/unit/plugins/vanilla/hadoop2/resources/*.txt
include sahara/tests/unit/plugins/mapr/utils/resources/*.topology
include sahara/tests/unit/plugins/mapr/utils/resources/*.json
include sahara/tests/unit/plugins/mapr/utils/resources/*.data
include sahara/tests/unit/plugins/mapr/utils/resources/*.properties
include sahara/tests/unit/plugins/mapr/utils/resources/*.xml
include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.conf
include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.data
include sahara/tests/unit/plugins/mapr/utils/resources/bcc_expected/*.xml
include sahara/tests/unit/resources/*.heat
include sahara/tests/unit/resources/*.xml
include sahara/tests/unit/resources/*.txt

View File

@ -9,6 +9,7 @@ Babel>=1.3
eventlet>=0.16.1
Flask>=0.10,<1.0
iso8601>=0.1.9
Jinja2>=2.6 # BSD License (3 clause)
jsonschema>=2.0.0,<3.0.0
keystonemiddleware>=1.0.0
oslo.config>=1.6.0 # Apache-2.0

View File

@ -0,0 +1,109 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class AbstractClusterContext(object):
@abc.abstractproperty
def mapr_home(self):
return
@abc.abstractproperty
def hadoop_version(self):
return
@abc.abstractproperty
def hadoop_home(self):
return
@abc.abstractproperty
def hadoop_lib(self):
return
@abc.abstractproperty
def hadoop_conf(self):
return
@abc.abstractproperty
def cluster(self):
return
@abc.abstractproperty
def name_node_uri(self):
return
@abc.abstractproperty
def resource_manager_uri(self):
return
@abc.abstractproperty
def oozie_server_uri(self):
return
@abc.abstractproperty
def oozie_server(self):
return
@abc.abstractproperty
def oozie_http(self):
return
@abc.abstractproperty
def cluster_mode(self):
return
@abc.abstractproperty
def is_node_aware(self):
return
@abc.abstractproperty
def some_instance(self):
return
@abc.abstractproperty
def distro(self):
return
@abc.abstractproperty
def mapr_db(self):
return
@abc.abstractmethod
def filter_instances(self, instances, node_process=None, service=None):
return
@abc.abstractmethod
def removed_instances(self, node_process=None, service=None):
return
@abc.abstractmethod
def added_instances(self, node_process=None, service=None):
return
@abc.abstractmethod
def changed_instances(self, node_process=None, service=None):
return
@abc.abstractmethod
def existing_instances(self, node_process=None, service=None):
return
@abc.abstractproperty
def should_be_restarted(self):
return

View File

@ -0,0 +1,29 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class AbstractValidator(object):
@abc.abstractmethod
def validate(self, cluster_context):
pass
@abc.abstractmethod
def validate_scaling(self, cluster_context, existing, additional):
pass

View File

@ -0,0 +1,29 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class AbstractConfigurer(object):
@abc.abstractmethod
def configure(self, cluster_context, instances=None):
pass
@abc.abstractmethod
def update(self, cluster_context, instances=None):
pass

View File

@ -0,0 +1,37 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class AbstractNodeManager(object):
@abc.abstractmethod
def start(self, cluster_context, instances=None):
pass
@abc.abstractmethod
def stop(self, cluster_context, instances=None):
pass
@abc.abstractmethod
def move_nodes(self, cluster_context, instances):
pass
@abc.abstractmethod
def remove_nodes(self, cluster_context, instances):
pass

View File

@ -0,0 +1,73 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class AbstractVersionHandler(object):
@abc.abstractmethod
def get_node_processes(self):
return
@abc.abstractmethod
def get_configs(self):
return
@abc.abstractmethod
def configure_cluster(self, cluster):
pass
@abc.abstractmethod
def start_cluster(self, cluster):
pass
@abc.abstractmethod
def validate(self, cluster):
pass
@abc.abstractmethod
def validate_scaling(self, cluster, existing, additional):
pass
@abc.abstractmethod
def scale_cluster(self, cluster, instances):
pass
@abc.abstractmethod
def decommission_nodes(self, cluster, instances):
pass
@abc.abstractmethod
def get_edp_engine(self, cluster, job_type):
return
@abc.abstractmethod
def get_context(self, cluster, added=None, removed=None):
return
@abc.abstractmethod
def get_services(self):
return
@abc.abstractmethod
def get_required_services(self):
return
@abc.abstractmethod
def get_open_ports(self, node_group):
return

View File

@ -0,0 +1,324 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import os
from oslo_log import log as logging
import six
from sahara import conductor
from sahara import context
import sahara.plugins.mapr.abstract.configurer as ac
import sahara.plugins.mapr.services.management.management as mng
import sahara.plugins.mapr.services.mapreduce.mapreduce as mr
from sahara.plugins.mapr.services.maprfs import maprfs
from sahara.plugins.mapr.services.mysql import mysql
import sahara.plugins.mapr.services.yarn.yarn as yarn
import sahara.plugins.mapr.util.general as util
from sahara.topology import topology_helper as th
import sahara.utils.configs as sahara_configs
from sahara.utils import files as f
LOG = logging.getLogger(__name__)
conductor = conductor.API
MAPR_REPO_DIR = '/opt/mapr-repository'
_MAPR_HOME = '/opt/mapr'
_JAVA_HOME = '/usr/java/jdk1.7.0_51'
_CONFIGURE_SH_TIMEOUT = 600
_SET_MODE_CMD = 'maprcli cluster mapreduce set -mode '
_TOPO_SCRIPT = 'plugins/mapr/resources/topology.sh'
SERVICE_INSTALL_PRIORITY = [
mng.Management(),
yarn.YARNv251(),
yarn.YARNv241(),
mr.MapReduce(),
maprfs.MapRFS(),
]
@six.add_metaclass(abc.ABCMeta)
class BaseConfigurer(ac.AbstractConfigurer):
def configure(self, cluster_context, instances=None):
instances = instances or cluster_context.get_instances()
self._configure_ssh_connection(cluster_context, instances)
self._install_mapr_repo(cluster_context, instances)
self._install_services(cluster_context, instances)
self._configure_topology(cluster_context, instances)
self._configure_database(cluster_context, instances)
self._configure_services(cluster_context, instances)
self._configure_sh_cluster(cluster_context, instances)
self._set_cluster_mode(cluster_context)
self._write_config_files(cluster_context, instances)
self._configure_environment(cluster_context, instances)
self._update_cluster_info(cluster_context)
def update(self, cluster_context, instances=None):
LOG.debug('Configuring existing instances')
instances = instances or cluster_context.get_instances()
existing = cluster_context.existing_instances()
self._configure_topology(cluster_context, existing)
if cluster_context.has_control_nodes(instances):
self._configure_sh_cluster(cluster_context, existing)
self._write_config_files(cluster_context, existing)
self._update_services(cluster_context, existing)
self._restart_services(cluster_context)
LOG.debug('Existing instances successfully configured')
def _configure_services(self, cluster_context, instances):
for service in cluster_context.cluster_services:
service.configure(cluster_context, instances)
def _install_services(self, cluster_context, instances):
for service in self._service_install_sequence(cluster_context):
service.install(cluster_context, instances)
def _service_install_sequence(self, cluster_context):
def key(service):
if service in SERVICE_INSTALL_PRIORITY:
return SERVICE_INSTALL_PRIORITY.index(service)
return -1
return sorted(cluster_context.cluster_services, key=key, reverse=True)
def _configure_topology(self, context, instances):
LOG.debug('Configuring cluster topology')
is_node_aware = context.is_node_aware
if is_node_aware:
topo = th.generate_topology_map(context.cluster, is_node_aware)
topo = '\n'.join(['%s %s' % i for i in six.iteritems(topo)])
data_path = '%s/topology.data' % context.mapr_home
script_path = '%s/topology.sh' % context.mapr_home
files = {
data_path: topo,
script_path: f.get_file_text(_TOPO_SCRIPT),
}
chmod_cmd = 'chmod +x %s' % script_path
for instance in instances:
with instance.remote() as r:
r.write_files_to(files, run_as_root=True)
r.execute_command(chmod_cmd, run_as_root=True)
else:
LOG.debug('Data locality is disabled.')
LOG.debug('Cluster topology successfully configured')
def _execute_on_instances(self, function, cluster_context, instances,
**kwargs):
with context.ThreadGroup() as tg:
for instance in instances:
tg.spawn('%s-execution' % function.__name__,
function, instance, **kwargs)
def _write_config_files(self, cluster_context, instances):
LOG.debug('Writing config files')
def get_node_groups(instances):
return util.unique_list(instances, lambda i: i.node_group)
for ng in get_node_groups(instances):
ng_services = cluster_context.get_cluster_services(ng)
ng_user_configs = ng.configuration()
ng_default_configs = cluster_context.get_services_configs_dict(
ng_services)
ng_configs = sahara_configs.merge_configs(
ng_default_configs, ng_user_configs)
ng_config_files = dict()
for service in ng_services:
service_conf_files = service.get_config_files(
cluster_context=cluster_context,
configs=ng_configs[service.ui_name],
instance=ng.instances[0]
)
LOG.debug('Rendering %s config files', service.ui_name)
for conf_file in service_conf_files:
ng_config_files.update({
conf_file.remote_path: conf_file.render()
})
ng_instances = filter(lambda i: i in instances, ng.instances)
self._write_ng_config_files(ng_instances, ng_config_files)
LOG.debug('Config files successfully written')
def _write_ng_config_files(self, instances, conf_files):
with context.ThreadGroup() as tg:
for instance in instances:
tg.spawn('write-config-files-%s' % instance.id,
self._write_config_files_instance, instance,
conf_files)
def _configure_environment(self, cluster_context, instances):
self.configure_general_environment(cluster_context, instances)
self._post_install_services(cluster_context, instances)
def _configure_database(self, cluster_context, instances):
mysql_instance = mysql.MySQL.get_db_instance(cluster_context)
distro_name = cluster_context.distro.name
mysql.MySQL.install_mysql(mysql_instance, distro_name)
mysql.MySQL.start_mysql_server(cluster_context)
mysql.MySQL.create_databases(cluster_context, instances)
@staticmethod
def _write_config_files_instance(instance, config_files):
paths = six.iterkeys(config_files)
with instance.remote() as r:
for path in paths:
r.execute_command('mkdir -p ' + os.path.dirname(path),
run_as_root=True)
r.write_files_to(config_files, run_as_root=True)
def _post_install_services(self, cluster_context, instances):
LOG.debug('Executing service post install hooks')
for s in cluster_context.cluster_services:
s.post_install(cluster_context, instances)
LOG.debug('Post install hooks execution successfully executed')
def _update_cluster_info(self, cluster_context):
LOG.debug('Updating UI information.')
info = dict()
for service in cluster_context.cluster_services:
for uri_info in service.ui_info:
title, process, url = uri_info
info.update({
title: {
'WebUI': url % cluster_context.get_instance_ip(process)
}
})
ctx = context.ctx()
conductor.cluster_update(ctx, cluster_context.cluster, {'info': info})
def configure_general_environment(self, cluster_context, instances=None):
LOG.debug('Executing post configure hooks')
if not instances:
instances = cluster_context.get_instances()
def set_user_password(instance):
LOG.debug('Setting password for user "mapr"')
if self.mapr_user_exists(instance):
with instance.remote() as r:
r.execute_command(
'echo "%s:%s"|chpasswd' % ('mapr', 'mapr'),
run_as_root=True)
else:
LOG.debug('user "mapr" does not exists')
def create_home_mapr(instance):
target_path = '/home/mapr'
LOG.debug("Creating home directory for user 'mapr'")
args = {'path': target_path}
cmd = 'mkdir -p %(path)s && chown mapr:mapr %(path)s' % args
if self.mapr_user_exists(instance):
with instance.remote() as r:
r.execute_command(cmd, run_as_root=True)
else:
LOG.debug('user "mapr" does not exists')
self._execute_on_instances(set_user_password, cluster_context,
instances)
self._execute_on_instances(create_home_mapr, cluster_context,
instances)
def _configure_sh_cluster(self, cluster_context, instances):
LOG.debug('Executing configure.sh')
if not instances:
instances = cluster_context.get_instances()
script = cluster_context.configure_sh
db_specs = dict(mysql.MySQL.METRICS_SPECS._asdict())
db_specs.update({
'host': mysql.MySQL.get_db_instance(cluster_context).fqdn(),
'port': mysql.MySQL.MYSQL_SERVER_PORT,
})
with context.ThreadGroup() as tg:
for instance in instances:
tg.spawn('configure-sh-%s' % instance.id,
self._configure_sh_instance, cluster_context,
instance, script, db_specs)
LOG.debug('Executing configure.sh successfully completed')
def _configure_sh_instance(self, context, instance, command, specs):
if not self.mapr_user_exists(instance):
command += ' --create-user'
if context.check_for_process(instance, mng.METRICS):
command += (' -d %(host)s:%(port)s -du %(user)s -dp %(password)s '
'-ds %(db_name)s') % specs
with instance.remote() as r:
r.execute_command('sudo -i ' + command,
timeout=_CONFIGURE_SH_TIMEOUT)
def _configure_ssh_connection(self, cluster_context, instances):
def keep_alive_connection(instance):
echo_param = 'echo "KeepAlive yes" >> ~/.ssh/config'
echo_timeout = 'echo "ServerAliveInterval 60" >> ~/.ssh/config'
with instance.remote() as r:
r.execute_command(echo_param)
r.execute_command(echo_timeout)
self._execute_on_instances(keep_alive_connection,
cluster_context, instances)
def mapr_user_exists(self, instance):
with instance.remote() as r:
ec, out = r.execute_command(
'id -u mapr', run_as_root=True, raise_when_error=False)
return ec == 0
def post_start(self, c_context, instances=None):
instances = instances or c_context.get_instances()
LOG.debug('Executing service post start hooks')
for service in c_context.cluster_services:
updated = c_context.filter_instances(instances, service=service)
service.post_start(c_context, updated)
LOG.debug('Post start hooks execution successfully executed')
def _set_cluster_mode(self, cluster_context):
cluster_mode = cluster_context.cluster_mode
if not cluster_mode:
return
cldb = cluster_context.get_instance(maprfs.CLDB)
with cldb.remote() as r:
cmd = 'sudo -u mapr maprcli cluster mapreduce set -mode %s'
r.execute_command(cmd % cluster_mode)
def _install_mapr_repo(self, cluster_context, instances):
def add_repo(instance, **kwargs):
with instance.remote() as r:
script = '/tmp/repo_install.sh'
data = cluster_context.get_install_repo_script_data()
r.write_file_to(script, data, run_as_root=True)
r.execute_command('chmod +x %s' % script, run_as_root=True)
r.execute_command('%s %s' % (script, kwargs.get('distro')),
run_as_root=True, raise_when_error=False)
d_name = cluster_context.distro.name
self._execute_on_instances(
add_repo, cluster_context, instances, distro=d_name)
def _update_services(self, c_context, instances):
for service in c_context.cluster_services:
updated = c_context.filter_instances(instances, service=service)
service.update(c_context, updated)
def _restart_services(self, cluster_context):
restart = cluster_context.should_be_restarted
for service, instances in six.iteritems(restart):
service.restart(util.unique_list(instances))

View File

@ -0,0 +1,332 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
from oslo_config import cfg
import sahara.exceptions as e
from sahara.i18n import _
import sahara.plugins.mapr.abstract.cluster_context as cc
import sahara.plugins.mapr.domain.distro as distro
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.services.management.management as mng
import sahara.plugins.mapr.services.maprfs.maprfs as mfs
import sahara.plugins.mapr.services.oozie.oozie as oozie
from sahara.plugins.mapr.services.swift import swift
import sahara.plugins.mapr.services.yarn.yarn as yarn
import sahara.plugins.mapr.util.general as g
import sahara.plugins.utils as u
CONF = cfg.CONF
CONF.import_opt("enable_data_locality", "sahara.topology.topology_helper")
def _get_node_process_name(node_process):
name = None
if isinstance(node_process, np.NodeProcess):
name = node_process.ui_name
elif isinstance(node_process, basestring):
name = node_process
return name
class BaseClusterContext(cc.AbstractClusterContext):
def __init__(self, cluster, version_handler, added=None, removed=None):
self._cluster = cluster
self._distro = None
self.all_services_list = version_handler.get_services()
self._required_services = version_handler.get_required_services()
self._cluster_services = None
self._mapr_home = '/opt/mapr'
self._name_node_uri = 'maprfs:///'
self._cluster_mode = None
self._node_aware = None
self._oozie_server_uri = None
self._oozie_server = None
self._oozie_http = None
self._some_instance = None
self._configure_sh = None
self._mapr_db = None
self._hadoop_home = None
self._hadoop_version = None
self._added_instances = added or []
self._removed_instances = removed or []
self._changed_instances = (
self._added_instances + self._removed_instances)
self._existing_instances = [i for i in self.get_instances()
if i not in self._changed_instances]
self._restart = collections.defaultdict(list)
@property
def cluster(self):
return self._cluster
@property
def cluster_services(self):
if not self._cluster_services:
self._cluster_services = self.get_cluster_services()
return self._cluster_services
@property
def required_services(self):
return self._required_services
@property
def mapr_home(self):
return self._mapr_home
@property
def hadoop_version(self):
return self._hadoop_version
@property
def hadoop_home(self):
if not self._hadoop_home:
f = '%(mapr_home)s/hadoop/hadoop-%(hadoop_version)s'
args = {
'mapr_home': self.mapr_home,
'hadoop_version': self.hadoop_version,
}
self._hadoop_home = f % args
return self._hadoop_home
@property
def name_node_uri(self):
return self._name_node_uri
@property
def oozie_server_uri(self):
if not self._oozie_server_uri:
oozie_http = self.oozie_http
url = 'http://%s/oozie' % oozie_http if oozie_http else None
self._oozie_server_uri = url
return self._oozie_server_uri
@property
def oozie_server(self):
if not self._oozie_server:
self._oozie_server = self.get_instance(oozie.OOZIE)
return self._oozie_server
@property
def oozie_http(self):
if not self._oozie_http:
oozie_server = self.oozie_server
ip = oozie_server.management_ip if oozie_server else None
self._oozie_http = '%s:11000' % ip if ip else None
return self._oozie_http
@property
def cluster_mode(self):
return self._cluster_mode
@property
def is_node_aware(self):
return self._node_aware and CONF.enable_data_locality
@property
def some_instance(self):
if not self._some_instance:
self._some_instance = self.cluster.node_groups[0].instances[0]
return self._some_instance
@property
def distro(self):
if not self._distro:
self._distro = distro.get(self.some_instance)
return self._distro
@property
def mapr_db(self):
if self._mapr_db is None:
mapr_db = mfs.MapRFS.ENABLE_MAPR_DB_CONFIG
mapr_db = self._get_cluster_config_value(mapr_db)
self._mapr_db = '-noDB' if not mapr_db else ''
return self._mapr_db
@property
def configure_sh(self):
if not self._configure_sh:
f = ('%(script_path)s'
' -N %(cluster_name)s'
' -C %(cldbs)s'
' -Z %(zookeepers)s'
' -no-autostart -f %(m7)s')
args = {
'script_path': '/opt/mapr/server/configure.sh',
'cluster_name': self.cluster.name,
'cldbs': self.get_cldb_nodes_ip(),
'zookeepers': self.get_zookeeper_nodes_ip(),
'm7': self.mapr_db
}
self._configure_sh = f % args
return self._configure_sh
def _get_cluster_config_value(self, config):
cluster_configs = self.cluster.cluster_configs
service = config.applicable_target
name = config.name
if service in cluster_configs and name in cluster_configs[service]:
return cluster_configs[service][name]
else:
return config.default_value
def get_instances(self, node_process=None):
name = _get_node_process_name(node_process)
return u.get_instances(self.cluster, name)
def get_instance(self, node_process):
name = _get_node_process_name(node_process)
i = u.get_instances(self.cluster, name)
return i[0] if i else None
def get_instances_ip(self, node_process):
return [i.management_ip for i in self.get_instances(node_process)]
def get_instance_ip(self, node_process):
i = self.get_instance(node_process)
return i.management_ip if i else None
def get_zookeeper_nodes_ip_with_port(self, separator=','):
return separator.join(['%s:%s' % (ip, mng.ZK_CLIENT_PORT)
for ip in self.get_instances_ip(mng.ZOOKEEPER)])
def check_for_process(self, instance, process):
processes = instance.node_group.node_processes
name = _get_node_process_name(process)
return name in processes
def get_services_configs_dict(self, services=None):
if not services:
services = self.cluster_services
result = dict()
for service in services:
result.update(service.get_configs_dict())
return result
def get_configure_sh_path(self):
return '/opt/mapr/server/configure.sh'
def get_chosen_service_version(self, service_name):
service_configs = self.cluster.cluster_configs.get(service_name, None)
if not service_configs:
return None
return service_configs.get('%s Version' % service_name, None)
def get_cluster_services(self, node_group=None):
node_processes = None
if node_group:
node_processes = node_group.node_processes
else:
node_processes = [np for ng in self.cluster.node_groups
for np in ng.node_processes]
node_processes = g.unique_list(node_processes)
services = g.unique_list(node_processes, self.get_service)
return services + [swift.Swift()]
def get_service(self, node_process):
ui_name = self.get_service_name_by_node_process(node_process)
if ui_name is None:
raise e.InvalidDataException(
_('Service not found in services list'))
version = self.get_chosen_service_version(ui_name)
service = self._find_service_instance(ui_name, version)
if service is None:
raise e.InvalidDataException(_('Can not map service'))
return service
def _find_service_instance(self, ui_name, version):
for service in self.all_services_list:
if service.ui_name == ui_name:
if version is not None and service.version != version:
continue
return service
def get_service_name_by_node_process(self, node_process):
node_process = _get_node_process_name(node_process)
for service in self.all_services_list:
node_processes = [np.ui_name for np in service.node_processes]
if node_process in node_processes:
return service.ui_name
def get_instances_count(self, node_process=None):
name = _get_node_process_name(node_process)
return u.get_instances_count(self.cluster, name)
def get_node_groups(self, node_process=None):
name = _get_node_process_name(node_process)
return u.get_node_groups(self.cluster, name)
def get_cldb_nodes_ip(self, separator=','):
return separator.join(self.get_instances_ip(mfs.CLDB))
def get_zookeeper_nodes_ip(self, separator=','):
return separator.join(
self.get_instances_ip(mng.ZOOKEEPER))
def get_resourcemanager_ip(self):
return self.get_instance_ip(yarn.RESOURCE_MANAGER)
def get_historyserver_ip(self):
return self.get_instance_ip(yarn.HISTORY_SERVER)
def has_control_nodes(self, instances):
for inst in instances:
zookeepers = self.check_for_process(inst, mng.ZOOKEEPER)
cldbs = self.check_for_process(inst, mfs.CLDB)
if zookeepers or cldbs:
return True
return False
def is_present(self, service):
return service in self.cluster_services
def filter_instances(self, instances, node_process=None, service=None):
if node_process:
return filter(
lambda i: self.check_for_process(i, node_process), instances)
if service:
result = []
for instance in instances:
for node_process in service.node_processes:
if self.check_for_process(instance, node_process):
result += [instance]
break
return result
return list(instances)
def removed_instances(self, node_process=None, service=None):
instances = self._removed_instances
return self.filter_instances(instances, node_process, service)
def added_instances(self, node_process=None, service=None):
instances = self._added_instances
return self.filter_instances(instances, node_process, service)
def changed_instances(self, node_process=None, service=None):
instances = self._changed_instances
return self.filter_instances(instances, node_process, service)
def existing_instances(self, node_process=None, service=None):
instances = self._existing_instances
return self.filter_instances(instances, node_process, service)
@property
def should_be_restarted(self):
return self._restart

View File

@ -0,0 +1,35 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.abstract.cluster_validator as v
import sahara.plugins.mapr.util.validation_utils as vu
import sahara.plugins.mapr.versions.version_handler_factory as vhf
class BaseValidator(v.AbstractValidator):
def validate(self, cluster_context):
for service in cluster_context.required_services:
vu.assert_present(service, cluster_context)
for service in cluster_context.cluster_services:
for rule in service.validation_rules:
rule(cluster_context)
def validate_scaling(self, cluster_context, existing, additional):
cluster = cluster_context.cluster
version = cluster.hadoop_version
handler = vhf.VersionHandlerFactory.get().get_handler(version)
cluster = vu.create_fake_cluster(cluster, existing, additional)
cluster_context = handler.get_context(cluster)
self.validate(cluster_context)

View File

@ -0,0 +1,86 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import sahara.plugins.mapr.util.maprfs_helper as mfs
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.service.edp.binary_retrievers.dispatch as d
import sahara.service.edp.oozie.engine as e
from sahara.utils import edp
class MapROozieJobEngine(e.OozieJobEngine):
def __init__(self, cluster):
super(MapROozieJobEngine, self).__init__(cluster)
self.ctx = self._get_cluster_context(self.cluster)
hdfs_user = 'mapr'
def get_hdfs_user(self):
return MapROozieJobEngine.hdfs_user
def create_hdfs_dir(self, remote, dir_name):
mfs.create_maprfs4_dir(remote, dir_name, self.get_hdfs_user())
def _upload_workflow_file(self, where, job_dir, wf_xml, hdfs_user):
f_name = 'workflow.xml'
with where.remote() as r:
mfs.put_file_to_maprfs(r, wf_xml, f_name, job_dir, hdfs_user)
return os.path.join(job_dir, f_name)
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs,
proxy_configs=None):
mains = job.mains or []
libs = job.libs or []
builtin_libs = edp.get_builtin_binaries(job, configs)
uploaded_paths = []
hdfs_user = self.get_hdfs_user()
lib_dir = job_dir + '/lib'
with where.remote() as r:
for m in mains:
raw_data = d.get_raw_binary(m, proxy_configs)
mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user)
uploaded_paths.append(os.path.join(job_dir, m.name))
if len(libs) > 0:
self.create_hdfs_dir(r, lib_dir)
for l in libs:
raw_data = d.get_raw_binary(l, proxy_configs)
mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir,
hdfs_user)
uploaded_paths.append(os.path.join(lib_dir, l.name))
for lib in builtin_libs:
mfs.put_file_to_maprfs(r, lib['raw'], lib['name'], lib_dir,
hdfs_user)
uploaded_paths.append(lib_dir + '/' + lib['name'])
return uploaded_paths
def get_name_node_uri(self, cluster):
return self.ctx.name_node_uri
def get_oozie_server_uri(self, cluster):
return self.ctx.oozie_server_uri
def get_oozie_server(self, cluster):
return self.ctx.oozie_server
def get_resource_manager_uri(self, cluster):
return self.ctx.resource_manager_uri
def _get_cluster_context(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
return v_handler.get_context(cluster)

View File

@ -0,0 +1,184 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
import random
from oslo_log import log as logging
from oslo_utils import timeutils
from sahara import context
from sahara.i18n import _
import sahara.plugins.exceptions as ex
import sahara.plugins.mapr.abstract.node_manager as s
import sahara.plugins.mapr.services.management.management as mng
import sahara.plugins.mapr.services.maprfs.maprfs as mfs
LOG = logging.getLogger(__name__)
GET_SERVER_ID_CMD = ('maprcli node list -json -filter [ip==%s] -columns id'
' | grep id | grep -o \'[0-9]*\'')
NODE_LIST_CMD = 'maprcli node list -json'
MOVE_NODE_CMD = 'maprcli node move -serverids %s -topology /decommissioned'
REMOVE_NODE_CMD = ('maprcli node remove -filter [ip==%(ip)s] -nodes %(nodes)s'
' -zkconnect %(zookeepers)s')
WAIT_NODE_ALARM_NO_HEARTBEAT = 360
WARDEN_SERVICE = 'warden'
START = 'start'
STOP = 'stop'
DELAY = 5
DEFAULT_RETRY_COUNT = 10
class BaseNodeManager(s.AbstractNodeManager):
def move_nodes(self, cluster_context, instances):
LOG.debug("Moving the nodes to /decommissioned topology")
cldb_instances = self._get_cldb_instances(cluster_context, instances)
with random.choice(cldb_instances).remote() as cldb_remote:
for instance in instances:
with instance.remote() as r:
command = GET_SERVER_ID_CMD % instance.management_ip
ec, out = r.execute_command(command, run_as_root=True)
command = MOVE_NODE_CMD % out.strip()
cldb_remote.execute_command(command, run_as_root=True)
LOG.debug("Nodes successfully moved")
def remove_nodes(self, c_context, instances):
LOG.debug("Removing nodes from cluster")
cldb_instances = self._get_cldb_instances(c_context, instances)
with random.choice(cldb_instances).remote() as cldb_remote:
for instance in instances:
args = {
'ip': instance.management_ip,
'nodes': instance.fqdn(),
'zookeepers': c_context.get_zookeeper_nodes_ip_with_port(),
}
command = REMOVE_NODE_CMD % args
cldb_remote.execute_command(command, run_as_root=True)
LOG.debug("Nodes successfully removed")
def start(self, cluster_context, instances=None):
instances = instances or cluster_context.get_instances()
zookeepers = cluster_context.filter_instances(instances, mng.ZOOKEEPER)
cldbs = cluster_context.filter_instances(instances, mfs.CLDB)
others = filter(
lambda i: not cluster_context.check_for_process(i, mfs.CLDB),
instances)
self._start_zk_nodes(zookeepers)
self._start_cldb_nodes(cldbs)
self._start_non_cldb_nodes(others)
self._await_cldb(cluster_context, instances)
def stop(self, cluster_context, instances=None):
instances = instances or cluster_context.get_instances()
zookeepers = cluster_context.filter_instances(instances, mng.ZOOKEEPER)
self._stop_zk_nodes(zookeepers)
self._stop_warden_on_nodes(instances)
def _await_cldb(self, cluster_context, instances=None, timeout=600):
instances = instances or cluster_context.get_instances()
cldb_node = cluster_context.get_instance(mfs.CLDB)
start_time = timeutils.utcnow()
retry_count = 0
with cldb_node.remote() as r:
LOG.debug("Waiting %s seconds for CLDB initialization", timeout)
while timeutils.delta_seconds(start_time,
timeutils.utcnow()) < timeout:
ec, out = r.execute_command(NODE_LIST_CMD,
raise_when_error=False)
resp = json.loads(out)
status = resp['status']
if str(status).lower() == 'ok':
ips = [n['ip'] for n in resp['data']]
retry_count += 1
for i in instances:
if (i.management_ip not in ips
and retry_count > DEFAULT_RETRY_COUNT):
raise ex.HadoopProvisionError(_(
"Node failed to connect to CLDB: %s") %
i.management_ip)
break
else:
context.sleep(DELAY)
else:
raise ex.HadoopProvisionError(_("CLDB failed to start"))
def _start_nodes(self, instances, sys_service):
with context.ThreadGroup() as tg:
for instance in instances:
tg.spawn('start-%s-%s' % (sys_service, instance.id),
self._start_service, instance, sys_service)
def _stop_nodes(self, instances, sys_service):
with context.ThreadGroup() as tg:
for instance in instances:
tg.spawn('stop-%s-%s' % (sys_service, instance.id),
self._stop_service, instance, sys_service)
def _start_zk_nodes(self, instances):
LOG.debug('Starting ZooKeeper nodes')
self._start_nodes(instances, mng.ZOOKEEPER.ui_name)
LOG.debug('ZooKeeper nodes successfully started')
def _start_cldb_nodes(self, instances):
LOG.debug('Starting CLDB nodes')
self._start_nodes(instances, WARDEN_SERVICE)
LOG.debug('CLDB nodes successfully started')
def _start_non_cldb_nodes(self, instances):
LOG.debug('Starting non-control nodes')
self._start_nodes(instances, WARDEN_SERVICE)
LOG.debug('Non-control nodes successfully started')
def _stop_zk_nodes(self, instances):
self._stop_nodes(instances, mng.ZOOKEEPER.ui_name)
def _stop_warden_on_nodes(self, instances):
self._stop_nodes(instances, WARDEN_SERVICE)
@staticmethod
def _do_service_action(instance, service, action):
with instance.remote() as r:
cmd = "service mapr-%(service)s %(action)s"
args = {'service': service.lower(), 'action': action}
cmd = cmd % args
LOG.debug(
'Executing "%(command)s" on node=%(ip)s',
{'command': cmd, 'ip': instance.management_ip}
)
r.execute_command(cmd, run_as_root=True)
def _start_service(self, instance, service):
return self._do_service_action(instance, service, START)
def _stop_service(self, instance, service):
return self._do_service_action(instance, service, STOP)
def _get_cldb_instances(self, c_context, instances):
current = self._get_current_cluster_instances(c_context, instances)
return c_context.filter_instances(current, mfs.CLDB)
@staticmethod
def await_no_heartbeat():
delay = WAIT_NODE_ALARM_NO_HEARTBEAT
LOG.debug('Waiting for "NO_HEARBEAT" alarm')
context.sleep(delay)
def _get_current_cluster_instances(self, cluster_context, instances):
all_instances = cluster_context.get_instances()
return [x for x in all_instances if x not in instances]

View File

@ -0,0 +1,126 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections as c
import sahara.plugins.mapr.abstract.version_handler as vh
import sahara.plugins.mapr.base.base_cluster_configurer as base_conf
import sahara.plugins.mapr.base.base_cluster_validator as bv
import sahara.plugins.mapr.base.base_edp_engine as edp
import sahara.plugins.mapr.base.base_node_manager as bs
import sahara.plugins.mapr.util.general as util
import sahara.plugins.utils as u
class BaseVersionHandler(vh.AbstractVersionHandler):
def __init__(self):
self._validator = bv.BaseValidator()
self._configurer = base_conf.BaseConfigurer()
self._node_manager = bs.BaseNodeManager()
self._version = None
self._required_services = []
self._services = []
self._node_processes = {}
self._configs = []
def get_edp_engine(self, cluster, job_type):
if job_type in edp.MapROozieJobEngine.get_supported_job_types():
return edp.MapROozieJobEngine(cluster)
return None
def get_services(self):
return self._services
def get_required_services(self):
return self._required_services
def get_node_processes(self):
if not self._node_processes:
self._node_processes = {
s.ui_name: [np.ui_name for np in s.node_processes]
for s in self.get_services() if s.node_processes
}
return self._node_processes
def get_configs(self):
if not self._configs:
configs = [c for s in self.get_services() for c in s.get_configs()]
configs += self._get_version_configs()
self._configs = util.unique_list(configs)
return self._configs
def _get_version_configs(self):
services = self.get_services()
service_version_dict = c.defaultdict(list)
for service in services:
service_version_dict[service.ui_name].append(service.version)
result = []
for service in services:
versions = service_version_dict[service.ui_name]
if len(versions) > 1:
result.append(service.get_version_config(versions))
return result
def get_configs_dict(self):
configs = dict()
for service in self.get_services():
configs.update(service.get_configs_dict())
return configs
def configure_cluster(self, cluster):
instances = u.get_instances(cluster)
cluster_context = self.get_context(cluster, added=instances)
self._configurer.configure(cluster_context)
def start_cluster(self, cluster):
instances = u.get_instances(cluster)
cluster_context = self.get_context(cluster, added=instances)
self._node_manager.start(cluster_context)
self._configurer.post_start(cluster_context)
def validate(self, cluster):
cluster_context = self.get_context(cluster)
self._validator.validate(cluster_context)
def validate_scaling(self, cluster, existing, additional):
cluster_context = self.get_context(cluster)
self._validator.validate_scaling(cluster_context, existing, additional)
def scale_cluster(self, cluster, instances):
cluster_context = self.get_context(cluster, added=instances)
cluster_context._cluster_services = None
self._configurer.configure(cluster_context, instances)
self._configurer.update(cluster_context, instances)
self._node_manager.start(cluster_context, instances)
def decommission_nodes(self, cluster, instances):
cluster_context = self.get_context(cluster, removed=instances)
cluster_context._cluster_services = None
self._node_manager.move_nodes(cluster_context, instances)
self._node_manager.stop(cluster_context, instances)
self._node_manager.await_no_heartbeat()
self._node_manager.remove_nodes(cluster_context, instances)
self._configurer.update(cluster_context, instances)
def get_open_ports(self, node_group):
result = []
for service in self.get_services():
for node_process in service.node_processes:
if node_process.ui_name in node_group.node_processes:
result += node_process.open_ports
return util.unique_list(result)

View File

View File

@ -0,0 +1,132 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import os
import jinja2 as j2
import six
import sahara.exceptions as e
from sahara.i18n import _
import sahara.utils.xmlutils as xml
@six.add_metaclass(abc.ABCMeta)
class BaseConfigurationFile(object):
def __init__(self, file_name):
self.f_name = file_name
self._config_dict = dict()
self._local_path = None
self._remote_path = None
@property
def remote_path(self):
return self._remote_path
@remote_path.setter
def remote_path(self, path):
self._remote_path = os.path.join(path, self.f_name)
@abc.abstractmethod
def render(self):
pass
@abc.abstractmethod
def parse(self, content):
pass
def fetch(self, instance):
with instance.remote() as r:
content = r.read_file_from(self.remote_path, run_as_root=True)
self.parse(content)
def load_properties(self, config_dict):
for k, v in six.iteritems(config_dict):
self.add_property(k, v)
def add_property(self, name, value):
self._config_dict[name] = value
def add_properties(self, properties):
for prop in six.iteritems(properties):
self.add_property(*prop)
def _get_config_value(self, name):
return self._config_dict.get(name, None)
def __repr__(self):
return '<Configuration file %s>' % self.f_name
class HadoopXML(BaseConfigurationFile):
def __init__(self, file_name):
super(HadoopXML, self).__init__(file_name)
def parse(self, content):
configs = xml.parse_hadoop_xml_with_name_and_value(content)
map(lambda i: self.add_property(i['name'], i['value']), configs)
def render(self):
return xml.create_hadoop_xml(self._config_dict)
class RawFile(BaseConfigurationFile):
def __init__(self, file_name):
super(RawFile, self).__init__(file_name)
def render(self):
return self._config_dict.get('content', '')
def parse(self, content):
self._config_dict.update({'content': content})
class PropertiesFile(BaseConfigurationFile):
def __init__(self, file_name):
super(PropertiesFile, self).__init__(file_name)
def parse(self, content):
for line in content.splitlines():
prop = line.strip()
if len(prop) == 0:
continue
if prop[0] in ['#', '!']:
continue
name, value = prop.split("=")
self.add_property(name.strip(), value.strip())
def render(self):
lines = ['%s=%s' % (k, v) for k, v in six.iteritems(self._config_dict)]
return "\n".join(lines)
class TemplateFile(BaseConfigurationFile):
def __init__(self, file_name):
super(TemplateFile, self).__init__(file_name)
self._template = None
@staticmethod
def _j2_render(template, arg_dict):
if template:
return template.render(arg_dict)
else:
raise e.InvalidDataException(_('Template object must be defined'))
def render(self):
return self._j2_render(self._template, self._config_dict)
def parse(self, content):
self._template = j2.Template(content)

View File

@ -0,0 +1,81 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
class Distro(object):
def __init__(self, name, install_cmd, version_separator):
self._name = name
self._install_command = install_cmd
self._version_separator = version_separator
@property
def name(self):
return self._name
@property
def install_command(self):
return self._install_command
@property
def version_separator(self):
return self._version_separator
def create_install_cmd(self, packages):
s = self.version_separator
def join_package_version(pv_item):
p, v = pv_item if len(pv_item) > 1 else (pv_item[0], None)
return p + s + v + '*' if v else p
packages = ' '.join(map(join_package_version, packages))
command = '%(install_cmd)s %(packages)s'
args = {'install_cmd': self.install_command, 'packages': packages}
return command % args
UBUNTU = Distro(
name='Ubuntu',
install_cmd='apt-get install --force-yes -y',
version_separator='=',
)
CENTOS = Distro(
name='CentOS',
install_cmd='yum install -y',
version_separator='-',
)
RHEL = Distro(
name='RedHatEnterpriseServer',
install_cmd='yum install -y',
version_separator='-',
)
SUSE = Distro(
name='Suse',
install_cmd='zypper',
version_separator=':',
)
def get_all():
return [UBUNTU, CENTOS, RHEL, SUSE]
def get(instance):
with instance.remote() as r:
name = r.execute_command('cat /etc/*-release', run_as_root=True)[1]
for d in get_all():
if d.name in name:
return d

View File

@ -0,0 +1,62 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
WARDEN_MANAGED_CMD = ('sudo -u mapr maprcli node services'
' -name %(service)s'
' -action %(action)s'
' -nodes %(nodes)s')
START_ACTION = 'start'
STOP_ACTION = 'stop'
RESTART_ACTION = 'restart'
class NodeProcess(object):
def __init__(self, name, ui_name, package, open_ports=None):
self._name = name
self._ui_name = ui_name
self._package = package
self._open_ports = open_ports or []
@property
def name(self):
return self._name
@property
def ui_name(self):
return self._ui_name
@property
def package(self):
return self._package
@property
def open_ports(self):
return self._open_ports
def start(self, instances):
self.execute_action(instances, START_ACTION)
def restart(self, instances):
self.execute_action(instances, RESTART_ACTION)
def stop(self, instances):
self.execute_action(instances, STOP_ACTION)
def execute_action(self, instances, action):
nodes = ','.join(map(lambda i: i.management_ip, instances))
args = {'service': self.name, 'action': action, 'nodes': nodes}
command = WARDEN_MANAGED_CMD % args
with instances[0].remote() as r:
r.execute_command(command)

View File

@ -0,0 +1,224 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
from sahara import context
import sahara.exceptions as e
from sahara.i18n import _
import sahara.plugins.exceptions as ex
import sahara.plugins.provisioning as p
from sahara.utils import files as files
_INSTALL_PACKAGES_TIMEOUT = 3600
class Service(object):
def __init__(self):
self._name = None
self._ui_name = None
self._node_processes = []
self._version = None
self._dependencies = []
self._ui_info = []
self._cluster_defaults = []
self._node_defaults = []
self._validation_rules = []
@property
def name(self):
return self._name
@property
def ui_name(self):
return self._ui_name
@property
def version(self):
return self._version
@property
def node_processes(self):
return self._node_processes
@property
def dependencies(self):
return self._dependencies
@property
def ui_info(self):
return self._ui_info
@property
def cluster_defaults(self):
return self._cluster_defaults
@property
def node_defaults(self):
return self._node_defaults
@property
def validation_rules(self):
return self._validation_rules
def install(self, cluster_context, instances):
with context.ThreadGroup() as tg:
for instance in instances:
tg.spawn('install-packages-%s' % instance.id,
self._install_packages_on_instance, cluster_context,
instance)
def _install_packages_on_instance(self, cluster_context, instance):
processes = [p for p in self.node_processes if
p.ui_name in instance.node_group.node_processes]
if processes is not None and len(processes) > 0:
packages = self._get_packages(processes)
cmd = cluster_context.distro.create_install_cmd(packages)
with instance.remote() as r:
r.execute_command(cmd, run_as_root=True,
timeout=_INSTALL_PACKAGES_TIMEOUT,
raise_when_error=False)
def _get_packages(self, node_processes):
result = []
result += self.dependencies
result += [(np.package, self.version) for np in node_processes]
return result
def post_install(self, cluster_context, instances):
pass
def post_start(self, cluster_context, instances):
pass
def configure(self, cluster_context, instances=None):
pass
def update(self, cluster_context, instances=None):
pass
def get_file_path(self, file_name):
template = 'plugins/mapr/services/%(service)s/resources/%(file_name)s'
args = {'service': self.name, 'file_name': file_name}
return template % args
def get_configs(self):
result = []
for d_file in self.cluster_defaults:
data = self._load_config_file(self.get_file_path(d_file))
result += [self._create_config_obj(c, self.ui_name) for c in data]
for d_file in self.node_defaults:
data = self._load_config_file(self.get_file_path(d_file))
result += [self._create_config_obj(c, self.ui_name, scope='node')
for c in data]
return result
def get_configs_dict(self):
result = dict()
for conf_obj in self.get_configs():
result.update({conf_obj.name: conf_obj.default_value})
return {self.ui_name: result}
def _load_config_file(self, file_path=None):
return json.loads(files.get_file_text(file_path))
def get_config_files(self, cluster_context, configs, instance=None):
return []
def _create_config_obj(self, item, target='general', scope='cluster',
high_priority=False):
def _prepare_value(value):
if isinstance(value, str):
return value.strip().lower()
return value
conf_name = _prepare_value(item.get('name', None))
conf_value = _prepare_value(item.get('value', None))
if not conf_name:
raise ex.HadoopProvisionError(_("Config missing 'name'"))
if conf_value is None:
raise e.InvalidDataException(
_("Config '%s' missing 'value'") % conf_name)
if high_priority or item.get('priority', 2) == 1:
priority = 1
else:
priority = 2
return p.Config(
name=conf_name,
applicable_target=target,
scope=scope,
config_type=item.get('config_type', "string"),
config_values=item.get('config_values', None),
default_value=conf_value,
is_optional=item.get('is_optional', True),
description=item.get('description', None),
priority=priority)
def get_version_config(self, versions):
return p.Config(
name='%s Version' % self._ui_name,
applicable_target=self.ui_name,
scope='cluster',
config_type='dropdown',
config_values=[(v, v) for v in sorted(versions, reverse=True)],
is_optional=False,
description=_('Specify the version of the service'),
priority=1)
def __eq__(self, other):
if isinstance(other, self.__class__):
version_eq = self.version == other.version
ui_name_eq = self.ui_name == other.ui_name
return version_eq and ui_name_eq
return NotImplemented
def restart(self, instances):
for node_process in self.node_processes:
node_process.restart(instances)
def service_dir(self, cluster_context):
args = {'mapr_home': cluster_context.mapr_home, 'name': self.name}
return '%(mapr_home)s/%(name)s' % args
def home_dir(self, cluster_context):
args = {
'service_dir': self.service_dir(cluster_context),
'name': self.name,
'version': self.version,
}
return '%(service_dir)s/%(name)s-%(version)s' % args
def conf_dir(self, cluster_context):
return '%s/conf' % self.home_dir(cluster_context)
class Single(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Single, cls).__call__(*args, **kwargs)
return cls._instances[cls]

View File

@ -1,29 +1,29 @@
# Copyright (c) 2014, MapR Technologies
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara.i18n import _
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.plugins.provisioning as p
class MapRPlugin(p.ProvisioningPluginBase):
title = 'MapR Hadoop Distribution'
description = ('The MapR Distribution provides a full Hadoop stack that'
' includes the MapR File System (MapR-FS), MapReduce,'
' a complete Hadoop ecosystem, and the MapR Control System'
' user interface')
hdfs_user = 'mapr'
description = _('The MapR Distribution provides a full Hadoop stack that'
' includes the MapR File System (MapR-FS), MapReduce,'
' a complete Hadoop ecosystem, and the MapR Control System'
' user interface')
def _get_handler(self, hadoop_version):
return vhf.VersionHandlerFactory.get().get_handler(hadoop_version)
@ -34,9 +34,6 @@ class MapRPlugin(p.ProvisioningPluginBase):
def get_description(self):
return MapRPlugin.description
def get_hdfs_user(self):
return MapRPlugin.hdfs_user
def get_versions(self):
return vhf.VersionHandlerFactory.get().get_versions()
@ -67,22 +64,10 @@ class MapRPlugin(p.ProvisioningPluginBase):
v_handler = self._get_handler(cluster.hadoop_version)
v_handler.decommission_nodes(cluster, instances)
def get_oozie_server(self, cluster):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_oozie_server(cluster)
def get_name_node_uri(self, cluster):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_name_node_uri(cluster)
def get_oozie_server_uri(self, cluster):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_oozie_server_uri(cluster)
def get_resource_manager_uri(self, cluster):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_resource_manager_uri(cluster)
def get_edp_engine(self, cluster, job_type):
v_handler = self._get_handler(cluster.hadoop_version)
return v_handler.get_edp_engine(cluster, job_type)
def get_open_ports(self, node_group):
v_handler = self._get_handler(node_group.cluster.hadoop_version)
return v_handler.get_open_ports(node_group)

View File

@ -0,0 +1,51 @@
#!/bin/sh
if [ "$1" = "Ubuntu" ]; then
cat >> /etc/apt/sources.list.d/maprtech.list << EOF
deb %(ubuntu_mapr_base_repo)s
deb %(ubuntu_mapr_ecosystem_repo)s
EOF
cat >> /etc/apt/sources.list.d/security_repo.list << EOF
deb http://security.ubuntu.com/ubuntu precise-security main
deb http://security.ubuntu.com/ubuntu lucid-security main
EOF
sudo apt-get install -y --force-yes wget
wget -O - http://package.mapr.com/releases/pub/maprgpg.key | sudo apt-key add -
sudo apt-get update
elif [ "$1" = 'CentOS' -o "$1" = 'RedHatEnterpriseServer' ]; then
cat >> /etc/yum.repos.d/maprtech.repo << EOF
[maprtech]
name=MapR Technologies
baseurl=%(centos_mapr_repo)s
enabled=1
gpgcheck=0
protect=1
[maprecosystem]
name=MapR Technologies
baseurl=%(centos_mapr_ecosystem_repo)s
enabled=1
gpgcheck=0
protect=1
EOF
rpm --import http://package.mapr.com/releases/pub/maprgpg.key
yum install -y wget
release=`cat /etc/*-release`
if [[ $release =~ 6\.[0-9] ]]; then
cd /tmp
wget http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
rpm -Uvh epel-release-6*.rpm
elif [[ $release =~ 7\.[0-9] ]]; then
cd /tmp
wget http://download.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-5.noarch.rpm
rpm -Uvh epel-release-7*.rpm
else
echo "Unsupported distribution version"
exit 1
fi
rpm -Uvh ftp://rpmfind.net/linux/centos/6.6/os/x86_64/Packages/libevent-1.4.13-4.el6.x86_64.rpm
else
echo "Unknown distribution"
exit 1
fi

View File

@ -0,0 +1,58 @@
#!/bin/bash
set -e
JAVA_TARGET_LOCATION="/usr/java"
export JAVA_DOWNLOAD_URL=${JAVA_DOWNLOAD_URL:-"http://download.oracle.com/otn-pub/java/jdk/7u51-b13/jdk-7u51-linux-x64.tar.gz"}
JAVA_HOME=$TARGET_ROOT$JAVA_TARGET_LOCATION
mkdir -p $JAVA_HOME
JAVA_FILE=$(basename $JAVA_DOWNLOAD_URL)
wget --no-check-certificate --no-cookies -c \
--header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" \
-O $JAVA_HOME/$JAVA_FILE $JAVA_DOWNLOAD_URL
if [ $? -eq 0 ]; then
echo "Java download successful"
else
echo "Error downloading $JAVA_DOWNLOAD_URL, exiting"
exit 1
fi
cd $JAVA_HOME
if [[ $JAVA_FILE == *.tar.gz ]]; then
echo -e "\n" | tar -zxf $JAVA_FILE
JAVA_NAME=`ls -1 $JAVA_TARGET_LOCATION | grep -v tar.gz`
chown -R root:root $JAVA_HOME
cat >> /etc/profile.d/java.sh <<EOF
# Custom Java install
export JAVA_HOME=$JAVA_TARGET_LOCATION/$JAVA_NAME
export PATH=\$PATH:$JAVA_TARGET_LOCATION/$JAVA_NAME/bin
EOF
case "$1" in
Ubuntu )
update-alternatives --install "/usr/bin/java" "java" "$JAVA_TARGET_LOCATION/$JAVA_NAME/bin/java" 1
update-alternatives --install "/usr/bin/javac" "javac" "$JAVA_TARGET_LOCATION/$JAVA_NAME/bin/javac" 1
update-alternatives --install "/usr/bin/javaws" "javaws" "$JAVA_TARGET_LOCATION/$JAVA_NAME/bin/javaws" 1
update-alternatives --set java $JAVA_TARGET_LOCATION/$JAVA_NAME/bin/java
update-alternatives --set javac $JAVA_TARGET_LOCATION/$JAVA_NAME/bin/javac
update-alternatives --set javaws $JAVA_TARGET_LOCATION/$JAVA_NAME/bin/javaws
;;
Fedora | RedHatEnterpriseServer | CentOS )
alternatives --install /usr/bin/java java $JAVA_TARGET_LOCATION/$JAVA_NAME/bin/java 200000
alternatives --install /usr/bin/javaws javaws $JAVA_TARGET_LOCATION/$JAVA_NAME/bin/javaws 200000
alternatives --install /usr/bin/javac javac $JAVA_TARGET_LOCATION/$JAVA_NAME/bin/javac 200000
alternatives --install /usr/bin/jar jar $JAVA_TARGET_LOCATION/$JAVA_NAME/bin/jar 200000
;;
esac
elif [[ $JAVA_FILE == *.bin ]]; then
echo -e "\n" | sh $JAVA_FILE
else
echo "Unknown file type: $JAVA_FILE, exiting"
exit 1
fi
rm $JAVA_FILE

View File

@ -0,0 +1,23 @@
#!/bin/bash
if [ ! -f /etc/init.d/mysql* ]; then
if [[ $1 == *"Ubuntu"* ]]; then
sudo debconf-set-selections <<< 'mysql-server mysql-server/root_password password root'
sudo debconf-set-selections <<< 'mysql-server mysql-server/root_password_again password root'
sudo apt-get install --force-yes -y mysql-server
sudo apt-get install --force-yes -y libmysqlclient16
mysql -uroot -proot mysql -e "UPDATE user SET Password=PASSWORD('') WHERE User='root'; FLUSH PRIVILEGES;"
sudo sed -i "s/^\(bind-address\s*=\s*\).*\$/\10.0.0.0/" /etc/mysql/my.cnf
sudo service mysql restart
elif [[ $1 == *"CentOS"* ]] || [[ $1 == *"Red Hat Enterprise Linux"* ]]; then
sudo yum install -y mysql-server
sudo yum install -y mysql-connector-java
elif [[ $1 == *"SUSE"* ]]; then
sudo zypper mysql-server
else
echo "Unknown distribution"
exit 1
fi
else
echo "Mysql server already installed"
fi

View File

View File

@ -0,0 +1,82 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.domain.configuration_file as bcf
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.validation_utils as vu
HBASE_MASTER = np.NodeProcess(
name='hbmaster',
ui_name='HBase-Master',
package='mapr-hbase-master',
open_ports=[60000, 60010]
)
HBASE_REGION_SERVER = np.NodeProcess(
name='hbregionserver',
ui_name='HBase-RegionServer',
package='mapr-hbase-regionserver',
open_ports=[60020]
)
HBASE_THRIFT = np.NodeProcess(
name='hbasethrift',
ui_name='HBase-Thrift',
package='mapr-hbasethrift',
open_ports=[9090]
)
class HBase(s.Service):
def __init__(self):
super(HBase, self).__init__()
self._name = 'hbase'
self._ui_name = 'HBase'
self._node_processes = [
HBASE_MASTER,
HBASE_REGION_SERVER,
HBASE_THRIFT,
]
self._cluster_defaults = ['hbase-default.json']
self._validation_rules = [
vu.at_least(1, HBASE_MASTER),
vu.at_least(1, HBASE_REGION_SERVER),
]
def get_config_files(self, cluster_context, configs, instance=None):
hbase_site = bcf.HadoopXML("hbase-site.xml")
hbase_site.remote_path = self.conf_dir(cluster_context)
if instance:
hbase_site.fetch(instance)
hbase_site.load_properties(configs)
return [hbase_site]
@six.add_metaclass(s.Single)
class HBaseV094(HBase):
def __init__(self):
super(HBaseV094, self).__init__()
self._version = '0.94.24'
self._dependencies = [('mapr-hbase', self.version)]
@six.add_metaclass(s.Single)
class HBaseV098(HBase):
def __init__(self):
super(HBaseV098, self).__init__()
self._version = '0.98.7'
self._dependencies = [('mapr-hbase', self.version)]

View File

@ -0,0 +1,26 @@
[
{
"value": "maprfs:///hbase",
"name": "hbase.rootdir",
"description": "The directory shared by RegionServers."
},
{
"value": true,
"name": "dfs.support.append",
"config_type": "bool"
},
{
"name": "hbase.fsutil.maprfs.impl",
"value": "org.apache.hadoop.hbase.util.FSMapRUtils"
},
{
"value": 30,
"name": "hbase.regionserver.handler.count",
"config_type": "int"
},
{
"value": 64,
"name": "fs.mapr.threads",
"config_type": "int"
}
]

View File

@ -0,0 +1,128 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
import six
import sahara.plugins.mapr.domain.configuration_file as bcf
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.validation_utils as vu
import sahara.utils.files as files
LOG = logging.getLogger(__name__)
HIVE_METASTORE = np.NodeProcess(
name='hivemeta',
ui_name='HiveMetastore',
package='mapr-hivemetastore',
open_ports=[9083]
)
HIVE_SERVER_2 = np.NodeProcess(
name='hs2',
ui_name='HiveServer2',
package='mapr-hiveserver2',
open_ports=[10000]
)
class Hive(s.Service):
def __init__(self):
super(Hive, self).__init__()
self._name = 'hive'
self._ui_name = 'Hive'
self._node_processes = [HIVE_METASTORE, HIVE_SERVER_2]
self._validation_rules = [
vu.at_least(1, HIVE_METASTORE),
vu.at_least(1, HIVE_SERVER_2),
]
# hive-site.xml
def get_config_files(self, cluster_context, configs, instance=None):
hive_default = 'plugins/mapr/services/hive/resources/hive-default.xml'
hive_site = bcf.HadoopXML("hive-site.xml")
hive_site.remote_path = self.conf_dir(cluster_context)
if instance:
hive_site.fetch(instance)
hive_site.parse(files.get_file_text(hive_default))
hive_site.add_properties(self._get_hive_site_props(cluster_context))
return [hive_site]
def _get_hive_site_props(self, context):
# Import here to resolve circular dependency
from sahara.plugins.mapr.services.mysql import mysql
zookeepers = context.get_zookeeper_nodes_ip()
metastore_specs = mysql.MySQL.METASTORE_SPECS
return {
'javax.jdo.option.ConnectionDriverName': mysql.MySQL.DRIVER_CLASS,
'javax.jdo.option.ConnectionURL': self._get_jdbc_uri(context),
'javax.jdo.option.ConnectionUserName': metastore_specs.user,
'javax.jdo.option.ConnectionPassword': metastore_specs.password,
'hive.metastore.uris': self._get_metastore_uri(context),
'hive.zookeeper.quorum': zookeepers,
'hbase.zookeeper.quorum': zookeepers,
}
def _get_jdbc_uri(self, context):
# Import here to resolve circular dependency
from sahara.plugins.mapr.services.mysql import mysql
jdbc_uri = ('jdbc:mysql://%(db_host)s:%(db_port)s/%(db_name)s?'
'createDatabaseIfNotExist=true')
jdbc_args = {
'db_host': mysql.MySQL.get_db_instance(context).fqdn(),
'db_port': mysql.MySQL.MYSQL_SERVER_PORT,
'db_name': mysql.MySQL.METASTORE_SPECS.db_name,
}
return jdbc_uri % jdbc_args
def _get_metastore_uri(self, context):
return 'thrift://%s:9083' % context.get_instance_ip(HIVE_METASTORE)
def post_start(self, cluster_context, instances):
# Import here to resolve circular dependency
import sahara.plugins.mapr.services.maprfs.maprfs as mfs
create_path = lambda p: 'sudo -u mapr hadoop fs -mkdir %s' % p
check_path = 'sudo -u mapr hadoop fs -ls %s'
cmd = "%(check)s || ( %(parent)s && %(target)s )"
args = {
'check': check_path % '/user/hive/warehouse/',
'parent': create_path('/user/hive/'),
'target': create_path('/user/hive/warehouse/')
}
cldb_node = cluster_context.get_instance(mfs.CLDB)
with cldb_node.remote() as r:
LOG.debug("Creating Hive warehouse dir")
r.execute_command(cmd % args, raise_when_error=False)
@six.add_metaclass(s.Single)
class HiveV012(Hive):
def __init__(self):
super(HiveV012, self).__init__()
self._version = '0.12'
self._dependencies = [('mapr-hive', self.version)]
@six.add_metaclass(s.Single)
class HiveV013(Hive):
def __init__(self):
super(HiveV013, self).__init__()
self._version = '0.13'
self._dependencies = [('mapr-hive', self.version)]

View File

@ -0,0 +1,56 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/metastore
?createDatabaseIfNotExist=true
</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value/>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NOSASL</value>
</property>
<property>
<name>hive.zookeeper.property.clientPort</name>
<value>5181</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>5181</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,45 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.commands as cmd
import sahara.plugins.mapr.util.validation_utils as vu
HTTP_FS = np.NodeProcess(
name='httpfs',
ui_name='HTTPFS',
package='mapr-httpfs',
open_ports=[14000]
)
@six.add_metaclass(s.Single)
class HttpFS(s.Service):
def __init__(self):
super(HttpFS, self).__init__()
self._name = 'httpfs'
self._ui_name = 'HttpFS'
self._version = '1.0'
self._node_processes = [HTTP_FS]
self._cluster_defaults = ['httpfs-default.json']
self._validation_rules = [vu.at_least(1, HTTP_FS)]
def post_install(self, cluster_context, instances):
instance = cluster_context.get_instance(HTTP_FS)
cmd.chown(instance, 'mapr:mapr', self.service_dir(cluster_context))

View File

@ -0,0 +1,12 @@
[
{
"value": "*",
"priority": 1,
"name": "httpfs.proxyuser.mapr.hosts"
},
{
"value": "*",
"priority": 1,
"name": "httpfs.proxyuser.mapr.groups"
}
]

View File

@ -0,0 +1,39 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.validation_utils as vu
MAHOUT = np.NodeProcess(
name='mahout',
ui_name='Mahout',
package='mapr-mahout',
open_ports=[]
)
@six.add_metaclass(s.Single)
class Mahout(s.Service):
def __init__(self):
super(Mahout, self).__init__()
self._name = 'mahout'
self._ui_name = 'Mahout'
self._version = '0.9'
self._node_processes = [MAHOUT]
self._validation_rules = [vu.at_least(1, MAHOUT)]

View File

@ -0,0 +1,60 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.validation_utils as vu
ZK_CLIENT_PORT = 5181
ZOOKEEPER = np.NodeProcess(
name='mapr-zookeeper',
ui_name='ZooKeeper',
package='mapr-zookeeper',
open_ports=[ZK_CLIENT_PORT]
)
WEB_SERVER = np.NodeProcess(
name='webserver',
ui_name='Webserver',
package='mapr-webserver',
open_ports=[8443]
)
METRICS = np.NodeProcess(
name='metrics',
ui_name='Metrics',
package='mapr-metrics',
open_ports=[1111]
)
@six.add_metaclass(s.Single)
class Management(s.Service):
def __init__(self):
super(Management, self).__init__()
self._ui_name = 'Management'
self._node_processes = [ZOOKEEPER, WEB_SERVER, METRICS]
self._ui_info = [
('MapR Control System (MCS)', WEB_SERVER, 'https://%s:8443'),
]
self._validation_rules = [
vu.at_least(1, ZOOKEEPER),
vu.exactly(1, WEB_SERVER),
vu.odd_count_of(ZOOKEEPER),
]

View File

@ -0,0 +1,138 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.domain.configuration_file as bcf
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.validation_utils as vu
from sahara.plugins.mapr.versions import version_handler_factory as vhf
from sahara.swift import swift_helper
from sahara.topology import topology_helper as topo
from sahara.utils import files as f
JOB_TRACKER = np.NodeProcess(
name='jobtracker',
ui_name='JobTracker',
package='mapr-jobtracker',
open_ports=[9001, 50030]
)
TASK_TRACKER = np.NodeProcess(
name='tasktracker',
ui_name='TaskTracker',
package='mapr-tasktracker',
open_ports=[50060]
)
JACKSON_CORE_ASL = ('plugins/mapr/services/swift/resources/'
'jackson-core-asl-1.9.13.jar')
JACKSON_MAPPER_ASL = ('plugins/mapr/services/swift/resources/'
'jackson-mapper-asl-1.9.13.jar')
@six.add_metaclass(s.Single)
class MapReduce(s.Service):
cluster_mode = 'classic'
def __init__(self):
super(MapReduce, self).__init__()
self._ui_name = 'MapReduce'
self._name = 'hadoop'
self._version = '0.20.2'
self._node_processes = [JOB_TRACKER, TASK_TRACKER]
self._ui_info = [
('JobTracker', JOB_TRACKER, 'http://%s:50030'),
('TaskTracker', TASK_TRACKER, 'http://%s:50060'),
]
self._validation_rules = [
vu.at_least(1, JOB_TRACKER),
vu.at_least(1, TASK_TRACKER),
]
def _get_packages(self, node_processes):
result = []
result += self.dependencies
result += [(np.package, None) for np in node_processes]
return result
# mapred-site.xml
def get_config_files(self, cluster_context, configs, instance=None):
core_site = bcf.HadoopXML("core-site.xml")
core_site.remote_path = self.conf_dir(cluster_context)
if instance:
core_site.fetch(instance)
core_site.add_properties(self._get_core_site_props(cluster_context))
mapred_site = bcf.HadoopXML("mapred-site.xml")
mapred_site.remote_path = self.conf_dir(cluster_context)
if instance:
mapred_site.fetch(instance)
mapred_site.load_properties(configs)
mapred_site.add_properties(
self._get_mapred_site_props(cluster_context))
return [core_site, mapred_site]
def _get_core_site_props(self, context):
result = {}
if context.is_node_aware:
for conf in topo.vm_awareness_core_config():
result[conf['name']] = conf['value']
for conf in swift_helper.get_swift_configs():
result[conf['name']] = conf['value']
for conf in self._get_impersonation_props():
result[conf['name']] = conf['value']
return result
def _get_mapred_site_props(self, context):
result = {}
if context.is_node_aware:
for conf in topo.vm_awareness_mapred_config():
result[conf['name']] = conf['value']
return result
def _get_impersonation_props(self):
return [
{'name': 'hadoop.proxyuser.mapr.groups', 'value': '*'},
{'name': 'hadoop.proxyuser.mapr.hosts', 'value': '*'}
]
def configure(self, cluster_context, instances=None):
version = cluster_context.cluster.hadoop_version
handler = vhf.VersionHandlerFactory.get().get_handler(version)
if handler._version == '3.1.1':
self._update_jackson_libs(cluster_context, instances)
def _update_jackson_libs(self, context, instances):
hadoop_lib = context.hadoop_lib
core_asl = f.get_file_text(JACKSON_CORE_ASL)
mapper_asl = f.get_file_text(JACKSON_MAPPER_ASL)
core_asl_path = '%s/%s' % (hadoop_lib, 'jackson-core-asl-1.9.13.jar')
mapper_path = '%s/%s' % (hadoop_lib, 'jackson-mapper-asl-1.9.13.jar')
libs = {
core_asl_path: core_asl,
mapper_path: mapper_asl
}
for instance in instances:
with instance.remote() as r:
r.execute_command('rm %s/jackson-*.jar' % hadoop_lib,
run_as_root=True)
r.write_files_to(libs, run_as_root=True)
def get_file_path(self, file_name):
template = 'plugins/mapr/services/mapreduce/resources/%s'
return template % file_name

View File

@ -0,0 +1,91 @@
[
{
"name": "mapred.fairscheduler.assignmultiple",
"value": true,
"config_type": "bool",
"description": "CoreDefaultProperties"
},
{
"name": "mapred.fairscheduler.eventlog.enabled",
"value": false,
"config_type": "bool",
"description": "Enable scheduler logging in ${HADOOP_LOG_DIR}/fairscheduler/"
},
{
"name": "mapred.fairscheduler.smalljob.schedule.enable",
"value": true,
"config_type": "bool",
"description": "Enable small job fast scheduling inside fair scheduler. TaskTrackers should reserve a slot called ephemeral slot which is used for smalljob if cluster is busy."
},
{
"name": "mapred.fairscheduler.smalljob.max.maps",
"value": 10,
"config_type": "int",
"description": "Small job definition. Max number of maps allowed in small job."
},
{
"name": "mapred.fairscheduler.smalljob.max.reducers",
"value": 10,
"config_type": "int",
"description": "Small job definition. Max number of reducers allowed in small job."
},
{
"name": "mapred.fairscheduler.smalljob.max.inputsize",
"value": 10737418240,
"config_type": "int",
"description": "Small job definition. Max input size in bytes allowed for a small job. Default is 10 GB"
},
{
"name": "mapred.fairscheduler.smalljob.max.reducer.inputsize",
"value": 1073741824,
"config_type": "int",
"description": "Small job definition. Max estimated input size for a reducer allowed in small job. Default is 1 GB per reducer."
},
{
"name": "mapred.cluster.ephemeral.tasks.memory.limit.mb",
"value": 200,
"config_type": "int",
"description": "Small job definition. Max memory in mbytes reserved for an ephermal slot. Default is 200 mb. This value must be same on JobTracker and TaskTracker nodes."
},
{
"name": "mapreduce.jobtracker.node.labels.file",
"value": "/",
"description": "File on maprfs that has mapping of nodes and labels."
},
{
"name": "mapred.tasktracker.ephemeral.tasks.maximum",
"value": 1,
"config_type": "int",
"description": "Reserved slot for small job scheduling"
},
{
"name": "mapred.tasktracker.ephemeral.tasks.timeout",
"value": 10000,
"config_type": "int",
"description": "Maximum time in ms a task is allowed to occupy ephemeral slot"
},
{
"name": "mapred.tasktracker.ephemeral.tasks.ulimit",
"value": 4294967296,
"config_type": "int",
"description": "Ulimit (bytes) on all tasks sheduled on an ephemeral slot"
},
{
"name": "mapreduce.tasktracker.group",
"value": "root",
"description": "Group to which TaskTracker belongs."
},
{
"name": "mapred.local.dir",
"value": "/tmp/mapr-hadoop/mapred/local",
"description": "The local directory where MapReduce stores job jar, xml files and creates work dirs for tasks. MapR hadoop uses a local volume map outputs."
},
{
"name": "mapred.map.child.java.opts",
"value": "-Xmx1024m"
},
{
"name": "mapred.reduce.child.java.opts",
"value": "-Xmx3072m"
}
]

View File

@ -0,0 +1,140 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
import six
from sahara import context
from sahara.i18n import _
import sahara.plugins.mapr.domain.configuration_file as bcf
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.validation_utils as vu
import sahara.plugins.provisioning as p
from sahara.utils import files
LOG = logging.getLogger(__name__)
CLDB = np.NodeProcess(
name='cldb',
ui_name='CLDB',
package='mapr-cldb',
open_ports=[7222, 7220, 7221]
)
FILE_SERVER = np.NodeProcess(
name='fileserver',
ui_name='FileServer',
package='mapr-fileserver',
open_ports=[]
)
NFS = np.NodeProcess(
name='nfs',
ui_name='NFS',
package='mapr-nfs',
open_ports=[2049, 9997, 9998]
)
@six.add_metaclass(s.Single)
class MapRFS(s.Service):
_CREATE_DISK_LIST = 'plugins/mapr/resources/create_disk_list_file.sh'
_DISK_SETUP_CMD = '/opt/mapr/server/disksetup -F /tmp/disk.list'
_DISK_SETUP_TIMEOUT = 600
ENABLE_MAPR_DB_NAME = 'Enable MapR-DB'
ENABLE_MAPR_DB_CONFIG = p.Config(
name=ENABLE_MAPR_DB_NAME,
applicable_target='general',
scope='cluster',
config_type="bool",
priority=1,
default_value=True,
description=_('Specifies that MapR-DB is in use.')
)
def __init__(self):
super(MapRFS, self).__init__()
self._ui_name = 'MapRFS'
self._node_processes = [CLDB, FILE_SERVER, NFS]
self._ui_info = [
('Container Location Database (CLDB)', CLDB, 'http://%s:7221'),
]
self._validation_rules = [
vu.at_least(1, CLDB),
vu.each_node_has(FILE_SERVER),
vu.on_same_node(CLDB, FILE_SERVER),
]
def service_dir(self, cluster_context):
return
def home_dir(self, cluster_context):
return
def conf_dir(self, cluster_context):
return '%s/conf' % cluster_context.mapr_home
def post_install(self, cluster_context, instances):
LOG.debug('Initializing MapR FS')
instances = instances or cluster_context.get_instances()
file_servers = cluster_context.filter_instances(instances, FILE_SERVER)
with context.ThreadGroup() as tg:
for instance in file_servers:
tg.spawn('init-mfs-%s' % instance.id,
self._init_mfs_instance, instance)
LOG.debug('MapR FS successfully initialized')
def _init_mfs_instance(self, instance):
self._generate_disk_list_file(instance, self._CREATE_DISK_LIST)
self._execute_disksetup(instance)
def _generate_disk_list_file(self, instance, path_to_disk_setup_script):
LOG.debug('Creating disk list file')
script_path = '/tmp/disk_setup_script.sh'
with instance.remote() as r:
r.write_file_to(
script_path, files.get_file_text(path_to_disk_setup_script))
r.execute_command('chmod +x ' + script_path, run_as_root=True)
args = ' '.join(instance.node_group.storage_paths())
cmd = '%s %s' % (script_path, args)
r.execute_command(cmd, run_as_root=True)
def _execute_disksetup(self, instance):
with instance.remote() as rmt:
rmt.execute_command(
self._DISK_SETUP_CMD, run_as_root=True,
timeout=self._DISK_SETUP_TIMEOUT)
def get_configs(self):
return [MapRFS.ENABLE_MAPR_DB_CONFIG]
def get_config_files(self, cluster_context, configs, instance=None):
default_path = 'plugins/mapr/services/maprfs/resources/cldb.conf'
cldb_conf = bcf.PropertiesFile("cldb.conf")
cldb_conf.remote_path = self.conf_dir(cluster_context)
if instance:
cldb_conf.fetch(instance)
cldb_conf.parse(files.get_file_text(default_path))
cldb_conf.add_properties(self._get_cldb_conf_props(cluster_context))
return [cldb_conf]
def _get_cldb_conf_props(self, context):
zookeepers = context.get_zookeeper_nodes_ip_with_port()
result = {'cldb.zookeeper.servers': zookeepers}
if context.is_node_aware:
result['net.topology.script.file.name'] = '/opt/mapr/topology.sh'
return result

View File

@ -0,0 +1,208 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections as c
from oslo_log import log as logging
import six
import sahara.plugins.mapr.domain.configuration_file as cf
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.services.hive.hive as hive
import sahara.utils.files as f
LOG = logging.getLogger(__name__)
db_spec = c.namedtuple('DatabaseSpec', ['db_name', 'user', 'password'])
@six.add_metaclass(s.Single)
class MySQL(s.Service):
METRICS_SPECS = db_spec('metrics', 'maprmetrics', 'mapr')
METASTORE_SPECS = db_spec('metastore', 'maprmetastore', 'mapr')
RDBMS_SPECS = db_spec('rdbms', 'maprrdbms', 'mapr')
OOZIE_SPECS = db_spec('oozie', 'maproozie', 'mapr')
SELECT_DATA = 'mysql -uroot --skip-column-names -e "%s"| grep -E "\w+"'
GET_DBS_LIST = SELECT_DATA % 'SHOW DATABASES'
GET_USERS_HOSTS = (
SELECT_DATA % "SELECT Host FROM mysql.user WHERE mysql.user.User='%s'"
)
SCHEMA_PATH = (
'/opt/mapr/hive/hive-{0}/scripts/metastore/upgrade/mysql/'
'hive-schema-{0}.0.mysql.sql')
DRIVER_CLASS = 'com.mysql.jdbc.Driver'
MYSQL_SERVER_PORT = 3306
MYSQL_INSTALL_SCRIPT = 'plugins/mapr/resources/install_mysql.sh'
INSTALL_PACKAGES_TIMEOUT = 1800
def __init__(self):
super(MySQL, self).__init__()
self._ui_name = 'MySQL'
@staticmethod
def _get_db_daemon_name(distro):
if distro.lower() == 'ubuntu':
return 'mysql'
if distro.lower() in ['centos', 'redhatenterpriseserver', 'suse']:
return 'mysqld'
return None
@staticmethod
def _execute_script(instance, script_path, script_text=None,
user='root', password=None):
with instance.remote() as r:
if script_text:
r.write_file_to(script_path, script_text, run_as_root=True)
LOG.debug('Executing SQL script %s', script_path)
r.execute_command(("mysql %s %s < %s" %
('-u' + user if user else '',
'-p' + password if password else '',
script_path)),
run_as_root=True)
@staticmethod
def _create_service_db(instance, specs):
f_name = 'create_db_%s.sql' % specs.db_name
script = MySQL._create_script_obj(f_name, 'create_database.sql',
db_name=specs.db_name,
user=specs.user,
password=specs.password)
MySQL._execute_script(instance, script.remote_path, script.render())
@staticmethod
def _create_metrics_db(instance, databases, instances):
if MySQL.METRICS_SPECS.db_name not in databases:
MySQL._create_service_db(instance, MySQL.METRICS_SPECS)
MySQL._execute_script(instance=instance,
script_path='/opt/mapr/bin/setup.sql')
MySQL._grant_access(instance, MySQL.METRICS_SPECS, instances)
@staticmethod
def _create_rdbms_db(instance, databases, instances):
if MySQL.RDBMS_SPECS.db_name not in databases:
MySQL._create_service_db(instance, MySQL.RDBMS_SPECS)
MySQL._grant_access(instance, MySQL.RDBMS_SPECS, instances)
@staticmethod
def _create_metastore_db(instance, cluster_context, databases, instances):
hive_meta = cluster_context.get_instance(hive.HIVE_METASTORE)
if not hive_meta:
return
db_name = MySQL.METASTORE_SPECS.db_name
if db_name not in databases:
MySQL._create_service_db(instance, MySQL.METASTORE_SPECS)
MySQL._grant_access(instance, MySQL.METASTORE_SPECS, instances)
with hive_meta.remote() as r:
hive_serv = cluster_context.get_service(hive.HIVE_METASTORE)
schema_path = MySQL.SCHEMA_PATH.format(hive_serv.version)
script = MySQL._create_script_obj('hive_schema.sql',
'hive_schema.sql',
db_name=db_name,
path=schema_path)
r.write_file_to(script.remote_path, script.render())
args = {
'user': MySQL.METASTORE_SPECS.user,
'password': MySQL.METASTORE_SPECS.password,
'host': instance.management_ip,
'path': script.remote_path
}
cmd = 'mysql -h{host} -u{user} -p{password} < {path}'
r.execute_command(cmd.format(**args), run_as_root=True)
else:
MySQL._grant_access(instance, MySQL.METASTORE_SPECS, instances)
@staticmethod
def _create_oozie_db(instance, databases, instances):
if MySQL.OOZIE_SPECS.db_name not in databases:
MySQL._create_service_db(instance, MySQL.OOZIE_SPECS)
MySQL._grant_access(instance, MySQL.OOZIE_SPECS, instances)
@staticmethod
def start_mysql_server(cluster_context):
LOG.debug('Starting MySQL Server')
instance = MySQL.get_db_instance(cluster_context)
distro = cluster_context.distro
with instance.remote() as r:
r.execute_command(('service %s restart' %
MySQL._get_db_daemon_name(distro.name)),
run_as_root=True)
LOG.debug('MySQL Server successfully started')
@staticmethod
def get_databases_list(db_instance):
with db_instance.remote() as r:
ec, out = r.execute_command(MySQL.GET_DBS_LIST)
if out:
return out.splitlines()
return list()
@staticmethod
def get_user_hosts(db_instance, username):
with db_instance.remote() as r:
ec, out = r.execute_command(MySQL.GET_USERS_HOSTS % username)
if out:
return out.splitlines()
return list()
@staticmethod
def get_db_instance(context):
return context.oozie_server
@staticmethod
def create_databases(cluster_context, instances):
db_instance = MySQL.get_db_instance(cluster_context)
databases = MySQL.get_databases_list(db_instance)
MySQL._create_metrics_db(db_instance, databases, instances)
MySQL._create_rdbms_db(db_instance, databases, instances)
MySQL._create_oozie_db(db_instance, databases, instances)
MySQL._create_metastore_db(
db_instance, cluster_context, databases, instances)
@staticmethod
def _create_script_obj(filename, template, **kwargs):
script = cf.TemplateFile(filename)
script.remote_path = '/tmp/'
script.parse(f.get_file_text(
'plugins/mapr/services/mysql/resources/%s' % template))
for k, v in six.iteritems(kwargs):
script.add_property(k, v)
return script
@staticmethod
def _grant_access(instance, specs, instances):
f_name = 'grant_access_%s.sql' % specs.db_name
ips = [i.management_ip for i in instances]
user_hosts = MySQL.get_user_hosts(instance, specs.user)
script = MySQL._create_script_obj(f_name, 'grant_access.sql',
hosts=set(ips)-set(user_hosts),
db_name=specs.db_name,
user=specs.user,
password=specs.password)
MySQL._execute_script(instance, script.remote_path, script.render())
@staticmethod
def install_mysql(instance, distro_name):
with instance.remote() as r:
script = '/tmp/install_mysql.sh'
data = f.get_file_text(MySQL.MYSQL_INSTALL_SCRIPT)
r.write_file_to(script, data, run_as_root=True)
r.execute_command('chmod +x %s' % script, run_as_root=True)
r.execute_command('%s %s' % (script, distro_name),
run_as_root=True,
timeout=MySQL.INSTALL_PACKAGES_TIMEOUT)

View File

@ -0,0 +1,7 @@
CREATE DATABASE {{ db_name }};
CREATE USER {{ user }}@'localhost' IDENTIFIED BY '{{ password }}';
CREATE USER {{ user }}@'127.0.0.1' IDENTIFIED BY '{{ password }}';
GRANT ALL PRIVILEGES ON {{ db_name }}.* TO {{ user }}@'localhost' WITH GRANT OPTION;
GRANT ALL PRIVILEGES ON {{ db_name }}.* TO {{ user }}@'127.0.0.1' WITH GRANT OPTION;
FLUSH PRIVILEGES;
FLUSH HOSTS;

View File

@ -0,0 +1,6 @@
{% for host in hosts %}
CREATE USER {{ user }}@'{{ host }}' IDENTIFIED BY '{{ password }}';
GRANT ALL PRIVILEGES ON {{ db_name }}.* TO {{ user }}@'{{ host }}' WITH GRANT OPTION;
{% endfor %}
FLUSH PRIVILEGES;
FLUSH HOSTS;

View File

@ -0,0 +1,2 @@
USE {{ db_name }};
SOURCE {{ path }};

View File

@ -0,0 +1,115 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
import six
import sahara.plugins.mapr.domain.configuration_file as bcf
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.services.mysql.mysql as mysql
import sahara.plugins.mapr.util.validation_utils as vu
LOG = logging.getLogger(__name__)
OOZIE = np.NodeProcess(
name='oozie',
ui_name='Oozie',
package='mapr-oozie',
open_ports=[11000]
)
@six.add_metaclass(s.Single)
class Oozie(s.Service):
def __init__(self):
super(Oozie, self).__init__()
self._name = 'oozie'
self._ui_name = 'Oozie'
self._version = '4.0.1'
self._node_processes = [OOZIE]
self._dependencies = [('mapr-oozie-internal', self.version)]
self._cluster_defaults = ['oozie-default.json']
self._validation_rules = [vu.exactly(1, OOZIE)]
def get_config_files(self, cluster_context, configs, instance=None):
oozie_site = bcf.HadoopXML("oozie-site.xml")
oozie_site.remote_path = self.conf_dir(cluster_context)
if instance:
oozie_site.fetch(instance)
oozie_site.load_properties(configs)
oozie_site.add_properties(self._get_oozie_site_props(cluster_context))
return [oozie_site]
def _get_oozie_site_props(self, context):
oozie_specs = mysql.MySQL.OOZIE_SPECS
return {
'oozie.db.schema.name': oozie_specs.db_name,
'oozie.service.JPAService.create.db.schema': True,
'oozie.service.JPAService.jdbc.driver': mysql.MySQL.DRIVER_CLASS,
'oozie.service.JPAService.jdbc.url': self._get_jdbc_uri(context),
'oozie.service.JPAService.jdbc.username': oozie_specs.user,
'oozie.service.JPAService.jdbc.password': oozie_specs.password,
'oozie.service.HadoopAccessorService.hadoop.configurations':
'*=%s' % context.hadoop_conf
}
def _get_jdbc_uri(self, context):
jdbc_uri = ('jdbc:mysql://%(db_host)s:%(db_port)s/%(db_name)s?'
'createDatabaseIfNotExist=true')
jdbc_args = {
'db_host': mysql.MySQL.get_db_instance(context).fqdn(),
'db_port': mysql.MySQL.MYSQL_SERVER_PORT,
'db_name': mysql.MySQL.OOZIE_SPECS.db_name,
}
return jdbc_uri % jdbc_args
def post_install(self, cluster_context, instances):
oozie_inst = cluster_context.get_instance(OOZIE)
oozie_service = cluster_context.get_service(OOZIE)
if oozie_service:
oozie_version = oozie_service.version
symlink_cmd = ('cp /usr/share/java/mysql-connector-java.jar '
'/opt/mapr/oozie/oozie-%s'
'/oozie-server/lib/') % oozie_version
with oozie_inst.remote() as r:
LOG.debug('Installing MySQL connector for Oozie')
r.execute_command(symlink_cmd, run_as_root=True,
raise_when_error=False)
def post_start(self, cluster_context, instances):
check_sharelib = 'sudo -u mapr hadoop fs -ls /oozie/share/lib'
create_sharelib_dir = 'sudo -u mapr hadoop fs -mkdir /oozie'
is_yarn = cluster_context.cluster_mode == 'yarn'
upload_args = {
'oozie_home': self.home_dir(cluster_context),
'share': 'share2' if is_yarn else 'share1'
}
upload_sharelib = ('sudo -u mapr hadoop fs -copyFromLocal '
'%(oozie_home)s/%(share)s /oozie/share')
oozie_inst = cluster_context.get_instance(OOZIE)
with oozie_inst.remote() as r:
LOG.debug("Installing Oozie sharelibs")
command = '%(check)s || (%(mkdir)s && %(upload)s)'
args = {
'check': check_sharelib,
'mkdir': create_sharelib_dir,
'upload': upload_sharelib % upload_args,
}
r.execute_command(command % args, raise_when_error=False)

View File

@ -0,0 +1,28 @@
[
{
"value": "/oozie/share/lib",
"name": "oozie.service.WorkflowAppService.system.libpath",
"description": "System library path to use for workflow applications.This path is added to workflow application if their job properties setsthe property 'oozie.use.system.libpath' to true."
},
{
"value": "yarn-tez",
"name": "mapreduce.framework.name",
"description": "The runtime framework for executing MapReduce jobs."
},
{
"value": false,
"name": "use.system.libpath.for.mapreduce.and.pig.jobs",
"config_type": "bool",
"description": "If set to true, submissions of MapReduce and Pig jobs will includeautomatically the system library path, thus not requiring users tospecify where the Pig JAR files are. Instead, the ones from the systemlibrary path are used."
},
{
"value": "*",
"name": "oozie.service.ProxyUserService.proxyuser.mapr.hosts",
"description": "List of hosts the '#USER#' user is allowed to perform 'doAs'operations.The '#USER#' must be replaced with the username o the user who isallowed to perform 'doAs' operations.The value can be the '*' wildcard or a list of hostnames.For multiple users copy this property and replace the user namein the property name."
},
{
"value": "*",
"name": "oozie.service.ProxyUserService.proxyuser.mapr.groups",
"description": "List of groups the '#USER#' user is allowed to impersonate usersfrom to perform 'doAs' operations.The '#USER#' must be replaced with the username o the user who isallowed to perform 'doAs' operations.The value can be the '*' wildcard or a list of groups.For multiple users copy this property and replace the user namein the property name."
}
]

View File

@ -0,0 +1,38 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.validation_utils as vu
PIG = np.NodeProcess(
name='pig',
ui_name='Pig',
package='mapr-pig'
)
@six.add_metaclass(s.Single)
class Pig(s.Service):
def __init__(self):
super(Pig, self).__init__()
self._name = 'pig'
self._ui_name = 'Pig'
self._version = '0.13'
self._node_processes = [PIG]
self._validation_rules = [vu.at_least(1, PIG)]

View File

@ -0,0 +1,61 @@
[
{
"name": "fs.swift.impl",
"config_type": "string",
"value": "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem"
},
{
"name": "fs.swift.connect.timeout",
"config_type": "int",
"value": 15000
},
{
"name": "fs.swift.socket.timeout",
"config_type": "int",
"value": 60000
},
{
"name": "fs.swift.connect.retry.count",
"config_type": "int",
"value": 3
},
{
"name": "fs.swift.connect.throttle.delay",
"config_type": "int",
"value": 0
},
{
"name": "fs.swift.blocksize",
"config_type": "int",
"value": 32768
},
{
"name": "fs.swift.partsize",
"config_type": "int",
"value": 4718592
},
{
"name": "fs.swift.requestsize",
"config_type": "int",
"value": 64
},
{
"name": "fs.swift.service.sahara.public",
"config_type": "bool",
"value": true
},
{
"name": "fs.swift.service.sahara.http.port",
"config_type": "int",
"value": 8080
},
{
"name": "fs.swift.service.sahara.https.port",
"config_type": "int",
"value": 443
},
{
"name": "fs.swift.service.sahara.auth.endpoint.prefix",
"value": "/endpoints/AUTH_"
}
]

View File

@ -0,0 +1,48 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
import six
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.services.maprfs.maprfs as maprfs
import sahara.utils.files as f
LOG = logging.getLogger(__name__)
@six.add_metaclass(s.Single)
class Swift(s.Service):
HADOOP_SWIFT_JAR = ('plugins/mapr/services/swift/'
'resources/hadoop-swift-latest.jar')
def __init__(self):
super(Swift, self).__init__()
self._name = 'swift'
self._ui_name = 'Swift'
self._cluster_defaults = ['swift-default.json']
def configure(self, context, instances=None):
instances = instances or context.get_instances()
file_servers = context.filter_instances(instances, maprfs.FILE_SERVER)
self._install_swift_jar(context, file_servers)
def _install_swift_jar(self, context, instances):
LOG.debug('Installing Swift jar')
jar = f.get_file_text(Swift.HADOOP_SWIFT_JAR)
path = '%s/swift.jar' % context.hadoop_lib
for instance in instances:
with instance.remote() as r:
r.write_file_to(path, jar, run_as_root=True)

View File

@ -0,0 +1,16 @@
[
{
"value": 1024,
"name": "yarn.scheduler.minimum-allocation-mb",
"priority": 1,
"config_type": "int",
"description": "The minimum allocation for every container request at the RM, in MBs. Memory requests lower than this won't take effect, and the specified value will get allocated at minimum."
},
{
"value": 8192,
"name": "yarn.scheduler.maximum-allocation-mb",
"priority": 1,
"config_type": "int",
"description": "The maximum allocation for every container request at the RM, in MBs. Memory requests higher than this won't take effect, and will get capped to this value."
}
]

View File

@ -0,0 +1,23 @@
[
{
"value": 8192,
"name": "yarn.nodemanager.resource.memory-mb",
"priority": 1,
"config_type": "int",
"description": "Amount of physical memory, in MB, that can be allocated for containers."
},
{
"value": 4,
"name": "yarn.nodemanager.resource.cpu-vcores",
"priority": 1,
"config_type": "int",
"description": "Number of CPU cores that can be allocated for containers."
},
{
"value": 1,
"name": "yarn.nodemanager.resource.io-spindles",
"priority": 1,
"config_type": "int",
"description": "Number of spindles that can be allocated for containers."
}
]

View File

@ -0,0 +1,123 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.domain.configuration_file as bcf
import sahara.plugins.mapr.domain.node_process as np
import sahara.plugins.mapr.domain.service as s
import sahara.plugins.mapr.util.validation_utils as vu
from sahara.swift import swift_helper
RESOURCE_MANAGER = np.NodeProcess(
name='resourcemanager',
ui_name='ResourceManager',
package='mapr-resourcemanager',
open_ports=[8033, 8032, 8031, 8030, 8088]
)
NODE_MANAGER = np.NodeProcess(
name='nodemanager',
ui_name='NodeManager',
package='mapr-nodemanager',
open_ports=[8041, 8040, 8042, 8044]
)
HISTORY_SERVER = np.NodeProcess(
name='historyserver',
ui_name='HistoryServer',
package='mapr-historyserver',
open_ports=[10020, 19888, 19890]
)
class YARN(s.Service):
cluster_mode = 'yarn'
def __init__(self):
super(YARN, self).__init__()
self._name = 'hadoop'
self._ui_name = 'YARN'
self._node_processes = [RESOURCE_MANAGER, NODE_MANAGER, HISTORY_SERVER]
self._ui_info = [
('NodeManager', NODE_MANAGER, 'http://%s:8042'),
('ResourceManager', RESOURCE_MANAGER, 'http://%s:8088'),
('HistoryServer', RESOURCE_MANAGER, 'http://%s:19888'),
]
self._cluster_defaults = ['yarn-cluster.json']
self._node_defaults = ['yarn-node.json']
def get_config_files(self, cluster_context, configs, instance=None):
# yarn-site.xml
yarn_site = bcf.HadoopXML("yarn-site.xml")
yarn_site.remote_path = self.conf_dir(cluster_context)
if instance:
yarn_site.fetch(instance)
yarn_site.add_properties(self._get_yarn_site_props(cluster_context))
yarn_site.load_properties(configs)
# core-site.xml
core_site = bcf.HadoopXML("core-site.xml")
core_site.remote_path = self.conf_dir(cluster_context)
if instance:
core_site.fetch(instance)
core_site.add_properties(self._get_core_site_props(cluster_context))
return [yarn_site, core_site]
def _get_core_site_props(self, context):
result = {
'hadoop.proxyuser.mapr.groups': '*',
'hadoop.proxyuser.mapr.hosts': '*',
}
for conf in swift_helper.get_swift_configs():
result[conf['name']] = conf['value']
return result
def _get_yarn_site_props(self, context):
return {
'hadoop.proxyuser.mapr.groups': '*',
'hadoop.proxyuser.mapr.hosts': '*',
}
def conf_dir(self, cluster_context):
return '%s/etc/hadoop' % self.home_dir(cluster_context)
def get_file_path(self, file_name):
template = 'plugins/mapr/services/yarn/resources/%s'
return template % file_name
@six.add_metaclass(s.Single)
class YARNv241(YARN):
def __init__(self):
super(YARNv241, self).__init__()
self._version = '2.4.1'
self._validation_rules = [
vu.exactly(1, RESOURCE_MANAGER),
vu.at_least(1, NODE_MANAGER),
vu.exactly(1, HISTORY_SERVER),
]
@six.add_metaclass(s.Single)
class YARNv251(YARN):
def __init__(self):
super(YARNv251, self).__init__()
self._version = '2.5.1'
self._validation_rules = [
vu.at_least(1, RESOURCE_MANAGER),
vu.at_least(1, NODE_MANAGER),
vu.exactly(1, HISTORY_SERVER),
]

View File

@ -1,20 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self

View File

@ -1,78 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara.plugins.mapr.util import names
import sahara.plugins.utils as u
ZOOKEEPER_CLIENT_PORT = 5181
def get_cldb_nodes_ip(cluster):
cldb_node_list = u.get_instances(cluster, names.CLDB)
return ','.join([i.management_ip for i in cldb_node_list])
def get_zookeeper_nodes_ip(cluster):
zkeeper_node_list = u.get_instances(cluster, names.ZOOKEEPER)
return ','.join([i.management_ip for i in zkeeper_node_list])
def get_zookeeper_nodes_ip_with_port(cluster):
zkeeper_node_list = u.get_instances(cluster, names.ZOOKEEPER)
return ','.join(['%s:%s' % (i.management_ip, ZOOKEEPER_CLIENT_PORT)
for i in zkeeper_node_list])
def get_resourcemanager_ip(cluster):
rm_instance = u.get_instance(cluster, names.RESOURCE_MANAGER)
return rm_instance.management_ip
def get_historyserver_ip(cluster):
hs_instance = u.get_instance(cluster, names.HISTORY_SERVER)
return hs_instance.management_ip
def get_jobtracker(cluster):
instance = u.get_instance(cluster, names.JOBTRACKER)
return instance
def get_resourcemanager(cluster):
return u.get_instance(cluster, names.RESOURCE_MANAGER)
def get_nodemanagers(cluster):
return u.get_instances(cluster, names.NODE_MANAGER)
def get_oozie(cluster):
return u.get_instance(cluster, names.OOZIE)
def get_datanodes(cluster):
return u.get_instances(cluster, names.DATANODE)
def get_tasktrackers(cluster):
return u.get_instances(cluster, names.TASK_TRACKER)
def get_secondarynamenodes(cluster):
return u.get_instances(cluster, names.SECONDARY_NAMENODE)
def get_historyserver(cluster):
return u.get_instance(cluster, names.HISTORY_SERVER)

View File

@ -1,76 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections as c
import six
import sahara.plugins.utils as u
class ClusterInfo(object):
# TODO(aosadchiy): perform lookup for plugin_spec
def __init__(self, cluster, plugin_spec):
self.cluster = cluster
self.plugin_spec = plugin_spec
def get_default_configs(self, node_group=None):
services = self.get_services(node_group)
return self.plugin_spec.get_default_plugin_configs(services)
def get_services(self, node_group=None):
if not node_group:
return set(service for node_group in self.cluster.node_groups
for service in self.get_services(node_group))
else:
return (set(self.plugin_spec.get_node_process_service(node_process)
for node_process in node_group.node_processes)
| set(['general']))
def get_user_configs(self, node_group=None):
services = self.get_services(node_group)
predicate = lambda i: i[0] in services and i[1]
configs = dict(filter(
predicate, six.iteritems(self.cluster.cluster_configs)))
scope = 'node' if node_group else 'cluster'
result = c.defaultdict(lambda: c.defaultdict(dict))
for service, kv in six.iteritems(configs):
for key, value in six.iteritems(kv):
filename = self.plugin_spec.get_config_file(
scope, service, key)
result[service][filename][key] = value
return result
def get_node_group_files(self):
return
def get_node_groups(self, node_process=None):
return u.get_node_groups(self.cluster, node_process)
def get_instances_count(self, node_process=None):
return u.get_instances_count(self.cluster, node_process)
def get_instances(self, node_process=None):
return u.get_instances(self.cluster, node_process)
def get_instance(self, node_process):
return u.get_instance(self.cluster, node_process)
def get_instances_ip(self, node_process):
return [i.management_ip for i in self.get_instances(node_process)]
def get_instance_ip(self, node_process):
return self.get_instance(node_process).management_ip

View File

@ -0,0 +1,19 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
def chown(instance, owner, path, run_as_root=True):
cmd = 'chown -R %(owner)s %(path)s' % {'owner': owner, 'path': path}
with instance.remote() as r:
r.execute_command(cmd, run_as_root=run_as_root)

View File

@ -1,110 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_config import cfg
from oslo_log import log as logging
from sahara import exceptions as ex
from sahara.i18n import _
from sahara.i18n import _LI
import sahara.plugins.mapr.util.config_file_utils as cfu
import sahara.plugins.mapr.versions.version_handler_factory as vhf
from sahara.plugins import provisioning as p
import sahara.plugins.utils as u
from sahara.topology import topology_helper as th
from sahara.utils import files as f
MAPR_HOME = '/opt/mapr'
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
CONF.import_opt("enable_data_locality", "sahara.topology.topology_helper")
ENABLE_DATA_LOCALITY = p.Config('Enable Data Locality', 'general', 'cluster',
config_type="bool", priority=1,
default_value=True, is_optional=True)
def post_configure_instance(instance):
LOG.info(_LI('START: Post configuration for instance.'))
with instance.remote() as r:
if is_data_locality_enabled(instance.node_group.cluster):
LOG.debug('Data locality is enabled.')
t_script = MAPR_HOME + '/topology.sh'
LOG.debug('Start writing file %s', t_script)
r.write_file_to(t_script, f.get_file_text(
'plugins/mapr/util/resources/topology.sh'), run_as_root=True)
LOG.debug('Done for writing file %s', t_script)
LOG.debug('Start executing command: chmod +x %s', t_script)
r.execute_command('chmod +x ' + t_script, run_as_root=True)
LOG.debug('Done for executing command.')
else:
LOG.debug('Data locality is disabled.')
LOG.info(_LI('END: Post configuration for instance.'))
def configure_instances(cluster, instances):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
p_spec = v_handler.get_plugin_spec()
configurer = v_handler.get_cluster_configurer(cluster, p_spec)
configurer.configure(instances)
def configure_topology_data(cluster, is_node_awareness):
LOG.info(_LI('START: configuring topology data.'))
if is_data_locality_enabled(cluster):
LOG.debug('Data locality is enabled.')
LOG.debug('Start generating topology map.')
topology_map = th.generate_topology_map(cluster, is_node_awareness)
LOG.debug('Done for generating topology map.')
topology_data = cfu.to_file_content(topology_map, 'topology')
for i in u.get_instances(cluster):
LOG.debug(
'Start writing to file: %s/topology.data', MAPR_HOME)
i.remote().write_file_to(MAPR_HOME + "/topology.data",
topology_data, run_as_root=True)
LOG.debug('Done writing to file: %s/topology.data', MAPR_HOME)
else:
LOG.debug('Data locality is disabled.')
LOG.info(_LI('END: configuring topology data.'))
def get_plugin_configs():
configs = []
if CONF.enable_data_locality:
configs.append(ENABLE_DATA_LOCALITY)
return configs
def get_plugin_config_value(service, name, cluster):
if cluster:
for ng in cluster.node_groups:
cl_param = ng.configuration().get(service, {}).get(name)
if cl_param is not None:
return cl_param
for c in get_plugin_configs():
if c.applicable_target == service and c.name == name:
return c.default_value
raise ex.NotFoundException(
name, (_("Unable to get parameter '%(name)s' from service %(service)s")
% {'name': name, 'service': service}))
def is_data_locality_enabled(cluster):
if not CONF.enable_data_locality:
return False
return get_plugin_config_value(ENABLE_DATA_LOCALITY.applicable_target,
ENABLE_DATA_LOCALITY.name, cluster)

View File

@ -1,81 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
import sahara.plugins.mapr.util.func_utils as fu
import sahara.utils.files as f
import sahara.utils.xmlutils as x
def load_properties_file(path):
predicate = fu.and_predicate(lambda i: len(i) != 0,
lambda i: not i.isspace(),
lambda i: not i.startswith('#'))
mapper = fu.chain_function(lambda i: tuple(i.split('=')),
lambda i: (i[0].strip(), i[1].strip()))
lines = f.get_file_text(path).splitlines()
return dict(map(mapper, filter(predicate, lines)))
def load_xml_file(path):
kv_mapper = lambda i: (x.get_text_from_node(i, 'name'),
x._adjust_field(x.get_text_from_node(i, 'value')))
strip_mapper = lambda i: (i[0].strip(), i[1].strip())
props = x.load_xml_document(path).getElementsByTagName('property')
return dict(map(strip_mapper, map(kv_mapper, props)))
def load_raw_file(path):
return {'content': f.get_file_text(path)}
def to_properties_file_content(data):
mapper = lambda i: '%s=%s\n' % i
reducer = lambda p, c: p + c
return reduce(reducer, map(mapper, six.iteritems(data)), '')
def to_xml_file_content(data):
return x.create_hadoop_xml(data)
def to_topology_file_content(data):
mapper = lambda i: '%s %s\n' % i
reducer = lambda p, c: p + c
return reduce(reducer, map(mapper, six.iteritems(data)))
def to_raw_file_content(data, cfu=True, conv=str):
return data['content'] if cfu else conv(data)
def load_file(path, file_type):
if file_type == 'properties':
return load_properties_file(path)
elif file_type == 'xml':
return load_xml_file(path)
elif file_type == 'raw':
return load_raw_file(path)
def to_file_content(data, file_type, *args, **kargs):
if file_type == 'properties':
return to_properties_file_content(data, *args, **kargs)
elif file_type == 'xml':
return to_xml_file_content(data, *args, **kargs)
elif file_type == 'topology':
return to_topology_file_content(data, *args, **kargs)
elif file_type == 'raw':
return to_raw_file_content(data, *args, **kargs)

View File

@ -1,77 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.util.dict_utils as du
import sahara.plugins.mapr.util.func_utils as fu
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.utils.configs as c
def get_scope_default_configs(version_handler, scope, services=None):
configs = map(lambda i: i.to_dict(), version_handler.get_configs())
q_predicate = fu.field_equals_predicate('scope', scope)
if services:
at_predicate = fu.in_predicate('applicable_target', services)
q_predicate = fu.and_predicate(q_predicate, at_predicate)
q_fields = ['applicable_target', 'name', 'default_value']
q_result = du.select(q_fields, configs, q_predicate)
m_reducer = du.iterable_to_values_pair_dict_reducer(
'name', 'default_value')
return du.map_by_field_value(q_result, 'applicable_target',
dict, m_reducer)
def get_cluster_default_configs(version_handler, services=None):
return get_scope_default_configs(version_handler, 'cluster', services)
def get_node_default_configs(version_handler, services=None):
return get_scope_default_configs(version_handler, 'node', services)
def get_default_configs(version_handler, services=None):
cluster_configs = get_cluster_default_configs(version_handler, services)
node_configs = get_node_default_configs(version_handler, services)
return c.merge_configs(cluster_configs, node_configs)
def get_node_group_services(node_group):
h_version = node_group.cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
services = v_handler.get_node_processes()
node_processes = node_group.node_processes
return set(s for np in node_processes
for s in services if np in services[s])
def get_cluster_configs(cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
default_configs = get_cluster_default_configs(v_handler)
user_configs = cluster.cluster_configs
return c.merge_configs(default_configs, user_configs)
def get_configs(node_group):
services = get_node_group_services(node_group)
h_version = node_group.cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
default_configs = get_default_configs(v_handler, services)
user_configs = node_group.configuration()
return c.merge_configs(default_configs, user_configs)
def get_service(version_handler, node_process):
node_processes = version_handler.get_node_processes()
return du.get_keys_by_value_2(node_processes, node_process)

View File

@ -1,124 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections as cl
import copy as cp
import functools as ft
import itertools as it
import six
import sahara.plugins.mapr.util.func_utils as fu
def append_to_key(dict_0, dict_1):
return dict((k0, dict((k1, dict_1[k1]) for k1 in keys_1 if k1 in dict_1))
for k0, keys_1 in six.iteritems(dict_0))
def iterable_to_values_pair_dict_reducer(key_0, key_1):
def reducer(previous, iterable, mapper):
previous.update(dict(map(mapper, iterable)))
return previous
return ft.partial(reducer, mapper=fu.get_values_pair_function(key_0,
key_1))
def flatten_to_list_reducer():
def reducer(previous, iterable):
previous.extend(list(iterable))
return previous
return reducer
def map_by_field_value(iterable, key, factory=list,
iterator_reducer=flatten_to_list_reducer()):
def reducer(mapping, current):
mapping[current[0]] = iterator_reducer(
mapping[current[0]], iter(current[1]))
return mapping
groups = it.groupby(iterable, fu.get_value_function(key))
return reduce(reducer, groups, cl.defaultdict(factory))
def map_by_fields_values(iterable, fields, factory=list,
reducer=flatten_to_list_reducer()):
if len(fields) == 1:
return map_by_field_value(iterable, fields[0], factory, reducer)
else:
return dict((k, map_by_fields_values(v, fields[1:], factory, reducer))
for k, v in six.iteritems(map_by_field_value(
iterable, fields[0])))
def get_keys_by_value_type(mapping, value_type):
return filter(lambda k: isinstance(mapping[k], value_type), mapping)
def deep_update(dict_0, dict_1, copy=True):
result = cp.deepcopy(dict_0) if copy else dict_0
dict_valued_keys_0 = set(get_keys_by_value_type(dict_0, dict))
dict_valued_keys_1 = set(get_keys_by_value_type(dict_1, dict))
common_keys = dict_valued_keys_0 & dict_valued_keys_1
if not common_keys:
result.update(dict_1)
else:
for k1, v1 in six.iteritems(dict_1):
result[k1] = deep_update(
dict_0[k1], v1) if k1 in common_keys else v1
return result
def get_keys_by_value(mapping, value):
return [k for k, v in six.iteritems(mapping) if v == value]
# TODO(aosadchiy): find more appropriate name
def get_keys_by_value_2(mapping, value):
return [k for k, v in six.iteritems(mapping) if value in v]
def iterable_to_values_list_reducer(key):
def reducer(previous, iterable, mapper):
previous.extend(map(mapper, iterable))
return previous
return ft.partial(reducer, mapper=fu.get_value_function(key))
def select(fields, iterable, predicate=fu.true_predicate):
return map(fu.extract_fields_function(fields), filter(predicate, iterable))
has_no_dict_values_predicate = lambda n: not get_keys_by_value_type(n, dict)
def list_of_vp_dicts_function(key_0, key_1):
def transformer(item, key_0, key_1):
return [fu.values_pair_to_dict_function(key_0, key_1)(i)
for i in six.iteritems(item)]
return ft.partial(transformer, key_0=key_0, key_1=key_1)
def flattened_dict(mapping, keys, is_terminal=has_no_dict_values_predicate,
transform=None):
if not transform:
transform = list_of_vp_dicts_function(*keys[-2:])
if is_terminal(mapping):
return list(transform(mapping))
else:
temp = [it.imap(fu.append_field_function(keys[0], key),
flattened_dict(value, keys[1:],
is_terminal, transform))
for key, value in six.iteritems(mapping)]
return list(it.chain(*temp))

View File

@ -1,167 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import copy as c
import functools as ft
import itertools as it
import six
# predicates
true_predicate = lambda i: True
false_predicate = lambda i: False
def not_predicate(predicate):
return ft.partial(lambda i, p: not p(i), p=predicate)
def and_predicate(*predicates):
if len(predicates) == 1:
return predicates[0]
else:
def predicate(item, predicates):
for p in predicates:
if not p(item):
return False
return True
return ft.partial(predicate, predicates=predicates)
def or_predicate(*predicates):
if len(predicates) == 1:
return predicates[0]
else:
def predicate(item, predicates):
for p in predicates:
if p(item):
return True
return False
return ft.partial(predicate, predicates=predicates)
def impl_predicate(p0, p1):
return or_predicate(not_predicate(p0), p1)
def field_equals_predicate(key, value):
return ft.partial(lambda i, k, v: i[k] == v, k=key, v=value)
def like_predicate(template, ignored=[]):
if not template:
return true_predicate
elif len(template) == 1:
k, v = six.iteritems(template).next()
return true_predicate if k in ignored else field_equals_predicate(k, v)
else:
return and_predicate(*[field_equals_predicate(key, value)
for key, value in six.iteritems(template)
if key not in ignored])
def in_predicate(key, values):
if not values:
return false_predicate
else:
return or_predicate(*[field_equals_predicate(key, value)
for value in values])
# functions
def chain_function(*functions):
return reduce(lambda p, c: ft.partial(lambda i, p, c: c(p(i)), p=p, c=c),
functions)
def copy_function():
return lambda i: c.deepcopy(i)
def append_field_function(key, value):
def mapper(item, key, value):
item = c.deepcopy(item)
item[key] = value
return item
return ft.partial(mapper, key=key, value=value)
def append_fields_function(fields):
if not fields:
return copy_function()
elif len(fields) == 1:
key, value = six.iteritems(fields).next()
return append_field_function(key, value)
else:
return chain_function(*[append_field_function(key, value)
for key, value in six.iteritems(fields)])
def get_values_pair_function(key_0, key_1):
return ft.partial(lambda i, k0, k1: (i[k0], i[k1]), k0=key_0, k1=key_1)
def get_field_function(key):
return ft.partial(lambda i, k: (k, i[k]), k=key)
def get_fields_function(keys):
return ft.partial(lambda i, k: [f(i) for f in [get_field_function(key)
for key in k]], k=keys)
def extract_fields_function(keys):
return lambda i: dict(get_fields_function(keys)(i))
def get_value_function(key):
return ft.partial(lambda i, k: i[k], k=key)
def set_default_value_function(key, value):
def mapper(item, key, value):
item = c.deepcopy(item)
if key not in item:
item[key] = value
return item
return ft.partial(mapper, key=key, value=value)
def set_default_values_function(fields):
if not fields:
return copy_function()
elif len(fields) == 1:
key, value = six.iteritems(fields).next()
return set_default_value_function(key, value)
else:
return chain_function(*[set_default_value_function(key, value)
for key, value in six.iteritems(fields)])
def values_pair_to_dict_function(key_0, key_1):
return ft.partial(lambda vp, k0, k1: {k0: vp[0], k1: vp[1]},
k0=key_0, k1=key_1)
def flatten(iterable):
return it.chain.from_iterable(iterable)
def sync_execute_consumer(*consumers):
def consumer(argument, consumers):
for cn in consumers:
cn(argument)
return ft.partial(consumer, consumers=consumers)

View File

@ -0,0 +1,90 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import uuid
def unique_list(iterable, mapper=lambda i: i):
result = []
for item in iterable:
value = mapper(item)
if value not in result:
result.append(value)
return result
def _run_as(user, command):
if not user:
return command
return 'sudo -u %(user)s %(command)s' % {'user': user, 'command': command}
def unique_file_name(base='/tmp'):
return '%(base)s/%(uuid)s' % {'base': base, 'uuid': uuid.uuid4()}
def remove(instance, path, recursive=True, run_as=None):
with instance.remote() as r:
args = {'recursive': '-r' if recursive else '', 'path': path}
r.execute_command(_run_as(run_as, 'rm %(recursive)s %(path)s' % args))
def create_archive(instance, path, archive=None, run_as=None):
if not archive:
archive = unique_file_name('/tmp')
args = {'archive': archive, 'path': path}
tar = 'tar cf %(archive)s -C %(path)s .' % args
with instance.remote() as r:
r.execute_command(_run_as(run_as, tar))
return archive
def unpack_archive(instance, src, dest, cleanup=False, run_as=None):
with instance.remote() as r:
r.execute_command(_run_as(run_as, 'mkdir -p %s' % dest))
untar = 'tar xf %(src)s -C %(dest)s' % {'src': src, 'dest': dest}
r.execute_command(_run_as(run_as, untar))
if cleanup:
r.execute_command(_run_as(run_as, 'rm -r %s' % src))
def is_directory(instance, path):
with instance.remote() as r:
ec, out = r.execute_command('[ -d %s ]' % path, raise_when_error=False)
return not ec
def copy_file(s_path, s_instance, d_path, d_instance, run_as=None):
with s_instance.remote() as sr:
data = sr.read_file_from(s_path, run_as_root=(run_as == 'root'))
with d_instance.remote() as dr:
dr.write_file_to(d_path, data, run_as_root=(run_as == 'root'))
def copy_dir(s_path, s_instance, d_path, d_instance, run_as=None):
s_path = create_archive(s_instance, s_path, run_as=run_as)
tmp_path = unique_file_name('/tmp')
copy_file(s_path, s_instance, tmp_path, d_instance, run_as)
unpack_archive(d_instance, tmp_path, d_path, True, run_as)
remove(s_instance, s_path, True, run_as)
def copy(s_path, s_instance, d_path, d_instance, run_as=None):
if is_directory(s_instance, s_path):
copy_dir(s_path, s_instance, d_path, d_instance, run_as)
else:
copy_file(s_path, s_instance, d_path, d_instance, run_as)

View File

@ -1,17 +1,19 @@
# Copyright (c) 2014, MapR Technologies
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import uuid
import six
@ -19,13 +21,15 @@ import six
MV_TO_MAPRFS_CMD = ('sudo -u %(user)s'
' hadoop fs -copyFromLocal %(source)s %(target)s'
' && sudo rm -f %(source)s')
MKDIR_CMD = 'sudo -u %(user)s hadoop fs -mkdir -p %(path)s'
MKDIR_CMD_MAPR4 = 'sudo -u %(user)s hadoop fs -mkdir -p %(path)s'
MKDIR_CMD_MAPR3 = 'sudo -u %(user)s hadoop fs -mkdir %(path)s'
def put_file_to_maprfs(r, content, file_name, path, hdfs_user):
tmp_file_name = '/tmp/%s.%s' % (file_name, six.text_type(uuid.uuid4()))
r.write_file_to(tmp_file_name, content)
move_from_local(r, tmp_file_name, path + '/' + file_name, hdfs_user)
target = os.path.join(path, file_name)
move_from_local(r, tmp_file_name, target, hdfs_user)
def move_from_local(r, source, target, hdfs_user):
@ -33,5 +37,11 @@ def move_from_local(r, source, target, hdfs_user):
r.execute_command(MV_TO_MAPRFS_CMD % args)
def create_maprfs_dir(remote, dir_name, hdfs_user):
remote.execute_command(MKDIR_CMD % {'user': hdfs_user, 'path': dir_name})
def create_maprfs4_dir(remote, dir_name, hdfs_user):
remote.execute_command(MKDIR_CMD_MAPR4 % {'user': hdfs_user,
'path': dir_name})
def create_maprfs3_dir(remote, dir_name, hdfs_user):
remote.execute_command(MKDIR_CMD_MAPR3 % {'user': hdfs_user,
'path': dir_name})

View File

@ -1,41 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
OOZIE = 'Oozie'
HIVE = 'Hive'
HIVE_METASTORE = 'HiveMetastore'
HIVE_SERVER2 = 'HiveServer2'
CLDB = 'CLDB'
FILE_SERVER = 'FileServer'
ZOOKEEPER = 'ZooKeeper'
RESOURCE_MANAGER = 'ResourceManager'
HISTORY_SERVER = 'HistoryServer'
IS_M7_ENABLED = 'Enable MapR-DB'
GENERAL = 'general'
JOBTRACKER = 'JobTracker'
NODE_MANAGER = 'NodeManager'
DATANODE = 'Datanode'
TASK_TRACKER = 'TaskTracker'
SECONDARY_NAMENODE = 'SecondaryNamenode'
NFS = 'NFS'
WEB_SERVER = 'Webserver'
WAIT_OOZIE_INTERVAL = 300
WAIT_NODE_ALARM_NO_HEARTBEAT = 360
ecosystem_components = ['Oozie',
'Hive-Metastore',
'HiveServer2',
'HBase-Master',
'HBase-RegionServer',
'HBase-Client',
'Pig']

View File

@ -1,198 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
import os.path
from oslo_log import log as logging
import six
import sahara.plugins.mapr.util.config_file_utils as cfu
import sahara.plugins.mapr.util.dict_utils as du
import sahara.plugins.mapr.util.func_utils as fu
import sahara.plugins.provisioning as p
import sahara.utils.files as fm
LOG = logging.getLogger(__name__)
class PluginSpec(object):
def __init__(self, path):
self.base_dir = os.path.dirname(path)
self.plugin_spec_dict = self._load_plugin_spec_dict(path)
self.service_file_name_map = self._load_service_file_name_map()
self.default_configs = self._load_default_configs()
self.service_node_process_map = self._load_service_node_process_map()
self.plugin_config_objects = self._load_plugin_config_objects()
self.file_name_config_map = self._load_file_name_config_map()
self.plugin_config_items = self._load_plugin_config_items()
self.plugin_configs = self._load_plugin_configs()
self.default_plugin_configs = self._load_default_plugin_configs()
self.file_type_map = self._load_file_type_map()
def _load_plugin_spec_dict(self, path):
LOG.debug('Loading plugin spec from %s', path)
plugin_spec_dict = json.loads(fm.get_file_text(path))
return plugin_spec_dict
def _load_service_file_name_map(self):
LOG.debug('Loading service -> filename mapping')
return dict((s['name'], [fn for fn in s['files']])
for s in self.plugin_spec_dict['services']
if 'files' in s and s['files'])
def _load_default_configs(self):
LOG.debug('Loading defaults from local files')
file_name_data_map = {}
for f in self.plugin_spec_dict['files']:
if 'local' not in f:
LOG.debug('%s skipped. No "local" section', f['remote'])
continue
local_path = os.path.join(self.base_dir, f['local'])
LOG.debug('Loading %(local_path)s as default for %(remote)s',
{'local_path': local_path, 'remote': f['remote']})
data = cfu.load_file(local_path, f['type'])
file_name_data_map[f['remote']] = data
return du.append_to_key(self.service_file_name_map, file_name_data_map)
def _load_plugin_config_items(self):
LOG.debug('Loading full configs map for plugin')
items = map(lambda i: i.to_dict(), self.plugin_config_objects)
def mapper(item):
file_name = du.get_keys_by_value_2(
self.file_name_config_map, item['name'])[0]
append_f = fu.append_field_function('file', file_name)
return append_f(item)
return map(mapper, items)
def _load_plugin_configs(self):
LOG.debug('Loading plugin configs {service:{file:{name:value}}}')
m_fields = ['applicable_target', 'file']
vp_fields = ('name', 'default_value')
reducer = du.iterable_to_values_pair_dict_reducer(*vp_fields)
return du.map_by_fields_values(self.plugin_config_items,
m_fields, dict, reducer)
def _load_default_plugin_configs(self):
return du.deep_update(self.default_configs, self.plugin_configs)
def _load_service_node_process_map(self):
LOG.debug('Loading {service:[node process]} mapping')
return dict((s['name'], [np for np in s['node_processes']])
for s in self.plugin_spec_dict['services']
if 'node_processes' in s and s['node_processes'])
def _load_file_name_config_map(self):
LOG.debug('Loading {filename:[config_name]} names mapping')
r = {}
for fd in self.plugin_spec_dict['files']:
if 'configs' in fd:
r[fd['remote']] = [i['name']
for ir, sd in six.iteritems(fd['configs'])
for s, items in six.iteritems(sd)
for i in items]
return r
def _load_plugin_config_objects(self):
LOG.debug('Loading config objects for sahara-dashboard')
def mapper(item):
req = ['name', 'applicable_target', 'scope']
opt = ['description', 'config_type', 'config_values',
'default_value', 'is_optional', 'priority']
kargs = dict((k, item[k]) for k in req + opt if k in item)
return p.Config(**kargs)
result = []
for file_dict in self.plugin_spec_dict['files']:
if 'configs' not in file_dict:
LOG.debug('%s skipped. No "configs" section',
file_dict['remote'])
continue
remote_path = file_dict['remote']
applicable_target = du.get_keys_by_value_2(
self.service_file_name_map, remote_path)[0]
for is_required, scope_dict in six.iteritems(file_dict['configs']):
is_optional = is_required != 'required'
for scope, items in six.iteritems(scope_dict):
fields = {'file': remote_path, 'is_optional': is_optional,
'scope': scope,
'applicable_target': applicable_target}
append_f = fu.append_fields_function(fields)
result.extend([append_f(i) for i in items])
return map(mapper, result)
def _load_file_type_map(self):
LOG.debug('Loading {filename:type} mapping')
return dict((f['remote'], f['type'])
for f in self.plugin_spec_dict['files'])
def get_node_process_service(self, node_process):
return du.get_keys_by_value_2(self.service_node_process_map,
node_process)[0]
def get_default_plugin_configs(self, services):
return dict((k, self.default_plugin_configs[k])
for k in services if k in self.default_plugin_configs)
def get_config_file(self, scope, service, name):
p_template = {
'applicable_target': service, 'scope': scope, 'name': name}
q_fields = ['file']
q_predicate = fu.like_predicate(p_template)
q_source = self.plugin_config_items
q_result = du.select(q_fields, q_source, q_predicate)
if q_result and 'file' in q_result[0]:
return q_result[0]['file']
else:
return None
def get_file_type(self, file_name):
if file_name in self.file_type_map:
return self.file_type_map[file_name]
else:
return None
def get_service_for_file_name(self, file_name):
return du.get_keys_by_value_2(self.service_file_name_map, file_name)[0]
def get_version_config_objects(self):
common_fields = {'scope': 'cluster',
'config_type': 'dropdown',
'is_optional': False,
'priority': 1}
def has_version_field(service):
return 'versions' in service
def get_versions(service):
return {'name': '%s Version' % service['name'],
'applicable_target': service['name'],
'config_values': [(v, v) for v in service['versions']]}
def add_common_fields(item):
item.update(common_fields)
return item
def to_config(item):
return p.Config(**item)
mapper = fu.chain_function(get_versions, add_common_fields, to_config)
source = self.plugin_spec_dict['services']
return map(mapper, filter(has_version_field, source))
def get_configs(self):
return self.plugin_config_objects + self.get_version_config_objects()

View File

@ -1,3 +0,0 @@
classic_version=0.20.2
yarn_version=2.4.1
default_mode=%(mode)s

View File

@ -1,9 +0,0 @@
#!/bin/sh
while True; do
if [ -f '/tmp/launching-mapr-mfs.lck' ]; then
sleep 5
else
break
fi
done

View File

@ -1,72 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
from sahara.i18n import _LI
LOG = logging.getLogger(__name__)
def run_configure_sh(remote, script_string):
LOG.info(_LI("running configure.sh script"))
remote.execute_command(script_string, run_as_root=True)
def start_zookeeper(remote):
LOG.info(_LI("Starting mapr-zookeeper"))
remote.execute_command('service mapr-zookeeper start', run_as_root=True)
def start_oozie(remote):
LOG.info(_LI("Starting mapr-oozie"))
remote.execute_command('service mapr-oozie start',
run_as_root=True,
raise_when_error=False)
def start_hive_metastore(remote):
LOG.info(_LI("Starting mapr-hive-server2"))
remote.execute_command('service mapr-hivemetastore start',
run_as_root=True)
def start_hive_server2(remote):
LOG.info(_LI("Starting mapr-hive-server2"))
remote.execute_command('service mapr-hiveserver2 start', run_as_root=True)
def start_warden(remote):
LOG.info(_LI("Starting mapr-warden"))
remote.execute_command('service mapr-warden start', run_as_root=True)
def start_cldb(remote):
LOG.info(_LI("Starting mapr-cldb"))
remote.execute_command('service mapr-cldb start', run_as_root=True)
def start_node_manager(remote):
LOG.info(_LI("Starting nodemanager"))
remote.execute_command(('/opt/mapr/hadoop/hadoop-2.3.0'
'/sbin/yarn-daemon.sh start nodemanager'),
run_as_root=True)
def start_resource_manager(remote):
LOG.info(_LI("Starting resourcemanager"))
remote.execute_command(('/opt/mapr/hadoop/hadoop-2.3.0'
'/sbin/yarn-daemon.sh start resourcemanager'),
run_as_root=True)

View File

@ -1,145 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
from sahara import context
from sahara.i18n import _LI
from sahara.plugins.mapr.util import config
from sahara.plugins.mapr.util import names
from sahara.plugins.mapr.util import run_scripts
from sahara.plugins.mapr.util import start_helper
from sahara.utils import general as gen
LOG = logging.getLogger(__name__)
STOP_WARDEN_CMD = 'service mapr-warden stop'
STOP_ZOOKEEPER_CMD = 'service mapr-zookeeper stop'
GET_SERVER_ID_CMD = ('maprcli node list -json -filter [ip==%s] -columns id'
' | grep id | grep -o \'[0-9]*\'')
MOVE_NODE_CMD = 'maprcli node move -serverids %s -topology /decommissioned'
GET_HOSTNAME_CMD = ('maprcli node list -json -filter [ip==%s]'
' -columns hostname | grep hostname'
' | grep -Po \'(?<=("hostname":")).*?(?=")\'')
REMOVE_NODE_CMD = 'maprcli node remove -filter [ip==%(ip)s] -nodes %(nodes)s'
REMOVE_MAPR_PACKAGES_CMD = ('python -mplatform | grep Ubuntu '
'&& apt-get remove mapr-\* -y'
' || yum remove mapr-\* -y')
REMOVE_MAPR_HOME_CMD = 'rm -rf /opt/mapr'
REMOVE_MAPR_CORES_CMD = 'rm -rf /opt/cores/*.core.*'
def scale_cluster(cluster, instances, disk_setup_script_path, waiting_script,
context, configure_sh_string, is_node_awareness):
LOG.info(_LI('START: Cluster scaling. Cluster = %s'), cluster.name)
for inst in instances:
start_helper.install_role_on_instance(inst, context)
config.configure_instances(cluster, instances)
start_services(cluster, instances, disk_setup_script_path,
waiting_script, configure_sh_string)
LOG.info(_LI('END: Cluster scaling. Cluster = %s'), cluster)
def decommission_nodes(cluster, instances, configure_sh_string):
LOG.info(_LI('Start decommission . Cluster = %s'), cluster.name)
move_node(cluster, instances)
stop_services(cluster, instances)
context.sleep(names.WAIT_NODE_ALARM_NO_HEARTBEAT)
remove_node(cluster, instances)
remove_services(cluster, instances)
if check_for_cldb_or_zookeeper_service(instances):
all_instances = gen.get_instances(cluster)
current_cluster_instances = [
x for x in all_instances if x not in instances]
for inst in current_cluster_instances:
start_helper.exec_configure_sh_on_instance(
cluster, inst, configure_sh_string)
LOG.info(_LI('End decommission. Cluster = %s'), cluster.name)
def start_services(cluster, instances, disk_setup_script_path,
waiting_script, configure_sh_string):
LOG.info(_LI('START: Starting services.'))
for inst in instances:
start_helper.exec_configure_sh_on_instance(
cluster, inst, configure_sh_string)
start_helper.wait_for_mfs_unlock(cluster, waiting_script)
start_helper.setup_maprfs_on_instance(inst, disk_setup_script_path)
if check_if_is_zookeeper_node(inst):
run_scripts.start_zookeeper(inst.remote())
run_scripts.start_warden(inst.remote())
if check_for_cldb_or_zookeeper_service(instances):
start_helper.exec_configure_sh_on_cluster(
cluster, configure_sh_string)
LOG.info(_LI('END: Starting services.'))
def stop_services(cluster, instances):
LOG.info(_LI("Stop warden and zookeeper"))
for instance in instances:
with instance.remote() as r:
r.execute_command(STOP_WARDEN_CMD, run_as_root=True)
if check_if_is_zookeeper_node(instance):
r.execute_command(STOP_ZOOKEEPER_CMD, run_as_root=True)
LOG.info(_LI("Warden and zookeeper stoped"))
def move_node(cluster, instances):
LOG.info(_LI("Start moving the node to the /decommissioned"))
for instance in instances:
with instance.remote() as r:
command = GET_SERVER_ID_CMD % instance.management_ip
ec, out = r.execute_command(command, run_as_root=True)
command = MOVE_NODE_CMD % out.strip()
r.execute_command(command, run_as_root=True)
LOG.info(_LI("Nodes moved to the /decommissioned"))
def remove_node(cluster, instances):
LOG.info("Start removing the nodes")
for instance in instances:
with instance.remote() as r:
command = GET_HOSTNAME_CMD % instance.management_ip
ec, out = r.execute_command(command, run_as_root=True)
command = REMOVE_NODE_CMD % {'ip': instance.management_ip,
'nodes': out.strip()}
r.execute_command(command, run_as_root=True)
LOG.info("Nodes removed")
def remove_services(cluster, instances):
LOG.info(_LI("Start remove all mapr services"))
for instance in instances:
with instance.remote() as r:
r.execute_command(REMOVE_MAPR_PACKAGES_CMD, run_as_root=True)
r.execute_command(REMOVE_MAPR_HOME_CMD, run_as_root=True)
r.execute_command(REMOVE_MAPR_CORES_CMD, run_as_root=True)
LOG.info(_LI("All mapr services removed"))
def check_if_is_zookeeper_node(instance):
processes_list = instance.node_group.node_processes
return names.ZOOKEEPER in processes_list
def check_for_cldb_or_zookeeper_service(instances):
for inst in instances:
np_list = inst.node_group.node_processes
if names.ZOOKEEPER in np_list or names.CLDB in np_list:
return True
return False

View File

@ -1,178 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
from sahara import context
from sahara.i18n import _LI
from sahara.plugins.mapr.util import names
from sahara.plugins.mapr.util import run_scripts
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.plugins.utils as utils
from sahara.utils import files as files
LOG = logging.getLogger(__name__)
def exec_configure_sh_on_cluster(cluster, script_string):
inst_list = utils.get_instances(cluster)
for n in inst_list:
exec_configure_sh_on_instance(cluster, n, script_string)
def exec_configure_sh_on_instance(cluster, instance, script_string):
LOG.info(_LI('START: Executing configure.sh'))
if check_for_mapr_db(cluster):
script_string += ' -M7'
if not check_if_mapr_user_exist(instance):
script_string += ' --create-user'
LOG.debug('script_string = %s', script_string)
instance.remote().execute_command(script_string, run_as_root=True)
LOG.info(_LI('END: Executing configure.sh'))
def check_for_mapr_db(cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
return v_handler.get_context(cluster).is_m7_enabled()
def setup_maprfs_on_cluster(cluster, path_to_disk_setup_script):
mapr_node_list = utils.get_instances(cluster, 'FileServer')
for instance in mapr_node_list:
setup_maprfs_on_instance(instance, path_to_disk_setup_script)
def setup_maprfs_on_instance(instance, path_to_disk_setup_script):
LOG.info(_LI('START: Setup maprfs on instance %s'), instance.instance_name)
create_disk_list_file(instance, path_to_disk_setup_script)
execute_disksetup(instance)
LOG.info(_LI('END: Setup maprfs on instance.'))
def create_disk_list_file(instance, path_to_disk_setup_script):
LOG.info(_LI('START: Creating disk list file.'))
script_path = '/tmp/disk_setup_script.sh'
rmt = instance.remote()
LOG.debug('Writing /tmp/disk_setup_script.sh')
rmt.write_file_to(
script_path, files.get_file_text(path_to_disk_setup_script))
LOG.debug('Start executing command: chmod +x %s', script_path)
rmt.execute_command('chmod +x ' + script_path, run_as_root=True)
LOG.debug('Done for executing command.')
args = ' '.join(instance.node_group.storage_paths())
cmd = '%s %s' % (script_path, args)
LOG.debug('Executing %s', cmd)
rmt.execute_command(cmd, run_as_root=True)
LOG.info(_LI('END: Creating disk list file.'))
def execute_disksetup(instance):
LOG.info(_LI('START: Executing disksetup.'))
rmt = instance.remote()
rmt.execute_command(
'/opt/mapr/server/disksetup -F /tmp/disk.list', run_as_root=True)
LOG.info(_LI('END: Executing disksetup.'))
def wait_for_mfs_unlock(cluster, path_to_waiting_script):
mapr_node_list = utils.get_instances(cluster, names.FILE_SERVER)
for instance in mapr_node_list:
create_waiting_script_file(instance, path_to_waiting_script)
exec_waiting_script_on_instance(instance)
def start_zookeeper_nodes_on_cluster(cluster):
zkeeper_node_list = utils.get_instances(cluster, names.ZOOKEEPER)
for z_keeper_node in zkeeper_node_list:
run_scripts.start_zookeeper(z_keeper_node.remote())
def start_warden_on_cluster(cluster):
node_list = utils.get_instances(cluster)
for node in node_list:
run_scripts.start_warden(node.remote())
def start_warden_on_cldb_nodes(cluster):
node_list = utils.get_instances(cluster, names.CLDB)
for node in node_list:
run_scripts.start_warden(node.remote())
def start_warden_on_other_nodes(cluster):
node_list = utils.get_instances(cluster)
for node in node_list:
if names.CLDB not in node.node_group.node_processes:
run_scripts.start_warden(node.remote())
def create_waiting_script_file(instance, path_to_waiting_script):
LOG.info(_LI('START: Creating waiting script file.'))
script_path = '/tmp/waiting_script.sh'
rmt = instance.remote()
rmt.write_file_to(script_path, files.get_file_text(path_to_waiting_script))
LOG.info(_LI('END: Creating waiting script file.'))
def exec_waiting_script_on_instance(instance):
LOG.info(_LI('START: Waiting script'))
rmt = instance.remote()
rmt.execute_command('chmod +x /tmp/waiting_script.sh', run_as_root=True)
rmt.execute_command('/tmp/waiting_script.sh', run_as_root=True)
LOG.info(_LI('END: Waiting script'))
def check_if_mapr_user_exist(instance):
ec, out = instance.remote().execute_command('id -u mapr',
run_as_root=True,
raise_when_error=False)
return ec == 0
def check_for_mapr_component(instance, component_name):
component_list = instance.node_group.node_processes
return component_name in component_list
def install_role_on_instance(instance, cluster_context):
LOG.info(_LI('START: Installing roles on node '))
roles_list = instance.node_group.node_processes
exec_str = (cluster_context.get_install_manager()
+ cluster_context.get_roles_str(roles_list))
LOG.debug('Executing "%(command)s" on %(instance)s',
{'command': exec_str, 'instance': instance.instance_id})
instance.remote().execute_command(exec_str, run_as_root=True, timeout=900)
LOG.info(_LI('END: Installing roles on node '))
def install_roles(cluster, cluster_context):
LOG.info(_LI('START: Installing roles on cluster'))
instances = utils.get_instances(cluster)
with context.ThreadGroup(len(instances)) as tg:
for instance in instances:
tg.spawn('install_roles_%s' % instance.instance_id,
install_role_on_instance,
instance,
cluster_context)
LOG.info(_LI('END: Installing roles on cluster'))
def start_ecosystem(cluster_context):
oozie_inst = cluster_context.get_oozie_instance()
if oozie_inst is not None:
context.sleep(names.WAIT_OOZIE_INTERVAL)
run_scripts.start_oozie(oozie_inst.remote())

View File

@ -1,135 +1,175 @@
# Copyright (c) 2014, MapR Technologies
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import functools as ft
from sahara.conductor import resource as r
import sahara.exceptions as ex
from sahara.i18n import _
import sahara.plugins.exceptions as e
import sahara.plugins.mapr.util.cluster_info as ci
import sahara.plugins.mapr.util.wrapper as w
class LessThanCountException(e.InvalidComponentCountException):
MESSAGE = _("Hadoop cluster should contain at least"
" %(expected_count)d %(component)s component(s)."
" Actual %(component)s count is %(actual_count)d")
def __init__(self, component, expected_count, count):
super(LessThanCountException, self).__init__(
component, expected_count, count)
self.message = (_("Hadoop cluster should contain at least"
" %(expected_count)d %(component)s component(s)."
" Actual %(component)s count is %(count)d")
% {'expected_count': expected_count,
'component': component, 'count': count})
args = {
'expected_count': expected_count,
'component': component,
'actual_count': count,
}
self.message = LessThanCountException.MESSAGE % args
class MoreThanCountException(e.InvalidComponentCountException):
class EvenCountException(ex.SaharaException):
MESSAGE = _("Hadoop cluster should contain odd number of %(component)s"
" but %(actual_count)s found.")
def __init__(self, component, expected_count, count):
super(MoreThanCountException, self).__init__(
component, expected_count, count)
self.message = (_("Hadoop cluster should contain not more than"
" %(expected_count)d %(component)s component(s)."
" Actual %(component)s count is %(count)d")
% {'expected_count': expected_count,
'component': component, 'count': count})
def __init__(self, component, count):
super(EvenCountException, self).__init__()
args = {'component': component, 'actual_count': count}
self.message = EvenCountException.MESSAGE % args
class NodeRequiredServiceMissingException(e.RequiredServiceMissingException):
MISSING_MSG = _('Node "%(ng_name)s" is missing component %(component)s')
REQUIRED_MSG = _('%(message)s, required by %(required_by)s')
def __init__(self, service_name, required_by=None):
def __init__(self, service_name, ng_name, required_by=None):
super(NodeRequiredServiceMissingException, self).__init__(
service_name, required_by)
self.message = _('Node is missing a service: %s') % service_name
args = {'ng_name': ng_name, 'component': service_name}
self.message = (
NodeRequiredServiceMissingException.MISSING_MSG % args)
if required_by:
self.message = (_('%(message)s, required by service:'
' %(required_by)s')
% {'message': self.message,
'required_by': required_by})
args = {'message': self.message, 'required_by': required_by}
self.message = (
NodeRequiredServiceMissingException.REQUIRED_MSG % args)
def not_less_than_count_component_vr(component, count):
def validate(cluster, component, count):
c_info = ci.ClusterInfo(cluster, None)
actual_count = c_info.get_instances_count(component)
class NodeServiceConflictException(ex.SaharaException):
MESSAGE = _('%(service)s service cannot be installed alongside'
' %(package)s package')
ERROR_CODE = "NODE_PROCESS_CONFLICT"
def __init__(self, service_name, conflicted_package):
super(NodeServiceConflictException, self).__init__()
args = {
'service': service_name,
'package': conflicted_package,
}
self.message = NodeServiceConflictException.MESSAGE % args
self.code = NodeServiceConflictException.ERROR_CODE
def at_least(count, component):
def validate(cluster_context, component, count):
actual_count = cluster_context.get_instances_count(component)
if not actual_count >= count:
raise LessThanCountException(component, count, actual_count)
raise LessThanCountException(
component.ui_name, count, actual_count)
return ft.partial(validate, component=component, count=count)
def not_more_than_count_component_vr(component, count):
def validate(cluster, component, count):
c_info = ci.ClusterInfo(cluster, None)
actual_count = c_info.get_instances_count(component)
if not actual_count <= count:
raise MoreThanCountException(component, count, actual_count)
return ft.partial(validate, component=component, count=count)
def equal_count_component_vr(component, count):
def validate(cluster, component, count):
c_info = ci.ClusterInfo(cluster, None)
actual_count = c_info.get_instances_count(component)
def exactly(count, component):
def validate(cluster_context, component, count):
actual_count = cluster_context.get_instances_count(component)
if not actual_count == count:
raise e.InvalidComponentCountException(
component, count, actual_count)
component.ui_name, count, actual_count)
return ft.partial(validate, component=component, count=count)
def require_component_vr(component):
def validate(instance, component):
if component not in instance.node_group.node_processes:
raise NodeRequiredServiceMissingException(component)
return ft.partial(validate, component=component)
def require_of_listed_components(components):
def validate(instance, components):
if (False in (c in instance.node_group.node_processes
for c in components)):
raise NodeRequiredServiceMissingException(components)
return ft.partial(validate, components=components)
def each_node_has_component_vr(component):
def validate(cluster, component):
rc_vr = require_component_vr(component)
c_info = ci.ClusterInfo(cluster, None)
for i in c_info.get_instances():
rc_vr(i)
return ft.partial(validate, component=component)
def each_node_has_at_least_one_of_listed_components(components):
def validate(cluster, components):
rc_vr = require_of_listed_components(components)
c_info = ci.ClusterInfo(cluster, None)
for i in c_info.get_instances():
rc_vr(i)
return ft.partial(validate, components=components)
def node_dependency_satisfied_vr(component, dependency):
def validate(cluster, component, dependency):
c_info = ci.ClusterInfo(cluster, None)
for ng in c_info.get_node_groups(component):
if dependency not in ng.node_processes:
def each_node_has(component):
def validate(cluster_context, component):
for node_group in cluster_context.cluster.node_groups:
if component.ui_name not in node_group.node_processes:
raise NodeRequiredServiceMissingException(
component, dependency)
component.ui_name, node_group.name)
return ft.partial(validate, component=component)
def odd_count_of(component):
def validate(cluster_context, component):
actual_count = cluster_context.get_instances_count(component)
if actual_count > 1 and actual_count % 2 == 0:
raise EvenCountException(component.ui_name, actual_count)
return ft.partial(validate, component=component)
def on_same_node(component, dependency):
def validate(cluster_context, component, dependency):
for ng in cluster_context.get_node_groups(component):
if dependency.ui_name not in ng.node_processes:
raise NodeRequiredServiceMissingException(
dependency.ui_name, ng.name, component.ui_name)
return ft.partial(validate, component=component, dependency=dependency)
def depends_on(service, required_by=None):
def validate(cluster_context, service, required_by):
if not cluster_context.is_present(service):
raise e.RequiredServiceMissingException(
service.ui_name, required_by.ui_name)
return ft.partial(validate, service=service, required_by=required_by)
def node_client_package_conflict_vr(components, client_component):
def validate(cluster_context, components):
for ng in cluster_context.get_node_groups():
for c in components:
nps = ng.node_processes
if c in nps and client_component in nps:
raise NodeServiceConflictException(c, client_component)
return ft.partial(validate, components=components)
def assert_present(service, cluster_context):
if not cluster_context.is_present(service):
raise e.RequiredServiceMissingException(service.ui_name)
def create_fake_cluster(cluster, existing, additional):
w_node_groups = [w.Wrapper(ng, count=existing[ng.id])
if ng.id in existing else ng
for ng in cluster.node_groups]
return w.Wrapper(cluster, node_groups=w_node_groups)
counts = existing.copy()
counts.update(additional)
def update_ng(node_group):
ng_dict = node_group.to_dict()
count = counts[node_group.id]
ng_dict.update(dict(count=count))
return r.NodeGroupResource(ng_dict)
def need_upd(node_group):
return node_group.id in counts and counts[node_group.id] > 0
updated = map(update_ng, filter(need_upd, cluster.node_groups))
not_updated = filter(lambda ng:
not need_upd(ng) and ng is not None,
cluster.node_groups)
cluster_dict = cluster.to_dict()
cluster_dict.update({'node_groups': updated + not_updated})
fake = r.ClusterResource(cluster_dict)
return fake

View File

@ -1,28 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
class Wrapper(object):
WRAPPED = '__wrapped__'
def __init__(self, wrapped, **kargs):
object.__getattribute__(self, '__dict__').update(kargs)
object.__setattr__(self, Wrapper.WRAPPED, wrapped)
def __getattribute__(self, name):
wrapped = object.__getattribute__(self, Wrapper.WRAPPED)
try:
return object.__getattribute__(self, name)
except AttributeError:
return object.__getattribute__(wrapped, name)

View File

@ -1,167 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import collections as c
import os
from oslo_log import log as logging
import six
from sahara.i18n import _LI
import sahara.plugins.mapr.util.cluster_helper as ch
import sahara.plugins.mapr.util.cluster_info as ci
from sahara.plugins.mapr.util import config
import sahara.plugins.mapr.util.config_file_utils as cfu
import sahara.plugins.mapr.util.dict_utils as du
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.plugins.utils as u
import sahara.swift.swift_helper as sh
LOG = logging.getLogger(__name__)
@six.add_metaclass(abc.ABCMeta)
class BaseClusterConfigurer(object):
def get_topology_configs(self):
result = c.defaultdict(dict)
if config.is_data_locality_enabled(self.cluster):
if self.is_node_awareness_enabled():
LOG.debug('Node group awareness is set to True')
file_name = '%s/core-site.xml' % self.get_hadoop_conf_dir()
service = self.plugin_spec.get_service_for_file_name(file_name)
data = {}
data['net.topology.impl'] = (
'org.apache.hadoop.net.NetworkTopologyWithNodeGroup')
data['net.topology.nodegroup.aware'] = True
data['dfs.block.replicator.classname'] = (
'org.apache.hadoop.hdfs.server.namenode'
'.BlockPlacementPolicyWithNodeGroup')
result[service][file_name] = data
file_name = '%s/mapred-site.xml' % self.get_hadoop_conf_dir()
service = self.plugin_spec.get_service_for_file_name(file_name)
data = {}
data['mapred.jobtracker.nodegroup.aware'] = True
data['mapred.task.cache.levels'] = 3
result[service][file_name] = data
file_name = '/opt/mapr/conf/cldb.conf'
service = self.plugin_spec.get_service_for_file_name(file_name)
data = {}
data['net.topology.script.file.name'] = '/opt/mapr/topology.sh'
result[service][file_name] = data
else:
LOG.debug('Node group awareness is not implemented in YARN'
' yet so enable_hypervisor_awareness set to'
' False explicitly')
return result
def get_swift_configs(self):
mapper = lambda i: (i['name'], i['value'])
file_name = '%s/core-site.xml' % self.get_hadoop_conf_dir()
service = self.plugin_spec.get_service_for_file_name(file_name)
data = dict(map(mapper, sh.get_swift_configs()))
return {service: {file_name: data}}
def get_cluster_configs(self):
default_configs = self.cluster_info.get_default_configs()
user_configs = self.cluster_info.get_user_configs()
result = du.deep_update(default_configs, user_configs)
file_name = '/opt/mapr/conf/cldb.conf'
service = self.plugin_spec.get_service_for_file_name(file_name)
if file_name not in result[service]:
result[service][file_name] = {}
data = result[service][file_name]
data['cldb.zookeeper.servers'] = ch.get_zookeeper_nodes_ip_with_port(
self.cluster)
return result
def get_cluster_configs_template(self):
template = {}
du.deep_update(template, self.get_topology_configs(), False)
du.deep_update(template, self.get_swift_configs(), False)
du.deep_update(template, self.get_cluster_configs(), False)
return template
def get_node_group_configs(self, node_groups=None):
ng_configs = {}
if not node_groups:
node_groups = self.cluster.node_groups
cc_template = self.cluster_configs_template
p_spec = self.plugin_spec
for ng in node_groups:
ng_services = self.cluster_info.get_services(ng)
d_configs = dict(filter(lambda i: i[0] in ng_services,
six.iteritems(cc_template)))
u_configs = self.cluster_info.get_user_configs(ng)
nc_template = du.deep_update(d_configs, u_configs)
nc_data = {}
for files in nc_template.values():
for f_name, f_data in six.iteritems(files):
if f_name:
f_type = p_spec.get_file_type(f_name)
f_content = cfu.to_file_content(f_data, f_type)
if f_content:
nc_data[f_name] = f_content
ng_configs[ng.id] = nc_data
return ng_configs
def configure_instances(self, instances=None):
if not instances:
instances = u.get_instances(self.cluster)
for i in instances:
i_files = self.node_group_files[i.node_group_id]
LOG.info(_LI('Writing files %(f_names)s to node %(node)s'),
{'f_names': i_files.keys(), 'node': i.management_ip})
with i.remote() as r:
for f_name in i_files:
r.execute_command('mkdir -p ' + os.path.dirname(f_name),
run_as_root=True)
LOG.debug('Created dir: %s', os.path.dirname(f_name))
r.write_files_to(i_files, run_as_root=True)
config.post_configure_instance(i)
def __init__(self, cluster, plugin_spec):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
self.context = v_handler.get_context(cluster)
self.cluster = cluster
self.plugin_spec = plugin_spec
self.cluster_info = ci.ClusterInfo(self.cluster, self.plugin_spec)
self.cluster_configs_template = self.get_cluster_configs_template()
self.node_group_files = self.get_node_group_configs()
def configure(self, instances=None):
self.configure_topology_data(self.cluster)
self.configure_instances(instances)
@staticmethod
def _post_configure_instance(instance):
config.post_configure_instance(instance)
def configure_topology_data(self, cluster):
config.configure_topology_data(
cluster, self.is_node_awareness_enabled())
@abc.abstractmethod
def get_hadoop_conf_dir(self):
return
@abc.abstractmethod
def is_node_awareness_enabled(self):
return

View File

@ -1,173 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
import sahara.plugins.mapr.util.config_utils as cu
import sahara.plugins.mapr.util.names as n
import sahara.plugins.utils as u
@six.add_metaclass(abc.ABCMeta)
class BaseContext(object):
hive_version_config = 'Hive Version'
oozie_version_config = 'Oozie Version'
@abc.abstractmethod
def get_cluster(self):
return
@abc.abstractmethod
def is_m7_enabled(self):
return
@abc.abstractmethod
def get_hadoop_version(self):
return
def get_linux_distro_version(self):
return self.get_zk_instances()[0].remote().execute_command(
'lsb_release -is', run_as_root=True)[1].rstrip()
def get_install_manager(self):
install_manager_map = {'Ubuntu': 'apt-get install --force-yes -y ',
'CentOS': 'yum install -y ',
'RedHatEnterpriseServer': 'yum install -y ',
'Suse': 'zypper '}
return install_manager_map.get(self.get_linux_distro_version())
def get_install_manager_version_separator(self):
install_manager_map = {'Ubuntu': '=',
'CentOS': '-',
'RedHatEnterpriseServer': '-',
'Suse': ':'}
return install_manager_map.get(self.get_linux_distro_version())
def get_fs_instances(self):
return u.get_instances(self.get_cluster(), n.FILE_SERVER)
def get_zk_instances(self):
return u.get_instances(self.get_cluster(), n.ZOOKEEPER)
def get_zk_uris(self):
mapper = lambda i: '%s' % i.management_ip
return map(mapper, self.get_zk_instances())
def get_cldb_instances(self):
return u.get_instances(self.get_cluster(), n.CLDB)
def get_cldb_uris(self):
mapper = lambda i: '%s' % i.management_ip
return map(mapper, self.get_cldb_instances())
def get_cldb_uri(self):
return 'maprfs:///'
def get_rm_instance(self):
return u.get_instance(self.get_cluster(), n.RESOURCE_MANAGER)
def get_rm_port(self):
return '8032'
def get_rm_uri(self):
port = self.get_rm_port()
ip = self.get_rm_instance().management_ip
return '%s:%s' % (ip, port) if port else ip
def get_hs_instance(self):
return u.get_instance(self.get_cluster(), n.HISTORY_SERVER)
def get_hs_uri(self):
return self.get_hs_instance().management_ip
def get_oozie_instance(self):
return u.get_instance(self.get_cluster(), n.OOZIE)
def get_hive_metastore_instances(self):
return u.get_instances(self.get_cluster(), n.HIVE_METASTORE)
def get_hive_server2_instances(self):
return u.get_instances(self.get_cluster(), n.HIVE_SERVER2)
def get_oozie_uri(self):
ip = self.get_oozie_instance().management_ip
return 'http://%s:11000/oozie' % ip
def get_roles_str(self, comp_list):
component_list_str = 'mapr-core ' + ' '.join(['mapr-' + role + ' '
for role in comp_list])
if 'HBase-Client' in comp_list:
component_list_str = component_list_str.replace(
'HBase-Client', 'hbase')
if 'Oozie' in comp_list:
component_list_str = component_list_str.replace(
'Oozie', 'oozie' + self.get_oozie_version())
if 'HiveMetastore' in comp_list:
component_list_str = component_list_str.replace(
'HiveMetastore', 'HiveMetastore' + self.get_hive_version())
if 'HiveServer2' in comp_list:
component_list_str = component_list_str.replace(
'HiveServer2', 'HiveServer2' + self.get_hive_version())
return component_list_str.lower()
def user_exists(self):
return
def get_plain_instances(self):
fs = self.get_fs_instances()
zk = self.get_zk_instances()
cldb = self.get_cldb_instances()
zk_fs_cldb = zk + fs + cldb
instances = u.get_instances(self.get_cluster())
return [i for i in instances if i not in zk_fs_cldb]
def get_configure_command(self):
kargs = {'path': self.get_configure_sh_path(),
'cldb_nodes': ','.join(self.get_cldb_uris()),
'zk_nodes': ','.join(self.get_cldb_uris()),
'rm_node': self.get_rm_uri(),
'hs_node': self.get_hs_uri()}
command = ('{path} -C {cldb_nodes} -Z {zk_nodes} -RM {rm_node}'
' -HS {hs_node} -f').format(**kargs)
if self.is_m7_enabled():
command += ' -M7'
if not self.user_exists():
command += ' --create-user'
return command
def get_fs_wait_command(self):
return '/tmp/waiting_script.sh'
def get_disk_setup_command(self):
return '/opt/mapr/server/disksetup -F /tmp/disk.list'
def get_configure_sh_path(self):
return '/opt/mapr/server/configure.sh'
def get_oozie_version(self):
configs = cu.get_cluster_configs(self.get_cluster())
return (self.get_install_manager_version_separator()
+ configs[n.OOZIE][BaseContext.oozie_version_config] + '*')
def get_hive_version(self):
configs = cu.get_cluster_configs(self.get_cluster())
return (self.get_install_manager_version_separator()
+ configs[n.HIVE][BaseContext.hive_version_config] + "*")
def get_scripts(self):
return

View File

@ -1,115 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import six
import sahara.plugins.mapr.util.plugin_spec as ps
import sahara.plugins.mapr.util.start_helper as sh
import sahara.plugins.mapr.util.validation_utils as vu
import sahara.plugins.mapr.versions.edp_engine as edp
@six.add_metaclass(abc.ABCMeta)
class BaseVersionHandler(object):
def __init__(self):
self.plugin_spec = ps.PluginSpec(self.get_plugin_spec_path())
def get_plugin_spec(self):
return self.plugin_spec
def get_configs(self):
return self.plugin_spec.get_configs()
def get_node_processes(self):
return self.plugin_spec.service_node_process_map
def get_disk_setup_script(self):
return 'plugins/mapr/util/resources/create_disk_list_file.sh'
def validate(self, cluster):
rules = self.get_cluster_validation_rules(cluster)
for rule in rules:
rule(cluster)
def validate_scaling(self, cluster, existing, additional):
fake_cluster = vu.create_fake_cluster(cluster, existing, additional)
self.validate(fake_cluster)
def validate_edp(self, cluster):
for rule in self.get_edp_validation_rules():
rule(cluster)
def configure_cluster(self, cluster):
sh.install_roles(cluster, self.get_context(cluster))
self.get_cluster_configurer(cluster, self.plugin_spec).configure()
def get_name_node_uri(self, cluster):
return self.get_context(cluster).get_cldb_uri()
def get_oozie_server(self, cluster):
return self.get_context(cluster).get_oozie_instance()
def get_oozie_server_uri(self, cluster):
return self.get_context(cluster).get_oozie_uri()
def get_resource_manager_uri(self, cluster):
return self.get_context(cluster).get_rm_uri()
def get_home_dir(self):
return ('plugins/mapr/versions/v%s'
% self.get_plugin_version().replace('.', '_').lower())
def get_plugin_spec_path(self):
return '%s/resources/plugin_spec.json' % self.get_home_dir()
def get_edp_engine(self, cluster, job_type):
if job_type in edp.MapROozieJobEngine.get_supported_job_types():
return edp.MapROozieJobEngine(cluster)
return None
# Astract methods
@abc.abstractmethod
def get_plugin_version(self):
return
@abc.abstractmethod
def get_cluster_validation_rules(self, cluster):
return
@abc.abstractmethod
def get_scaling_validation_rules(self):
return
def get_waiting_script(self):
return
@abc.abstractmethod
def get_edp_validation_rules(self):
return
@abc.abstractmethod
def get_cluster_configurer(self, cluster, plugin_spec):
return
@abc.abstractmethod
def get_configure_sh_string(self, cluster):
return
@abc.abstractmethod
def get_context(self, cluster):
return

View File

@ -1,76 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.util.maprfs_helper as mfs
import sahara.plugins.mapr.versions.version_handler_factory as vhf
import sahara.service.edp.binary_retrievers.dispatch as d
import sahara.service.edp.oozie.engine as e
class MapROozieJobEngine(e.OozieJobEngine):
def get_hdfs_user(self):
return 'mapr'
def create_hdfs_dir(self, remote, dir_name):
mfs.create_maprfs_dir(remote, dir_name, self.get_hdfs_user())
def _upload_workflow_file(self, where, job_dir, wf_xml, hdfs_user):
f_name = 'workflow.xml'
with where.remote() as r:
mfs.put_file_to_maprfs(r, wf_xml, f_name, job_dir, hdfs_user)
return job_dir + '/' + f_name
def _upload_job_files_to_hdfs(self, where, job_dir, job):
mains = job.mains or []
libs = job.libs or []
uploaded_paths = []
hdfs_user = self.get_hdfs_user()
with where.remote() as r:
for m in mains:
raw_data = d.get_raw_binary(m)
mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user)
uploaded_paths.append(job_dir + '/' + m.name)
for l in libs:
raw_data = d.get_raw_binary(l)
lib_dir = job_dir + '/lib/'
self.create_hdfs_dir(r, lib_dir)
mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir,
hdfs_user)
uploaded_paths.append(lib_dir + l.name)
return uploaded_paths
def get_name_node_uri(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
uri = v_handler.get_name_node_uri(cluster)
return uri
def get_oozie_server_uri(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
uri = v_handler.get_oozie_server_uri(cluster)
return uri
def get_oozie_server(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
server = v_handler.get_oozie_server(cluster)
return server
def get_resource_manager_uri(self, cluster):
h_version = cluster.hadoop_version
v_handler = vhf.VersionHandlerFactory.get().get_handler(h_version)
uri = v_handler.get_resource_manager_uri(cluster)
return uri

View File

@ -1,24 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.versions.base_cluster_configurer as bcc
class ClusterConfigurer(bcc.BaseClusterConfigurer):
def get_hadoop_conf_dir(self):
return '/opt/mapr/hadoop/hadoop-0.20.2/conf'
def is_node_awareness_enabled(self):
return True

View File

@ -0,0 +1,81 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.base.base_cluster_context as bc
import sahara.plugins.mapr.services.mapreduce.mapreduce as mr
import sahara.plugins.mapr.services.maprfs.maprfs as maprfs
from sahara.utils import files as f
class Context(bc.BaseClusterContext):
UBUNTU_MAPR_BASE_REPO = ('http://package.mapr.com/releases/v3.1.1/ubuntu/ '
'mapr optional')
UBUNTU_MAPR_ECOSYSTEM_REPO = ('http://package.mapr.com/releases/'
'ecosystem/ubuntu binary/')
CENTOS_MAPR_BASE_REPO = 'http://package.mapr.com/releases/v3.1.1/redhat/'
CENTOS_MAPR_ECOSYSTEM_REPO = ('http://package.mapr.com/releases/'
'ecosystem/redhat')
def __init__(self, cluster, version_handler, added=None, removed=None):
super(Context, self).__init__(cluster, version_handler, added, removed)
self._hadoop_version = mr.MapReduce().version
self._hadoop_lib = None
self._hadoop_conf = None
self._resource_manager_uri = 'maprfs:///'
self._cluster_mode = None
self._node_aware = False
@property
def hadoop_lib(self):
if not self._hadoop_lib:
f = '%(hadoop_home)s/lib'
args = {
'hadoop_home': self.hadoop_home,
}
self._hadoop_lib = f % args
return self._hadoop_lib
@property
def hadoop_conf(self):
if not self._hadoop_conf:
f = '%(hadoop_home)s/conf'
args = {
'hadoop_home': self.hadoop_home,
}
self._hadoop_conf = f % args
return self._hadoop_conf
@property
def resource_manager_uri(self):
return self._resource_manager_uri
@property
def mapr_db(self):
if self._mapr_db is None:
mapr_db = maprfs.MapRFS.ENABLE_MAPR_DB_CONFIG
mapr_db = self._get_cluster_config_value(mapr_db)
self._mapr_db = '-M7' if mapr_db else ''
return self._mapr_db
def get_install_repo_script_data(self):
script_template = 'plugins/mapr/resources/add_mapr_repo.sh'
script_template = f.get_file_text(script_template)
args = {
"ubuntu_mapr_base_repo": Context.UBUNTU_MAPR_BASE_REPO,
"ubuntu_mapr_ecosystem_repo": Context.UBUNTU_MAPR_ECOSYSTEM_REPO,
"centos_mapr_repo": Context.CENTOS_MAPR_BASE_REPO,
"centos_mapr_ecosystem_repo": Context.CENTOS_MAPR_ECOSYSTEM_REPO,
}
return script_template % args

View File

@ -0,0 +1,22 @@
# Copyright (c) 2015, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sahara.plugins.mapr.base.base_edp_engine as edp
import sahara.plugins.mapr.util.maprfs_helper as mfs
class MapR3OozieJobEngine(edp.MapROozieJobEngine):
def create_hdfs_dir(self, remote, dir_name):
mfs.create_maprfs3_dir(remote, dir_name, self.get_hdfs_user())

View File

@ -1,57 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<!--
Replace 'maprfs' by 'hdfs' to use HDFS.
Replace localhost by an ip address for namenode/cldb.
-->
<configuration>
<property>
<name>fs.default.name</name>
<value>maprfs:///</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
<property>
<name>fs.mapr.working.dir</name>
<value>/user/$USERNAME/</value>
<description>The default directory to be used with relative paths.
Note that $USERNAME is NOT an enviromental variable, but just a placeholder
to indicate that it will be expanded to the corresponding username.
Other example default directories could be "/", "/home/$USERNAME", "/$USERNAME" etc.
</description>
</property>
<property>
<name>fs.s3n.block.size</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3n.blockSize</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3.block.size</name>
<value>33554432</value>
</property>
<property>
<name>fs.s3.blockSize</name>
<value>33554432</value>
</property>
<property>
<name>hadoop.proxyuser.mapr.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapr.hosts</name>
<value>*</value>
</property>
</configuration>

View File

@ -1,30 +0,0 @@
# Sample Exports file
# for /mapr exports
# <Path> <exports_control>
#access_control -> order is specific to default
# list the hosts before specifying a default for all
# a.b.c.d,1.2.3.4(ro) d.e.f.g(ro) (rw)
# enforces ro for a.b.c.d & 1.2.3.4 and everybody else is rw
# special path to export clusters in mapr-clusters.conf. To disable exporting,
# comment it out. to restrict access use the exports_control
#
/mapr (rw)
#to export only certain clusters, comment out the /mapr & uncomment.
# Note: this will cause /mapr to be unexported
#/mapr/clustername (rw)
#to export /mapr only to certain hosts (using exports_control)
#/mapr a.b.c.d(rw),e.f.g.h(ro)
# export /mapr/cluster1 rw to a.b.c.d & ro to e.f.g.h (denied for others)
#/mapr/cluster1 a.b.c.d(rw),e.f.g.h(ro)
# export /mapr/cluster2 only to e.f.g.h (denied for others)
#/mapr/cluster2 e.f.g.h(rw)
# export /mapr/cluster3 rw to e.f.g.h & ro to others
#/mapr/cluster2 e.f.g.h(rw) (ro)

View File

@ -1,41 +0,0 @@
#CLDB metrics config - Pick one out of null,file or ganglia.
#Uncomment all properties in null, file or ganglia context, to send cldb metrics to that context
# Configuration of the "cldb" context for null
#cldb.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
#cldb.period=10
# Configuration of the "cldb" context for file
#cldb.class=org.apache.hadoop.metrics.file.FileContext
#cldb.period=60
#cldb.fileName=/tmp/cldbmetrics.log
# Configuration of the "cldb" context for ganglia
cldb.class=com.mapr.fs.cldb.counters.MapRGangliaContext31
cldb.period=10
cldb.servers=localhost:8649
cldb.spoof=1
#FileServer metrics config - Pick one out of null,file or ganglia.
#Uncomment all properties in null, file or ganglia context, to send fileserver metrics to that context
# Configuration of the "fileserver" context for null
#fileserver.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
#fileserver.period=10
# Configuration of the "fileserver" context for file
#fileserver.class=org.apache.hadoop.metrics.file.FileContext
#fileserver.period=60
#fileserver.fileName=/tmp/fsmetrics.log
# Configuration of the "fileserver" context for ganglia
fileserver.class=com.mapr.fs.cldb.counters.MapRGangliaContext31
fileserver.period=37
fileserver.servers=localhost:8649
fileserver.spoof=1
maprmepredvariant.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContext
maprmepredvariant.period=10
maprmapred.class=com.mapr.job.mngmnt.hadoop.metrics.MaprRPCContextFinal
maprmapred.period=10

View File

@ -1,16 +0,0 @@
#mfs.num.compress.threads=1
#mfs.max.aio.events=5000
#mfs.disable.periodic.flush=0
#mfs.io.disk.timeout=60
#mfs.server.ip=127.0.0.1
#mfs.max.resync.count=16
#mfs.max.restore.count=16
#mfs.ignore.container.delete=0
#mfs.ignore.readdir.pattern=0
mfs.server.port=5660
#mfs.subnets.whitelist=127.0.0.1/8
#UNCOMMENT this line to disable bulk writes
#mfs.bulk.writes.enabled=0
#UNCOMMENT this to set the topology of this node
#For e.g., to set this node's topology to /compute-only uncomment the below line
#mfs.network.location=/compute-only

View File

@ -1,43 +0,0 @@
# Configuration for nfsserver
#
# The system defaults are in the comments
#
# Default compression is true
#Compression = true
# chunksize is 64M
#ChunkSize = 67108864
# Number of threads for compression/decompression: default=2
#CompThreads = 2
#Mount point for the ramfs file for mmap
#RamfsMntDir = /ramfs/mapr
# Size of the ramfile to use (percent of total physical memory) default=0.25
# 0: disables the use of ramfs
#RamfsSize = 0.25
# Loglevel = DEBUG | INFO | WARN | ERROR | CRITICAL | OFF
#Loglevel = INFO
#Duplicate Request cache size & timeout in seconds
#DrCacheSize = 20480
#DrCacheTimeout = 62
# To keep the drcache lean, we only cache the response if the
# time we took to populate is greater than 50% of DrCacheTimeout.
# Set it to 0 to disable this optimization, Note that the DrCacheSize or
# DrCacheTimeout will also need to be changed. Ex: if the nfsserver supports
# 10,000 ops/sec (modification ops): then DrCacheSize will need to change
# to: 10,000*DrCacheTimeout = 620,000
#DRCacheTimeOutOpt = 0.5
#NFS fileid, by default the fileid is of 32 bit size.
#Set Use32BitFileId=0 to use 64 bit fileid (inode number)
#Use32BitFileId=0
#Auto refresh exports time interval in mins.
#default is 0, means there is no auto refresh.
#AutoRefreshExportsTimeInterval = 5

View File

@ -1,203 +0,0 @@
{
"files": [
{
"remote": null,
"type": null,
"configs": {
"required": {
"cluster": [
{
"name": "Enable MapR-DB",
"config_type": "bool",
"default_value": false,
"priority": 1
}
]
}
}
},
{
"remote": "/opt/mapr/conf/cldb.conf",
"local": "default/cldb.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/hadoop-metrics.properties",
"local": "default/hadoop-metrics.properties",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/mfs.conf",
"local": "default/mfs.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/nfsserver.conf",
"local": "default/nfsserver.conf",
"type": "properties"
},
{
"remote": "/opt/mapr/conf/exports",
"local": "default/exports",
"type": "raw"
},
{
"remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml",
"local": "default/core-site.xml",
"type": "xml",
"configs": {
"optional": {
"cluster": [
{
"name": "fs.swift.impl",
"default_value": "org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem"
},
{
"name": "fs.swift.connect.timeout",
"config_type": "int",
"default_value": 15000
},
{
"name": "fs.swift.socket.timeout",
"config_type": "int",
"default_value": 60000
},
{
"name": "fs.swift.connect.retry.count",
"config_type": "int",
"default_value": 3
},
{
"name": "fs.swift.connect.throttle.delay",
"config_type": "int",
"default_value": 0
},
{
"name": "fs.swift.blocksize",
"config_type": "int",
"default_value": 32768
},
{
"name": "fs.swift.partsize",
"config_type": "int",
"default_value": 4718592
},
{
"name": "fs.swift.requestsize",
"config_type": "int",
"default_value": 64
},
{
"name": "fs.swift.service.sahara.public",
"config_type": "bool",
"default_value": true
},
{
"name": "fs.swift.service.sahara.http.port",
"config_type": "int",
"default_value": 8080
},
{
"name": "fs.swift.service.sahara.https.port",
"config_type": "int",
"default_value": 443
},
{
"name": "fs.swift.service.sahara.auth.endpoint.prefix",
"default_value": "/endpoints/AUTH_"
}
]
}
}
},
{
"remote": "/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml",
"type": "xml"
}
],
"services": [
{
"name": "general",
"files": [
null
]
},
{
"name": "Management",
"node_processes": [
"ZooKeeper",
"Webserver",
"MapR-Client",
"Metrics"
]
},
{
"name": "MapReduce",
"node_processes": [
"TaskTracker",
"JobTracker"
],
"files": [
"/opt/mapr/hadoop/hadoop-0.20.2/conf/mapred-site.xml"
]
},
{
"name": "MapR FS",
"node_processes": [
"CLDB",
"FileServer",
"NFS"
],
"files": [
"/opt/mapr/conf/cldb.conf",
"/opt/mapr/conf/hadoop-metrics.properties",
"/opt/mapr/conf/mfs.conf",
"/opt/mapr/conf/nfsserver.conf",
"/opt/mapr/conf/exports",
"/opt/mapr/hadoop/hadoop-0.20.2/conf/core-site.xml"
]
},
{
"name": "HBase",
"node_processes": [
"HBase-Master",
"HBase-RegionServer",
"HBase-Client"
]
},
{
"name": "Hive",
"node_processes": [
"HiveMetastore",
"HiveServer2"
],
"versions": [
"0.13",
"0.12"
]
},
{
"name": "Oozie",
"node_processes": [
"Oozie"
],
"versions": [
"4.0.1",
"4.0.0",
"3.3.2"
]
},
{
"name": "Pig",
"node_processes": [
"Pig"
]
},
{
"name": "Mahout",
"node_processes": [
"Mahout"
]
}
]
}

View File

@ -1,34 +0,0 @@
# Copyright (c) 2014, MapR Technologies
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from sahara.plugins.mapr.util import cluster_helper as clh_utils
from sahara.plugins.mapr.util import start_helper as start_helper
import sahara.plugins.utils as utils
def exec_configure_sh_on_cluster(cluster):
inst_list = utils.get_instances(cluster)
for n in inst_list:
exec_configure_sh_on_instance(cluster, n)
def exec_configure_sh_on_instance(cluster, instance):
script_string = ('/opt/mapr/server/configure.sh'
+ ' -C ' + clh_utils.get_cldb_nodes_ip(cluster)
+ ' -Z ' + clh_utils.get_zookeeper_nodes_ip(cluster)
+ ' -f')
if not start_helper.check_if_mapr_user_exist(instance):
script_string = script_string + ' --create-user'
instance.remote().execute_command(script_string, True)

Some files were not shown because too many files have changed in this diff Show More