deb-sahara/sahara/service/edp/hdfs_helper.py
Andrew Lazarev 7bae4261d0 Implemented support of placeholders in datasource URLs
Added ability to use placeholders in datasource URLs. Currently
supported placeholders:
* %RANDSTR(len)% - will be replaced with random string of
  lowercase letters of length `len`.
* %JOB_EXEC_ID% - will be replaced with the job execution ID.

Resulting URLs will be stored in a new field at job_execution
table. Using 'info' field doesn't look as good solution since it
is reserved for oozie status.

Next steps:
* write documentation
* update horizon

Implements blueprint: edp-datasource-placeholders

Change-Id: I1d9282b210047982c062b24bd03cf2331ab7599e
2015-05-06 20:50:03 +00:00

111 lines
3.9 KiB
Python

# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import uuid
import six
from six.moves.urllib import parse as urlparse
from sahara import conductor as c
from sahara import context
from sahara.plugins import exceptions as ex
from sahara.plugins import utils as u
from sahara.utils import general as g
conductor = c.API
HBASE_COMMON_LIB_PATH = "/user/sahara-hbase-lib"
def create_hbase_common_lib(r):
r.execute_command(
'sudo su - -c "hadoop dfs -mkdir -p %s" hdfs' % (
HBASE_COMMON_LIB_PATH))
ret_code, stdout = r.execute_command(
'hbase classpath')
if ret_code == 0:
paths = stdout.split(':')
for p in paths:
if p.endswith(".jar"):
r.execute_command('sudo su - -c "hadoop fs -put -p %s %s" hdfs'
% (p, HBASE_COMMON_LIB_PATH))
else:
raise ex.RequiredServiceMissingException('hbase')
def put_file_to_hdfs(r, file, file_name, path, hdfs_user):
tmp_file_name = '%s.%s' % (file_name, six.text_type(uuid.uuid4()))
r.write_file_to('/tmp/%s' % tmp_file_name, file)
move_from_local(r, '/tmp/%s' % tmp_file_name, path + '/' + file_name,
hdfs_user)
def copy_from_local(r, source, target, hdfs_user):
r.execute_command('sudo su - -c "hadoop dfs -copyFromLocal '
'%s %s" %s' % (source, target, hdfs_user))
def move_from_local(r, source, target, hdfs_user):
# using copyFromLocal followed by rm to address permission issues that
# arise when image user is not the same as hdfs user (permissions-wise).
r.execute_command('sudo su - -c "hadoop dfs -copyFromLocal %(source)s '
'%(target)s" %(user)s && sudo rm -f %(source)s' %
{"source": source, "target": target, "user": hdfs_user})
def create_dir_hadoop1(r, dir_name, hdfs_user):
r.execute_command(
'sudo su - -c "hadoop dfs -mkdir %s" %s' % (dir_name, hdfs_user))
def create_dir_hadoop2(r, dir_name, hdfs_user):
r.execute_command(
'sudo su - -c "hadoop dfs -mkdir -p %s" %s' % (dir_name, hdfs_user))
def _get_cluster_hosts_information(host, cluster):
for clust in conductor.cluster_get_all(context.ctx()):
if clust.id == cluster.id:
continue
for i in u.get_instances(clust):
if i.instance_name == host:
return g.generate_etc_hosts(clust)
return None
def configure_cluster_for_hdfs(cluster, data_source_url):
host = urlparse.urlparse(data_source_url).hostname
etc_hosts_information = _get_cluster_hosts_information(host, cluster)
if etc_hosts_information is None:
# Ip address hasn't been resolved, the last chance is for VM itself
return
etc_hosts_update = '/tmp/etc-hosts-update.%s' % six.text_type(uuid.uuid4())
tmp_etc_hosts = '/tmp/etc-hosts.%s' % six.text_type(uuid.uuid4())
update_etc_hosts_cmd = (
'cat %(etc_hosts_update)s /etc/hosts | '
'sort | uniq > %(tmp_etc_hosts)s && '
'cat %(tmp_etc_hosts)s > /etc/hosts && '
'rm -f %(tmp_etc_hosts)s %(etc_hosts_update)s' %
{'etc_hosts_update': etc_hosts_update, 'tmp_etc_hosts': tmp_etc_hosts})
for inst in u.get_instances(cluster):
with inst.remote() as r:
r.write_file_to(etc_hosts_update, etc_hosts_information)
r.execute_command(update_etc_hosts_cmd, run_as_root=True)