9f371ebab3
Change-Id: I7c83333be77724cd1864393aed93143162fd48a2
293 lines
9.9 KiB
Python
293 lines
9.9 KiB
Python
# Copyright (c) 2014 OpenStack Foundation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import random
|
|
import re
|
|
import string
|
|
import uuid
|
|
|
|
from oslo_config import cfg
|
|
from oslo_utils import uuidutils
|
|
import six
|
|
|
|
from sahara import conductor as c
|
|
from sahara import context
|
|
from sahara.plugins import base as plugin_base
|
|
from sahara.swift import swift_helper as sw
|
|
from sahara.utils import edp
|
|
from sahara.utils import remote
|
|
|
|
|
|
opts = [
|
|
cfg.StrOpt('job_workflow_postfix',
|
|
default='',
|
|
help="Postfix for storing jobs in hdfs. Will be "
|
|
"added to '/user/<hdfs user>/' path.")
|
|
]
|
|
|
|
CONF = cfg.CONF
|
|
CONF.register_opts(opts)
|
|
|
|
conductor = c.API
|
|
|
|
# Prefix used to mark data_source name references in arg lists
|
|
DATA_SOURCE_PREFIX = "datasource://"
|
|
|
|
DATA_SOURCE_SUBST_NAME = "edp.substitute_data_source_for_name"
|
|
DATA_SOURCE_SUBST_UUID = "edp.substitute_data_source_for_uuid"
|
|
|
|
|
|
def get_plugin(cluster):
|
|
return plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
|
|
|
|
|
|
def create_workflow_dir(where, path, job, use_uuid=None, chmod=""):
|
|
|
|
if use_uuid is None:
|
|
use_uuid = six.text_type(uuid.uuid4())
|
|
|
|
constructed_dir = _append_slash_if_needed(path)
|
|
constructed_dir += '%s/%s' % (job.name, use_uuid)
|
|
with remote.get_remote(where) as r:
|
|
if chmod:
|
|
r.execute_command("mkdir -p -m %s %s" % (chmod, constructed_dir))
|
|
else:
|
|
r.execute_command("mkdir -p %s" % constructed_dir)
|
|
return constructed_dir
|
|
|
|
|
|
def get_data_sources(job_execution, job, data_source_urls):
|
|
if edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA, edp.JOB_TYPE_SPARK):
|
|
return None, None
|
|
|
|
ctx = context.ctx()
|
|
|
|
input_source = conductor.data_source_get(ctx, job_execution.input_id)
|
|
if input_source and input_source.id not in data_source_urls:
|
|
data_source_urls[input_source.id] = _construct_data_source_url(
|
|
input_source.url, job_execution.id)
|
|
|
|
output_source = conductor.data_source_get(ctx, job_execution.output_id)
|
|
if output_source and output_source.id not in data_source_urls:
|
|
data_source_urls[output_source.id] = _construct_data_source_url(
|
|
output_source.url, job_execution.id)
|
|
|
|
return input_source, output_source
|
|
|
|
|
|
def _append_slash_if_needed(path):
|
|
if path[-1] != '/':
|
|
path += '/'
|
|
return path
|
|
|
|
|
|
def may_contain_data_source_refs(job_configs):
|
|
|
|
def _check_data_source_ref_option(option):
|
|
truth = job_configs and (
|
|
job_configs.get('configs', {}).get(option))
|
|
# Config values specified in the UI may be
|
|
# passed as strings
|
|
return truth in (True, 'True')
|
|
|
|
return (
|
|
_check_data_source_ref_option(DATA_SOURCE_SUBST_NAME),
|
|
_check_data_source_ref_option(DATA_SOURCE_SUBST_UUID))
|
|
|
|
|
|
def _data_source_ref_search(job_configs, func, prune=lambda x: x):
|
|
"""Return a list of unique values in job_configs filtered by func().
|
|
|
|
Loop over the 'args', 'configs' and 'params' elements in
|
|
job_configs and return a list of all values for which
|
|
func(value) is True.
|
|
|
|
Optionally provide a 'prune' function that is applied
|
|
to values before they are added to the return value.
|
|
"""
|
|
args = set([prune(arg) for arg in job_configs.get(
|
|
'args', []) if func(arg)])
|
|
|
|
configs = set([prune(val) for val in six.itervalues(
|
|
job_configs.get('configs', {})) if func(val)])
|
|
|
|
params = set([prune(val) for val in six.itervalues(
|
|
job_configs.get('params', {})) if func(val)])
|
|
|
|
return list(args | configs | params)
|
|
|
|
|
|
def find_possible_data_source_refs_by_name(job_configs):
|
|
"""Find string values in job_configs starting with 'datasource://'.
|
|
|
|
Loop over the 'args', 'configs', and 'params' elements of
|
|
job_configs to find all values beginning with the prefix
|
|
'datasource://'. Return a list of unique values with the prefix
|
|
removed.
|
|
|
|
Note that for 'configs' and 'params', which are dictionaries, only
|
|
the values are considered and the keys are not relevant.
|
|
"""
|
|
def startswith(arg):
|
|
return isinstance(
|
|
arg,
|
|
six.string_types) and arg.startswith(DATA_SOURCE_PREFIX)
|
|
return _data_source_ref_search(job_configs,
|
|
startswith,
|
|
prune=lambda x: x[len(DATA_SOURCE_PREFIX):])
|
|
|
|
|
|
def find_possible_data_source_refs_by_uuid(job_configs):
|
|
"""Find string values in job_configs which are uuids.
|
|
|
|
Return a list of unique values in the 'args', 'configs', and 'params'
|
|
elements of job_configs which have the form of a uuid.
|
|
|
|
Note that for 'configs' and 'params', which are dictionaries, only
|
|
the values are considered and the keys are not relevant.
|
|
"""
|
|
return _data_source_ref_search(job_configs, uuidutils.is_uuid_like)
|
|
|
|
|
|
def _add_credentials_for_data_sources(ds_list, configs):
|
|
|
|
username = password = None
|
|
for src in ds_list:
|
|
if src.type == "swift" and hasattr(src, "credentials"):
|
|
if "user" in src.credentials:
|
|
username = src.credentials['user']
|
|
if "password" in src.credentials:
|
|
password = src.credentials['password']
|
|
break
|
|
|
|
# Don't overwrite if there is already a value here
|
|
if configs.get(sw.HADOOP_SWIFT_USERNAME, None) is None and (
|
|
username is not None):
|
|
configs[sw.HADOOP_SWIFT_USERNAME] = username
|
|
if configs.get(sw.HADOOP_SWIFT_PASSWORD, None) is None and (
|
|
password is not None):
|
|
configs[sw.HADOOP_SWIFT_PASSWORD] = password
|
|
|
|
|
|
def resolve_data_source_references(job_configs, job_exec_id, data_source_urls):
|
|
"""Resolve possible data_source references in job_configs.
|
|
|
|
Look for any string values in the 'args', 'configs', and 'params'
|
|
elements of job_configs which start with 'datasource://' or have
|
|
the form of a uuid.
|
|
|
|
For values beginning with 'datasource://', strip off the prefix
|
|
and search for a DataSource object with a name that matches the
|
|
value.
|
|
|
|
For values having the form of a uuid, search for a DataSource object
|
|
with an id that matches the value.
|
|
|
|
If a DataSource object is found for the value, replace the value
|
|
with the URL from the DataSource object. If any DataSource objects
|
|
are found which reference swift paths and contain credentials, set
|
|
credential configuration values in job_configs (use the first set
|
|
of swift credentials found).
|
|
|
|
If no values are resolved, return an empty list and a reference
|
|
to job_configs.
|
|
|
|
If any values are resolved, return a list of the referenced
|
|
data_source objects and a copy of job_configs with all of the
|
|
references replaced with URLs.
|
|
"""
|
|
by_name, by_uuid = may_contain_data_source_refs(job_configs)
|
|
if not (by_name or by_uuid):
|
|
return [], job_configs
|
|
|
|
ctx = context.ctx()
|
|
ds_seen = {}
|
|
new_configs = {}
|
|
|
|
def _resolve(value):
|
|
kwargs = {}
|
|
if by_name and isinstance(
|
|
value,
|
|
six.string_types) and value.startswith(DATA_SOURCE_PREFIX):
|
|
value = value[len(DATA_SOURCE_PREFIX):]
|
|
kwargs['name'] = value
|
|
|
|
elif by_uuid and uuidutils.is_uuid_like(value):
|
|
kwargs['id'] = value
|
|
|
|
if kwargs:
|
|
# Name and id are both unique constraints so if there
|
|
# is more than 1 something is really wrong
|
|
ds = conductor.data_source_get_all(ctx, **kwargs)
|
|
if len(ds) == 1:
|
|
ds = ds[0]
|
|
ds_seen[ds.id] = ds
|
|
if ds.id not in data_source_urls:
|
|
data_source_urls[ds.id] = _construct_data_source_url(
|
|
ds.url, job_exec_id)
|
|
|
|
return data_source_urls[ds.id]
|
|
return value
|
|
|
|
# Loop over configs/params/args and look up each value as a data_source.
|
|
# If we find it, replace the value. In all cases, we've produced a
|
|
# copy which is not a FrozenClass type and can be updated.
|
|
new_configs['configs'] = {
|
|
k: _resolve(v) for k, v in six.iteritems(
|
|
job_configs.get('configs', {}))}
|
|
new_configs['params'] = {
|
|
k: _resolve(v) for k, v in six.iteritems(
|
|
job_configs.get('params', {}))}
|
|
new_configs['args'] = [_resolve(a) for a in job_configs.get('args', [])]
|
|
|
|
# If we didn't resolve anything we might as well return the original
|
|
ds_seen = ds_seen.values()
|
|
if not ds_seen:
|
|
return [], job_configs
|
|
|
|
# If there are no proxy_configs and the user has not already set configs
|
|
# for swift credentials, set those configs based on data_sources we found
|
|
if not job_configs.get('proxy_configs'):
|
|
_add_credentials_for_data_sources(ds_seen, new_configs['configs'])
|
|
else:
|
|
# we'll need to copy these, too, so job_configs is complete
|
|
new_configs['proxy_configs'] = {
|
|
k: v for k, v in six.iteritems(job_configs.get('proxy_configs'))}
|
|
|
|
return ds_seen, new_configs
|
|
|
|
|
|
def _construct_data_source_url(url, job_exec_id):
|
|
"""Resolve placeholders in data_source URL.
|
|
|
|
Supported placeholders:
|
|
|
|
* %RANDSTR(len)% - will be replaced with random string of lowercase
|
|
letters of length `len`.
|
|
* %JOB_EXEC_ID% - will be replaced with the job execution ID.
|
|
|
|
"""
|
|
|
|
def _randstr(match):
|
|
len = int(match.group(1))
|
|
return ''.join(random.choice(string.ascii_lowercase)
|
|
for _ in six.moves.range(len))
|
|
|
|
url = url.replace("%JOB_EXEC_ID%", job_exec_id)
|
|
|
|
url = re.sub(r"%RANDSTR\((\d+)\)%", _randstr, url)
|
|
|
|
return url
|