Add Oozie java action workflows
Change-Id: I6559f75deb7d238c8ab4e2df2f0483f100143b93 Implements: blueprint edp-oozie-java-action
This commit is contained in:
parent
fc3cf8bcf0
commit
afe8da21f9
@ -35,13 +35,7 @@ rest = u.Rest('v11', __name__)
|
|||||||
@v.check_exists(api.get_job, id='job_id')
|
@v.check_exists(api.get_job, id='job_id')
|
||||||
@v.validate(v_j_e.JOB_EXEC_SCHEMA, v_j_e.check_job_executor)
|
@v.validate(v_j_e.JOB_EXEC_SCHEMA, v_j_e.check_job_executor)
|
||||||
def job_execute(job_id, data):
|
def job_execute(job_id, data):
|
||||||
input = data['input_id']
|
return u.render(job_execution=api.execute_job(job_id, data).to_dict())
|
||||||
output = data['output_id']
|
|
||||||
cluster = data['cluster_id']
|
|
||||||
configs = data.get('job_configs', {})
|
|
||||||
return u.render(job_execution=api.execute_job(job_id, input,
|
|
||||||
output, cluster,
|
|
||||||
configs).to_dict())
|
|
||||||
|
|
||||||
|
|
||||||
@rest.get('/jobs/config-hints/<job_type>')
|
@rest.get('/jobs/config-hints/<job_type>')
|
||||||
|
@ -265,6 +265,8 @@ class JobExecution(mb.SavannaBase):
|
|||||||
oozie_job_id = sa.Column(sa.String(100))
|
oozie_job_id = sa.Column(sa.String(100))
|
||||||
return_code = sa.Column(sa.String(80))
|
return_code = sa.Column(sa.String(80))
|
||||||
job_configs = sa.Column(st.JsonDictType())
|
job_configs = sa.Column(st.JsonDictType())
|
||||||
|
main_class = sa.Column(sa.String)
|
||||||
|
java_opts = sa.Column(sa.String)
|
||||||
|
|
||||||
|
|
||||||
mains_association = sa.Table("mains_association",
|
mains_association = sa.Table("mains_association",
|
||||||
|
@ -29,10 +29,28 @@ def get_job_config_hints(job_type):
|
|||||||
return w_f.get_possible_job_config(job_type)
|
return w_f.get_possible_job_config(job_type)
|
||||||
|
|
||||||
|
|
||||||
def execute_job(job_id, input_id, output_id, cluster_id, configs):
|
def execute_job(job_id, data):
|
||||||
job_ex_dict = {'input_id': input_id, 'output_id': output_id,
|
|
||||||
|
# Elements common to all job types
|
||||||
|
cluster_id = data['cluster_id']
|
||||||
|
configs = data.get('job_configs', {})
|
||||||
|
|
||||||
|
# Not in Java job types but present for all others
|
||||||
|
input_id = data.get('input_id', None)
|
||||||
|
output_id = data.get('output_id', None)
|
||||||
|
|
||||||
|
# Present for Java job types
|
||||||
|
main_class = data.get('main_class', '')
|
||||||
|
java_opts = data.get('java_opts', '')
|
||||||
|
|
||||||
|
# Since we will use a unified class in the database, we pass
|
||||||
|
# a superset for all job types
|
||||||
|
job_ex_dict = {'main_class': main_class,
|
||||||
|
'java_opts': java_opts,
|
||||||
|
'input_id': input_id, 'output_id': output_id,
|
||||||
'job_id': job_id, 'cluster_id': cluster_id,
|
'job_id': job_id, 'cluster_id': cluster_id,
|
||||||
'info': {'status': 'Pending'}, 'job_configs': configs}
|
'info': {'status': 'Pending'}, 'job_configs': configs}
|
||||||
|
|
||||||
job_execution = conductor.job_execution_create(context.ctx(), job_ex_dict)
|
job_execution = conductor.job_execution_create(context.ctx(), job_ex_dict)
|
||||||
|
|
||||||
context.spawn("Starting Job Execution %s" % job_execution.id,
|
context.spawn("Starting Job Execution %s" % job_execution.id,
|
||||||
|
@ -107,9 +107,12 @@ def run_job(job_execution):
|
|||||||
return job_execution
|
return job_execution
|
||||||
|
|
||||||
job = conductor.job_get(ctx, job_execution.job_id)
|
job = conductor.job_get(ctx, job_execution.job_id)
|
||||||
|
if job.type != 'Java':
|
||||||
input_source = conductor.data_source_get(ctx, job_execution.input_id)
|
input_source = conductor.data_source_get(ctx, job_execution.input_id)
|
||||||
output_source = conductor.data_source_get(ctx, job_execution.output_id)
|
output_source = conductor.data_source_get(ctx, job_execution.output_id)
|
||||||
|
else:
|
||||||
|
input_source = None
|
||||||
|
output_source = None
|
||||||
#TODO(nprivalova): should be removed after all features implemented
|
#TODO(nprivalova): should be removed after all features implemented
|
||||||
validate(input_source, output_source, job)
|
validate(input_source, output_source, job)
|
||||||
|
|
||||||
@ -124,8 +127,9 @@ def run_job(job_execution):
|
|||||||
# uploading hive configuration
|
# uploading hive configuration
|
||||||
creator.configure_workflow_if_needed(cluster, wf_dir)
|
creator.configure_workflow_if_needed(cluster, wf_dir)
|
||||||
|
|
||||||
wf_xml = creator.get_workflow_xml(job_execution.job_configs,
|
wf_xml = creator.get_workflow_xml(job_execution,
|
||||||
input_source, output_source)
|
input_source,
|
||||||
|
output_source)
|
||||||
|
|
||||||
path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster),
|
path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster),
|
||||||
wf_dir, wf_xml, hdfs_user)
|
wf_dir, wf_xml, hdfs_user)
|
||||||
@ -205,7 +209,8 @@ def _append_slash_if_needed(path):
|
|||||||
#TODO(nprivalova): this validation should be removed after implementing
|
#TODO(nprivalova): this validation should be removed after implementing
|
||||||
# all features
|
# all features
|
||||||
def validate(input_data, output_data, job):
|
def validate(input_data, output_data, job):
|
||||||
if input_data.type != 'swift' or output_data.type != 'swift':
|
if (input_data and input_data.type != 'swift') or\
|
||||||
|
(output_data and output_data.type != 'swift'):
|
||||||
raise RuntimeError
|
raise RuntimeError
|
||||||
if job.type not in ['Pig', 'MapReduce', 'Hive', 'Jar']:
|
if job.type not in ['Pig', 'MapReduce', 'Hive', 'Java', 'Jar']:
|
||||||
raise RuntimeError
|
raise RuntimeError
|
||||||
|
51
savanna/service/edp/workflow_creator/java_workflow.py
Normal file
51
savanna/service/edp/workflow_creator/java_workflow.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# Copyright (c) 2013 RedHat Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
# implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from savanna.service.edp.workflow_creator import base_workflow
|
||||||
|
from savanna.utils import xmlutils as x
|
||||||
|
|
||||||
|
|
||||||
|
class JavaWorkflowCreator(base_workflow.OozieWorkflowCreator):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(JavaWorkflowCreator, self).__init__('java')
|
||||||
|
|
||||||
|
def build_workflow_xml(self, main_class,
|
||||||
|
prepare={},
|
||||||
|
job_xml=None,
|
||||||
|
configuration=None,
|
||||||
|
java_opts=None,
|
||||||
|
arguments=[],
|
||||||
|
files=[], archives=[]):
|
||||||
|
|
||||||
|
for k, v in prepare.items():
|
||||||
|
self._add_to_prepare_element(k, v)
|
||||||
|
|
||||||
|
self._add_job_xml_element(job_xml)
|
||||||
|
|
||||||
|
self._add_configuration_elements(configuration)
|
||||||
|
|
||||||
|
x.add_text_element_to_tag(self.doc, self.tag_name,
|
||||||
|
'main-class', main_class)
|
||||||
|
|
||||||
|
if java_opts:
|
||||||
|
x.add_text_element_to_tag(self.doc, self.tag_name,
|
||||||
|
'java-opts', java_opts)
|
||||||
|
|
||||||
|
for arg in arguments:
|
||||||
|
x.add_text_element_to_tag(self.doc, self.tag_name,
|
||||||
|
'arg', arg)
|
||||||
|
|
||||||
|
self._add_files_and_archives(files, archives)
|
@ -21,6 +21,7 @@ from savanna.plugins import base as plugin_base
|
|||||||
from savanna.plugins.general import utils as u
|
from savanna.plugins.general import utils as u
|
||||||
from savanna.service.edp import hdfs_helper as h
|
from savanna.service.edp import hdfs_helper as h
|
||||||
from savanna.service.edp.workflow_creator import hive_workflow
|
from savanna.service.edp.workflow_creator import hive_workflow
|
||||||
|
from savanna.service.edp.workflow_creator import java_workflow
|
||||||
from savanna.service.edp.workflow_creator import mapreduce_workflow
|
from savanna.service.edp.workflow_creator import mapreduce_workflow
|
||||||
from savanna.service.edp.workflow_creator import pig_workflow
|
from savanna.service.edp.workflow_creator import pig_workflow
|
||||||
from savanna.utils import remote
|
from savanna.utils import remote
|
||||||
@ -69,11 +70,11 @@ class PigFactory(BaseFactory):
|
|||||||
def get_script_name(self, job):
|
def get_script_name(self, job):
|
||||||
return conductor.job_main_name(context.ctx(), job)
|
return conductor.job_main_name(context.ctx(), job)
|
||||||
|
|
||||||
def get_workflow_xml(self, execution_configs, input_data, output_data):
|
def get_workflow_xml(self, execution, input_data, output_data):
|
||||||
configs = {'configs': self.get_configs(input_data, output_data),
|
configs = {'configs': self.get_configs(input_data, output_data),
|
||||||
'params': self.get_params(input_data, output_data),
|
'params': self.get_params(input_data, output_data),
|
||||||
'args': self.get_args()}
|
'args': self.get_args()}
|
||||||
self.update_configs(configs, execution_configs)
|
self.update_configs(configs, execution.job_configs)
|
||||||
creator = pig_workflow.PigWorkflowCreator()
|
creator = pig_workflow.PigWorkflowCreator()
|
||||||
creator.build_workflow_xml(self.name,
|
creator.build_workflow_xml(self.name,
|
||||||
configuration=configs['configs'],
|
configuration=configs['configs'],
|
||||||
@ -92,10 +93,10 @@ class HiveFactory(BaseFactory):
|
|||||||
def get_script_name(self, job):
|
def get_script_name(self, job):
|
||||||
return conductor.job_main_name(context.ctx(), job)
|
return conductor.job_main_name(context.ctx(), job)
|
||||||
|
|
||||||
def get_workflow_xml(self, execution_configs, input_data, output_data):
|
def get_workflow_xml(self, execution, input_data, output_data):
|
||||||
configs = {'configs': self.get_configs(input_data, output_data),
|
configs = {'configs': self.get_configs(input_data, output_data),
|
||||||
'params': self.get_params(input_data, output_data)}
|
'params': self.get_params(input_data, output_data)}
|
||||||
self.update_configs(configs, execution_configs)
|
self.update_configs(configs, execution.job_configs)
|
||||||
creator = hive_workflow.HiveWorkflowCreator()
|
creator = hive_workflow.HiveWorkflowCreator()
|
||||||
creator.build_workflow_xml(self.name,
|
creator.build_workflow_xml(self.name,
|
||||||
self.job_xml,
|
self.job_xml,
|
||||||
@ -120,14 +121,40 @@ class MapReduceFactory(BaseFactory):
|
|||||||
configs['mapred.output.dir'] = output_data.url
|
configs['mapred.output.dir'] = output_data.url
|
||||||
return configs
|
return configs
|
||||||
|
|
||||||
def get_workflow_xml(self, execution_configs, input_data, output_data):
|
def get_workflow_xml(self, execution, input_data, output_data):
|
||||||
configs = {'configs': self.get_configs(input_data, output_data)}
|
configs = {'configs': self.get_configs(input_data, output_data)}
|
||||||
self.update_configs(configs, execution_configs)
|
self.update_configs(configs, execution.job_configs)
|
||||||
creator = mapreduce_workflow.MapReduceWorkFlowCreator()
|
creator = mapreduce_workflow.MapReduceWorkFlowCreator()
|
||||||
creator.build_workflow_xml(configuration=configs['configs'])
|
creator.build_workflow_xml(configuration=configs['configs'])
|
||||||
return creator.get_built_workflow_xml()
|
return creator.get_built_workflow_xml()
|
||||||
|
|
||||||
|
|
||||||
|
class JavaFactory(BaseFactory):
|
||||||
|
|
||||||
|
def get_workflow_xml(self, execution, *args, **kwargs):
|
||||||
|
# input and output will be handled as args, so we don't really
|
||||||
|
# know whether or not to include the swift configs. Hmmm.
|
||||||
|
configs = {'configs': {}}
|
||||||
|
self.update_configs(configs, execution.job_configs)
|
||||||
|
|
||||||
|
# Update is not supported for list types, and besides
|
||||||
|
# since args are listed (not named) update doesn't make
|
||||||
|
# sense, just replacement of any default args
|
||||||
|
configs['args'] = execution.job_configs.get('args', [])
|
||||||
|
|
||||||
|
if hasattr(execution, 'java_opts'):
|
||||||
|
java_opts = execution.java_opts
|
||||||
|
else:
|
||||||
|
java_opts = ""
|
||||||
|
|
||||||
|
creator = java_workflow.JavaWorkflowCreator()
|
||||||
|
creator.build_workflow_xml(execution.main_class,
|
||||||
|
configuration=configs['configs'],
|
||||||
|
java_opts=java_opts,
|
||||||
|
arguments=configs['args'])
|
||||||
|
return creator.get_built_workflow_xml()
|
||||||
|
|
||||||
|
|
||||||
def get_creator(job):
|
def get_creator(job):
|
||||||
|
|
||||||
def make_PigFactory():
|
def make_PigFactory():
|
||||||
@ -140,6 +167,7 @@ def get_creator(job):
|
|||||||
MapReduceFactory,
|
MapReduceFactory,
|
||||||
make_HiveFactory,
|
make_HiveFactory,
|
||||||
make_PigFactory,
|
make_PigFactory,
|
||||||
|
JavaFactory,
|
||||||
# Keep 'Jar' as a synonym for 'MapReduce'
|
# Keep 'Jar' as a synonym for 'MapReduce'
|
||||||
MapReduceFactory,
|
MapReduceFactory,
|
||||||
]
|
]
|
||||||
@ -151,6 +179,10 @@ def get_creator(job):
|
|||||||
def get_possible_job_config(job_type):
|
def get_possible_job_config(job_type):
|
||||||
if job_type not in get_possible_job_types():
|
if job_type not in get_possible_job_types():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if job_type == "Java":
|
||||||
|
return {'job_config': {'configs': [], 'args': []}}
|
||||||
|
|
||||||
if job_type in ['MapReduce', 'Pig', 'Jar']:
|
if job_type in ['MapReduce', 'Pig', 'Jar']:
|
||||||
#TODO(nmakhotkin) Savanna should return config based on specific plugin
|
#TODO(nmakhotkin) Savanna should return config based on specific plugin
|
||||||
cfg = xmlutils.load_hadoop_xml_defaults(
|
cfg = xmlutils.load_hadoop_xml_defaults(
|
||||||
@ -163,7 +195,7 @@ def get_possible_job_config(job_type):
|
|||||||
cfg = xmlutils.load_hadoop_xml_defaults(
|
cfg = xmlutils.load_hadoop_xml_defaults(
|
||||||
'plugins/vanilla/resources/hive-default.xml')
|
'plugins/vanilla/resources/hive-default.xml')
|
||||||
config = {'configs': cfg, "args": {}}
|
config = {'configs': cfg, "args": {}}
|
||||||
if job_type not in ['MapReduce', 'Jar']:
|
if job_type not in ['MapReduce', 'Jar', 'Java']:
|
||||||
config.update({'params': {}})
|
config.update({'params': {}})
|
||||||
return {'job_config': config}
|
return {'job_config': config}
|
||||||
|
|
||||||
@ -173,5 +205,6 @@ def get_possible_job_types():
|
|||||||
'MapReduce',
|
'MapReduce',
|
||||||
'Hive',
|
'Hive',
|
||||||
'Pig',
|
'Pig',
|
||||||
|
'Java',
|
||||||
'Jar',
|
'Jar',
|
||||||
]
|
]
|
||||||
|
@ -42,6 +42,23 @@ job_configs = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
java_job_configs = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"configs": {
|
||||||
|
"type": "simple_config",
|
||||||
|
},
|
||||||
|
"args": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def check_data_source_unique_name(name):
|
def check_data_source_unique_name(name):
|
||||||
if name in [ds.name for ds in api.get_data_sources()]:
|
if name in [ds.name for ds in api.get_data_sources()]:
|
||||||
raise ex.NameAlreadyExistsException("Data source with name '%s' "
|
raise ex.NameAlreadyExistsException("Data source with name '%s' "
|
||||||
|
@ -35,6 +35,7 @@ JOB_SCHEMA = {
|
|||||||
"Pig",
|
"Pig",
|
||||||
"Hive",
|
"Hive",
|
||||||
"MapReduce",
|
"MapReduce",
|
||||||
|
"Java",
|
||||||
# Leave this here for validation of create_job,
|
# Leave this here for validation of create_job,
|
||||||
# but it will be changed to MapReduce on creation
|
# but it will be changed to MapReduce on creation
|
||||||
"Jar",
|
"Jar",
|
||||||
|
@ -13,11 +13,13 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import savanna.exceptions as ex
|
||||||
|
from savanna.service.edp import api
|
||||||
import savanna.service.validations.base as main_base
|
import savanna.service.validations.base as main_base
|
||||||
import savanna.service.validations.edp.base as b
|
import savanna.service.validations.edp.base as b
|
||||||
|
|
||||||
|
|
||||||
JOB_EXEC_SCHEMA = {
|
MR_EXEC_SCHEMA = {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"input_id": {
|
"input_id": {
|
||||||
@ -38,12 +40,50 @@ JOB_EXEC_SCHEMA = {
|
|||||||
"required": [
|
"required": [
|
||||||
"input_id",
|
"input_id",
|
||||||
"output_id",
|
"output_id",
|
||||||
"cluster_id",
|
"cluster_id"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def check_job_executor(data, **kwargs):
|
JAVA_EXEC_SCHEMA = {
|
||||||
b.check_data_source_exists(data['input_id'])
|
"type": "object",
|
||||||
b.check_data_source_exists(data['output_id'])
|
"properties": {
|
||||||
|
"main_class": {
|
||||||
|
"type": "string",
|
||||||
|
},
|
||||||
|
"java_opts": {
|
||||||
|
"type": "string",
|
||||||
|
},
|
||||||
|
"cluster_id": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "uuid",
|
||||||
|
},
|
||||||
|
"job_configs": b.java_job_configs,
|
||||||
|
},
|
||||||
|
"additionalProperties": False,
|
||||||
|
"required": [
|
||||||
|
"cluster_id",
|
||||||
|
"main_class",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
JOB_EXEC_SCHEMA = {
|
||||||
|
"oneOf": [MR_EXEC_SCHEMA, JAVA_EXEC_SCHEMA]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def check_job_executor(data, job_id):
|
||||||
|
job = api.get_job(job_id)
|
||||||
|
|
||||||
|
# Make sure we have the right schema for the job type
|
||||||
|
# We can identify the Java action schema by looking for 'main_class'
|
||||||
|
if ('main_class' in data) ^ (job.type == 'Java'):
|
||||||
|
raise ex.InvalidException("Schema is not valid for job type %s"
|
||||||
|
% job.type)
|
||||||
|
|
||||||
|
if 'input_id' in data:
|
||||||
|
b.check_data_source_exists(data['input_id'])
|
||||||
|
b.check_data_source_exists(data['output_id'])
|
||||||
|
|
||||||
main_base.check_cluster_exists(data['cluster_id'])
|
main_base.check_cluster_exists(data['cluster_id'])
|
||||||
|
@ -25,6 +25,9 @@ from savanna.utils import patches as p
|
|||||||
|
|
||||||
conductor = cond.API
|
conductor = cond.API
|
||||||
|
|
||||||
|
_java_main_class = "org.apache.hadoop.examples.WordCount"
|
||||||
|
_java_opts = "-Dparam1=val1 -Dparam2=val2"
|
||||||
|
|
||||||
|
|
||||||
def _resource_passthrough(*args, **kwargs):
|
def _resource_passthrough(*args, **kwargs):
|
||||||
return True
|
return True
|
||||||
@ -105,7 +108,7 @@ class TestJobManager(models_test_base.DbTestCase):
|
|||||||
|
|
||||||
creator = workflow_factory.get_creator(job)
|
creator = workflow_factory.get_creator(job)
|
||||||
|
|
||||||
res = creator.get_workflow_xml(job_exec.job_configs,
|
res = creator.get_workflow_xml(job_exec,
|
||||||
input_data, output_data)
|
input_data, output_data)
|
||||||
|
|
||||||
self.assertIn("""
|
self.assertIn("""
|
||||||
@ -134,7 +137,7 @@ class TestJobManager(models_test_base.DbTestCase):
|
|||||||
|
|
||||||
creator = workflow_factory.get_creator(job)
|
creator = workflow_factory.get_creator(job)
|
||||||
|
|
||||||
res = creator.get_workflow_xml(job_exec.job_configs,
|
res = creator.get_workflow_xml(job_exec,
|
||||||
input_data, output_data)
|
input_data, output_data)
|
||||||
|
|
||||||
self.assertIn("""
|
self.assertIn("""
|
||||||
@ -161,19 +164,45 @@ class TestJobManager(models_test_base.DbTestCase):
|
|||||||
<value>admin</value>
|
<value>admin</value>
|
||||||
</property>""", res)
|
</property>""", res)
|
||||||
|
|
||||||
def test_jar_creator_is_mapreduce(self):
|
|
||||||
# Ensure that we get the MapReduce workflow factory for 'Jar' jobs
|
|
||||||
job, _ = _create_all_stack('Jar')
|
|
||||||
|
|
||||||
creator = workflow_factory.get_creator(job)
|
|
||||||
self.assertEqual(type(creator), workflow_factory.MapReduceFactory)
|
|
||||||
|
|
||||||
def test_build_workflow_for_job_mapreduce(self):
|
def test_build_workflow_for_job_mapreduce(self):
|
||||||
self._build_workflow_common('MapReduce')
|
self._build_workflow_common('MapReduce')
|
||||||
|
|
||||||
def test_build_workflow_for_job_jar(self):
|
def test_build_workflow_for_job_jar(self):
|
||||||
self._build_workflow_common('Jar')
|
self._build_workflow_common('Jar')
|
||||||
|
|
||||||
|
def test_build_workflow_for_job_java(self):
|
||||||
|
# If args include swift paths, user and password values
|
||||||
|
# will have to be supplied via configs instead of being
|
||||||
|
# lifted from input or output data sources
|
||||||
|
configs = {workflow_factory.swift_username: 'admin',
|
||||||
|
workflow_factory.swift_password: 'admin1'}
|
||||||
|
|
||||||
|
configs = {
|
||||||
|
'configs': configs,
|
||||||
|
'args': ['input_path',
|
||||||
|
'output_path']
|
||||||
|
}
|
||||||
|
|
||||||
|
job, job_exec = _create_all_stack('Java', configs)
|
||||||
|
creator = workflow_factory.get_creator(job)
|
||||||
|
res = creator.get_workflow_xml(job_exec)
|
||||||
|
|
||||||
|
self.assertIn("""
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.swift.service.savanna.password</name>
|
||||||
|
<value>admin1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.swift.service.savanna.username</name>
|
||||||
|
<value>admin</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>%s</main-class>
|
||||||
|
<java-opts>%s</java-opts>
|
||||||
|
<arg>input_path</arg>
|
||||||
|
<arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
|
||||||
|
|
||||||
@mock.patch('savanna.conductor.API.job_binary_get')
|
@mock.patch('savanna.conductor.API.job_binary_get')
|
||||||
def test_build_workflow_for_job_hive(self, job_binary):
|
def test_build_workflow_for_job_hive(self, job_binary):
|
||||||
|
|
||||||
@ -185,7 +214,7 @@ class TestJobManager(models_test_base.DbTestCase):
|
|||||||
|
|
||||||
creator = workflow_factory.get_creator(job)
|
creator = workflow_factory.get_creator(job)
|
||||||
|
|
||||||
res = creator.get_workflow_xml(job_exec.job_configs,
|
res = creator.get_workflow_xml(job_exec,
|
||||||
input_data, output_data)
|
input_data, output_data)
|
||||||
|
|
||||||
self.assertIn("""
|
self.assertIn("""
|
||||||
@ -210,11 +239,12 @@ class TestJobManager(models_test_base.DbTestCase):
|
|||||||
input_data = _create_data_source('swift://ex.savanna/i')
|
input_data = _create_data_source('swift://ex.savanna/i')
|
||||||
output_data = _create_data_source('swift://ex.savanna/o')
|
output_data = _create_data_source('swift://ex.savanna/o')
|
||||||
|
|
||||||
job_exec = _create_job_exec(job.id, configs={"configs": {'c': 'f'}})
|
job_exec = _create_job_exec(job.id,
|
||||||
|
job_type, configs={"configs": {'c': 'f'}})
|
||||||
|
|
||||||
creator = workflow_factory.get_creator(job)
|
creator = workflow_factory.get_creator(job)
|
||||||
|
|
||||||
res = creator.get_workflow_xml(job_exec.job_configs,
|
res = creator.get_workflow_xml(job_exec,
|
||||||
input_data, output_data)
|
input_data, output_data)
|
||||||
|
|
||||||
self.assertIn("""
|
self.assertIn("""
|
||||||
@ -241,11 +271,18 @@ class TestJobManager(models_test_base.DbTestCase):
|
|||||||
def test_build_workflow_for_job_jar_with_conf(self):
|
def test_build_workflow_for_job_jar_with_conf(self):
|
||||||
self._build_workflow_with_conf_common('Jar')
|
self._build_workflow_with_conf_common('Jar')
|
||||||
|
|
||||||
|
def test_jar_creator_is_mapreduce(self):
|
||||||
|
# Ensure that we get the MapReduce workflow factory for 'Jar' jobs
|
||||||
|
job, _ = _create_all_stack('Jar')
|
||||||
|
|
||||||
def _create_all_stack(type):
|
creator = workflow_factory.get_creator(job)
|
||||||
|
self.assertEqual(type(creator), workflow_factory.MapReduceFactory)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_all_stack(type, configs=None):
|
||||||
b = _create_job_binary('1', type)
|
b = _create_job_binary('1', type)
|
||||||
j = _create_job('2', b, type)
|
j = _create_job('2', b, type)
|
||||||
e = _create_job_exec(j.id)
|
e = _create_job_exec(j.id, type, configs)
|
||||||
return j, e
|
return j, e
|
||||||
|
|
||||||
|
|
||||||
@ -257,7 +294,7 @@ def _create_job(id, job_binary, type):
|
|||||||
if type == 'Pig' or type == 'Hive':
|
if type == 'Pig' or type == 'Hive':
|
||||||
job.mains = [job_binary]
|
job.mains = [job_binary]
|
||||||
job.libs = None
|
job.libs = None
|
||||||
if type in ['MapReduce', 'Jar']:
|
if type in ['MapReduce', 'Jar', 'Java']:
|
||||||
job.libs = [job_binary]
|
job.libs = [job_binary]
|
||||||
job.mains = None
|
job.mains = None
|
||||||
return job
|
return job
|
||||||
@ -269,7 +306,7 @@ def _create_job_binary(id, type):
|
|||||||
binary.url = "savanna-db://42"
|
binary.url = "savanna-db://42"
|
||||||
if type == "Pig":
|
if type == "Pig":
|
||||||
binary.name = "script.pig"
|
binary.name = "script.pig"
|
||||||
if type in ['MapReduce', 'Jar']:
|
if type in ['MapReduce', 'Jar', 'Java']:
|
||||||
binary.name = "main.jar"
|
binary.name = "main.jar"
|
||||||
if type == "Hive":
|
if type == "Hive":
|
||||||
binary.name = "script.q"
|
binary.name = "script.q"
|
||||||
@ -286,8 +323,11 @@ def _create_data_source(url):
|
|||||||
return data_source
|
return data_source
|
||||||
|
|
||||||
|
|
||||||
def _create_job_exec(job_id, configs=None):
|
def _create_job_exec(job_id, type, configs=None):
|
||||||
j_exec = mock.Mock()
|
j_exec = mock.Mock()
|
||||||
j_exec.job_id = job_id
|
j_exec.job_id = job_id
|
||||||
j_exec.job_configs = configs
|
j_exec.job_configs = configs
|
||||||
|
if type == "Java":
|
||||||
|
j_exec.main_class = _java_main_class
|
||||||
|
j_exec.java_opts = _java_opts
|
||||||
return j_exec
|
return j_exec
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
import unittest2
|
import unittest2
|
||||||
|
|
||||||
from savanna.service.edp.workflow_creator import hive_workflow as hw
|
from savanna.service.edp.workflow_creator import hive_workflow as hw
|
||||||
|
from savanna.service.edp.workflow_creator import java_workflow as jw
|
||||||
from savanna.service.edp.workflow_creator import mapreduce_workflow as mrw
|
from savanna.service.edp.workflow_creator import mapreduce_workflow as mrw
|
||||||
from savanna.service.edp.workflow_creator import pig_workflow as pw
|
from savanna.service.edp.workflow_creator import pig_workflow as pw
|
||||||
from savanna.utils import patches as p
|
from savanna.utils import patches as p
|
||||||
@ -143,3 +144,46 @@ class TestPigWorkflowCreator(unittest2.TestCase):
|
|||||||
</hive>"""
|
</hive>"""
|
||||||
|
|
||||||
self.assertIn(hive_action, res)
|
self.assertIn(hive_action, res)
|
||||||
|
|
||||||
|
def test_create_java_workflow(self):
|
||||||
|
java_workflow = jw.JavaWorkflowCreator()
|
||||||
|
main_class = 'org.apache.hadoop.examples.SomeClass'
|
||||||
|
args = ['/user/hadoop/input',
|
||||||
|
'/user/hadoop/output']
|
||||||
|
java_opts = '-Dparam1=val1 -Dparam2=val2'
|
||||||
|
|
||||||
|
java_workflow.build_workflow_xml(main_class,
|
||||||
|
self.prepare,
|
||||||
|
self.job_xml, self.configuration,
|
||||||
|
java_opts, args,
|
||||||
|
self.files, self.archives)
|
||||||
|
res = java_workflow.get_built_workflow_xml()
|
||||||
|
java_action = """
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<prepare>
|
||||||
|
<mkdir path="mkdir_1"/>
|
||||||
|
<delete path="delete_dir_1"/>
|
||||||
|
<delete path="delete_dir_2"/>
|
||||||
|
</prepare>
|
||||||
|
<job-xml>job_xml.xml</job-xml>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>conf_param_1</name>
|
||||||
|
<value>conf_value_1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>conf_param_2</name>
|
||||||
|
<value>conf_value_3</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>org.apache.hadoop.examples.SomeClass</main-class>
|
||||||
|
<java-opts>-Dparam1=val1 -Dparam2=val2</java-opts>
|
||||||
|
<arg>/user/hadoop/input</arg>
|
||||||
|
<arg>/user/hadoop/output</arg>
|
||||||
|
<file>file1</file>
|
||||||
|
<file>file2</file>
|
||||||
|
<archive>arch1</archive>
|
||||||
|
</java>"""
|
||||||
|
|
||||||
|
self.assertIn(java_action, res)
|
||||||
|
@ -24,7 +24,7 @@ class TestJobValidation(u.ValidationTestCase):
|
|||||||
self.scheme = j.JOB_SCHEMA
|
self.scheme = j.JOB_SCHEMA
|
||||||
|
|
||||||
def test_empty_mains_and_libs(self):
|
def test_empty_mains_and_libs(self):
|
||||||
for job_type in ['MapReduce', 'Jar']:
|
for job_type in ['MapReduce', 'Java', 'Jar']:
|
||||||
self._assert_create_object_validation(
|
self._assert_create_object_validation(
|
||||||
data={
|
data={
|
||||||
"name": "jar.jar",
|
"name": "jar.jar",
|
||||||
@ -51,7 +51,7 @@ class TestJobValidation(u.ValidationTestCase):
|
|||||||
"Hive flow requires main script"))
|
"Hive flow requires main script"))
|
||||||
|
|
||||||
def test_overlap_libs(self):
|
def test_overlap_libs(self):
|
||||||
for job_type in ['MapReduce', 'Jar']:
|
for job_type in ['MapReduce', 'Java', 'Jar']:
|
||||||
self._assert_create_object_validation(
|
self._assert_create_object_validation(
|
||||||
data={
|
data={
|
||||||
"name": "jar.jar",
|
"name": "jar.jar",
|
||||||
|
Loading…
Reference in New Issue
Block a user