Merge "Minor EDP refactoring"
This commit is contained in:
commit
8d2c8f96ee
@ -82,23 +82,16 @@ def update_job_statuses():
|
||||
(je.id, e))
|
||||
|
||||
|
||||
def _get_hdfs_user(cluster):
|
||||
plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
|
||||
hdfs_user = plugin.get_hdfs_user()
|
||||
return hdfs_user
|
||||
def _get_plugin(cluster):
|
||||
return plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
|
||||
|
||||
|
||||
def _create_oozie_client(cluster):
|
||||
plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
|
||||
plugin = _get_plugin(cluster)
|
||||
return o.OozieClient(plugin.get_oozie_server_uri(cluster),
|
||||
plugin.get_oozie_server(cluster))
|
||||
|
||||
|
||||
def _get_oozie_server(cluster):
|
||||
plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
|
||||
return plugin.get_oozie_server(cluster)
|
||||
|
||||
|
||||
def cancel_job(job_execution_id):
|
||||
ctx = context.ctx()
|
||||
job_execution = conductor.job_execution_get(ctx, job_execution_id)
|
||||
@ -116,6 +109,41 @@ def cancel_job(job_execution_id):
|
||||
return job_execution
|
||||
|
||||
|
||||
def _update_job_execution_extra(job_execution, cluster):
|
||||
if CONF.use_namespaces and not CONF.use_floating_ips:
|
||||
oozie = _get_plugin(cluster).get_oozie_server(cluster)
|
||||
info = oozie.remote().get_neutron_info()
|
||||
extra = job_execution.extra.copy()
|
||||
extra['neutron'] = info
|
||||
|
||||
job_execution = conductor.job_execution_update(
|
||||
context.ctx(), job_execution.id, {'extra': extra})
|
||||
return job_execution
|
||||
|
||||
|
||||
def _get_data_sources(job_execution, job):
|
||||
if edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA):
|
||||
return None, None
|
||||
|
||||
ctx = context.ctx()
|
||||
input_source = conductor.data_source_get(ctx, job_execution.input_id)
|
||||
output_source = conductor.data_source_get(ctx, job_execution.output_id)
|
||||
return input_source, output_source
|
||||
|
||||
|
||||
def _get_oozie_job_params(cluster, hdfs_user, path_to_workflow):
|
||||
plugin = _get_plugin(cluster)
|
||||
rm_path = plugin.get_resource_manager_uri(cluster)
|
||||
nn_path = plugin.get_name_node_uri(cluster)
|
||||
job_parameters = {
|
||||
"jobTracker": rm_path,
|
||||
"nameNode": nn_path,
|
||||
"user.name": hdfs_user,
|
||||
"oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
|
||||
"oozie.use.system.libpath": "true"}
|
||||
return job_parameters
|
||||
|
||||
|
||||
def run_job(job_execution_id):
|
||||
try:
|
||||
_run_job(job_execution_id)
|
||||
@ -133,74 +161,45 @@ def run_job(job_execution_id):
|
||||
def _run_job(job_execution_id):
|
||||
ctx = context.ctx()
|
||||
|
||||
job_execution = conductor.job_execution_get(ctx,
|
||||
job_execution_id)
|
||||
job_execution = conductor.job_execution_get(ctx, job_execution_id)
|
||||
|
||||
cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
|
||||
if cluster.status != 'Active':
|
||||
return
|
||||
|
||||
if CONF.use_namespaces and not CONF.use_floating_ips:
|
||||
plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
|
||||
oozie = plugin.get_oozie_server(cluster)
|
||||
|
||||
info = oozie.remote().get_neutron_info()
|
||||
extra = job_execution.extra.copy()
|
||||
extra['neutron'] = info
|
||||
|
||||
job_execution = conductor.job_execution_update(ctx,
|
||||
job_execution_id,
|
||||
{'extra': extra})
|
||||
job_execution = _update_job_execution_extra(job_execution, cluster)
|
||||
|
||||
job = conductor.job_get(ctx, job_execution.job_id)
|
||||
if not edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA):
|
||||
input_source = conductor.data_source_get(ctx, job_execution.input_id)
|
||||
output_source = conductor.data_source_get(ctx, job_execution.output_id)
|
||||
else:
|
||||
input_source = None
|
||||
output_source = None
|
||||
input_source, output_source = _get_data_sources(job_execution, job)
|
||||
|
||||
for data_source in [input_source, output_source]:
|
||||
if data_source and data_source.type == 'hdfs':
|
||||
h.configure_cluster_for_hdfs(cluster, data_source)
|
||||
|
||||
hdfs_user = _get_hdfs_user(cluster)
|
||||
oozie_server = _get_oozie_server(cluster)
|
||||
plugin = _get_plugin(cluster)
|
||||
hdfs_user = plugin.get_hdfs_user()
|
||||
oozie_server = plugin.get_oozie_server(cluster)
|
||||
|
||||
wf_dir = create_workflow_dir(oozie_server, job, hdfs_user)
|
||||
upload_job_files(oozie_server, wf_dir, job, hdfs_user)
|
||||
|
||||
creator = workflow_factory.get_creator(job)
|
||||
|
||||
wf_xml = creator.get_workflow_xml(cluster, job_execution,
|
||||
input_source, output_source)
|
||||
wf_xml = workflow_factory.get_workflow_xml(
|
||||
job, cluster, job_execution, input_source, output_source)
|
||||
|
||||
path_to_workflow = upload_workflow_file(oozie_server,
|
||||
wf_dir, wf_xml, hdfs_user)
|
||||
|
||||
plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
|
||||
rm_path = plugin.get_resource_manager_uri(cluster)
|
||||
nn_path = plugin.get_name_node_uri(cluster)
|
||||
|
||||
client = _create_oozie_client(cluster)
|
||||
job_parameters = {"jobTracker": rm_path,
|
||||
"nameNode": nn_path,
|
||||
"user.name": hdfs_user,
|
||||
"oozie.wf.application.path":
|
||||
"%s%s" % (nn_path, path_to_workflow),
|
||||
"oozie.use.system.libpath": "true"}
|
||||
|
||||
oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters),
|
||||
job_params = _get_oozie_job_params(cluster, hdfs_user, path_to_workflow)
|
||||
oozie_job_id = client.add_job(x.create_hadoop_xml(job_params),
|
||||
job_execution)
|
||||
job_execution = conductor.job_execution_update(ctx, job_execution,
|
||||
{'oozie_job_id':
|
||||
oozie_job_id,
|
||||
'start_time':
|
||||
datetime.datetime.now()})
|
||||
job_execution = conductor.job_execution_update(
|
||||
ctx, job_execution, {'oozie_job_id': oozie_job_id,
|
||||
'start_time': datetime.datetime.now()})
|
||||
client.run_job(job_execution, oozie_job_id)
|
||||
|
||||
|
||||
def upload_job_files(where, job_dir, job, hdfs_user):
|
||||
|
||||
mains = job.mains or []
|
||||
libs = job.libs or []
|
||||
uploaded_paths = []
|
||||
|
@ -208,7 +208,7 @@ class JavaFactory(BaseFactory):
|
||||
return creator.get_built_workflow_xml()
|
||||
|
||||
|
||||
def get_creator(job):
|
||||
def _get_creator(job):
|
||||
|
||||
def make_PigFactory():
|
||||
return PigFactory(job)
|
||||
@ -227,6 +227,11 @@ def get_creator(job):
|
||||
return type_map[job.type]()
|
||||
|
||||
|
||||
def get_workflow_xml(job, cluster, execution, *args, **kwargs):
|
||||
return _get_creator(job).get_workflow_xml(
|
||||
cluster, execution, *args, **kwargs)
|
||||
|
||||
|
||||
def get_possible_job_config(job_type):
|
||||
if not edp.compare_job_type(job_type, *edp.JOB_TYPES_ALL):
|
||||
return None
|
||||
|
@ -101,10 +101,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
input_data = _create_data_source('swift://ex/i')
|
||||
output_data = _create_data_source('swift://ex/o')
|
||||
|
||||
creator = workflow_factory.get_creator(job)
|
||||
|
||||
res = creator.get_workflow_xml(_create_cluster(), job_exec,
|
||||
input_data, output_data)
|
||||
res = workflow_factory.get_workflow_xml(
|
||||
job, _create_cluster(), job_exec, input_data, output_data)
|
||||
|
||||
self.assertIn("""
|
||||
<param>INPUT=swift://ex.sahara/i</param>
|
||||
@ -134,9 +132,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
input_data = _create_data_source('swift://ex/i')
|
||||
output_data = _create_data_source('hdfs://user/hadoop/out')
|
||||
|
||||
creator = workflow_factory.get_creator(job)
|
||||
res = creator.get_workflow_xml(_create_cluster(), job_exec,
|
||||
input_data, output_data)
|
||||
res = workflow_factory.get_workflow_xml(
|
||||
job, _create_cluster(), job_exec, input_data, output_data)
|
||||
|
||||
self.assertIn("""
|
||||
<configuration>
|
||||
@ -153,10 +150,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
input_data = _create_data_source('hdfs://user/hadoop/in')
|
||||
output_data = _create_data_source('swift://ex/o')
|
||||
|
||||
creator = workflow_factory.get_creator(job)
|
||||
|
||||
res = creator.get_workflow_xml(_create_cluster(), job_exec,
|
||||
input_data, output_data)
|
||||
res = workflow_factory.get_workflow_xml(
|
||||
job, _create_cluster(), job_exec, input_data, output_data)
|
||||
|
||||
self.assertIn("""
|
||||
<configuration>
|
||||
@ -175,10 +170,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
input_data = _create_data_source('hdfs://user/hadoop/in')
|
||||
output_data = _create_data_source('hdfs://user/hadoop/out')
|
||||
|
||||
creator = workflow_factory.get_creator(job)
|
||||
|
||||
res = creator.get_workflow_xml(_create_cluster(), job_exec,
|
||||
input_data, output_data)
|
||||
res = workflow_factory.get_workflow_xml(
|
||||
job, _create_cluster(), job_exec, input_data, output_data)
|
||||
|
||||
self.assertIn("""
|
||||
<configuration>
|
||||
@ -201,10 +194,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
input_data = _create_data_source('swift://ex/i')
|
||||
output_data = _create_data_source('swift://ex/o')
|
||||
|
||||
creator = workflow_factory.get_creator(job)
|
||||
|
||||
res = creator.get_workflow_xml(_create_cluster(), job_exec,
|
||||
input_data, output_data)
|
||||
res = workflow_factory.get_workflow_xml(
|
||||
job, _create_cluster(), job_exec, input_data, output_data)
|
||||
|
||||
if streaming:
|
||||
self.assertIn("""
|
||||
@ -255,8 +246,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
}
|
||||
|
||||
job, job_exec = _create_all_stack(edp.JOB_TYPE_JAVA, configs)
|
||||
creator = workflow_factory.get_creator(job)
|
||||
res = creator.get_workflow_xml(_create_cluster(), job_exec)
|
||||
res = workflow_factory.get_workflow_xml(
|
||||
job, _create_cluster(), job_exec)
|
||||
|
||||
self.assertIn("""
|
||||
<configuration>
|
||||
@ -283,10 +274,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
input_data = _create_data_source('swift://ex/i')
|
||||
output_data = _create_data_source('swift://ex/o')
|
||||
|
||||
creator = workflow_factory.get_creator(job)
|
||||
|
||||
res = creator.get_workflow_xml(_create_cluster(), job_exec,
|
||||
input_data, output_data)
|
||||
res = workflow_factory.get_workflow_xml(
|
||||
job, _create_cluster(), job_exec, input_data, output_data)
|
||||
|
||||
self.assertIn("""
|
||||
<job-xml>/user/hadoop/conf/hive-site.xml</job-xml>
|
||||
@ -313,10 +302,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
job_exec = _create_job_exec(job.id,
|
||||
job_type, configs={"configs": {'c': 'f'}})
|
||||
|
||||
creator = workflow_factory.get_creator(job)
|
||||
|
||||
res = creator.get_workflow_xml(_create_cluster(), job_exec,
|
||||
input_data, output_data)
|
||||
res = workflow_factory.get_workflow_xml(
|
||||
job, _create_cluster(), job_exec, input_data, output_data)
|
||||
|
||||
self.assertIn("""
|
||||
<property>
|
||||
@ -392,6 +379,57 @@ class TestJobManager(base.SaharaWithDbTestCase):
|
||||
new_status = job_ex_upd.call_args[0][2]["info"]["status"]
|
||||
self.assertEqual('FAILED', new_status)
|
||||
|
||||
def test_get_plugin(self):
|
||||
plugin = job_manager._get_plugin(_create_cluster())
|
||||
self.assertEqual("vanilla", plugin.name)
|
||||
|
||||
@mock.patch('sahara.conductor.API.data_source_get')
|
||||
def test_get_data_sources(self, ds):
|
||||
job, job_exec = _create_all_stack(edp.JOB_TYPE_PIG)
|
||||
|
||||
job_exec.input_id = 's1'
|
||||
job_exec.output_id = 's2'
|
||||
|
||||
ds.side_effect = _conductor_data_source_get
|
||||
input_source, output_source = (
|
||||
job_manager._get_data_sources(job_exec, job))
|
||||
|
||||
self.assertEqual('obj_s1', input_source)
|
||||
self.assertEqual('obj_s2', output_source)
|
||||
|
||||
def test_get_data_sources_java(self):
|
||||
configs = {sw.HADOOP_SWIFT_USERNAME: 'admin',
|
||||
sw.HADOOP_SWIFT_PASSWORD: 'admin1'}
|
||||
|
||||
configs = {
|
||||
'configs': configs,
|
||||
'args': ['swift://ex/i',
|
||||
'output_path']
|
||||
}
|
||||
|
||||
job, job_exec = _create_all_stack(edp.JOB_TYPE_JAVA, configs)
|
||||
|
||||
input_source, output_source = (
|
||||
job_manager._get_data_sources(job_exec, job))
|
||||
|
||||
self.assertEqual(None, input_source)
|
||||
self.assertEqual(None, output_source)
|
||||
|
||||
@mock.patch('sahara.service.edp.job_manager._get_plugin')
|
||||
def test_get_oozie_job_params(self, getplugin):
|
||||
plugin = mock.Mock()
|
||||
getplugin.return_value = plugin
|
||||
|
||||
plugin.get_resource_manager_uri.return_value = 'http://localhost:50030'
|
||||
plugin.get_name_node_uri.return_value = 'hdfs://localhost:8020'
|
||||
|
||||
cluster = _create_cluster()
|
||||
job_params = job_manager._get_oozie_job_params(cluster, 'hadoop',
|
||||
'/tmp')
|
||||
self.assertEqual('http://localhost:50030', job_params["jobTracker"])
|
||||
self.assertEqual('hdfs://localhost:8020', job_params["nameNode"])
|
||||
self.assertEqual('hadoop', job_params["user.name"])
|
||||
|
||||
|
||||
def _create_all_stack(type, configs=None):
|
||||
b = _create_job_binary('1', type)
|
||||
@ -430,6 +468,7 @@ def _create_job_binary(id, type):
|
||||
def _create_cluster():
|
||||
cluster = mock.Mock()
|
||||
cluster.plugin_name = 'vanilla'
|
||||
cluster.plugin_version = '1.2.1'
|
||||
return cluster
|
||||
|
||||
|
||||
@ -453,3 +492,7 @@ def _create_job_exec(job_id, type, configs=None):
|
||||
j_exec.job_configs['configs']['edp.java.main_class'] = _java_main_class
|
||||
j_exec.job_configs['configs']['edp.java.java_opts'] = _java_opts
|
||||
return j_exec
|
||||
|
||||
|
||||
def _conductor_data_source_get(ctx, id):
|
||||
return "obj_" + id
|
||||
|
Loading…
Reference in New Issue
Block a user