Merge "Minor EDP refactoring"

2014-06-16 06:56:24 +00:00 · 2014-06-16 06:56:24 +00:00 · 8d2c8f96ee
commit 8d2c8f96ee
parent 525c6441b6 f70802046c
3 changed files with 130 additions and 83 deletions
--- a/sahara/service/edp/job_manager.py
+++ b/sahara/service/edp/job_manager.py
@ -82,23 +82,16 @@ def update_job_statuses():
                          (je.id, e))


-def _get_hdfs_user(cluster):
-    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
-    hdfs_user = plugin.get_hdfs_user()
-    return hdfs_user
+def _get_plugin(cluster):
+    return plugin_base.PLUGINS.get_plugin(cluster.plugin_name)


 def _create_oozie_client(cluster):
-    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
+    plugin = _get_plugin(cluster)
    return o.OozieClient(plugin.get_oozie_server_uri(cluster),
                         plugin.get_oozie_server(cluster))


-def _get_oozie_server(cluster):
-    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
-    return plugin.get_oozie_server(cluster)
-
-
 def cancel_job(job_execution_id):
    ctx = context.ctx()
    job_execution = conductor.job_execution_get(ctx, job_execution_id)
@ -116,6 +109,41 @@ def cancel_job(job_execution_id):
    return job_execution


+def _update_job_execution_extra(job_execution, cluster):
+    if CONF.use_namespaces and not CONF.use_floating_ips:
+        oozie = _get_plugin(cluster).get_oozie_server(cluster)
+        info = oozie.remote().get_neutron_info()
+        extra = job_execution.extra.copy()
+        extra['neutron'] = info
+
+        job_execution = conductor.job_execution_update(
+            context.ctx(), job_execution.id, {'extra': extra})
+    return job_execution
+
+
+def _get_data_sources(job_execution, job):
+    if edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA):
+        return None, None
+
+    ctx = context.ctx()
+    input_source = conductor.data_source_get(ctx, job_execution.input_id)
+    output_source = conductor.data_source_get(ctx, job_execution.output_id)
+    return input_source, output_source
+
+
+def _get_oozie_job_params(cluster, hdfs_user, path_to_workflow):
+    plugin = _get_plugin(cluster)
+    rm_path = plugin.get_resource_manager_uri(cluster)
+    nn_path = plugin.get_name_node_uri(cluster)
+    job_parameters = {
+        "jobTracker": rm_path,
+        "nameNode": nn_path,
+        "user.name": hdfs_user,
+        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
+        "oozie.use.system.libpath": "true"}
+    return job_parameters
+
+
 def run_job(job_execution_id):
    try:
        _run_job(job_execution_id)
@ -133,74 +161,45 @@ def run_job(job_execution_id):
 def _run_job(job_execution_id):
    ctx = context.ctx()

-    job_execution = conductor.job_execution_get(ctx,
-                                                job_execution_id)
+    job_execution = conductor.job_execution_get(ctx, job_execution_id)

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return

-    if CONF.use_namespaces and not CONF.use_floating_ips:
-        plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
-        oozie = plugin.get_oozie_server(cluster)
-
-        info = oozie.remote().get_neutron_info()
-        extra = job_execution.extra.copy()
-        extra['neutron'] = info
-
-        job_execution = conductor.job_execution_update(ctx,
-                                                       job_execution_id,
-                                                       {'extra': extra})
+    job_execution = _update_job_execution_extra(job_execution, cluster)

    job = conductor.job_get(ctx, job_execution.job_id)
-    if not edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA):
-        input_source = conductor.data_source_get(ctx,  job_execution.input_id)
-        output_source = conductor.data_source_get(ctx, job_execution.output_id)
-    else:
-        input_source = None
-        output_source = None
+    input_source, output_source = _get_data_sources(job_execution, job)

    for data_source in [input_source, output_source]:
        if data_source and data_source.type == 'hdfs':
            h.configure_cluster_for_hdfs(cluster, data_source)

-    hdfs_user = _get_hdfs_user(cluster)
-    oozie_server = _get_oozie_server(cluster)
+    plugin = _get_plugin(cluster)
+    hdfs_user = plugin.get_hdfs_user()
+    oozie_server = plugin.get_oozie_server(cluster)
+
    wf_dir = create_workflow_dir(oozie_server, job, hdfs_user)
    upload_job_files(oozie_server, wf_dir, job, hdfs_user)

-    creator = workflow_factory.get_creator(job)
-
-    wf_xml = creator.get_workflow_xml(cluster, job_execution,
-                                      input_source, output_source)
+    wf_xml = workflow_factory.get_workflow_xml(
+        job, cluster, job_execution, input_source, output_source)

    path_to_workflow = upload_workflow_file(oozie_server,
                                            wf_dir, wf_xml, hdfs_user)

-    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
-    rm_path = plugin.get_resource_manager_uri(cluster)
-    nn_path = plugin.get_name_node_uri(cluster)
-
    client = _create_oozie_client(cluster)
-    job_parameters = {"jobTracker": rm_path,
-                      "nameNode": nn_path,
-                      "user.name": hdfs_user,
-                      "oozie.wf.application.path":
-                      "%s%s" % (nn_path, path_to_workflow),
-                      "oozie.use.system.libpath": "true"}
-
-    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters),
+    job_params = _get_oozie_job_params(cluster, hdfs_user, path_to_workflow)
+    oozie_job_id = client.add_job(x.create_hadoop_xml(job_params),
                                  job_execution)
-    job_execution = conductor.job_execution_update(ctx, job_execution,
-                                                   {'oozie_job_id':
-                                                    oozie_job_id,
-                                                    'start_time':
-                                                    datetime.datetime.now()})
+    job_execution = conductor.job_execution_update(
+        ctx, job_execution, {'oozie_job_id': oozie_job_id,
+                             'start_time': datetime.datetime.now()})
    client.run_job(job_execution, oozie_job_id)


 def upload_job_files(where, job_dir, job, hdfs_user):
-
    mains = job.mains or []
    libs = job.libs or []
    uploaded_paths = []
--- a/sahara/service/edp/workflow_creator/workflow_factory.py
+++ b/sahara/service/edp/workflow_creator/workflow_factory.py
@ -208,7 +208,7 @@ class JavaFactory(BaseFactory):
        return creator.get_built_workflow_xml()


-def get_creator(job):
+def _get_creator(job):

    def make_PigFactory():
        return PigFactory(job)
@ -227,6 +227,11 @@ def get_creator(job):
    return type_map[job.type]()


+def get_workflow_xml(job, cluster, execution, *args, **kwargs):
+    return _get_creator(job).get_workflow_xml(
+        cluster, execution, *args, **kwargs)
+
+
 def get_possible_job_config(job_type):
    if not edp.compare_job_type(job_type, *edp.JOB_TYPES_ALL):
        return None
--- a/sahara/tests/unit/service/edp/test_job_manager.py
+++ b/sahara/tests/unit/service/edp/test_job_manager.py
@ -101,10 +101,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
        input_data = _create_data_source('swift://ex/i')
        output_data = _create_data_source('swift://ex/o')

-        creator = workflow_factory.get_creator(job)
-
-        res = creator.get_workflow_xml(_create_cluster(), job_exec,
-                                       input_data, output_data)
+        res = workflow_factory.get_workflow_xml(
+            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <param>INPUT=swift://ex.sahara/i</param>
@ -134,9 +132,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
        input_data = _create_data_source('swift://ex/i')
        output_data = _create_data_source('hdfs://user/hadoop/out')

-        creator = workflow_factory.get_creator(job)
-        res = creator.get_workflow_xml(_create_cluster(), job_exec,
-                                       input_data, output_data)
+        res = workflow_factory.get_workflow_xml(
+            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <configuration>
@ -153,10 +150,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
        input_data = _create_data_source('hdfs://user/hadoop/in')
        output_data = _create_data_source('swift://ex/o')

-        creator = workflow_factory.get_creator(job)
-
-        res = creator.get_workflow_xml(_create_cluster(), job_exec,
-                                       input_data, output_data)
+        res = workflow_factory.get_workflow_xml(
+            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <configuration>
@ -175,10 +170,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
        input_data = _create_data_source('hdfs://user/hadoop/in')
        output_data = _create_data_source('hdfs://user/hadoop/out')

-        creator = workflow_factory.get_creator(job)
-
-        res = creator.get_workflow_xml(_create_cluster(), job_exec,
-                                       input_data, output_data)
+        res = workflow_factory.get_workflow_xml(
+            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <configuration>
@ -201,10 +194,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
        input_data = _create_data_source('swift://ex/i')
        output_data = _create_data_source('swift://ex/o')

-        creator = workflow_factory.get_creator(job)
-
-        res = creator.get_workflow_xml(_create_cluster(), job_exec,
-                                       input_data, output_data)
+        res = workflow_factory.get_workflow_xml(
+            job, _create_cluster(), job_exec, input_data, output_data)

        if streaming:
            self.assertIn("""
@ -255,8 +246,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
        }

        job, job_exec = _create_all_stack(edp.JOB_TYPE_JAVA, configs)
-        creator = workflow_factory.get_creator(job)
-        res = creator.get_workflow_xml(_create_cluster(), job_exec)
+        res = workflow_factory.get_workflow_xml(
+            job, _create_cluster(), job_exec)

        self.assertIn("""
      <configuration>
@ -283,10 +274,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
        input_data = _create_data_source('swift://ex/i')
        output_data = _create_data_source('swift://ex/o')

-        creator = workflow_factory.get_creator(job)
-
-        res = creator.get_workflow_xml(_create_cluster(), job_exec,
-                                       input_data, output_data)
+        res = workflow_factory.get_workflow_xml(
+            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
      <job-xml>/user/hadoop/conf/hive-site.xml</job-xml>
@ -313,10 +302,8 @@ class TestJobManager(base.SaharaWithDbTestCase):
        job_exec = _create_job_exec(job.id,
                                    job_type, configs={"configs": {'c': 'f'}})

-        creator = workflow_factory.get_creator(job)
-
-        res = creator.get_workflow_xml(_create_cluster(), job_exec,
-                                       input_data, output_data)
+        res = workflow_factory.get_workflow_xml(
+            job, _create_cluster(), job_exec, input_data, output_data)

        self.assertIn("""
        <property>
@ -392,6 +379,57 @@ class TestJobManager(base.SaharaWithDbTestCase):
        new_status = job_ex_upd.call_args[0][2]["info"]["status"]
        self.assertEqual('FAILED', new_status)

+    def test_get_plugin(self):
+        plugin = job_manager._get_plugin(_create_cluster())
+        self.assertEqual("vanilla", plugin.name)
+
+    @mock.patch('sahara.conductor.API.data_source_get')
+    def test_get_data_sources(self, ds):
+        job, job_exec = _create_all_stack(edp.JOB_TYPE_PIG)
+
+        job_exec.input_id = 's1'
+        job_exec.output_id = 's2'
+
+        ds.side_effect = _conductor_data_source_get
+        input_source, output_source = (
+            job_manager._get_data_sources(job_exec, job))
+
+        self.assertEqual('obj_s1', input_source)
+        self.assertEqual('obj_s2', output_source)
+
+    def test_get_data_sources_java(self):
+        configs = {sw.HADOOP_SWIFT_USERNAME: 'admin',
+                   sw.HADOOP_SWIFT_PASSWORD: 'admin1'}
+
+        configs = {
+            'configs': configs,
+            'args': ['swift://ex/i',
+                     'output_path']
+        }
+
+        job, job_exec = _create_all_stack(edp.JOB_TYPE_JAVA, configs)
+
+        input_source, output_source = (
+            job_manager._get_data_sources(job_exec, job))
+
+        self.assertEqual(None, input_source)
+        self.assertEqual(None, output_source)
+
+    @mock.patch('sahara.service.edp.job_manager._get_plugin')
+    def test_get_oozie_job_params(self, getplugin):
+        plugin = mock.Mock()
+        getplugin.return_value = plugin
+
+        plugin.get_resource_manager_uri.return_value = 'http://localhost:50030'
+        plugin.get_name_node_uri.return_value = 'hdfs://localhost:8020'
+
+        cluster = _create_cluster()
+        job_params = job_manager._get_oozie_job_params(cluster, 'hadoop',
+                                                       '/tmp')
+        self.assertEqual('http://localhost:50030', job_params["jobTracker"])
+        self.assertEqual('hdfs://localhost:8020', job_params["nameNode"])
+        self.assertEqual('hadoop', job_params["user.name"])
+

 def _create_all_stack(type, configs=None):
    b = _create_job_binary('1', type)
@ -430,6 +468,7 @@ def _create_job_binary(id, type):
 def _create_cluster():
    cluster = mock.Mock()
    cluster.plugin_name = 'vanilla'
+    cluster.plugin_version = '1.2.1'
    return cluster


@ -453,3 +492,7 @@ def _create_job_exec(job_id, type, configs=None):
        j_exec.job_configs['configs']['edp.java.main_class'] = _java_main_class
        j_exec.job_configs['configs']['edp.java.java_opts'] = _java_opts
    return j_exec
+
+
+def _conductor_data_source_get(ctx, id):
+    return "obj_" + id