Refactoring DataSources to use proxy user

Changes
* adding constants for trust id and domain name to swift_helper
* refactoring oozie edp workflows to use proxy configs when necessary
* adding tests to exercise proxy domain usage in workflows
* removing credentials requirement for DataSource models when using
  proxy domain
* pruning duplicate MapReduce test
* adding tests for DataSource creation without credentials
* adding Keystone v3 token endpoint to Hadoop core-site.xml when domain
  requested

Partial-implements: blueprint edp-swift-trust-authentication
Change-Id: I38fd1c470d608c3de9d8c140228d7c9666523b23
This commit is contained in:
Michael McCune 2014-08-27 16:09:54 -04:00
parent f3b2a30309
commit 02b292a459
7 changed files with 216 additions and 42 deletions

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo.config import cfg
import six
import six.moves.urllib.parse as urlparse
@ -29,6 +30,7 @@ from sahara.utils import xmlutils
conductor = c.API
CONF = cfg.CONF
class BaseFactory(object):
@ -95,13 +97,25 @@ class BaseFactory(object):
self.inject_swift_url_suffix(arg) for arg in job_dict['args']]
for k, v in six.iteritems(job_dict.get('configs', {})):
job_dict['configs'][k] = self.inject_swift_url_suffix(v)
if k != 'proxy_configs':
job_dict['configs'][k] = self.inject_swift_url_suffix(v)
for k, v in six.iteritems(job_dict.get('params', {})):
job_dict['params'][k] = self.inject_swift_url_suffix(v)
def get_configs(self, input_data, output_data):
def get_configs(self, input_data, output_data, proxy_configs=None):
configs = {}
if proxy_configs:
configs[sw.HADOOP_SWIFT_USERNAME] = proxy_configs.get(
'proxy_username')
configs[sw.HADOOP_SWIFT_PASSWORD] = proxy_configs.get(
'proxy_password')
configs[sw.HADOOP_SWIFT_TRUST_ID] = proxy_configs.get(
'proxy_trust_id')
configs[sw.HADOOP_SWIFT_DOMAIN_NAME] = CONF.proxy_user_domain_name
return configs
for src in (input_data, output_data):
if src.type == "swift" and hasattr(src, "credentials"):
if "user" in src.credentials:
@ -128,7 +142,9 @@ class PigFactory(BaseFactory):
def get_workflow_xml(self, cluster, execution, input_data, output_data,
hdfs_user):
job_dict = {'configs': self.get_configs(input_data, output_data),
proxy_configs = execution.job_configs.get('proxy_configs')
job_dict = {'configs': self.get_configs(input_data, output_data,
proxy_configs),
'params': self.get_params(input_data, output_data),
'args': []}
self.update_job_dict(job_dict, execution.job_configs)
@ -151,7 +167,9 @@ class HiveFactory(BaseFactory):
def get_workflow_xml(self, cluster, execution, input_data, output_data,
hdfs_user):
job_dict = {'configs': self.get_configs(input_data, output_data),
proxy_configs = execution.job_configs.get('proxy_configs')
job_dict = {'configs': self.get_configs(input_data, output_data,
proxy_configs),
'params': self.get_params(input_data, output_data)}
self.update_job_dict(job_dict, execution.job_configs)
@ -165,9 +183,10 @@ class HiveFactory(BaseFactory):
class MapReduceFactory(BaseFactory):
def get_configs(self, input_data, output_data):
def get_configs(self, input_data, output_data, proxy_configs):
configs = super(MapReduceFactory, self).get_configs(input_data,
output_data)
output_data,
proxy_configs)
configs['mapred.input.dir'] = input_data.url
configs['mapred.output.dir'] = output_data.url
return configs
@ -179,7 +198,9 @@ class MapReduceFactory(BaseFactory):
def get_workflow_xml(self, cluster, execution, input_data, output_data,
hdfs_user):
job_dict = {'configs': self.get_configs(input_data, output_data)}
proxy_configs = execution.job_configs.get('proxy_configs')
job_dict = {'configs': self.get_configs(input_data, output_data,
proxy_configs)}
self.update_job_dict(job_dict, execution.job_configs)
creator = mapreduce_workflow.MapReduceWorkFlowCreator()
creator.build_workflow_xml(configuration=job_dict['configs'],
@ -194,8 +215,24 @@ class JavaFactory(BaseFactory):
java_opts = job_dict['edp_configs'].get('edp.java.java_opts', None)
return main_class, java_opts
def get_configs(self, proxy_configs=None):
configs = {}
if proxy_configs:
configs[sw.HADOOP_SWIFT_USERNAME] = proxy_configs.get(
'proxy_username')
configs[sw.HADOOP_SWIFT_PASSWORD] = proxy_configs.get(
'proxy_password')
configs[sw.HADOOP_SWIFT_TRUST_ID] = proxy_configs.get(
'proxy_trust_id')
configs[sw.HADOOP_SWIFT_DOMAIN_NAME] = CONF.proxy_user_domain_name
return configs
return configs
def get_workflow_xml(self, cluster, execution, *args, **kwargs):
job_dict = {'configs': {},
proxy_configs = execution.job_configs.get('proxy_configs')
job_dict = {'configs': self.get_configs(proxy_configs=proxy_configs),
'args': []}
self.update_job_dict(job_dict, execution.job_configs)

View File

@ -13,12 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo.config import cfg
import six.moves.urllib.parse as urlparse
import sahara.exceptions as ex
import sahara.service.validations.edp.base as b
from sahara.swift import utils as su
CONF = cfg.CONF
DATA_SOURCE_SCHEMA = {
"type": "object",
@ -76,12 +78,14 @@ def _check_swift_data_source_create(data):
"URL must be of the form swift://container%s/object"
% su.SWIFT_URL_SUFFIX)
if "credentials" not in data:
if not CONF.use_domain_for_proxy_users and "credentials" not in data:
raise ex.InvalidCredentials("No credentials provided for Swift")
if "user" not in data["credentials"]:
if not CONF.use_domain_for_proxy_users and (
"user" not in data["credentials"]):
raise ex.InvalidCredentials(
"User is not provided in credentials for Swift")
if "password" not in data["credentials"]:
if not CONF.use_domain_for_proxy_users and (
"password" not in data["credentials"]):
raise ex.InvalidCredentials(
"Password is not provided in credentials for Swift")

View File

@ -30,6 +30,8 @@ HADOOP_SWIFT_TENANT = 'fs.swift.service.sahara.tenant'
HADOOP_SWIFT_USERNAME = 'fs.swift.service.sahara.username'
HADOOP_SWIFT_PASSWORD = 'fs.swift.service.sahara.password'
HADOOP_SWIFT_REGION = 'fs.swift.service.sahara.region'
HADOOP_SWIFT_TRUST_ID = 'fs.swift.service.sahara.trust.id'
HADOOP_SWIFT_DOMAIN_NAME = 'fs.swift.service.sahara.domain.name'
def retrieve_tenant():

View File

@ -34,7 +34,15 @@ def retrieve_auth_url():
"""
info = urlparse.urlparse(context.current().auth_uri)
return "%s://%s:%s/%s/" % (info.scheme, info.hostname, info.port, 'v2.0')
if CONF.use_domain_for_proxy_users:
url = 'v3/auth'
else:
url = 'v2.0'
return '{scheme}://{hostname}:{port}/{url}/'.format(scheme=info.scheme,
hostname=info.hostname,
port=info.port,
url=url)
def retrieve_preauth_url():

View File

@ -26,10 +26,11 @@ _java_main_class = "org.apache.hadoop.examples.WordCount"
_java_opts = "-Dparam1=val1 -Dparam2=val2"
def create_job_exec(type, configs=None):
def create_job_exec(type, configs=None, proxy=False):
b = create_job_binary('1', type)
j = _create_job('2', b, type)
e = _create_job_exec(j.id, type, configs)
_cje_func = _create_job_exec_with_proxy if proxy else _create_job_exec
e = _cje_func(j.id, type, configs)
return j, e
@ -87,3 +88,16 @@ def _create_job_exec(job_id, type, configs=None):
j_exec.job_configs['configs']['edp.java.main_class'] = _java_main_class
j_exec.job_configs['configs']['edp.java.java_opts'] = _java_opts
return j_exec
def _create_job_exec_with_proxy(job_id, type, configs=None):
j_exec = _create_job_exec(job_id, type, configs)
j_exec.id = '00000000-1111-2222-3333-4444444444444444'
if not j_exec.job_configs:
j_exec.job_configs = {}
j_exec.job_configs['proxy_configs'] = {
'proxy_username': 'job_' + j_exec.id,
'proxy_password': '55555555-6666-7777-8888-999999999999',
'proxy_trust_id': '0123456789abcdef0123456789abcdef'
}
return j_exec

View File

@ -112,7 +112,7 @@ class TestJobManager(base.SaharaWithDbTestCase):
@mock.patch('sahara.conductor.API.job_binary_get')
def test_build_workflow_for_job_pig(self, job_binary):
job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
job_binary.return_value = {"name": "script.pig"}
input_data = u.create_data_source('swift://ex/i')
@ -140,11 +140,40 @@ class TestJobManager(base.SaharaWithDbTestCase):
self.assertIn("<script>script.pig</script>", res)
# testing workflow creation with a proxy domain
self.override_config('use_domain_for_proxy_users', True)
self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, proxy=True)
res = workflow_factory.get_workflow_xml(
job, u.create_cluster(), job_exec, input_data, output_data,
'hadoop')
self.assertIn("""
<configuration>
<property>
<name>fs.swift.service.sahara.domain.name</name>
<value>sahara_proxy_domain</value>
</property>
<property>
<name>fs.swift.service.sahara.password</name>
<value>55555555-6666-7777-8888-999999999999</value>
</property>
<property>
<name>fs.swift.service.sahara.trust.id</name>
<value>0123456789abcdef0123456789abcdef</value>
</property>
<property>
<name>fs.swift.service.sahara.username</name>
<value>job_00000000-1111-2222-3333-4444444444444444</value>
</property>
</configuration>""", res)
@mock.patch('sahara.conductor.API.job_binary_get')
def test_build_workflow_swift_configs(self, job_binary):
# Test that swift configs come from either input or output data sources
job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG)
job, job_exec = u.create_job_exec(edp.JOB_TYPE_PIG, configs={})
job_binary.return_value = {"name": "script.pig"}
input_data = u.create_data_source('swift://ex/i')
@ -202,7 +231,7 @@ class TestJobManager(base.SaharaWithDbTestCase):
</property>
</configuration>""", res)
def _build_workflow_common(self, job_type, streaming=False):
def _build_workflow_common(self, job_type, streaming=False, proxy=False):
if streaming:
configs = {'edp.streaming.mapper': '/usr/bin/cat',
'edp.streaming.reducer': '/usr/bin/wc'}
@ -238,21 +267,53 @@ class TestJobManager(base.SaharaWithDbTestCase):
<value>swift://ex.sahara/i</value>
</property>""", res)
self.assertIn("""
if not proxy:
self.assertIn("""
<property>
<name>fs.swift.service.sahara.password</name>
<value>admin1</value>
</property>""", res)
self.assertIn("""
self.assertIn("""
<property>
<name>fs.swift.service.sahara.username</name>
<value>admin</value>
</property>""", res)
else:
# testing workflow creation with a proxy domain
self.override_config('use_domain_for_proxy_users', True)
self.override_config("proxy_user_domain_name",
'sahara_proxy_domain')
job, job_exec = u.create_job_exec(job_type, proxy=True)
res = workflow_factory.get_workflow_xml(
job, u.create_cluster(), job_exec, input_data, output_data,
'hadoop')
self.assertIn("""
<property>
<name>fs.swift.service.sahara.domain.name</name>
<value>sahara_proxy_domain</value>
</property>
<property>
<name>fs.swift.service.sahara.password</name>
<value>55555555-6666-7777-8888-999999999999</value>
</property>
<property>
<name>fs.swift.service.sahara.trust.id</name>
<value>0123456789abcdef0123456789abcdef</value>
</property>
<property>
<name>fs.swift.service.sahara.username</name>
<value>job_00000000-1111-2222-3333-4444444444444444</value>
</property>""", res)
def test_build_workflow_for_job_mapreduce(self):
self._build_workflow_common(edp.JOB_TYPE_MAPREDUCE)
self._build_workflow_common(edp.JOB_TYPE_MAPREDUCE, streaming=True)
self._build_workflow_common(edp.JOB_TYPE_MAPREDUCE, proxy=True)
self._build_workflow_common(edp.JOB_TYPE_MAPREDUCE, streaming=True,
proxy=True)
def test_build_workflow_for_job_java(self):
# If args include swift paths, user and password values
@ -287,10 +348,48 @@ class TestJobManager(base.SaharaWithDbTestCase):
<arg>swift://ex.sahara/i</arg>
<arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
# testing workflow creation with a proxy domain
self.override_config('use_domain_for_proxy_users', True)
self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
configs = {
'configs': {},
'args': ['swift://ex/i',
'output_path']
}
job, job_exec = u.create_job_exec(edp.JOB_TYPE_JAVA, configs,
proxy=True)
res = workflow_factory.get_workflow_xml(job, u.create_cluster(),
job_exec)
self.assertIn("""
<configuration>
<property>
<name>fs.swift.service.sahara.domain.name</name>
<value>sahara_proxy_domain</value>
</property>
<property>
<name>fs.swift.service.sahara.password</name>
<value>55555555-6666-7777-8888-999999999999</value>
</property>
<property>
<name>fs.swift.service.sahara.trust.id</name>
<value>0123456789abcdef0123456789abcdef</value>
</property>
<property>
<name>fs.swift.service.sahara.username</name>
<value>job_00000000-1111-2222-3333-4444444444444444</value>
</property>
</configuration>
<main-class>%s</main-class>
<java-opts>%s</java-opts>
<arg>swift://ex.sahara/i</arg>
<arg>output_path</arg>""" % (_java_main_class, _java_opts), res)
@mock.patch('sahara.conductor.API.job_binary_get')
def test_build_workflow_for_job_hive(self, job_binary):
job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE)
job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, configs={})
job_binary.return_value = {"name": "script.q"}
input_data = u.create_data_source('swift://ex/i')
@ -316,38 +415,39 @@ class TestJobManager(base.SaharaWithDbTestCase):
<param>INPUT=swift://ex.sahara/i</param>
<param>OUTPUT=swift://ex.sahara/o</param>""", res)
def _build_workflow_with_conf_common(self, job_type):
# testing workflow creation with a proxy domain
self.override_config('use_domain_for_proxy_users', True)
self.override_config("proxy_user_domain_name", 'sahara_proxy_domain')
input_data = u.create_data_source('swift://ex/i')
output_data = u.create_data_source('swift://ex/o')
job, job_exec = u.create_job_exec(job_type,
configs={"configs": {'c': 'f'}})
job, job_exec = u.create_job_exec(edp.JOB_TYPE_HIVE, proxy=True)
res = workflow_factory.get_workflow_xml(
job, u.create_cluster(), job_exec, input_data, output_data,
'hadoop')
self.assertIn("""
<job-xml>/user/hadoop/conf/hive-site.xml</job-xml>
<configuration>
<property>
<name>c</name>
<value>f</value>
</property>""", res)
self.assertIn("""
<name>fs.swift.service.sahara.domain.name</name>
<value>sahara_proxy_domain</value>
</property>
<property>
<name>mapred.input.dir</name>
<value>swift://ex.sahara/i</value>
</property>""", res)
self.assertIn("""
<name>fs.swift.service.sahara.password</name>
<value>55555555-6666-7777-8888-999999999999</value>
</property>
<property>
<name>mapred.output.dir</name>
<value>swift://ex.sahara/o</value>
</property>""", res)
def test_build_workflow_for_job_mapreduce_with_conf(self):
self._build_workflow_with_conf_common(edp.JOB_TYPE_MAPREDUCE)
<name>fs.swift.service.sahara.trust.id</name>
<value>0123456789abcdef0123456789abcdef</value>
</property>
<property>
<name>fs.swift.service.sahara.username</name>
<value>job_00000000-1111-2222-3333-4444444444444444</value>
</property>
</configuration>
<script>script.q</script>
<param>INPUT=swift://ex.sahara/i</param>
<param>OUTPUT=swift://ex.sahara/o</param>""", res)
def test_update_job_dict(self):
w = workflow_factory.BaseFactory()

View File

@ -60,6 +60,9 @@ class TestDataSourceValidation(u.ValidationTestCase):
}
with testtools.ExpectedException(ex.InvalidCredentials):
ds.check_data_source_create(data)
# proxy enabled should allow creation without credentials
self.override_config('use_domain_for_proxy_users', True)
ds.check_data_source_create(data)
@mock.patch("sahara.service.validations."
"edp.base.check_data_source_unique_name")
@ -79,6 +82,9 @@ class TestDataSourceValidation(u.ValidationTestCase):
}
with testtools.ExpectedException(ex.InvalidCredentials):
ds.check_data_source_create(data)
# proxy enabled should allow creation without credentials
self.override_config('use_domain_for_proxy_users', True)
ds.check_data_source_create(data)
@mock.patch("sahara.service.validations."
"edp.base.check_data_source_unique_name")
@ -98,6 +104,9 @@ class TestDataSourceValidation(u.ValidationTestCase):
}
with testtools.ExpectedException(ex.InvalidCredentials):
ds.check_data_source_create(data)
# proxy enabled should allow creation without credentials
self.override_config('use_domain_for_proxy_users', True)
ds.check_data_source_create(data)
@mock.patch("sahara.service.validations."
"edp.base.check_data_source_unique_name")