Add secret_data to zuul_return

So that a job may provide sensitive data to a child job without
those data ending up in the inventory file (and therefore in the
log archive) add a secret_data attribute to zuul_return.

Change-Id: I7cb8bed585eb6e94009647f490b9341927266e8f
Story: 2008389
This commit is contained in:
James E. Blair 2021-05-21 10:10:26 -07:00
parent f69ad20713
commit 04f203f03a
13 changed files with 162 additions and 16 deletions

View File

@ -792,6 +792,21 @@ path to a JSON-formatted file with that data. For example:
- zuul_return:
file: /path/to/data.json
Normally returned data are provided to dependent jobs in the inventory
file, which may end up in the log archive of a job. In the case where
sensitive data must be provided to dependent jobs, the ``secret_data``
attribute may be used instead, and the data will be provided via the
same mechanism as job secrets, where the data are not written to disk
in the work directory. Care must still be taken to avoid displaying
or storing sensitive data within the job. For example:
.. code-block:: yaml
tasks:
- zuul_return:
secret_data:
password: foobar
.. TODO: xref to section describing formatting
Any values other than those in the ``zuul`` hierarchy will be supplied

View File

@ -0,0 +1,19 @@
---
features:
- |
If sensitive data must be returned from a job in order to be
provided to dependent jobs, the ``secret_data`` attribute of
``zuul_return`` attribute may now be used instead of the normal
``data`` attribute. The data will be provided via the same
mechanism as job secrets, where the data are not written to disk
in the work directory. Care must still be taken to avoid
displaying or storing sensitive data within the job. For example:
.. code-block:: yaml
tasks:
- zuul_return:
secret_data:
password: foobar
data:
this_is: not secret

View File

@ -2948,7 +2948,7 @@ class FakeBuild(object):
if data is None:
return
with open(self.jobdir.result_data_file, 'w') as f:
f.write(json.dumps(data))
f.write(json.dumps({'data': data}))
def hasChanges(self, *changes):
"""Return whether this build has certain changes in its git repos.

View File

@ -1,7 +1,13 @@
- hosts: localhost
- hosts: node
tasks:
- name: Assert returned variables are valid
assert:
that:
- child.value1 == 'data-return-relative'
- child.value2 == 'data-return'
- returned_secret_password == 'fromreturn'
- jobvar == 'job'
- hostvar == 'host'
- groupvar == 'group'
- extravar == 'extra'
- test_secret.password == 'fromsecret'

View File

@ -1,6 +1,16 @@
- hosts: localhost
tasks:
- zuul_return:
secret_data:
# This one should get through
returned_secret_password: fromreturn
# Everything else should be superceded by precedence
test_secret:
password: fromreturn
jobvar: return
hostvar: return
groupvar: return
extravar: return
data:
zuul:
log_url: http://example.com/test/log/url/

View File

@ -77,6 +77,12 @@
name: data-return-cd
run: playbooks/data-return-cd.yaml
# Used to test returned secret data precedence.
- secret:
name: test_secret
data:
password: fromsecret
# This child job will be skipped in the test case test_data_return_child_jobs.
# In order to verify that this doesn't lead to node leaks attach a nodeset to
# it. Each test case automatically verifies that there are no open node
@ -88,6 +94,23 @@
nodes:
- name: node
label: test
groups:
- name: group1
nodes: node
# Include a bunch of variables + secret to test returned secret
# data precedence.
secrets:
- test_secret
vars:
jobvar: job
host-vars:
node:
hostvar: host
group-vars:
group1:
groupvar: group
extra-vars:
extravar: extra
- job:
name: several-zuul-return-parent

View File

@ -4464,7 +4464,7 @@ class TestDataReturn(AnsibleZuulTestCase):
# zuul_return data is set correct
j = json.loads(_get_file(paused_job.jobdir.result_data_file))
self.assertEqual(j["build_id"], paused_job.uuid)
self.assertEqual(j["data"]["build_id"], paused_job.uuid)
# Stop the job worker to simulate an executor restart
for job_worker in self.executor_server.job_workers.values():
@ -4492,7 +4492,7 @@ class TestDataReturn(AnsibleZuulTestCase):
# zuul_return data is set correct to new build id
j = json.loads(_get_file(retried_build.jobdir.result_data_file))
self.assertEqual(j["build_id"], retried_build.uuid)
self.assertEqual(j["data"]["build_id"], retried_build.uuid)
self.assertNotIn(first_build.uuid,
_get_file(print_build.jobdir.job_output_file))

View File

@ -1058,6 +1058,7 @@ class TestWeb(BaseTestWeb):
'extra_vars': {},
'host_vars': {},
'group_vars': {},
'secret_vars': None,
'zuul': {
'_inheritance_path': [
'<Job base branches: None source: '

View File

@ -86,33 +86,42 @@ def merge_file_comments(dict_a, dict_b):
return file_comments
def set_value(path, new_data, new_file):
def set_value(path, new_data, new_file, new_secret_data, new_secret_file):
workdir = os.path.dirname(path)
data = None
secret_data = None
# Read any existing zuul_return data.
if os.path.exists(path):
with open(path, 'r') as f:
data = f.read()
if data:
data = json.loads(data)
file_data = f.read()
if file_data:
file_data = json.loads(file_data)
data = file_data['data']
secret_data = file_data['secret_data']
else:
data = {}
secret_data = {}
# If a file of data was supplied, merge its contents.
if new_file:
with open(new_file, 'r') as f:
merge_data(json.load(f), data)
if new_secret_file:
with open(new_secret_file, 'r') as f:
merge_data(json.load(f), secret_data)
# If a 'data' value was supplied, merge it.
if new_data:
merge_data(new_data, data)
if new_secret_data:
merge_data(new_secret_data, secret_data)
# Replace our results file ('path') with the updated data.
(f, tmp_path) = tempfile.mkstemp(dir=workdir)
try:
f = os.fdopen(f, 'w')
json.dump(data, f)
json.dump({'data': data, 'secret_data': secret_data}, f)
f.close()
os.rename(tmp_path, path)
except Exception:
@ -128,11 +137,13 @@ class ActionModule(ActionBase):
Our plugin currently accepts these arguments:
data - A dictionary of arbitrary data to return to Zuul.
secret_data - The same, but secret data.
path - File location on the executor to store the return data.
Unlikely to be supplied.
file - A JSON-formatted file storing the data to return to Zuul.
This can be used instead of, or in conjunction with, the
'data' argument to return large amounts of data.
secret_file - The same, but secret data.
Note: The plugin parameters are stored in the self._task.args variable.
@ -156,6 +167,10 @@ class ActionModule(ActionBase):
return paths._fail_dict(path)
set_value(
path, self._task.args.get('data'), self._task.args.get('file'))
path,
self._task.args.get('data'),
self._task.args.get('file'),
self._task.args.get('secret_data'),
self._task.args.get('secret_file'))
return results

View File

@ -143,6 +143,7 @@ def construct_gearman_params(uuid, sched, nodeset, job, item, pipeline,
params['extra_vars'] = job.extra_variables
params['host_vars'] = job.host_variables
params['group_vars'] = job.group_variables
params['secret_vars'] = job.secret_parent_data
params['zuul'] = zuul_params
projects = set()
required_projects = set()

View File

@ -917,7 +917,10 @@ class AnsibleJob(object):
self.paused = True
data = {'paused': self.paused, 'data': self.getResultData()}
result_data, secret_result_data = self.getResultData()
data = {'paused': self.paused,
'data': result_data,
'secret_data': secret_result_data}
self.executor_server.pauseBuild(self.job, data)
self._resume_event.wait()
@ -1230,24 +1233,31 @@ class AnsibleJob(object):
if self.aborted_reason == self.RESULT_DISK_FULL:
result = 'DISK_FULL'
data = self.getResultData()
data, secret_data = self.getResultData()
warnings = []
self.mapLines(merger, args, data, item_commit, warnings)
warnings.extend(get_warnings_from_result_data(data, logger=self.log))
result_data = dict(result=result, warnings=warnings, data=data)
result_data = dict(result=result,
warnings=warnings,
data=data,
secret_data=secret_data)
# TODO do we want to log the secret data here?
self.log.debug("Sending result: %s", result_data)
self.executor_server.completeBuild(self.job, result_data)
def getResultData(self):
data = {}
secret_data = {}
try:
with open(self.jobdir.result_data_file) as f:
file_data = f.read()
if file_data:
data = json.loads(file_data)
file_data = json.loads(file_data)
data = file_data.get('data', {})
secret_data = file_data.get('secret_data', {})
except Exception:
self.log.exception("Unable to load result data:")
return data
return data, secret_data
def mapLines(self, merger, args, data, commit, warnings):
# The data and warnings arguments are mutated in this method.
@ -1492,7 +1502,7 @@ class AnsibleJob(object):
return None
# check if we need to pause here
result_data = self.getResultData()
result_data, secret_result_data = self.getResultData()
pause = result_data.get('zuul', {}).get('pause')
if success and pause:
self.pause()
@ -1790,6 +1800,7 @@ class AnsibleJob(object):
self.prepareRole(jobdir_playbook, role, args)
secrets = self.decryptSecrets(playbook['secrets'])
secrets = self.mergeSecretVars(secrets, args)
if secrets:
check_varnames(secrets)
jobdir_playbook.secrets_content = yaml.safe_dump(
@ -1914,6 +1925,40 @@ class AnsibleJob(object):
project.name)
return path
def mergeSecretVars(self, secrets, args):
'''
Merge secret return data with secrets.
:arg secrets dict: Actual Zuul secrets.
:arg args dict: The job arguments.
'''
secret_vars = args.get('secret_vars') or {}
# We need to handle secret vars specially. We want to pass
# them to Ansible as we do secrets with a -e file, but we want
# them to have the lowest priority. In order to accomplish
# that, we will simply remove any top-level secret var with
# the same name as anything above it in precedence.
other_vars = set()
other_vars.update(args['vars'].keys())
for group_vars in args['group_vars'].values():
other_vars.update(group_vars.keys())
for host_vars in args['host_vars'].values():
other_vars.update(host_vars.keys())
other_vars.update(secrets.keys())
ret = secret_vars.copy()
for key in other_vars:
ret.pop(key, None)
# Add in the actual secrets
if secrets:
ret.update(secrets)
return ret
def prepareRole(self, jobdir_playbook, role, args):
if role['type'] == 'zuul':
root = jobdir_playbook.addRole()

View File

@ -1309,6 +1309,7 @@ class Job(ConfigObject):
start_mark=None,
inheritance_path=(),
parent_data=None,
secret_parent_data=None,
artifact_data=None,
description=None,
variant_description=None,
@ -1567,6 +1568,13 @@ class Job(ConfigObject):
del v['zuul']
self.parent_data = v
secret_other_vars = other_build.secret_result_data
v = self.secret_parent_data or {}
v = Job._deepUpdate(secret_other_vars, v)
if 'zuul' in v:
del v['zuul']
self.secret_parent_data = v
artifact_data = self.artifact_data or []
artifacts = get_artifacts_from_result_data(other_vars)
for a in artifacts:

View File

@ -1590,6 +1590,7 @@ class Scheduler(threading.Thread):
# with child job skipping.
build.paused = True
build.result_data = event.data.get("data", {})
build.secret_result_data = event.data.get("secret_data", {})
log = get_annotated_logger(self.log, build.zuul_event_id)
if build.build_set is not build.build_set.item.current_build_set:
@ -1780,6 +1781,7 @@ class Scheduler(threading.Thread):
build.retry = True
result_data = event_result.get("data", {})
secret_result_data = event_result.get("secret_data", {})
warnings = event_result.get("warnings", [])
log.info("Build complete, result %s, warnings %s", result, warnings)
@ -1795,6 +1797,7 @@ class Scheduler(threading.Thread):
build.end_time = event_result["end_time"]
build.result_data = result_data
build.secret_result_data = secret_result_data
build.build_set.warning_messages.extend(warnings)
build.result = result