Fix configuration error related to YAML anchors

PyYAML is efficient with YAML anchors and will only construct one
Python object for a YAML mapping that is used multiple times via
anchors.

We copy job variables (a mapping) straight from the YAML to an
attribute of the Job object, then we freeze the Job object which
converts all of its dicts to mappingproxy objects.  This mutates
the contents of the vars dict, and if that is used on another
job via an anchor, we will end up with mappingproxy objects in
what we think is a "raw yaml" configuration dict, and we will not
be able to serialize it in case of errors.

To address this, perform a deep copy of every configuration yaml
blob before parsing it into an object.

Change-Id: Idaa6ff78b1ac5a108fb9f43700cf8e66192c43ce
This commit is contained in:
James E. Blair 2023-02-13 12:10:32 -08:00
parent c6b98116f5
commit 776bbc6a6e
2 changed files with 73 additions and 0 deletions

View File

@ -1557,6 +1557,43 @@ class TestInRepoConfig(ZuulTestCase):
'start_line': 5},
})
def test_dynamic_config_job_anchors(self):
# Test the use of anchors in job configuration. This is a
# regression test designed to catch a failure where we freeze
# the first job and in doing so, mutate the vars dict. The
# intended behavior is that the two jobs end up with two
# separate python objects for their vars dicts.
in_repo_conf = textwrap.dedent(
"""
- job:
name: myvars
vars: &anchor
plugins:
foo: bar
- job:
name: project-test1
timeout: 999999999999
vars: *anchor
- project:
name: org/project
check:
jobs:
- project-test1
""")
file_dict = {'.zuul.yaml': in_repo_conf}
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
files=file_dict)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertEqual(A.reported, 1,
"A should report failure")
self.assertEqual(A.patchsets[0]['approvals'][0]['value'], "-1")
self.assertIn('max-job-timeout', A.messages[0])
self.assertHistory([])
def test_dynamic_config_non_existing_job_in_template(self):
"""Test that requesting a non existent job fails"""
in_repo_conf = textwrap.dedent(

View File

@ -437,6 +437,30 @@ def ansible_vars_dict(value):
ansible_var_name(key)
def copy_safe_config(conf):
"""Return a deep copy of a config dictionary.
This lets us assign values of a config dictionary to configuration
objects, even if those values are nested dictionaries. This way
we can safely freeze the configuration object (the process of
which mutates dictionaries) without mutating the original
configuration.
Meanwhile, this does retain the original context information as a
single object (some behaviors rely on mutating the source context
(e.g., pragma)).
"""
ret = copy.deepcopy(conf)
for key in (
'_source_context',
'_start_mark',
):
if key in conf:
ret[key] = conf[key]
return ret
class PragmaParser(object):
pragma = {
'implied-branch-matchers': bool,
@ -452,6 +476,7 @@ class PragmaParser(object):
self.pcontext = pcontext
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
bm = conf.get('implied-branch-matchers')
@ -512,6 +537,7 @@ class NodeSetParser(object):
return vs.Schema(nodeset)
def fromYaml(self, conf, anonymous=False):
conf = copy_safe_config(conf)
if anonymous:
self.anon_schema(conf)
self.anonymous = True
@ -599,6 +625,7 @@ class SecretParser(object):
return vs.Schema(secret)
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
s = model.Secret(conf['name'], conf['_source_context'])
s.source_context = conf['_source_context']
@ -723,6 +750,7 @@ class JobParser(object):
def fromYaml(self, conf, project_pipeline=False, name=None,
validate=True):
conf = copy_safe_config(conf)
if validate:
self.schema(conf)
@ -1075,6 +1103,7 @@ class ProjectTemplateParser(object):
return vs.Schema(project)
def fromYaml(self, conf, validate=True, freeze=True):
conf = copy_safe_config(conf)
if validate:
self.schema(conf)
source_context = conf['_source_context']
@ -1165,6 +1194,7 @@ class ProjectParser(object):
return vs.Schema(project)
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
project_name = conf.get('name')
@ -1328,6 +1358,7 @@ class PipelineParser(object):
return vs.Schema(pipeline)
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
pipeline = model.Pipeline(conf['name'], self.pcontext.tenant)
pipeline.source_context = conf['_source_context']
@ -1469,6 +1500,7 @@ class SemaphoreParser(object):
return vs.Schema(semaphore)
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
semaphore = model.Semaphore(conf['name'], conf.get('max', 1))
semaphore.source_context = conf.get('_source_context')
@ -1494,6 +1526,7 @@ class QueueParser:
return vs.Schema(queue)
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
queue = model.Queue(
conf['name'],
@ -1523,6 +1556,7 @@ class AuthorizationRuleParser(object):
return vs.Schema(authRule)
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
a = model.AuthZRuleTree(conf['name'])
@ -1556,6 +1590,7 @@ class GlobalSemaphoreParser(object):
return vs.Schema(semaphore)
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
semaphore = model.Semaphore(conf['name'], conf.get('max', 1),
global_scope=True)
@ -1576,6 +1611,7 @@ class ApiRootParser(object):
return vs.Schema(api_root)
def fromYaml(self, conf):
conf = copy_safe_config(conf)
self.schema(conf)
api_root = model.ApiRoot(conf.get('authentication-realm'))
api_root.access_rules = conf.get('access-rules', [])