Add more deduplication tests

This adds more test cases for automatic job deduplication, as well
as some explanatory comments.

Change-Id: I5ca96ddf655e501af3c9490ea86e8cd6a13d7e44
This commit is contained in:
James E. Blair 2023-09-06 15:45:57 -07:00
parent eee6ef3fcf
commit 77633e0005
7 changed files with 366 additions and 13 deletions

View File

@ -3259,7 +3259,7 @@ class FakeBuild(object):
def writeReturnData(self):
changes = self.executor_server.return_data.get(self.name, {})
data = changes.get(self.change)
data = changes.get(self.parameters['zuul']['ref'])
if data is None:
return
with open(self.jobdir.result_data_file, 'w') as f:
@ -3641,17 +3641,15 @@ class RecordingExecutorServer(zuul.executor.server.ExecutorServer):
:arg str name: The name of the job to return data.
:arg Change change: The :py:class:`~tests.base.FakeChange`
instance which should cause the job to return data.
Or pass a ref as a string.
:arg dict data: The data to return
"""
# TODO(clarkb) We are incredibly change focused here and in FakeBuild
# above. This makes it very difficult to test non change items with
# return data. We currently rely on the hack that None is used as a
# key for the changes dict, but we should improve that to look up
# refnames or similar.
changes = self.return_data.setdefault(name, {})
if hasattr(change, 'number'):
cid = ' '.join((str(change.number), str(change.latest_patchset)))
cid = change.data['currentPatchSet']['ref']
elif isinstance(change, str):
cid = change
else:
# Not actually a change, but a ref update event for tags/etc
# In this case a key of None is used by writeReturnData

View File

@ -0,0 +1,85 @@
- queue:
name: integrated
allow-circular-dependencies: true
- pipeline:
name: gate
manager: dependent
success-message: Build succeeded (gate).
require:
gerrit:
approval:
- Approved: 1
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
start:
gerrit:
Verified: 0
precedence: high
- job:
name: base
parent: null
run: playbooks/run.yaml
nodeset:
nodes:
- label: debian
name: controller
- job:
name: common-job
required-projects:
- org/project1
- org/project2
- job:
name: project1-job
- job:
name: project2-job
- job:
name: child1-job
required-projects:
- org/project1
- org/project2
- job:
name: child2-job
required-projects:
- org/project1
- org/project2
- project:
name: org/project1
queue: integrated
gate:
jobs:
- common-job
- child1-job:
dependencies: common-job
- child2-job:
dependencies: common-job
- project1-job
- project:
name: org/project2
queue: integrated
gate:
jobs:
- common-job
- child1-job:
dependencies: common-job
- child2-job:
dependencies: common-job
- project2-job

View File

@ -0,0 +1,71 @@
- queue:
name: integrated
allow-circular-dependencies: true
- pipeline:
name: gate
manager: dependent
success-message: Build succeeded (gate).
require:
gerrit:
approval:
- Approved: 1
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
start:
gerrit:
Verified: 0
precedence: high
- job:
name: base
parent: null
run: playbooks/run.yaml
nodeset:
nodes:
- label: debian
name: controller
- job:
name: common-job
deduplicate: false
- job:
name: project1-job
- job:
name: project2-job
- job:
name: child-job
required-projects:
- org/project1
- org/project2
- project:
name: org/project1
queue: integrated
gate:
jobs:
- common-job
- child-job:
dependencies: common-job
- project:
name: org/project2
queue: integrated
gate:
jobs:
- common-job
- child-job:
dependencies: common-job

View File

@ -0,0 +1,81 @@
- queue:
name: integrated
allow-circular-dependencies: true
- pipeline:
name: gate
manager: dependent
success-message: Build succeeded (gate).
require:
gerrit:
approval:
- Approved: 1
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
start:
gerrit:
Verified: 0
precedence: high
- job:
name: base
parent: null
run: playbooks/run.yaml
nodeset:
nodes:
- label: debian
name: controller
- job:
name: common-job
required-projects:
- org/project1
- org/project2
- job:
name: project1-job
- job:
name: project2-job
- job:
name: child1-job
required-projects:
- org/project1
- org/project2
- job:
name: child2-job
required-projects:
- org/project1
- org/project2
- project:
name: org/project1
queue: integrated
gate:
jobs:
- common-job
- child1-job:
dependencies: common-job
- project1-job
- project:
name: org/project2
queue: integrated
gate:
jobs:
- common-job
- child2-job:
dependencies: common-job
- project2-job

View File

@ -1739,10 +1739,12 @@ class TestGerritCircularDependencies(ZuulTestCase):
self._test_job_deduplication()
self.assertHistory([
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/02/2/1'),
dict(name="project2-job", result="SUCCESS", changes="2,1 1,1"),
# This is not deduplicated
dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/01/1/1'),
], ordered=False)
self.assertEqual(len(self.fake_nodepool.history), 4)
@ -1763,10 +1765,12 @@ class TestGerritCircularDependencies(ZuulTestCase):
self._test_job_deduplication()
self.assertHistory([
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/02/2/1'),
dict(name="project2-job", result="SUCCESS", changes="2,1 1,1"),
# This is not deduplicated, though it would be under auto
dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/01/1/1'),
], ordered=False)
self.assertEqual(len(self.fake_nodepool.history), 4)
@ -1784,6 +1788,96 @@ class TestGerritCircularDependencies(ZuulTestCase):
], ordered=False)
self.assertEqual(len(self.fake_nodepool.history), 0)
@simple_layout('layouts/job-dedup-child-jobs.yaml')
def test_job_deduplication_child_jobs(self):
# Test that child jobs of deduplicated parents are
# deduplicated, and also that supplying child_jobs to
# zuul_return filters correctly. child1-job should not run,
# but child2 should run and be deduplicated. This uses auto
# deduplication.
self.executor_server.returnData(
'common-job', 'refs/changes/02/2/1',
{'zuul': {'child_jobs': ['child2-job']}}
)
self.executor_server.returnData(
'common-job', 'refs/changes/01/1/1',
{'zuul': {'child_jobs': ['child2-job']}}
)
self._test_job_deduplication()
self.assertHistory([
dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="project2-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="child2-job", result="SUCCESS", changes="2,1 1,1"),
# This is deduplicated
# dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
# dict(name="child2-job", result="SUCCESS", changes="2,1 1,1"),
], ordered=False)
@simple_layout('layouts/job-dedup-mismatched-child-jobs.yaml')
def test_job_deduplication_mismatched_child_jobs(self):
# Test that a parent job with different child jobs is
# deduplicated. This uses auto-deduplication.
self._test_job_deduplication()
self.assertHistory([
dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="child1-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="child2-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="project2-job", result="SUCCESS", changes="2,1 1,1"),
# This is deduplicated
# dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
], ordered=False)
@simple_layout('layouts/job-dedup-child-of-diff-parent.yaml')
def test_job_deduplication_child_of_diff_parent(self):
# This will never happen in practice, but it's theoretically
# possible, so we have a test to codify and exercise the
# behavior.
# The common job is forced to not deduplicate, but since there
# is no return data, the inputs to child-job are identical, so
# child-job is deduplicated. In practice, there will always
# be different return data so this is unlikely to happen.
# The child job uses auto deduplication.
self._test_job_deduplication()
self.assertHistory([
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/02/2/1'),
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/01/1/1'),
dict(name="child-job", result="SUCCESS", changes="2,1 1,1"),
], ordered=False)
@simple_layout('layouts/job-dedup-child-of-diff-parent.yaml')
def test_job_deduplication_child_of_diff_parent_diff_data(self):
# This is the more realistic test of the above, where we
# return different data from the non-deduplicated parent job,
# which causes the child job not to be deduplicated.
# The child job uses auto deduplication.
self.executor_server.returnData(
'common-job', 'refs/changes/02/2/1',
{'foo': 'a'}
)
self.executor_server.returnData(
'common-job', 'refs/changes/01/1/1',
{'foo': 'b'}
)
self._test_job_deduplication()
self.assertHistory([
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/02/2/1'),
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/01/1/1'),
dict(name="child-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/02/2/1'),
dict(name="child-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/01/1/1'),
], ordered=False)
@simple_layout('layouts/job-dedup-auto-shared.yaml')
def test_job_deduplication_failed_node_request(self):
# Pause nodepool so we can fail the node request later
@ -2062,7 +2156,7 @@ class TestGerritCircularDependencies(ZuulTestCase):
)
self.executor_server.returnData(
'parent-job', A,
'parent-job', B,
{'zuul': {'pause': True}}
)

View File

@ -8699,7 +8699,7 @@ class TestProvidesRequiresBuildset(ZuulTestCase):
self.executor_server.hold_jobs_in_build = True
event = self.fake_gerrit.addFakeTag('org/project1', 'master', 'foo')
self.executor_server.returnData(
'image-builder', event,
'image-builder', 'refs/tags/foo',
{'zuul':
{'artifacts': [
{'name': 'image',

View File

@ -5507,6 +5507,30 @@ class QueueItem(zkobject.ZKObject):
If another item in the bundle has a duplicate job,
return the other item
"""
# A note on some of the checks below:
#
# A possible difference between jobs could be the dependent
# job tree under this one. Because that is passed to the job
# as zuul.child_jobs, that could be a different input to the
# job, and therefore produce a different output. However, a
# very common pattern is to build a common artifact in a
# parent job and then do something different with it in a
# child job. The utility of automatic deduplication in that
# case is very compelling, so we do not check child_jobs when
# deduplicating. Users can set deduplicate:false if that
# behavior is important.
#
# Theoretically, it would be okay to deduplicate a job with
# different parents as long as the inputs are the same. But
# that won't happen because the job's dependencies are checked
# in isEqual. Similarly, it would be okay to deduplicate the
# same job with a deduplicated parent as long as the returned
# data are the same. However, in practice, that will never be
# the case since all Zuul jobs return artifacts (the
# manifest), and those will be different. No special handling
# is done here, that is a natural consequence of parent_data
# being different.
if not self.bundle:
return None
if job.deduplicate is False: