Fix layout cache and adding a project to a pipeline
When reporting a failure on a change that adds a project to a pipeline, we must be careful to distinguish a failure that users want to see versus a failure that should not be reported (if Zuul can't merge a given change, it can't know if that change adds the project or not, so it falls back to the parent layout to decide if the project participates in the pipeline). We recently reduced the number of cases where we generate a full dynamic layout so that they are only created when freezing a job graph of the current change, or if needed to do so for a following change. Any time there is a reconfiguration, the layout cache is cleared and layouts are regenerated only if they need to be to meet the above conditions. With the following sequence of events, we can produce erroneous behavior: 1) Enqueue a change which, via in-repo config, adds a project to a pipeline. 2) While jobs are running for that change, reconfigure. 3) When the jobs complete and the change reports, there will be no layout local to the queue item and Zuul will fall back to the current pipeline layout where the project is not in the pipeline, and will decide not to report. The presence of jobs on a queue item is a de-facto indication that the dynamic layout for that item included the project in the pipeline. That's rather efficient, so let's check that first, then fall back on the layout cache for the remaining cases. There is one case that isn't handled by this: * If an item *ahead* is the item that adds the project to the pipeline. * And this item has a merge failure. * And there has been a reconfiguration since the item ahead was enqueued. In that case, Zuul will still proceed through the original code path where it finds no layout with the project in the pipeline at the time of reporting. To correct that, we would need to either recompute the layout, or we would need to determine that the item is in the pipeline earlier and preserve that information on the queue item so it persists across reconfigurations. Change-Id: I7a9a2005eb57c4e2fd398e616c31ba5035e8d965
This commit is contained in:
parent
74f10e5924
commit
e9cf4bb842
|
@ -3019,6 +3019,44 @@ class TestInRepoJoin(ZuulTestCase):
|
|||
dict(name='project-test1', result='ABORTED', changes='1,1 2,1'),
|
||||
], ordered=False)
|
||||
|
||||
def test_dynamic_failure_with_reconfig(self):
|
||||
# Test that a reconfig in the middle of adding a change to a
|
||||
# pipeline works.
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
|
||||
in_repo_conf = textwrap.dedent(
|
||||
"""
|
||||
- job:
|
||||
name: project-test1
|
||||
run: playbooks/project-test1.yaml
|
||||
|
||||
- project:
|
||||
name: org/project
|
||||
gate:
|
||||
jobs:
|
||||
- project-test1
|
||||
""")
|
||||
|
||||
file_dict = {'.zuul.yaml': in_repo_conf}
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
|
||||
files=file_dict)
|
||||
self.executor_server.failJob('project-test1', A)
|
||||
A.addApproval('Code-Review', 2)
|
||||
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
|
||||
|
||||
# Execute a reconfig here which will clear the cached layout
|
||||
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.orderedRelease()
|
||||
self.waitUntilSettled()
|
||||
self.assertEqual(A.reported, 2,
|
||||
"A should report start and failure")
|
||||
self.assertEqual(A.data['status'], 'NEW')
|
||||
self.assertHistory([
|
||||
dict(name='project-test1', result='FAILURE', changes='1,1'),
|
||||
], ordered=False)
|
||||
|
||||
def test_dynamic_dependent_pipeline_absent(self):
|
||||
# Test that a series of dependent changes don't report merge
|
||||
# failures to a pipeline they aren't in.
|
||||
|
|
|
@ -1514,25 +1514,28 @@ class PipelineManager(metaclass=ABCMeta):
|
|||
log.debug("Reporting change %s", item.change)
|
||||
ret = True # Means error as returned by trigger.report
|
||||
|
||||
# In the case of failure, we may not hove completed an initial
|
||||
# In the case of failure, we may not have completed an initial
|
||||
# merge which would get the layout for this item, so in order
|
||||
# to determine whether this item's project is in this
|
||||
# pipeline, use the dynamic layout if available, otherwise,
|
||||
# fall back to the current static layout as a best
|
||||
# approximation.
|
||||
layout = None
|
||||
if item.layout_uuid:
|
||||
layout = self.getLayout(item)
|
||||
if not layout:
|
||||
layout = self.pipeline.tenant.layout
|
||||
# approximation. However, if we ran jobs, then we obviously
|
||||
# were in the pipeline config.
|
||||
project_in_pipeline = bool(item.getJobs())
|
||||
|
||||
project_in_pipeline = True
|
||||
ppc = None
|
||||
try:
|
||||
ppc = layout.getProjectPipelineConfig(item)
|
||||
except Exception:
|
||||
log.exception("Invalid config for change %s", item.change)
|
||||
if not ppc:
|
||||
if not project_in_pipeline:
|
||||
layout = None
|
||||
if item.layout_uuid:
|
||||
layout = self.getLayout(item)
|
||||
if not layout:
|
||||
layout = self.pipeline.tenant.layout
|
||||
|
||||
try:
|
||||
project_in_pipeline = bool(
|
||||
layout.getProjectPipelineConfig(item))
|
||||
except Exception:
|
||||
log.exception("Invalid config for change %s", item.change)
|
||||
if not project_in_pipeline:
|
||||
log.debug("Project %s not in pipeline %s for change %s",
|
||||
item.change.project, self.pipeline, item.change)
|
||||
project_in_pipeline = False
|
||||
|
|
Loading…
Reference in New Issue