Revert "Optimize layout re-calculation after re-enqueue"

This reverts commit 6fc028d255.

It looks like the scheduler might to use the wrong layout in certain
cases that are not covered by any tests.

Change-Id: I2b626f944cd09cf7e1dee73a0e121c4705ce850a
This commit is contained in:
Simon Westphahl 2021-05-12 15:22:02 +02:00
parent 7e802df42d
commit b144f54811
4 changed files with 31 additions and 136 deletions

View File

@ -3451,76 +3451,6 @@ class TestScheduler(ZuulTestCase):
self.assertEqual(A.data['status'], 'MERGED') self.assertEqual(A.data['status'], 'MERGED')
self.assertEqual(A.reported, 2) self.assertEqual(A.reported, 2)
def test_live_reconfiguration_fallback_layout(self):
# Test that re-calculating a dynamic fallback layout works after it
# was removed during a reconfiguration."
self.executor_server.hold_jobs_in_build = True
in_repo_conf = textwrap.dedent(
"""
- job:
name: project-test3
parent: project-test1
# add a job by the canonical project name
- project:
gate:
jobs:
- project-test3:
dependencies:
- project-merge
""")
file_dict = {'zuul.d/a.yaml': in_repo_conf}
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
files=file_dict)
A.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
self.waitUntilSettled()
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
items = tenant.layout.pipelines['gate'].getAllItems()
self.assertEqual(len(items), 1)
# Assert that the layout is removed during reconfiguration
self.assertIsNone(items[0].layout)
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B',
parent='refs/changes/01/1/1')
B.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
self.waitUntilSettled()
items = tenant.layout.pipelines['gate'].getAllItems()
self.assertEqual(len(items), 2)
for item in items:
# Assert that the dynamic layout is re-calculated
self.assertIsNotNone(item.layout)
self.assertIsNot(item.layout, tenant.layout)
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'MERGED')
self.assertEqual(A.reported, 2)
self.assertEqual(B.data['status'], 'MERGED')
self.assertEqual(B.reported, 2)
self.assertHistory([
dict(name='project-merge', result='SUCCESS', changes='1,1'),
dict(name='project-merge', result='SUCCESS', changes='1,1 2,1'),
dict(name='project-test1', result='SUCCESS', changes='1,1'),
dict(name='project-test1', result='SUCCESS', changes='1,1 2,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1 2,1'),
dict(name='project-test3', result='SUCCESS', changes='1,1'),
dict(name='project-test3', result='SUCCESS', changes='1,1 2,1'),
], ordered=False)
def test_live_reconfiguration_command_socket(self): def test_live_reconfiguration_command_socket(self):
"Test that live reconfiguration via command socket works" "Test that live reconfiguration via command socket works"

View File

@ -103,14 +103,14 @@ def construct_gearman_params(uuid, sched, nodeset, job, item, pipeline,
for role in d['roles']: for role in d['roles']:
if role['type'] != 'zuul': if role['type'] != 'zuul':
continue continue
project_metadata = item.job_graph.getProjectMetadata( project_metadata = item.layout.getProjectMetadata(
role['project_canonical_name']) role['project_canonical_name'])
if project_metadata: if project_metadata:
role['project_default_branch'] = \ role['project_default_branch'] = \
project_metadata.default_branch project_metadata.default_branch
else: else:
role['project_default_branch'] = 'master' role['project_default_branch'] = 'master'
role_trusted, role_project = item.pipeline.tenant.getProject( role_trusted, role_project = item.layout.tenant.getProject(
role['project_canonical_name']) role['project_canonical_name'])
role_connection = role_project.source.connection role_connection = role_project.source.connection
role['connection'] = role_connection.connection_name role['connection'] = role_connection.connection_name
@ -149,7 +149,7 @@ def construct_gearman_params(uuid, sched, nodeset, job, item, pipeline,
def make_project_dict(project, override_branch=None, def make_project_dict(project, override_branch=None,
override_checkout=None): override_checkout=None):
project_metadata = item.job_graph.getProjectMetadata( project_metadata = item.layout.getProjectMetadata(
project.canonical_name) project.canonical_name)
if project_metadata: if project_metadata:
project_default_branch = project_metadata.default_branch project_default_branch = project_metadata.default_branch

View File

@ -658,7 +658,7 @@ class PipelineManager(metaclass=ABCMeta):
def executeJobs(self, item): def executeJobs(self, item):
# TODO(jeblair): This should return a value indicating a job # TODO(jeblair): This should return a value indicating a job
# was executed. Appears to be a longstanding bug. # was executed. Appears to be a longstanding bug.
if not item.job_graph: if not item.layout:
return False return False
jobs = item.findJobsToRun( jobs = item.findJobsToRun(
@ -691,18 +691,16 @@ class PipelineManager(metaclass=ABCMeta):
return canceled return canceled
def _findRelevantErrors(self, item, layout): def _findRelevantErrors(self, item, layout):
# Collect all the config errors that are not related to the if item.item_ahead:
# current item. parent_layout = item.item_ahead.layout
parent_error_keys = list( else:
self.pipeline.tenant.layout.loading_errors.error_keys) parent_layout = item.pipeline.tenant.layout
for item_ahead in item.items_ahead:
parent_error_keys.extend(
e.key for e in item.item_ahead.current_build_set.config_errors)
relevant_errors = [] relevant_errors = []
for err in layout.loading_errors.errors: for err in layout.loading_errors.errors:
econtext = err.key.context econtext = err.key.context
if ((err.key not in parent_error_keys) or if ((err.key not in
parent_layout.loading_errors.error_keys) or
(econtext.project == item.change.project.name and (econtext.project == item.change.project.name and
econtext.branch == item.change.branch)): econtext.branch == item.change.branch)):
relevant_errors.append(err) relevant_errors.append(err)
@ -831,32 +829,14 @@ class PipelineManager(metaclass=ABCMeta):
item.setConfigError("Unknown configuration error") item.setConfigError("Unknown configuration error")
return None return None
def getFallbackLayout(self, item):
parent_item = item.item_ahead
if not parent_item:
return item.pipeline.tenant.layout
if parent_item.layout is None:
self.log.info("Re-calculating layout of parent item %s",
parent_item)
# The fallback layout is not existing currently. Since the
# item ahead has a job graph we know that all items ahead
# have or had the layout set. Since the layout can be reset
# during reconfigurations we need to re-calculate it.
parent_item.layout = self.getLayout(parent_item)
if parent_item.layout is None:
raise RuntimeError("Re-calculation of layout failed")
return parent_item.layout
def getLayout(self, item): def getLayout(self, item):
if item.item_ahead: if item.item_ahead:
# Check if the item ahead is live and has a job graph. If not it fallback_layout = item.item_ahead.layout
# hasn't finished job freezing and is likely waiting on a merge if fallback_layout is None:
# job. Therefore we need to wait as well. # We're probably waiting on a merge job for the item ahead.
if item.item_ahead.live and not item.item_ahead.job_graph:
self.log.info("Live item ahead %s has no job graph",
item.item_ahead)
return None return None
else:
fallback_layout = item.pipeline.tenant.layout
# If the current change does not update the layout, use its parent. # If the current change does not update the layout, use its parent.
# If the bundle doesn't update the config or the bundle updates the # If the bundle doesn't update the config or the bundle updates the
@ -872,14 +852,14 @@ class PipelineManager(metaclass=ABCMeta):
)[1] is not None )[1] is not None
) )
): ):
return self.getFallbackLayout(item) return fallback_layout
# Else this item updates the config, # Else this item updates the config,
# ask the merger for the result. # ask the merger for the result.
build_set = item.current_build_set build_set = item.current_build_set
if build_set.merge_state != build_set.COMPLETE: if build_set.merge_state != build_set.COMPLETE:
return None return None
if build_set.unable_to_merge: if build_set.unable_to_merge:
return self.getFallbackLayout(item) return fallback_layout
self.log.debug("Preparing dynamic layout for: %s" % item.change) self.log.debug("Preparing dynamic layout for: %s" % item.change)
return self._loadDynamicLayout(item) return self._loadDynamicLayout(item)
@ -1068,9 +1048,9 @@ class PipelineManager(metaclass=ABCMeta):
# None if there was an error that makes the layout unusable. # None if there was an error that makes the layout unusable.
# In the last case, it will have set the config_errors on this # In the last case, it will have set the config_errors on this
# item, which may be picked up by the next itme. # item, which may be picked up by the next itme.
if not item.layout and not item.job_graph: if not item.layout:
item.layout = self.getLayout(item) item.layout = self.getLayout(item)
if not item.layout and not item.job_graph: if not item.layout:
return False return False
# If the change can not be merged or has config errors, don't # If the change can not be merged or has config errors, don't

View File

@ -1917,7 +1917,6 @@ class JobGraph(object):
self.jobs = OrderedDict() # job_name -> Job self.jobs = OrderedDict() # job_name -> Job
# dependent_job_name -> dict(parent_job_name -> soft) # dependent_job_name -> dict(parent_job_name -> soft)
self._dependencies = {} self._dependencies = {}
self.project_metadata = {}
def __repr__(self): def __repr__(self):
return '<JobGraph %s>' % (self.jobs) return '<JobGraph %s>' % (self.jobs)
@ -2006,11 +2005,6 @@ class JobGraph(object):
jobs_to_iterate.add((j, current_parent_jobs[j])) jobs_to_iterate.add((j, current_parent_jobs[j]))
return all_parent_jobs return all_parent_jobs
def getProjectMetadata(self, name):
if name in self.project_metadata:
return self.project_metadata[name]
return None
class Build(object): class Build(object):
"""A Build is an instance of a single execution of a Job. """A Build is an instance of a single execution of a Job.
@ -2276,23 +2270,20 @@ class BuildSet(object):
# or if that fails, the current live layout, or if that fails, # or if that fails, the current live layout, or if that fails,
# use the default: merge-resolve. # use the default: merge-resolve.
item = self.item item = self.item
project = self.item.change.project layout = None
project_metadata = None
while item: while item:
if item.job_graph: layout = item.layout
project_metadata = item.job_graph.getProjectMetadata(
project.canonical_name)
if project_metadata:
break
item = item.item_ahead
if not project_metadata:
layout = self.item.pipeline.tenant.layout
if layout: if layout:
project_metadata = layout.getProjectMetadata( break
project.canonical_name item = item.item_ahead
) if not layout:
if project_metadata: layout = self.item.pipeline.tenant.layout
return project_metadata.merge_mode if layout:
project = self.item.change.project
project_metadata = layout.getProjectMetadata(
project.canonical_name)
if project_metadata:
return project_metadata.merge_mode
return MERGER_MERGE_RESOLVE return MERGER_MERGE_RESOLVE
def getSafeAttributes(self): def getSafeAttributes(self):
@ -2408,12 +2399,6 @@ class QueueItem(object):
# Ensure that each jobs's dependencies are fully # Ensure that each jobs's dependencies are fully
# accessible. This will raise an exception if not. # accessible. This will raise an exception if not.
job_graph.getParentJobsRecursively(job.name, self.layout) job_graph.getParentJobsRecursively(job.name, self.layout)
# Copy project metadata to job_graph since this will be needed
# later but must be independent of the layout due to buildset
# global repo state
job_graph.project_metadata = self.layout.project_metadata
self.job_graph = job_graph self.job_graph = job_graph
except Exception: except Exception:
self.project_pipeline_config = None self.project_pipeline_config = None