Fix bug with removing a failed job
A recent change in I96195cb9996b01075e3705b6cd89a9863528898e added code to re-set build states for changes during a re-enqueue to ensure that any side effects (such as marking new builds as skipped) occurred. However, we have found that if a change has a non-succeeding build and the job for that build is removed, then this new code will run and attempt to perform a lookup on the removed job, fail, and raise an exception that aborts reconfiguration. Correct this by fully removing builds from their build sets when the corresponding job is removed so that no further processing happens on them. Change-Id: I2d5cc8f4c5b73d6a2b184cd21016943b8c08949d
This commit is contained in:
parent
5bf78a3ade
commit
400e8fd0a5
25
tests/fixtures/layout-live-reconfiguration-failed-job.yaml
vendored
Normal file
25
tests/fixtures/layout-live-reconfiguration-failed-job.yaml
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
pipelines:
|
||||
- name: check
|
||||
manager: IndependentPipelineManager
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: patchset-created
|
||||
success:
|
||||
gerrit:
|
||||
verified: 1
|
||||
failure:
|
||||
gerrit:
|
||||
verified: -1
|
||||
|
||||
jobs:
|
||||
- name: ^.*-merge$
|
||||
failure-message: Unable to merge change
|
||||
hold-following-changes: true
|
||||
|
||||
projects:
|
||||
- name: org/project
|
||||
merge-mode: cherry-pick
|
||||
check:
|
||||
- project-merge:
|
||||
- project-test2
|
||||
- project-testfile
|
@ -2325,7 +2325,7 @@ class TestScheduler(ZuulTestCase):
|
||||
'SUCCESS')
|
||||
self.assertEqual(len(self.history), 4)
|
||||
|
||||
def test_live_reconfiguration_failed_job(self):
|
||||
def test_live_reconfiguration_failed_root(self):
|
||||
# An extrapolation of test_live_reconfiguration_merge_conflict
|
||||
# that tests a job added to a job tree with a failed root does
|
||||
# not run.
|
||||
@ -2387,6 +2387,59 @@ class TestScheduler(ZuulTestCase):
|
||||
self.assertEqual(self.history[4].result, 'SUCCESS')
|
||||
self.assertEqual(len(self.history), 5)
|
||||
|
||||
def test_live_reconfiguration_failed_job(self):
|
||||
# Test that a change with a removed failing job does not
|
||||
# disrupt reconfiguration. If a change has a failed job and
|
||||
# that job is removed during a reconfiguration, we observed a
|
||||
# bug where the code to re-set build statuses would run on
|
||||
# that build and raise an exception because the job no longer
|
||||
# existed.
|
||||
self.worker.hold_jobs_in_build = True
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
|
||||
# This change will fail and later be removed by the reconfiguration.
|
||||
self.worker.addFailTest('project-test1', A)
|
||||
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
self.worker.release('.*-merge')
|
||||
self.waitUntilSettled()
|
||||
self.worker.release('project-test1')
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual(A.data['status'], 'NEW')
|
||||
self.assertEqual(A.reported, 0)
|
||||
|
||||
self.assertEqual(self.getJobFromHistory('project-merge').result,
|
||||
'SUCCESS')
|
||||
self.assertEqual(self.getJobFromHistory('project-test1').result,
|
||||
'FAILURE')
|
||||
self.assertEqual(len(self.history), 2)
|
||||
|
||||
# Remove the test1 job.
|
||||
self.config.set('zuul', 'layout_config',
|
||||
'tests/fixtures/layout-live-'
|
||||
'reconfiguration-failed-job.yaml')
|
||||
self.sched.reconfigure(self.config)
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.worker.hold_jobs_in_build = False
|
||||
self.worker.release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual(self.getJobFromHistory('project-test2').result,
|
||||
'SUCCESS')
|
||||
self.assertEqual(self.getJobFromHistory('project-testfile').result,
|
||||
'SUCCESS')
|
||||
self.assertEqual(len(self.history), 4)
|
||||
|
||||
self.assertEqual(A.data['status'], 'NEW')
|
||||
self.assertEqual(A.reported, 1)
|
||||
self.assertIn('Build succeeded', A.messages[0])
|
||||
# Ensure the removed job was not included in the report.
|
||||
self.assertNotIn('project-test1', A.messages[0])
|
||||
|
||||
def test_live_reconfiguration_functions(self):
|
||||
"Test live reconfiguration with a custom function"
|
||||
self.worker.registerFunction('build:node-project-test1:debian')
|
||||
|
@ -679,7 +679,7 @@ class Scheduler(threading.Thread):
|
||||
continue
|
||||
self.log.debug("Re-enqueueing changes for pipeline %s" % name)
|
||||
items_to_remove = []
|
||||
builds_to_remove = []
|
||||
builds_to_cancel = []
|
||||
last_head = None
|
||||
for shared_queue in old_pipeline.queues:
|
||||
for item in shared_queue.queue:
|
||||
@ -703,14 +703,15 @@ class Scheduler(threading.Thread):
|
||||
if job:
|
||||
build.job = job
|
||||
else:
|
||||
builds_to_remove.append(build)
|
||||
item.removeBuild(build)
|
||||
builds_to_cancel.append(build)
|
||||
if not new_pipeline.manager.reEnqueueItem(item,
|
||||
last_head):
|
||||
items_to_remove.append(item)
|
||||
for item in items_to_remove:
|
||||
for build in item.current_build_set.getBuilds():
|
||||
builds_to_remove.append(build)
|
||||
for build in builds_to_remove:
|
||||
builds_to_cancel.append(build)
|
||||
for build in builds_to_cancel:
|
||||
self.log.warning(
|
||||
"Canceling build %s during reconfiguration" % (build,))
|
||||
try:
|
||||
|
Loading…
Reference in New Issue
Block a user