Fix exceptions on reconfig with changing window
When the window size changes, items formerly in the window might be no longer active. But those items can already have build results. During reconfiguration the job graph is set to None, but the item is not prepared if no longer active. This becomes problematic when build.setResult() tries to access the job graph when resetting builds. This can be solved by also preparing the item if there was already a job graph. The other option to fix this, would be to never change the item.active state back to False after it became active. However, this would be a change in the current behavior of Zuul, as only as builds finish the active window would actually decrease. Exceptions: 2019-12-03 08:11:06,298 zuul.Scheduler ERROR Exception while re-enqueing item <QueueItem 0x7f54285a49b0 for <Change 0x7f5442939da0 org/project 4,1> in gate> Traceback (most recent call last): File "/tmp/zuul/zuul/scheduler.py", line 931, in _reenqueueTenant item_ahead_valid=item_ahead_valid) File "/tmp/zuul/zuul/manager/__init__.py", line 276, in reEnqueueItem item.setResult(build) File "/tmp/zuul/zuul/model.py", line 2642, in setResult to_skip = self.job_graph.getDependentJobsRecursively( AttributeError: 'NoneType' object has no attribute 'getDependentJobsRecursively' 2019-12-03 08:11:11,401 zuul.Scheduler ERROR Exception in run handler: Traceback (most recent call last): File "/tmp/zuul/zuul/scheduler.py", line 1133, in run self.process_result_queue() File "/tmp/zuul/zuul/scheduler.py", line 1268, in process_result_queue self._doBuildCompletedEvent(event) File "/tmp/zuul/zuul/scheduler.py", line 1473, in _doBuildCompletedEvent pipeline.manager.onBuildCompleted(event.build) File "/tmp/zuul/zuul/manager/__init__.py", line 995, in onBuildCompleted item.setResult(build) File "/tmp/zuul/zuul/model.py", line 2642, in setResult to_skip = self.job_graph.getDependentJobsRecursively( AttributeError: 'NoneType' object has no attribute 'getDependentJobsRecursively' Change-Id: I96d4b28a017a1371029ff7b8d516beb09b6b94ad
This commit is contained in:
parent
8eec9e3bcb
commit
a49545c1f8
|
@ -0,0 +1,54 @@
|
|||
- pipeline:
|
||||
name: gate
|
||||
manager: dependent
|
||||
failure-message: Build failed. For information on how to proceed, see http://wiki.example.org/Test_Failures
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: comment-added
|
||||
approval:
|
||||
- Approved: 1
|
||||
start:
|
||||
gerrit:
|
||||
Verified: 0
|
||||
success:
|
||||
gerrit:
|
||||
Verified: 2
|
||||
submit: true
|
||||
failure:
|
||||
gerrit:
|
||||
Verified: -2
|
||||
window: 4
|
||||
window-floor: 1
|
||||
window-increase-type: linear
|
||||
window-increase-factor: 1
|
||||
window-decrease-type: exponential
|
||||
window-decrease-factor: 3
|
||||
|
||||
- job:
|
||||
name: base
|
||||
parent: null
|
||||
run: playbooks/base.yaml
|
||||
|
||||
- job:
|
||||
name: project-merge
|
||||
run: playbooks/project-merge.yaml
|
||||
|
||||
- job:
|
||||
name: project-test1
|
||||
run: playbooks/project-test1.yaml
|
||||
|
||||
- job:
|
||||
name: project-test2
|
||||
run: playbooks/project-test2.yaml
|
||||
|
||||
- project:
|
||||
name: org/project
|
||||
gate:
|
||||
jobs:
|
||||
- project-merge
|
||||
- project-test1:
|
||||
dependencies:
|
||||
- project-merge
|
||||
- project-test2:
|
||||
dependencies:
|
||||
- project-merge
|
|
@ -0,0 +1,52 @@
|
|||
- pipeline:
|
||||
name: gate
|
||||
manager: dependent
|
||||
failure-message: Build failed. For information on how to proceed, see http://wiki.example.org/Test_Failures
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: comment-added
|
||||
approval:
|
||||
- Approved: 1
|
||||
start:
|
||||
gerrit:
|
||||
Verified: 0
|
||||
success:
|
||||
gerrit:
|
||||
Verified: 2
|
||||
submit: true
|
||||
failure:
|
||||
gerrit:
|
||||
Verified: -2
|
||||
window: 4
|
||||
window-floor: 1
|
||||
window-decrease-type: exponential
|
||||
window-decrease-factor: 2
|
||||
|
||||
- job:
|
||||
name: base
|
||||
parent: null
|
||||
run: playbooks/base.yaml
|
||||
|
||||
- job:
|
||||
name: project-merge
|
||||
run: playbooks/project-merge.yaml
|
||||
|
||||
- job:
|
||||
name: project-test1
|
||||
run: playbooks/project-test1.yaml
|
||||
|
||||
- job:
|
||||
name: project-test2
|
||||
run: playbooks/project-test2.yaml
|
||||
|
||||
- project:
|
||||
name: org/project
|
||||
gate:
|
||||
jobs:
|
||||
- project-merge
|
||||
- project-test1:
|
||||
dependencies:
|
||||
- project-merge
|
||||
- project-test2:
|
||||
dependencies:
|
||||
- project-merge
|
|
@ -4830,6 +4830,116 @@ class TestScheduler(ZuulTestCase):
|
|||
self.assertEqual(queue.window_floor, 1)
|
||||
self.assertEqual(C.data['status'], 'MERGED')
|
||||
|
||||
@simple_layout('layouts/rate-limit-reconfigure.yaml')
|
||||
def test_queue_rate_limiting_reconfigure(self):
|
||||
"""Test that changes survive a reconfigure when no longer in window.
|
||||
|
||||
This is a regression tests for a case that lead to an exception during
|
||||
re-enqueue. The exception happened when former active items had already
|
||||
build results but then dropped out of the active window. During
|
||||
re-enqueue the job graph was not re-initialized because the items were
|
||||
no longer active.
|
||||
"""
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
|
||||
C = self.fake_gerrit.addFakeChange('org/project', 'master', 'C')
|
||||
D = self.fake_gerrit.addFakeChange('org/project', 'master', 'D')
|
||||
|
||||
A.addApproval('Code-Review', 2)
|
||||
B.addApproval('Code-Review', 2)
|
||||
C.addApproval('Code-Review', 2)
|
||||
D.addApproval('Code-Review', 2)
|
||||
|
||||
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
|
||||
self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
|
||||
self.fake_gerrit.addEvent(C.addApproval('Approved', 1))
|
||||
self.fake_gerrit.addEvent(D.addApproval('Approved', 1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual(len(self.builds), 4)
|
||||
self.assertEqual(self.builds[0].name, 'project-merge')
|
||||
self.assertEqual(self.builds[1].name, 'project-merge')
|
||||
self.assertEqual(self.builds[2].name, 'project-merge')
|
||||
self.assertEqual(self.builds[3].name, 'project-merge')
|
||||
|
||||
self.orderedRelease(4)
|
||||
|
||||
self.assertEqual(len(self.builds), 8)
|
||||
self.assertEqual(self.builds[0].name, 'project-test1')
|
||||
self.assertEqual(self.builds[1].name, 'project-test2')
|
||||
self.assertEqual(self.builds[2].name, 'project-test1')
|
||||
self.assertEqual(self.builds[3].name, 'project-test2')
|
||||
self.assertEqual(self.builds[4].name, 'project-test1')
|
||||
self.assertEqual(self.builds[5].name, 'project-test2')
|
||||
self.assertEqual(self.builds[6].name, 'project-test1')
|
||||
self.assertEqual(self.builds[7].name, 'project-test2')
|
||||
|
||||
self.executor_server.failJob('project-test1', B)
|
||||
self.builds[2].release()
|
||||
self.builds[3].release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual(len(self.builds), 4)
|
||||
# A's jobs
|
||||
self.assertEqual(self.builds[0].name, 'project-test1')
|
||||
self.assertEqual(self.builds[1].name, 'project-test2')
|
||||
# C's and D's merge jobs
|
||||
self.assertEqual(self.builds[2].name, 'project-merge')
|
||||
self.assertEqual(self.builds[3].name, 'project-merge')
|
||||
|
||||
# Release merge jobs of C, D after speculative gate reset
|
||||
self.executor_server.release('project-merge')
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual(len(self.builds), 6)
|
||||
|
||||
# A's jobs
|
||||
self.assertEqual(self.builds[0].name, 'project-test1')
|
||||
self.assertEqual(self.builds[1].name, 'project-test2')
|
||||
# C's + D's jobs
|
||||
self.assertEqual(self.builds[2].name, 'project-test1')
|
||||
self.assertEqual(self.builds[3].name, 'project-test2')
|
||||
self.assertEqual(self.builds[4].name, 'project-test1')
|
||||
self.assertEqual(self.builds[5].name, 'project-test2')
|
||||
|
||||
# Fail D's job so we have a build results for an item that
|
||||
# is not in the active window after B is reported
|
||||
# (condition that previously lead to an exception)
|
||||
self.executor_server.failJob('project-test1', D)
|
||||
self.builds[4].release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
# Release A's jobs
|
||||
self.builds[0].release()
|
||||
self.builds[1].release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual(len(self.builds), 3)
|
||||
|
||||
# C's jobs
|
||||
self.assertEqual(self.builds[0].name, 'project-test1')
|
||||
self.assertEqual(self.builds[1].name, 'project-test2')
|
||||
# D's remaining job
|
||||
self.assertEqual(self.builds[2].name, 'project-test2')
|
||||
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
queue = tenant.layout.pipelines['gate'].queues[0]
|
||||
self.assertEqual(queue.window, 1)
|
||||
|
||||
# D dropped out of the window
|
||||
self.assertFalse(queue.queue[-1].active)
|
||||
|
||||
self.commitConfigUpdate('org/common-config',
|
||||
'layouts/rate-limit-reconfigure2.yaml')
|
||||
self.sched.reconfigure(self.config)
|
||||
self.waitUntilSettled()
|
||||
|
||||
# D's remaining job should still be queued
|
||||
self.assertEqual(len(self.builds), 3)
|
||||
self.executor_server.release('project-.*')
|
||||
self.waitUntilSettled()
|
||||
|
||||
@simple_layout('layouts/reconfigure-window.yaml')
|
||||
def test_reconfigure_window_shrink(self):
|
||||
# Test the active window shrinking during reconfiguration
|
||||
|
@ -4908,8 +5018,7 @@ class TestScheduler(ZuulTestCase):
|
|||
# be allowed to shrink on reconfiguration.
|
||||
self.assertEqual(queue.window, 1)
|
||||
# B is outside the window, but still marked active until the
|
||||
# next pass through the queue processor, so its builds haven't
|
||||
# been canceled.
|
||||
# next pass through the queue processor.
|
||||
self.assertEqual(len(self.builds), 4)
|
||||
|
||||
self.sched.reconfigure(self.config)
|
||||
|
@ -4917,20 +5026,16 @@ class TestScheduler(ZuulTestCase):
|
|||
queue = tenant.layout.pipelines['gate'].queues[0]
|
||||
self.assertEqual(queue.window, 1)
|
||||
self.waitUntilSettled()
|
||||
# B's builds have been canceled now
|
||||
self.assertEqual(len(self.builds), 2)
|
||||
# B's builds should not be canceled
|
||||
self.assertEqual(len(self.builds), 4)
|
||||
|
||||
self.executor_server.hold_jobs_in_build = False
|
||||
self.executor_server.release()
|
||||
|
||||
# B's builds will be restarted and will show up in our history
|
||||
# twice.
|
||||
self.waitUntilSettled()
|
||||
self.assertHistory([
|
||||
dict(name='job1', result='SUCCESS', changes='1,1'),
|
||||
dict(name='job1', result='ABORTED', changes='1,1 2,1'),
|
||||
dict(name='job2', result='SUCCESS', changes='1,1'),
|
||||
dict(name='job2', result='ABORTED', changes='1,1 2,1'),
|
||||
dict(name='job1', result='SUCCESS', changes='1,1 2,1'),
|
||||
dict(name='job2', result='SUCCESS', changes='1,1 2,1'),
|
||||
], ordered=False)
|
||||
|
|
|
@ -259,9 +259,12 @@ class PipelineManager(object):
|
|||
# will go ahead and refresh the job graph if needed;
|
||||
# or it will send a new merge job if necessary, or it
|
||||
# will do nothing if we're waiting on a merge job.
|
||||
has_job_graph = bool(item.job_graph)
|
||||
item.job_graph = None
|
||||
item.layout = None
|
||||
if item.active:
|
||||
# If the item is no longer active, but has a job graph we
|
||||
# will make sure to update it.
|
||||
if item.active or has_job_graph:
|
||||
self.prepareItem(item)
|
||||
|
||||
# Re-set build results in case any new jobs have been
|
||||
|
|
Loading…
Reference in New Issue