Check paused parent on node failure of child job
When all or the last child jobs of a paused job have failed because of a node failure we have to resume the parent. Otherwise the builset will hang indefinitely. Change-Id: If4bea8b7b2d3395ec33aef3bdccce2fcd0b17413
This commit is contained in:
parent
859141cf4f
commit
8f9aff7ca9
|
@ -0,0 +1,7 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- name: Pause and let child run
|
||||
zuul_return:
|
||||
data:
|
||||
zuul:
|
||||
pause: true
|
4
tests/fixtures/config/job-pause/git/org_project2/playbooks/test-node-failure.yaml
vendored
Normal file
4
tests/fixtures/config/job-pause/git/org_project2/playbooks/test-node-failure.yaml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- debug:
|
||||
msg: "This should not be executed"
|
|
@ -0,0 +1,24 @@
|
|||
- job:
|
||||
name: just-pause
|
||||
run: playbooks/just-pause.yaml
|
||||
nodeset:
|
||||
nodes:
|
||||
- name: fake
|
||||
label: fake
|
||||
|
||||
- job:
|
||||
name: test-node-failure
|
||||
run: playbooks/test-node-failure.yaml
|
||||
nodeset:
|
||||
nodes:
|
||||
- name: fail
|
||||
label: fail
|
||||
|
||||
|
||||
- project:
|
||||
check:
|
||||
jobs:
|
||||
- just-pause
|
||||
- test-node-failure:
|
||||
dependencies:
|
||||
- just-pause
|
|
@ -5,3 +5,4 @@
|
|||
config-projects:
|
||||
- common-config
|
||||
- org/project
|
||||
- org/project2
|
||||
|
|
|
@ -4216,3 +4216,23 @@ class TestJobPause(AnsibleZuulTestCase):
|
|||
history_compile2 = self.history[-2]
|
||||
self.assertEqual('compile1', history_compile1.name)
|
||||
self.assertEqual('compile2', history_compile2.name)
|
||||
|
||||
def test_job_node_failure_resume(self):
|
||||
self.wait_timeout = 120
|
||||
|
||||
# Output extra ansible info so we might see errors.
|
||||
self.executor_server.verbose = True
|
||||
|
||||
# Second node request should fail
|
||||
fail = {'_oid': '200-0000000001'}
|
||||
self.fake_nodepool.addFailRequest(fail)
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A')
|
||||
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual([], self.builds)
|
||||
self.assertHistory([
|
||||
dict(name='just-pause', result='SUCCESS', changes='1,1'),
|
||||
], ordered=False)
|
||||
|
|
|
@ -799,6 +799,7 @@ class PipelineManager(object):
|
|||
self.log.info("Node request %s: failure for %s" %
|
||||
(request, request.job.name,))
|
||||
build_set.item.setNodeRequestFailure(request.job)
|
||||
self._resumeParents(request)
|
||||
self.log.info("Completed node request %s for job %s of item %s "
|
||||
"with nodes %s" %
|
||||
(request, request.job, build_set.item,
|
||||
|
|
Loading…
Reference in New Issue