Check paused parent on node failure of child job

When all or the last child jobs of a paused job have failed because of a
node failure we have to resume the parent. Otherwise the builset will
hang indefinitely.

Change-Id: If4bea8b7b2d3395ec33aef3bdccce2fcd0b17413
This commit is contained in:
Simon Westphahl 2018-10-11 13:20:56 +02:00
parent 859141cf4f
commit 8f9aff7ca9
6 changed files with 57 additions and 0 deletions

View File

@ -0,0 +1,7 @@
- hosts: all
tasks:
- name: Pause and let child run
zuul_return:
data:
zuul:
pause: true

View File

@ -0,0 +1,4 @@
- hosts: all
tasks:
- debug:
msg: "This should not be executed"

View File

@ -0,0 +1,24 @@
- job:
name: just-pause
run: playbooks/just-pause.yaml
nodeset:
nodes:
- name: fake
label: fake
- job:
name: test-node-failure
run: playbooks/test-node-failure.yaml
nodeset:
nodes:
- name: fail
label: fail
- project:
check:
jobs:
- just-pause
- test-node-failure:
dependencies:
- just-pause

View File

@ -5,3 +5,4 @@
config-projects:
- common-config
- org/project
- org/project2

View File

@ -4216,3 +4216,23 @@ class TestJobPause(AnsibleZuulTestCase):
history_compile2 = self.history[-2]
self.assertEqual('compile1', history_compile1.name)
self.assertEqual('compile2', history_compile2.name)
def test_job_node_failure_resume(self):
self.wait_timeout = 120
# Output extra ansible info so we might see errors.
self.executor_server.verbose = True
# Second node request should fail
fail = {'_oid': '200-0000000001'}
self.fake_nodepool.addFailRequest(fail)
A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertEqual([], self.builds)
self.assertHistory([
dict(name='just-pause', result='SUCCESS', changes='1,1'),
], ordered=False)

View File

@ -799,6 +799,7 @@ class PipelineManager(object):
self.log.info("Node request %s: failure for %s" %
(request, request.job.name,))
build_set.item.setNodeRequestFailure(request.job)
self._resumeParents(request)
self.log.info("Completed node request %s for job %s of item %s "
"with nodes %s" %
(request, request.job, build_set.item,