Browse Source

Also retry the job if a post job failed with unreachable

When a post job fails with unreachable currently the job result is
post_failure. However we do retries if a node gets unreachable in pre
and run playbooks so do that in post playbooks as well.

Change-Id: I0aa618e71a7533ad6392afa5199c9790cc8fa926
changes/30/602830/8
Tobias Henkel 3 years ago
parent
commit
a838328a49
No known key found for this signature in database GPG Key ID: 3750DEC158E5FA2
3 changed files with 20 additions and 0 deletions
  1. +8
    -0
      tests/fixtures/config/ansible-unreachable/git/org_project/zuul.yaml
  2. +2
    -0
      tests/unit/test_v3.py
  3. +10
    -0
      zuul/executor/server.py

+ 8
- 0
tests/fixtures/config/ansible-unreachable/git/org_project/zuul.yaml View File

@ -28,8 +28,16 @@
attempts: 2
run: playbooks/unreachable.yaml
- job:
name: post-unreachable
attempts: 2
run: playbooks/run.yaml
post-run:
- playbooks/unreachable.yaml
- project:
check:
jobs:
- pre-unreachable
- run-unreachable
- post-unreachable

+ 2
- 0
tests/unit/test_v3.py View File

@ -4193,6 +4193,8 @@ class TestUnreachable(AnsibleZuulTestCase):
dict(name='pre-unreachable', result=None, changes='1,1'),
dict(name='run-unreachable', result=None, changes='1,1'),
dict(name='run-unreachable', result=None, changes='1,1'),
dict(name='post-unreachable', result=None, changes='1,1'),
dict(name='post-unreachable', result=None, changes='1,1'),
], ordered=False)
unreachable_log = self._get_file(self.history[0],
'.ansible/nodes.unreachable')


+ 10
- 0
zuul/executor/server.py View File

@ -1137,6 +1137,7 @@ class AnsibleJob(object):
self.pause()
post_timeout = args['post_timeout']
unreachable = False
for index, playbook in enumerate(self.jobdir.post_playbooks):
# Post timeout operates a little differently to the main job
# timeout. We give each post playbook the full post timeout to
@ -1147,6 +1148,12 @@ class AnsibleJob(object):
playbook, post_timeout, success, phase='post', index=index)
if post_status == self.RESULT_ABORTED:
return 'ABORTED'
if post_status == self.RESULT_UNREACHABLE:
# In case we encounter unreachable nodes we need to return None
# so the job can be retried. However in the case of post
# playbooks we should still try to run all playbooks to get a
# chance to upload logs.
unreachable = True
if post_status != self.RESULT_NORMAL or post_code != 0:
success = False
# If we encountered a pre-failure, that takes
@ -1156,6 +1163,9 @@ class AnsibleJob(object):
if (index + 1) == len(self.jobdir.post_playbooks):
self._logFinalPlaybookError()
if unreachable:
return None
return result
def _logFinalPlaybookError(self):


Loading…
Cancel
Save