Also retry the job if a post job failed with unreachable

When a post job fails with unreachable currently the job result is
post_failure. However we do retries if a node gets unreachable in pre
and run playbooks so do that in post playbooks as well.

Change-Id: I0aa618e71a7533ad6392afa5199c9790cc8fa926
This commit is contained in:
Tobias Henkel 2018-09-15 15:34:33 +02:00
parent 30cb2b1484
commit a838328a49
No known key found for this signature in database
GPG Key ID: 03750DEC158E5FA2
3 changed files with 20 additions and 0 deletions

View File

@ -28,8 +28,16 @@
attempts: 2 attempts: 2
run: playbooks/unreachable.yaml run: playbooks/unreachable.yaml
- job:
name: post-unreachable
attempts: 2
run: playbooks/run.yaml
post-run:
- playbooks/unreachable.yaml
- project: - project:
check: check:
jobs: jobs:
- pre-unreachable - pre-unreachable
- run-unreachable - run-unreachable
- post-unreachable

View File

@ -4193,6 +4193,8 @@ class TestUnreachable(AnsibleZuulTestCase):
dict(name='pre-unreachable', result=None, changes='1,1'), dict(name='pre-unreachable', result=None, changes='1,1'),
dict(name='run-unreachable', result=None, changes='1,1'), dict(name='run-unreachable', result=None, changes='1,1'),
dict(name='run-unreachable', result=None, changes='1,1'), dict(name='run-unreachable', result=None, changes='1,1'),
dict(name='post-unreachable', result=None, changes='1,1'),
dict(name='post-unreachable', result=None, changes='1,1'),
], ordered=False) ], ordered=False)
unreachable_log = self._get_file(self.history[0], unreachable_log = self._get_file(self.history[0],
'.ansible/nodes.unreachable') '.ansible/nodes.unreachable')

View File

@ -1137,6 +1137,7 @@ class AnsibleJob(object):
self.pause() self.pause()
post_timeout = args['post_timeout'] post_timeout = args['post_timeout']
unreachable = False
for index, playbook in enumerate(self.jobdir.post_playbooks): for index, playbook in enumerate(self.jobdir.post_playbooks):
# Post timeout operates a little differently to the main job # Post timeout operates a little differently to the main job
# timeout. We give each post playbook the full post timeout to # timeout. We give each post playbook the full post timeout to
@ -1147,6 +1148,12 @@ class AnsibleJob(object):
playbook, post_timeout, success, phase='post', index=index) playbook, post_timeout, success, phase='post', index=index)
if post_status == self.RESULT_ABORTED: if post_status == self.RESULT_ABORTED:
return 'ABORTED' return 'ABORTED'
if post_status == self.RESULT_UNREACHABLE:
# In case we encounter unreachable nodes we need to return None
# so the job can be retried. However in the case of post
# playbooks we should still try to run all playbooks to get a
# chance to upload logs.
unreachable = True
if post_status != self.RESULT_NORMAL or post_code != 0: if post_status != self.RESULT_NORMAL or post_code != 0:
success = False success = False
# If we encountered a pre-failure, that takes # If we encountered a pre-failure, that takes
@ -1156,6 +1163,9 @@ class AnsibleJob(object):
if (index + 1) == len(self.jobdir.post_playbooks): if (index + 1) == len(self.jobdir.post_playbooks):
self._logFinalPlaybookError() self._logFinalPlaybookError()
if unreachable:
return None
return result return result
def _logFinalPlaybookError(self): def _logFinalPlaybookError(self):