Also retry the job if a post job failed with unreachable
When a post job fails with unreachable currently the job result is post_failure. However we do retries if a node gets unreachable in pre and run playbooks so do that in post playbooks as well. Change-Id: I0aa618e71a7533ad6392afa5199c9790cc8fa926
This commit is contained in:
parent
30cb2b1484
commit
a838328a49
|
@ -28,8 +28,16 @@
|
||||||
attempts: 2
|
attempts: 2
|
||||||
run: playbooks/unreachable.yaml
|
run: playbooks/unreachable.yaml
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: post-unreachable
|
||||||
|
attempts: 2
|
||||||
|
run: playbooks/run.yaml
|
||||||
|
post-run:
|
||||||
|
- playbooks/unreachable.yaml
|
||||||
|
|
||||||
- project:
|
- project:
|
||||||
check:
|
check:
|
||||||
jobs:
|
jobs:
|
||||||
- pre-unreachable
|
- pre-unreachable
|
||||||
- run-unreachable
|
- run-unreachable
|
||||||
|
- post-unreachable
|
||||||
|
|
|
@ -4193,6 +4193,8 @@ class TestUnreachable(AnsibleZuulTestCase):
|
||||||
dict(name='pre-unreachable', result=None, changes='1,1'),
|
dict(name='pre-unreachable', result=None, changes='1,1'),
|
||||||
dict(name='run-unreachable', result=None, changes='1,1'),
|
dict(name='run-unreachable', result=None, changes='1,1'),
|
||||||
dict(name='run-unreachable', result=None, changes='1,1'),
|
dict(name='run-unreachable', result=None, changes='1,1'),
|
||||||
|
dict(name='post-unreachable', result=None, changes='1,1'),
|
||||||
|
dict(name='post-unreachable', result=None, changes='1,1'),
|
||||||
], ordered=False)
|
], ordered=False)
|
||||||
unreachable_log = self._get_file(self.history[0],
|
unreachable_log = self._get_file(self.history[0],
|
||||||
'.ansible/nodes.unreachable')
|
'.ansible/nodes.unreachable')
|
||||||
|
|
|
@ -1137,6 +1137,7 @@ class AnsibleJob(object):
|
||||||
self.pause()
|
self.pause()
|
||||||
|
|
||||||
post_timeout = args['post_timeout']
|
post_timeout = args['post_timeout']
|
||||||
|
unreachable = False
|
||||||
for index, playbook in enumerate(self.jobdir.post_playbooks):
|
for index, playbook in enumerate(self.jobdir.post_playbooks):
|
||||||
# Post timeout operates a little differently to the main job
|
# Post timeout operates a little differently to the main job
|
||||||
# timeout. We give each post playbook the full post timeout to
|
# timeout. We give each post playbook the full post timeout to
|
||||||
|
@ -1147,6 +1148,12 @@ class AnsibleJob(object):
|
||||||
playbook, post_timeout, success, phase='post', index=index)
|
playbook, post_timeout, success, phase='post', index=index)
|
||||||
if post_status == self.RESULT_ABORTED:
|
if post_status == self.RESULT_ABORTED:
|
||||||
return 'ABORTED'
|
return 'ABORTED'
|
||||||
|
if post_status == self.RESULT_UNREACHABLE:
|
||||||
|
# In case we encounter unreachable nodes we need to return None
|
||||||
|
# so the job can be retried. However in the case of post
|
||||||
|
# playbooks we should still try to run all playbooks to get a
|
||||||
|
# chance to upload logs.
|
||||||
|
unreachable = True
|
||||||
if post_status != self.RESULT_NORMAL or post_code != 0:
|
if post_status != self.RESULT_NORMAL or post_code != 0:
|
||||||
success = False
|
success = False
|
||||||
# If we encountered a pre-failure, that takes
|
# If we encountered a pre-failure, that takes
|
||||||
|
@ -1156,6 +1163,9 @@ class AnsibleJob(object):
|
||||||
if (index + 1) == len(self.jobdir.post_playbooks):
|
if (index + 1) == len(self.jobdir.post_playbooks):
|
||||||
self._logFinalPlaybookError()
|
self._logFinalPlaybookError()
|
||||||
|
|
||||||
|
if unreachable:
|
||||||
|
return None
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _logFinalPlaybookError(self):
|
def _logFinalPlaybookError(self):
|
||||||
|
|
Loading…
Reference in New Issue