Also retry the job if a post job failed with unreachable

When a post job fails with unreachable currently the job result is post_failure. However we do retries if a node gets unreachable in pre and run playbooks so do that in post playbooks as well. Change-Id: I0aa618e71a7533ad6392afa5199c9790cc8fa926
2018-09-15 15:34:33 +02:00 · 2018-09-15 15:34:33 +02:00 · a838328a49
parent 30cb2b1484
commit a838328a49
3 changed files with 20 additions and 0 deletions
--- a/tests/fixtures/config/ansible-unreachable/git/org_project/zuul.yaml
+++ b/tests/fixtures/config/ansible-unreachable/git/org_project/zuul.yaml
@ -28,8 +28,16 @@
    attempts: 2
    run: playbooks/unreachable.yaml
 - job:
    name: post-unreachable
    attempts: 2
    run: playbooks/run.yaml
    post-run:
      - playbooks/unreachable.yaml
 - project:
    check:
      jobs:
        - pre-unreachable
        - run-unreachable
        - post-unreachable
--- a/tests/unit/test_v3.py
+++ b/tests/unit/test_v3.py
@ -4193,6 +4193,8 @@ class TestUnreachable(AnsibleZuulTestCase):
            dict(name='pre-unreachable', result=None, changes='1,1'),
            dict(name='run-unreachable', result=None, changes='1,1'),
            dict(name='run-unreachable', result=None, changes='1,1'),
            dict(name='post-unreachable', result=None, changes='1,1'),
            dict(name='post-unreachable', result=None, changes='1,1'),
        ], ordered=False)
        unreachable_log = self._get_file(self.history[0],
                                         '.ansible/nodes.unreachable')
--- a/zuul/executor/server.py
+++ b/zuul/executor/server.py
@ -1137,6 +1137,7 @@ class AnsibleJob(object):
            self.pause()
        post_timeout = args['post_timeout']
        unreachable = False
        for index, playbook in enumerate(self.jobdir.post_playbooks):
            # Post timeout operates a little differently to the main job
            # timeout. We give each post playbook the full post timeout to
@ -1147,6 +1148,12 @@ class AnsibleJob(object):
                playbook, post_timeout, success, phase='post', index=index)
            if post_status == self.RESULT_ABORTED:
                return 'ABORTED'
            if post_status == self.RESULT_UNREACHABLE:
                # In case we encounter unreachable nodes we need to return None
                # so the job can be retried. However in the case of post
                # playbooks we should still try to run all playbooks to get a
                # chance to upload logs.
                unreachable = True
            if post_status != self.RESULT_NORMAL or post_code != 0:
                success = False
                # If we encountered a pre-failure, that takes
@ -1156,6 +1163,9 @@ class AnsibleJob(object):
                if (index + 1) == len(self.jobdir.post_playbooks):
                    self._logFinalPlaybookError()
        if unreachable:
            return None
        return result
    def _logFinalPlaybookError(self):