Add Zuul job variable to indicate a job will retry
This change adds a variable to post and cleanup playboks in order to determine if a job will be retried due to a failure in one of the earlier playbooks. This variable might be useful for only performing certain actions (e.g. interacting with a remote system) when the job result is final and there won't be any further attempts. Change-Id: If7f4488d4a59b1544795401bdc243978fea9ca86
This commit is contained in:
parent
19ae1f2f8a
commit
7bba28a32f
@ -675,6 +675,18 @@ of item.
|
||||
- shell: echo example
|
||||
when: zuul_success | bool
|
||||
|
||||
.. var:: zuul_will_retry
|
||||
|
||||
Post run and cleanup playbook(s) will be passed this variable to indicate
|
||||
if the job will be retried. This variable is meant to be used with the
|
||||
`bool` filter.
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
tasks:
|
||||
- shell: echo example
|
||||
when: zuul_will_retry | bool
|
||||
|
||||
.. var:: nodepool
|
||||
|
||||
Information about each host from Nodepool is supplied in the
|
||||
|
@ -0,0 +1,5 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
A new Zuul job variable :var:`zuul_will_retry` available in post and
|
||||
cleanup playbooks, which indicates whether the current job will be retried.
|
6
tests/fixtures/config/ansible-unreachable/git/org_project/playbooks/post.yaml
vendored
Normal file
6
tests/fixtures/config/ansible-unreachable/git/org_project/playbooks/post.yaml
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
- hosts: localhost
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- copy:
|
||||
content: "{{ zuul_will_retry }}"
|
||||
dest: "{{ zuul._test.test_root }}/builds/{{ zuul.build }}.will-retry.flag"
|
@ -15,6 +15,7 @@
|
||||
- job:
|
||||
name: base
|
||||
parent: null
|
||||
post-run: playbooks/post.yaml
|
||||
|
||||
- job:
|
||||
name: pre-unreachable
|
||||
|
@ -7333,6 +7333,17 @@ class TestUnreachable(AnsibleZuulTestCase):
|
||||
'.ansible/nodes.unreachable')
|
||||
self.assertEqual('fake\n', unreachable_log)
|
||||
|
||||
retried_builds = set()
|
||||
for build in self.history:
|
||||
will_retry_flag = os.path.join(
|
||||
self.jobdir_root, f'{build.uuid}.will-retry.flag')
|
||||
self.assertTrue(os.path.exists(will_retry_flag))
|
||||
with open(will_retry_flag) as f:
|
||||
will_retry = f.readline()
|
||||
expect_retry = build.name not in retried_builds
|
||||
self.assertEqual(str(expect_retry), will_retry)
|
||||
retried_builds.add(build.name)
|
||||
|
||||
|
||||
class TestJobPause(AnsibleZuulTestCase):
|
||||
tenant_config_file = 'config/job-pause/main.yaml'
|
||||
|
@ -1052,6 +1052,13 @@ class AnsibleJob(object):
|
||||
# The zuul.* vars
|
||||
self.debug_zuul_vars = {}
|
||||
self.waiting_for_semaphores = False
|
||||
try:
|
||||
max_attempts = self.arguments["zuul"]["max_attempts"]
|
||||
except KeyError:
|
||||
# TODO (swestphahl):
|
||||
# Remove backward compatibility handling
|
||||
max_attempts = self.arguments["max_attempts"]
|
||||
self.retry_limit = self.arguments["zuul"]["attempts"] >= max_attempts
|
||||
|
||||
def run(self):
|
||||
self.running = True
|
||||
@ -1503,9 +1510,8 @@ class AnsibleJob(object):
|
||||
self.executor_server.updateBuildStatus(self.build_request, data)
|
||||
|
||||
result = self.runPlaybooks(args)
|
||||
success = result == 'SUCCESS'
|
||||
|
||||
self.runCleanupPlaybooks(success)
|
||||
self.runCleanupPlaybooks(result)
|
||||
|
||||
# Stop the persistent SSH connections.
|
||||
setup_status, setup_code = self.runAnsibleCleanup(
|
||||
@ -1822,6 +1828,8 @@ class AnsibleJob(object):
|
||||
post_timeout = self.job.post_timeout
|
||||
post_unreachable = False
|
||||
for index, playbook in enumerate(self.jobdir.post_playbooks):
|
||||
will_retry = (
|
||||
(pre_failed or post_unreachable) and not self.retry_limit)
|
||||
# Post timeout operates a little differently to the main job
|
||||
# timeout. We give each post playbook the full post timeout to
|
||||
# do its job because post is where you'll often record job logs
|
||||
@ -1829,7 +1837,7 @@ class AnsibleJob(object):
|
||||
# the first place.
|
||||
post_status, post_code = self.runAnsiblePlaybook(
|
||||
playbook, post_timeout, self.ansible_version, success,
|
||||
phase='post', index=index)
|
||||
phase='post', index=index, will_retry=will_retry)
|
||||
if post_status == self.RESULT_ABORTED:
|
||||
return 'ABORTED'
|
||||
if post_status == self.RESULT_UNREACHABLE:
|
||||
@ -1857,7 +1865,7 @@ class AnsibleJob(object):
|
||||
|
||||
return result
|
||||
|
||||
def runCleanupPlaybooks(self, success):
|
||||
def runCleanupPlaybooks(self, result):
|
||||
if not self.jobdir.cleanup_playbooks:
|
||||
return
|
||||
|
||||
@ -1871,11 +1879,14 @@ class AnsibleJob(object):
|
||||
now=datetime.datetime.now()
|
||||
))
|
||||
|
||||
success = result == 'SUCCESS'
|
||||
will_retry = result is None and not self.retry_limit
|
||||
self.cleanup_started = True
|
||||
for index, playbook in enumerate(self.jobdir.cleanup_playbooks):
|
||||
self.runAnsiblePlaybook(
|
||||
playbook, CLEANUP_TIMEOUT, self.ansible_version,
|
||||
success=success, phase='cleanup', index=index)
|
||||
success=success, phase='cleanup', index=index,
|
||||
will_retry=will_retry)
|
||||
|
||||
def _logFinalPlaybookError(self):
|
||||
# Failures in the final post playbook can include failures
|
||||
@ -3090,7 +3101,8 @@ class AnsibleJob(object):
|
||||
msg=msg))
|
||||
|
||||
def runAnsiblePlaybook(self, playbook, timeout, ansible_version,
|
||||
success=None, phase=None, index=None):
|
||||
success=None, phase=None, index=None,
|
||||
will_retry=None):
|
||||
if playbook.trusted or playbook.secrets_content:
|
||||
self.writeInventory(playbook, self.frozen_hostvars)
|
||||
else:
|
||||
@ -3107,6 +3119,9 @@ class AnsibleJob(object):
|
||||
if success is not None:
|
||||
cmd.extend(['-e', 'zuul_success=%s' % str(bool(success))])
|
||||
|
||||
if will_retry is not None:
|
||||
cmd.extend(['-e', f'zuul_will_retry={bool(will_retry)}'])
|
||||
|
||||
if phase:
|
||||
cmd.extend(['-e', 'zuul_execution_phase=%s' % phase])
|
||||
|
||||
@ -4081,23 +4096,14 @@ class ExecutorServer(BaseMergeServer):
|
||||
ansible_job.end_time = time.monotonic()
|
||||
duration = ansible_job.end_time - ansible_job.time_starting_build
|
||||
|
||||
params = ansible_job.arguments
|
||||
# If the result is None, check if the build has reached
|
||||
# its max attempts and if so set the result to
|
||||
# RETRY_LIMIT. This must be done in order to correctly
|
||||
# process the autohold in the next step. Since we only
|
||||
# want to hold the node if the build has reached a final
|
||||
# result.
|
||||
if result.get("result") is None:
|
||||
attempts = params["zuul"]["attempts"]
|
||||
try:
|
||||
max_attempts = params["zuul"]["max_attempts"]
|
||||
except KeyError:
|
||||
# TODO (swestphahl):
|
||||
# Remove backward compatibility handling
|
||||
max_attempts = params["max_attempts"]
|
||||
if attempts >= max_attempts:
|
||||
result["result"] = "RETRY_LIMIT"
|
||||
if result.get("result") is None and ansible_job.retry_limit:
|
||||
result["result"] = "RETRY_LIMIT"
|
||||
|
||||
# Provide the hold information back to the scheduler via the build
|
||||
# result.
|
||||
|
Loading…
x
Reference in New Issue
Block a user