Merge "Implement pre-timeout for pre-run playbooks"

This commit is contained in:
Zuul
2025-04-15 17:12:09 +00:00
committed by Gerrit Code Review
12 changed files with 100 additions and 3 deletions

View File

@@ -538,6 +538,14 @@ Here is an example of two job definitions:
this value instead of the branch of the item being tested when
collecting jobs to run.
.. attr:: pre-timeout
The time in seconds that the job will allow all pre-run playbooks to
consume before timing out. If set this value must be less than or
equal to the job ``timeout`` value as pre-run playbook runtime counts
against the job ``timeout``. If left unset then the job ``timeout``
value will be used.
.. attr:: timeout
The time in seconds that the job should be allowed to run before

View File

@@ -1279,6 +1279,10 @@ The following variables related to the job are available:
The name of the current Zuul tenant.
.. var:: pre_timeout
The pre-run playbook timeout, in seconds.
.. var:: timeout
The job timeout, in seconds.

View File

@@ -0,0 +1,11 @@
---
features:
- |
Jobs now have the ability to set a :attr:`job.pre-timeout` value. This
timeout limits the amount of time that pre-run playbooks may consume.
This timeout counts against the normal :attr:`job.timeout` value and if
:attr:`job.pre-timeout` is unset then :attr:`job.timeout` is used.
fixes:
- |
Job json API responses now include the :attr:`job.post-timeout`
configuration value for the job.

View File

@@ -101,6 +101,15 @@
# will lead to a job retry.
timeout: 20
- job:
parent: python27
name: pre-timeout
pre-run: playbooks/timeout.yaml
# Unlike the prior job we want to check things have failed in pre-run.
# To avoid unnecessary retries we limit them here.
attempts: 1
pre-timeout: 5
- job:
parent: python27
name: post-timeout

View File

@@ -30,6 +30,7 @@
dependencies:
- check-hostvars
- timeout: {dependencies: check-hostvars}
- pre-timeout: {dependencies: check-hostvars}
- post-timeout: {dependencies: check-secret-names}
- hello-world: {dependencies: check-secret-names}
- failpost: {dependencies: hello-world}

View File

@@ -4274,6 +4274,14 @@ class FunctionalAnsibleMixIn(object):
post_flag_path = os.path.join(
self.jobdir_root, build_timeout.uuid + '.post.flag')
self.assertTrue(os.path.exists(post_flag_path))
build_pre_timeout = self.getJobFromHistory('pre-timeout')
with self.jobLog(build_pre_timeout):
# Failures in pre-run have a None result and then Zuul determines
# if they should be retried from there.
self.assertEqual(build_pre_timeout.result, None)
post_flag_path = os.path.join(
self.jobdir_root, build_pre_timeout.uuid + '.post.flag')
self.assertTrue(os.path.exists(post_flag_path))
build_post_timeout = self.getJobFromHistory('post-timeout')
with self.jobLog(build_post_timeout):
self.assertEqual(build_post_timeout.result, 'POST_FAILURE')

View File

@@ -388,7 +388,9 @@ class TestWeb(BaseTestWeb):
'path': 'zuul.yaml',
'project': 'org/common-config'},
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'variant_description': '',
'voting': True,
@@ -473,7 +475,9 @@ class TestWeb(BaseTestWeb):
'semaphores': [],
'source_context': source_ctx,
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -530,7 +534,9 @@ class TestWeb(BaseTestWeb):
'semaphores': [],
'source_context': source_ctx,
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -580,7 +586,9 @@ class TestWeb(BaseTestWeb):
'semaphores': [],
'source_context': source_ctx,
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -712,7 +720,9 @@ class TestWeb(BaseTestWeb):
'path': 'zuul.yaml',
'project': 'common-config'},
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -758,7 +768,9 @@ class TestWeb(BaseTestWeb):
'path': 'zuul.yaml',
'project': 'common-config'},
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -804,7 +816,9 @@ class TestWeb(BaseTestWeb):
'path': 'zuul.yaml',
'project': 'common-config'},
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -850,7 +864,9 @@ class TestWeb(BaseTestWeb):
'path': 'zuul.yaml',
'project': 'common-config'},
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -923,7 +939,9 @@ class TestWeb(BaseTestWeb):
'path': 'zuul.yaml',
'project': 'common-config'},
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -970,7 +988,9 @@ class TestWeb(BaseTestWeb):
'path': 'zuul.yaml',
'project': 'common-config'},
'tags': [],
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'variables': {},
'extra_variables': {},
'group_variables': {},
@@ -1254,6 +1274,7 @@ class TestWeb(BaseTestWeb):
'ansible_split_streams': None,
'ansible_version': '9',
'timeout': None,
'pre_timeout': None,
'post_timeout': None,
'items': [],
'projects': [],
@@ -1357,7 +1378,9 @@ class TestWeb(BaseTestWeb):
'src_dir': 'src/review.example.com/org/project1',
},
'tenant': 'tenant-one',
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'jobtags': [],
'branch': 'master',
'projects': {},
@@ -1395,6 +1418,7 @@ class TestWeb(BaseTestWeb):
'groups': [], 'name': '', 'nodes': []},
'override_branch': None,
'override_checkout': None,
'pre_timeout': None,
'post_timeout': None,
'projects': [],
'merge_repo_state_ref': None,
@@ -1455,7 +1479,9 @@ class TestWeb(BaseTestWeb):
'src_dir':
'src/review.example.com/org/noop-project'}],
'tenant': 'tenant-one',
'pre_timeout': None,
'timeout': None,
'post_timeout': None,
'voting': True}}
self.assertEqual(job_params, resp.json())

View File

@@ -740,6 +740,7 @@ class JobParser(object):
vs.Any(ZUUL_REGEX, str)),
# validation happens in NodeSetParser
'nodeset': vs.Any(dict, str),
'pre-timeout': int,
'timeout': int,
'post-timeout': int,
'attempts': int,
@@ -802,6 +803,7 @@ class JobParser(object):
'abstract',
'protected',
'intermediate',
'pre-timeout',
'timeout',
'post-timeout',
'workspace',
@@ -827,6 +829,7 @@ class JobParser(object):
'hold-following-changes': 'hold_following_changes',
'files': 'file_matcher',
'irrelevant-files': 'irrelevant_file_matcher',
'pre-timeout': 'pre_timeout',
'post-timeout': 'post_timeout',
'pre-run': 'pre_run',
'post-run': 'post_run',

View File

@@ -209,7 +209,9 @@ def zuul_params_from_job(job):
zuul_params = {
"job": job.name,
"voting": job.voting,
"pre_timeout": job.pre_timeout,
"timeout": job.timeout,
"post_timeout": job.post_timeout,
"jobtags": sorted(job.tags),
"_inheritance_path": list(job.inheritance_path),
}

View File

@@ -1970,7 +1970,7 @@ class AnsibleJob(object):
if timeout is not None:
now = time.time()
elapsed = now - start
timeout = timeout - elapsed
timeout = max(0, timeout - elapsed)
return timeout
def runPlaybooks(self, args):
@@ -2032,14 +2032,15 @@ class AnsibleJob(object):
self.started = True
time_started = time.time()
# If we have a pre-run playbook timeout we use that. Any pre-run
# runtime counts against the total timeout for pre-run and run as the
# timeout value is "total" job timeout which accounts for
# pre-run and run playbooks. post-run is different because
# it is used to copy out job logs and we want to do our best
# to copy logs even when the job has timed out.
job_timeout = self.job.timeout
job_timeout = self.job.pre_timeout or self.job.timeout
for index, playbook in enumerate(self.jobdir.pre_playbooks):
nesting_level_achieved = playbook.nesting_level
# TODOv3(pabelanger): Implement pre-run timeout setting.
ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
pre_status, pre_code = self.runAnsiblePlaybook(
playbook, ansible_timeout, self.ansible_version, phase='pre',
@@ -2062,6 +2063,11 @@ class AnsibleJob(object):
self.cpu_times['children_system']))
if not pre_failed:
if self.job.pre_timeout:
# Update job_timeout to reset for longer timeout value if
# pre-timeout is set
job_timeout = self.getAnsibleTimeout(
time_started, self.job.timeout)
# At this point, we have gone all the way down.
nesting_level_achieved = None
for index, playbook in enumerate(self.jobdir.playbooks):

View File

@@ -3235,6 +3235,7 @@ class FrozenJob(zkobject.ZKObject):
'nodeset_index',
'override_branch',
'override_checkout',
'pre_timeout',
'post_timeout',
'required_projects',
'semaphores',
@@ -3444,6 +3445,9 @@ class FrozenJob(zkobject.ZKObject):
data['requires'] = frozenset(data['requires'])
data['tags'] = frozenset(data['tags'])
# MODEL_API <= 33
data.setdefault('pre_timeout', None)
for job_data_key in self.job_data_attributes:
job_data = data.pop(job_data_key, None)
if job_data:
@@ -3675,7 +3679,9 @@ class Job(ConfigObject):
d['intermediate'] = self.intermediate
d['protected'] = self.protected
d['voting'] = self.voting
d['pre_timeout'] = self.pre_timeout
d['timeout'] = self.timeout
d['post_timeout'] = self.post_timeout
d['tags'] = list(self.tags)
d['provides'] = list(self.provides)
d['requires'] = list(self.requires)
@@ -3746,6 +3752,7 @@ class Job(ConfigObject):
# project-pipeline.
self.execution_attributes = dict(
parent=None,
pre_timeout=None,
timeout=None,
post_timeout=None,
variables={},
@@ -9436,6 +9443,17 @@ class Layout(object):
if job.roles:
job._resolveRoles(self)
if (job.pre_timeout and
self.tenant.max_job_timeout != -1 and
job.pre_timeout > self.tenant.max_job_timeout):
raise MaxTimeoutError(job, self.tenant)
# pre-timeout counts against timeout so can't be any larger than
# timeout
if (job.pre_timeout and job.timeout and
job.pre_timeout > job.timeout):
raise MaxTimeoutError(job, self.tenant)
if (job.timeout and
self.tenant.max_job_timeout != -1 and
job.timeout > self.tenant.max_job_timeout):

View File

@@ -2593,6 +2593,7 @@ class ZuulWebAPI(object):
del params['secret_parent_data']
params['job'] = job.name
params['zuul']['buildset'] = None
params['pre_timeout'] = job.pre_timeout
params['timeout'] = job.timeout
params['post_timeout'] = job.post_timeout
params['override_branch'] = job.override_branch