Enable autohold for RETRY_LIMIT / POST_FAILURE

We'd like to support autohold on RETRY_LIMIT / POST_FAILURE results,
making it easier for operators to debug failing jobs.

Change-Id: I367bd92b6ae24e097b3112dcbe876d14d9e4802e
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
Paul Belanger 2018-03-21 13:27:18 -04:00
parent e56801f2e8
commit bdce6ed65a
No known key found for this signature in database
GPG Key ID: 611A80832067AF38
9 changed files with 102 additions and 3 deletions

View File

@ -0,0 +1,5 @@
- hosts: all
tasks:
- invalid-task:
path: "{{zuul._test.test_root}}/{{zuul.build}}.post.flag"
state: touch

View File

@ -21,3 +21,22 @@
pre-run: playbooks/pre.yaml
post-run: playbooks/post.yaml
run: playbooks/python27.yaml
- job:
name: python27-node
pre-run: playbooks/pre.yaml
post-run: playbooks/post.yaml
run: playbooks/python27.yaml
nodeset:
nodes:
name: test
label: label1
- job:
name: python27-node-post
post-run: playbooks/post-fail.yaml
run: playbooks/python27.yaml
nodeset:
nodes:
name: test
label: label1

View File

@ -0,0 +1,5 @@
- project:
name: org/project2
check:
jobs:
- python27-node

View File

@ -0,0 +1 @@
test

View File

@ -0,0 +1,5 @@
- project:
name: org/project3
check:
jobs:
- python27-node-post

View File

@ -0,0 +1 @@
test

View File

@ -6,4 +6,5 @@
- common-config
untrusted-projects:
- org/project
- org/project2
- org/project3

View File

@ -2189,6 +2189,67 @@ class TestPrePlaybooks(AnsibleZuulTestCase):
self.assertTrue(os.path.exists(post_flag_path),
"The file %s should exist" % post_flag_path)
def test_post_playbook_fail_autohold(self):
client = zuul.rpcclient.RPCClient('127.0.0.1',
self.gearman_server.port)
self.addCleanup(client.shutdown)
r = client.autohold('tenant-one', 'org/project3', 'python27-node-post',
"", "", "reason text", 1)
self.assertTrue(r)
A = self.fake_gerrit.addFakeChange('org/project3', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
build = self.getJobFromHistory('python27-node-post')
self.assertEqual(build.result, 'POST_FAILURE')
# Check nodepool for a held node
held_node = None
for node in self.fake_nodepool.getNodes():
if node['state'] == zuul.model.STATE_HOLD:
held_node = node
break
self.assertIsNotNone(held_node)
# Validate node has recorded the failed job
self.assertEqual(
held_node['hold_job'],
" ".join(['tenant-one',
'review.example.com/org/project3',
'python27-node-post', '.*'])
)
self.assertEqual(held_node['comment'], "reason text")
def test_pre_playbook_fail_autohold(self):
client = zuul.rpcclient.RPCClient('127.0.0.1',
self.gearman_server.port)
self.addCleanup(client.shutdown)
r = client.autohold('tenant-one', 'org/project2', 'python27-node',
"", "", "reason text", 1)
self.assertTrue(r)
A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
build = self.getJobFromHistory('python27-node')
self.assertIsNone(build.result)
self.assertIn('RETRY_LIMIT', A.messages[0])
# Check nodepool for a held node
held_node = None
for node in self.fake_nodepool.getNodes():
if node['state'] == zuul.model.STATE_HOLD:
held_node = node
break
self.assertIsNotNone(held_node)
# Validate node has recorded the failed job
self.assertEqual(
held_node['hold_job'],
" ".join(['tenant-one',
'review.example.com/org/project2',
'python27-node', '.*'])
)
self.assertEqual(held_node['comment'], "reason text")
class TestPostPlaybooks(AnsibleZuulTestCase):
tenant_config_file = 'config/post-playbook/main.yaml'

View File

@ -1041,8 +1041,9 @@ class Scheduler(threading.Thread):
def _processAutohold(self, build):
# We explicitly only want to hold nodes for jobs if they have
# failed and have an autohold request.
if build.result != "FAILURE":
# failed / retry_limit / post_failure and have an autohold request.
hold_list = ["FAILURE", "RETRY_LIMIT", "POST_FAILURE"]
if build.result not in hold_list:
return
autohold_key = self._getAutoholdRequestKey(build)