Support autoholding nodes for specific changes/refs
Add support for "scoped" autohold requests, making it possible to create requests that match a given ref filter and hold nodes for build that matches the filter. Introduce two new arguments to `zuul autohold`: --ref and --change, with --ref accepting regex that builds are matched against, and --change being a shortcut that creates a filter for the specific change. Change-Id: I801ba1af4b1bda46abff07791dd96828f2738621
This commit is contained in:
parent
1bb9171c43
commit
37d5403629
|
@ -1629,6 +1629,10 @@ class FakeNodepool(object):
|
||||||
nodeid = path.split("/")[-1]
|
nodeid = path.split("/")[-1]
|
||||||
return nodeid
|
return nodeid
|
||||||
|
|
||||||
|
def removeNode(self, node):
|
||||||
|
path = self.NODE_ROOT + '/' + node["_oid"]
|
||||||
|
self.client.delete(path, recursive=True)
|
||||||
|
|
||||||
def addFailRequest(self, request):
|
def addFailRequest(self, request):
|
||||||
self.fail_requests.add(request['_oid'])
|
self.fail_requests.add(request['_oid'])
|
||||||
|
|
||||||
|
|
|
@ -1507,7 +1507,7 @@ class TestScheduler(ZuulTestCase):
|
||||||
self.gearman_server.port)
|
self.gearman_server.port)
|
||||||
self.addCleanup(client.shutdown)
|
self.addCleanup(client.shutdown)
|
||||||
r = client.autohold('tenant-one', 'org/project', 'project-test2',
|
r = client.autohold('tenant-one', 'org/project', 'project-test2',
|
||||||
"reason text", 1)
|
"", "", "reason text", 1)
|
||||||
self.assertTrue(r)
|
self.assertTrue(r)
|
||||||
|
|
||||||
# First check that successful jobs do not autohold
|
# First check that successful jobs do not autohold
|
||||||
|
@ -1554,7 +1554,7 @@ class TestScheduler(ZuulTestCase):
|
||||||
held_node['hold_job'],
|
held_node['hold_job'],
|
||||||
" ".join(['tenant-one',
|
" ".join(['tenant-one',
|
||||||
'review.example.com/org/project',
|
'review.example.com/org/project',
|
||||||
'project-test2'])
|
'project-test2', '.*'])
|
||||||
)
|
)
|
||||||
self.assertEqual(held_node['comment'], "reason text")
|
self.assertEqual(held_node['comment'], "reason text")
|
||||||
|
|
||||||
|
@ -1574,13 +1574,151 @@ class TestScheduler(ZuulTestCase):
|
||||||
held_nodes += 1
|
held_nodes += 1
|
||||||
self.assertEqual(held_nodes, 1)
|
self.assertEqual(held_nodes, 1)
|
||||||
|
|
||||||
|
def _test_autohold_scoped(self, change_obj, change, ref):
|
||||||
|
client = zuul.rpcclient.RPCClient('127.0.0.1',
|
||||||
|
self.gearman_server.port)
|
||||||
|
self.addCleanup(client.shutdown)
|
||||||
|
|
||||||
|
# create two changes on the same project, and autohold request
|
||||||
|
# for one of them.
|
||||||
|
other = self.fake_gerrit.addFakeChange(
|
||||||
|
'org/project', 'master', 'other'
|
||||||
|
)
|
||||||
|
|
||||||
|
r = client.autohold('tenant-one', 'org/project', 'project-test2',
|
||||||
|
str(change), ref, "reason text", 1)
|
||||||
|
self.assertTrue(r)
|
||||||
|
|
||||||
|
# First, check that an unrelated job does not trigger autohold, even
|
||||||
|
# when it failed
|
||||||
|
self.executor_server.failJob('project-test2', other)
|
||||||
|
self.fake_gerrit.addEvent(other.getPatchsetCreatedEvent(1))
|
||||||
|
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
self.assertEqual(other.data['status'], 'NEW')
|
||||||
|
self.assertEqual(other.reported, 1)
|
||||||
|
# project-test2
|
||||||
|
self.assertEqual(self.history[0].result, 'FAILURE')
|
||||||
|
|
||||||
|
# Check nodepool for a held node
|
||||||
|
held_node = None
|
||||||
|
for node in self.fake_nodepool.getNodes():
|
||||||
|
if node['state'] == zuul.model.STATE_HOLD:
|
||||||
|
held_node = node
|
||||||
|
break
|
||||||
|
self.assertIsNone(held_node)
|
||||||
|
|
||||||
|
# And then verify that failed job for the defined change
|
||||||
|
# triggers the autohold
|
||||||
|
|
||||||
|
self.executor_server.failJob('project-test2', change_obj)
|
||||||
|
self.fake_gerrit.addEvent(change_obj.getPatchsetCreatedEvent(1))
|
||||||
|
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
self.assertEqual(change_obj.data['status'], 'NEW')
|
||||||
|
self.assertEqual(change_obj.reported, 1)
|
||||||
|
# project-test2
|
||||||
|
self.assertEqual(self.history[1].result, 'FAILURE')
|
||||||
|
|
||||||
|
# Check nodepool for a held node
|
||||||
|
held_node = None
|
||||||
|
for node in self.fake_nodepool.getNodes():
|
||||||
|
if node['state'] == zuul.model.STATE_HOLD:
|
||||||
|
held_node = node
|
||||||
|
break
|
||||||
|
self.assertIsNotNone(held_node)
|
||||||
|
|
||||||
|
# Validate node has recorded the failed job
|
||||||
|
if change != "":
|
||||||
|
ref = "refs/changes/%s/%s/.*" % (
|
||||||
|
str(change_obj.number)[-1:], str(change_obj.number)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
held_node['hold_job'],
|
||||||
|
" ".join(['tenant-one',
|
||||||
|
'review.example.com/org/project',
|
||||||
|
'project-test2', ref])
|
||||||
|
)
|
||||||
|
self.assertEqual(held_node['comment'], "reason text")
|
||||||
|
|
||||||
|
@simple_layout('layouts/autohold.yaml')
|
||||||
|
def test_autohold_change(self):
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||||
|
|
||||||
|
self._test_autohold_scoped(A, change=A.number, ref="")
|
||||||
|
|
||||||
|
@simple_layout('layouts/autohold.yaml')
|
||||||
|
def test_autohold_ref(self):
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||||
|
ref = A.data['currentPatchSet']['ref']
|
||||||
|
self._test_autohold_scoped(A, change="", ref=ref)
|
||||||
|
|
||||||
|
@simple_layout('layouts/autohold.yaml')
|
||||||
|
def test_autohold_scoping(self):
|
||||||
|
client = zuul.rpcclient.RPCClient('127.0.0.1',
|
||||||
|
self.gearman_server.port)
|
||||||
|
self.addCleanup(client.shutdown)
|
||||||
|
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||||
|
|
||||||
|
# create three autohold requests, scoped to job, change and
|
||||||
|
# a specific ref
|
||||||
|
change = str(A.number)
|
||||||
|
ref = A.data['currentPatchSet']['ref']
|
||||||
|
r1 = client.autohold('tenant-one', 'org/project', 'project-test2',
|
||||||
|
"", "", "reason text", 1)
|
||||||
|
self.assertTrue(r1)
|
||||||
|
r2 = client.autohold('tenant-one', 'org/project', 'project-test2',
|
||||||
|
change, "", "reason text", 1)
|
||||||
|
self.assertTrue(r2)
|
||||||
|
r3 = client.autohold('tenant-one', 'org/project', 'project-test2',
|
||||||
|
"", ref, "reason text", 1)
|
||||||
|
self.assertTrue(r3)
|
||||||
|
|
||||||
|
# Fail 3 jobs for the same change, and verify that the autohold
|
||||||
|
# requests are fullfilled in the expected order: from the most
|
||||||
|
# specific towards the most generic one.
|
||||||
|
|
||||||
|
def _fail_job_and_verify_autohold_request(change_obj, ref_filter):
|
||||||
|
self.executor_server.failJob('project-test2', change_obj)
|
||||||
|
self.fake_gerrit.addEvent(change_obj.getPatchsetCreatedEvent(1))
|
||||||
|
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# Check nodepool for a held node
|
||||||
|
held_node = None
|
||||||
|
for node in self.fake_nodepool.getNodes():
|
||||||
|
if node['state'] == zuul.model.STATE_HOLD:
|
||||||
|
held_node = node
|
||||||
|
break
|
||||||
|
self.assertIsNotNone(held_node)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
held_node['hold_job'],
|
||||||
|
" ".join(['tenant-one',
|
||||||
|
'review.example.com/org/project',
|
||||||
|
'project-test2', ref_filter])
|
||||||
|
)
|
||||||
|
self.assertFalse(held_node['_lock'], "Node %s is locked" %
|
||||||
|
(node['_oid'],))
|
||||||
|
self.fake_nodepool.removeNode(held_node)
|
||||||
|
|
||||||
|
_fail_job_and_verify_autohold_request(A, ref)
|
||||||
|
|
||||||
|
ref = "refs/changes/%s/%s/.*" % (str(change)[-1:], str(change))
|
||||||
|
_fail_job_and_verify_autohold_request(A, ref)
|
||||||
|
_fail_job_and_verify_autohold_request(A, ".*")
|
||||||
|
|
||||||
@simple_layout('layouts/autohold.yaml')
|
@simple_layout('layouts/autohold.yaml')
|
||||||
def test_autohold_ignores_aborted_jobs(self):
|
def test_autohold_ignores_aborted_jobs(self):
|
||||||
client = zuul.rpcclient.RPCClient('127.0.0.1',
|
client = zuul.rpcclient.RPCClient('127.0.0.1',
|
||||||
self.gearman_server.port)
|
self.gearman_server.port)
|
||||||
self.addCleanup(client.shutdown)
|
self.addCleanup(client.shutdown)
|
||||||
r = client.autohold('tenant-one', 'org/project', 'project-test2',
|
r = client.autohold('tenant-one', 'org/project', 'project-test2',
|
||||||
"reason text", 1)
|
"", "", "reason text", 1)
|
||||||
self.assertTrue(r)
|
self.assertTrue(r)
|
||||||
|
|
||||||
self.executor_server.hold_jobs_in_build = True
|
self.executor_server.hold_jobs_in_build = True
|
||||||
|
@ -1624,7 +1762,7 @@ class TestScheduler(ZuulTestCase):
|
||||||
self.addCleanup(client.shutdown)
|
self.addCleanup(client.shutdown)
|
||||||
|
|
||||||
r = client.autohold('tenant-one', 'org/project', 'project-test2',
|
r = client.autohold('tenant-one', 'org/project', 'project-test2',
|
||||||
"reason text", 1)
|
"", "", "reason text", 1)
|
||||||
self.assertTrue(r)
|
self.assertTrue(r)
|
||||||
|
|
||||||
autohold_requests = client.autohold_list()
|
autohold_requests = client.autohold_list()
|
||||||
|
@ -1633,11 +1771,12 @@ class TestScheduler(ZuulTestCase):
|
||||||
|
|
||||||
# The single dict key should be a CSV string value
|
# The single dict key should be a CSV string value
|
||||||
key = list(autohold_requests.keys())[0]
|
key = list(autohold_requests.keys())[0]
|
||||||
tenant, project, job = key.split(',')
|
tenant, project, job, ref_filter = key.split(',')
|
||||||
|
|
||||||
self.assertEqual('tenant-one', tenant)
|
self.assertEqual('tenant-one', tenant)
|
||||||
self.assertIn('org/project', project)
|
self.assertIn('org/project', project)
|
||||||
self.assertEqual('project-test2', job)
|
self.assertEqual('project-test2', job)
|
||||||
|
self.assertEqual(".*", ref_filter)
|
||||||
|
|
||||||
# Note: the value is converted from set to list by json.
|
# Note: the value is converted from set to list by json.
|
||||||
self.assertEqual([1, "reason text"], autohold_requests[key])
|
self.assertEqual([1, "reason text"], autohold_requests[key])
|
||||||
|
|
|
@ -51,6 +51,11 @@ class Client(zuul.cmd.ZuulApp):
|
||||||
required=True)
|
required=True)
|
||||||
cmd_autohold.add_argument('--job', help='job name',
|
cmd_autohold.add_argument('--job', help='job name',
|
||||||
required=True)
|
required=True)
|
||||||
|
cmd_autohold.add_argument('--change',
|
||||||
|
help='specific change to hold nodes for',
|
||||||
|
required=False, default='')
|
||||||
|
cmd_autohold.add_argument('--ref', help='git ref to hold nodes for',
|
||||||
|
required=False, default='')
|
||||||
cmd_autohold.add_argument('--reason', help='reason for the hold',
|
cmd_autohold.add_argument('--reason', help='reason for the hold',
|
||||||
required=True)
|
required=True)
|
||||||
cmd_autohold.add_argument('--count',
|
cmd_autohold.add_argument('--count',
|
||||||
|
@ -173,9 +178,15 @@ class Client(zuul.cmd.ZuulApp):
|
||||||
def autohold(self):
|
def autohold(self):
|
||||||
client = zuul.rpcclient.RPCClient(
|
client = zuul.rpcclient.RPCClient(
|
||||||
self.server, self.port, self.ssl_key, self.ssl_cert, self.ssl_ca)
|
self.server, self.port, self.ssl_key, self.ssl_cert, self.ssl_ca)
|
||||||
|
if self.args.change and self.args.ref:
|
||||||
|
print("Change and ref can't be both used for the same request")
|
||||||
|
return False
|
||||||
|
|
||||||
r = client.autohold(tenant=self.args.tenant,
|
r = client.autohold(tenant=self.args.tenant,
|
||||||
project=self.args.project,
|
project=self.args.project,
|
||||||
job=self.args.job,
|
job=self.args.job,
|
||||||
|
change=self.args.change,
|
||||||
|
ref=self.args.ref,
|
||||||
reason=self.args.reason,
|
reason=self.args.reason,
|
||||||
count=self.args.count)
|
count=self.args.count)
|
||||||
return r
|
return r
|
||||||
|
@ -190,14 +201,19 @@ class Client(zuul.cmd.ZuulApp):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
table = prettytable.PrettyTable(
|
table = prettytable.PrettyTable(
|
||||||
field_names=['Tenant', 'Project', 'Job', 'Count', 'Reason'])
|
field_names=[
|
||||||
|
'Tenant', 'Project', 'Job', 'Ref Filter', 'Count', 'Reason'
|
||||||
|
])
|
||||||
|
|
||||||
for key, value in autohold_requests.items():
|
for key, value in autohold_requests.items():
|
||||||
# The key comes to us as a CSV string because json doesn't like
|
# The key comes to us as a CSV string because json doesn't like
|
||||||
# non-str keys.
|
# non-str keys.
|
||||||
tenant_name, project_name, job_name = key.split(',')
|
tenant_name, project_name, job_name, ref_filter = key.split(',')
|
||||||
count, reason = value
|
count, reason = value
|
||||||
table.add_row([tenant_name, project_name, job_name, count, reason])
|
|
||||||
|
table.add_row([
|
||||||
|
tenant_name, project_name, job_name, ref_filter, count, reason
|
||||||
|
])
|
||||||
print(table)
|
print(table)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
@ -138,6 +138,10 @@ class GerritSource(BaseSource):
|
||||||
)
|
)
|
||||||
return [f]
|
return [f]
|
||||||
|
|
||||||
|
def getRefForChange(self, change):
|
||||||
|
partial = change[-2:]
|
||||||
|
return "refs/changes/%s/%s/.*" % (partial, change)
|
||||||
|
|
||||||
|
|
||||||
approval = vs.Schema({'username': str,
|
approval = vs.Schema({'username': str,
|
||||||
'email': str,
|
'email': str,
|
||||||
|
|
|
@ -68,3 +68,6 @@ class GitSource(BaseSource):
|
||||||
|
|
||||||
def getRejectFilters(self, config):
|
def getRejectFilters(self, config):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def getRefForChange(self, change):
|
||||||
|
raise NotImplemented()
|
||||||
|
|
|
@ -144,6 +144,9 @@ class GithubSource(BaseSource):
|
||||||
)
|
)
|
||||||
return [f]
|
return [f]
|
||||||
|
|
||||||
|
def getRefForChange(self, change):
|
||||||
|
return "refs/pull/%s/head" % change
|
||||||
|
|
||||||
|
|
||||||
review = v.Schema({'username': str,
|
review = v.Schema({'username': str,
|
||||||
'email': str,
|
'email': str,
|
||||||
|
|
|
@ -48,10 +48,12 @@ class RPCClient(object):
|
||||||
self.log.debug("Job complete, success: %s" % (not job.failure))
|
self.log.debug("Job complete, success: %s" % (not job.failure))
|
||||||
return job
|
return job
|
||||||
|
|
||||||
def autohold(self, tenant, project, job, reason, count):
|
def autohold(self, tenant, project, job, change, ref, reason, count):
|
||||||
data = {'tenant': tenant,
|
data = {'tenant': tenant,
|
||||||
'project': project,
|
'project': project,
|
||||||
'job': job,
|
'job': job,
|
||||||
|
'change': change,
|
||||||
|
'ref': ref,
|
||||||
'reason': reason,
|
'reason': reason,
|
||||||
'count': count}
|
'count': count}
|
||||||
return not self.submitJob('zuul:autohold', data).failure
|
return not self.submitJob('zuul:autohold', data).failure
|
||||||
|
|
|
@ -150,7 +150,20 @@ class RPCListener(object):
|
||||||
job.sendWorkException(error.encode('utf8'))
|
job.sendWorkException(error.encode('utf8'))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if args['change'] and args['ref']:
|
||||||
|
job.sendWorkException("Change and ref can't be both used "
|
||||||
|
"for the same request")
|
||||||
|
|
||||||
|
if args['change']:
|
||||||
|
# Convert change into ref based on zuul connection
|
||||||
|
ref_filter = project.source.getRefForChange(args['change'])
|
||||||
|
elif args['ref']:
|
||||||
|
ref_filter = "%s" % args['ref']
|
||||||
|
else:
|
||||||
|
ref_filter = ".*"
|
||||||
|
|
||||||
params['job_name'] = args['job']
|
params['job_name'] = args['job']
|
||||||
|
params['ref_filter'] = ref_filter
|
||||||
params['reason'] = args['reason']
|
params['reason'] = args['reason']
|
||||||
|
|
||||||
if args['count'] < 0:
|
if args['count'] < 0:
|
||||||
|
|
|
@ -19,6 +19,7 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
|
import re
|
||||||
import queue
|
import queue
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
|
@ -436,8 +437,9 @@ class Scheduler(threading.Thread):
|
||||||
self.last_reconfigured = int(time.time())
|
self.last_reconfigured = int(time.time())
|
||||||
# TODOv3(jeblair): reconfigure time should be per-tenant
|
# TODOv3(jeblair): reconfigure time should be per-tenant
|
||||||
|
|
||||||
def autohold(self, tenant_name, project_name, job_name, reason, count):
|
def autohold(self, tenant_name, project_name, job_name, ref_filter,
|
||||||
key = (tenant_name, project_name, job_name)
|
reason, count):
|
||||||
|
key = (tenant_name, project_name, job_name, ref_filter)
|
||||||
if count == 0 and key in self.autohold_requests:
|
if count == 0 and key in self.autohold_requests:
|
||||||
self.log.debug("Removing autohold for %s", key)
|
self.log.debug("Removing autohold for %s", key)
|
||||||
del self.autohold_requests[key]
|
del self.autohold_requests[key]
|
||||||
|
@ -973,6 +975,66 @@ class Scheduler(threading.Thread):
|
||||||
self.log.exception("Exception estimating build time:")
|
self.log.exception("Exception estimating build time:")
|
||||||
pipeline.manager.onBuildStarted(event.build)
|
pipeline.manager.onBuildStarted(event.build)
|
||||||
|
|
||||||
|
def _getAutoholdRequestKey(self, build):
|
||||||
|
change = build.build_set.item.change
|
||||||
|
|
||||||
|
autohold_key_base = (build.pipeline.layout.tenant.name,
|
||||||
|
change.project.canonical_name,
|
||||||
|
build.job.name)
|
||||||
|
|
||||||
|
class Scope(object):
|
||||||
|
"""Enum defining a precedence/priority of autohold requests.
|
||||||
|
|
||||||
|
Autohold requests for specific refs should be fulfilled first,
|
||||||
|
before those for changes, and generic jobs.
|
||||||
|
|
||||||
|
Matching algorithm goes over all existing autohold requests, and
|
||||||
|
returns one with the highest number (in case of duplicated
|
||||||
|
requests the last one wins).
|
||||||
|
"""
|
||||||
|
NONE = 0
|
||||||
|
JOB = 1
|
||||||
|
CHANGE = 2
|
||||||
|
REF = 3
|
||||||
|
|
||||||
|
def autohold_key_base_issubset(base, request_key):
|
||||||
|
"""check whether the given key is a subset of the build key"""
|
||||||
|
index = 0
|
||||||
|
base_len = len(base)
|
||||||
|
while index < base_len:
|
||||||
|
if base[index] != request_key[index]:
|
||||||
|
return False
|
||||||
|
index += 1
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Do a partial match of the autohold key against all autohold
|
||||||
|
# requests, ignoring the last element of the key (ref filter),
|
||||||
|
# and finally do a regex match between ref filter from
|
||||||
|
# the autohold request and the build's change ref to check
|
||||||
|
# if it matches. Lastly, make sure that we match the most
|
||||||
|
# specific autohold request by comparing "scopes"
|
||||||
|
# of requests - the most specific is selected.
|
||||||
|
autohold_key = None
|
||||||
|
scope = Scope.NONE
|
||||||
|
for request in self.autohold_requests:
|
||||||
|
ref_filter = request[-1]
|
||||||
|
if not autohold_key_base_issubset(autohold_key_base, request) \
|
||||||
|
or not re.match(ref_filter, change.ref):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if ref_filter == ".*":
|
||||||
|
candidate_scope = Scope.JOB
|
||||||
|
elif ref_filter.endswith(".*"):
|
||||||
|
candidate_scope = Scope.CHANGE
|
||||||
|
else:
|
||||||
|
candidate_scope = Scope.REF
|
||||||
|
|
||||||
|
if candidate_scope > scope:
|
||||||
|
scope = candidate_scope
|
||||||
|
autohold_key = request
|
||||||
|
|
||||||
|
return autohold_key
|
||||||
|
|
||||||
def _doBuildCompletedEvent(self, event):
|
def _doBuildCompletedEvent(self, event):
|
||||||
build = event.build
|
build = event.build
|
||||||
|
|
||||||
|
@ -981,11 +1043,9 @@ class Scheduler(threading.Thread):
|
||||||
# the nodes to nodepool.
|
# the nodes to nodepool.
|
||||||
try:
|
try:
|
||||||
nodeset = build.nodeset
|
nodeset = build.nodeset
|
||||||
autohold_key = (build.pipeline.layout.tenant.name,
|
autohold_key = self._getAutoholdRequestKey(build)
|
||||||
build.build_set.item.change.project.canonical_name,
|
|
||||||
build.job.name)
|
if (build.result == "FAILURE" and autohold_key):
|
||||||
if (build.result == "FAILURE" and
|
|
||||||
autohold_key in self.autohold_requests):
|
|
||||||
# We explicitly only want to hold nodes for jobs if they have
|
# We explicitly only want to hold nodes for jobs if they have
|
||||||
# failed and have an autohold request.
|
# failed and have an autohold request.
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in New Issue