diff --git a/tests/base.py b/tests/base.py index fe0139932e..20e664edfe 100755 --- a/tests/base.py +++ b/tests/base.py @@ -1629,6 +1629,10 @@ class FakeNodepool(object): nodeid = path.split("/")[-1] return nodeid + def removeNode(self, node): + path = self.NODE_ROOT + '/' + node["_oid"] + self.client.delete(path, recursive=True) + def addFailRequest(self, request): self.fail_requests.add(request['_oid']) diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py index 3d76510105..5ae8607f2e 100755 --- a/tests/unit/test_scheduler.py +++ b/tests/unit/test_scheduler.py @@ -1507,7 +1507,7 @@ class TestScheduler(ZuulTestCase): self.gearman_server.port) self.addCleanup(client.shutdown) r = client.autohold('tenant-one', 'org/project', 'project-test2', - "reason text", 1) + "", "", "reason text", 1) self.assertTrue(r) # First check that successful jobs do not autohold @@ -1554,7 +1554,7 @@ class TestScheduler(ZuulTestCase): held_node['hold_job'], " ".join(['tenant-one', 'review.example.com/org/project', - 'project-test2']) + 'project-test2', '.*']) ) self.assertEqual(held_node['comment'], "reason text") @@ -1574,13 +1574,151 @@ class TestScheduler(ZuulTestCase): held_nodes += 1 self.assertEqual(held_nodes, 1) + def _test_autohold_scoped(self, change_obj, change, ref): + client = zuul.rpcclient.RPCClient('127.0.0.1', + self.gearman_server.port) + self.addCleanup(client.shutdown) + + # create two changes on the same project, and autohold request + # for one of them. + other = self.fake_gerrit.addFakeChange( + 'org/project', 'master', 'other' + ) + + r = client.autohold('tenant-one', 'org/project', 'project-test2', + str(change), ref, "reason text", 1) + self.assertTrue(r) + + # First, check that an unrelated job does not trigger autohold, even + # when it failed + self.executor_server.failJob('project-test2', other) + self.fake_gerrit.addEvent(other.getPatchsetCreatedEvent(1)) + + self.waitUntilSettled() + + self.assertEqual(other.data['status'], 'NEW') + self.assertEqual(other.reported, 1) + # project-test2 + self.assertEqual(self.history[0].result, 'FAILURE') + + # Check nodepool for a held node + held_node = None + for node in self.fake_nodepool.getNodes(): + if node['state'] == zuul.model.STATE_HOLD: + held_node = node + break + self.assertIsNone(held_node) + + # And then verify that failed job for the defined change + # triggers the autohold + + self.executor_server.failJob('project-test2', change_obj) + self.fake_gerrit.addEvent(change_obj.getPatchsetCreatedEvent(1)) + + self.waitUntilSettled() + + self.assertEqual(change_obj.data['status'], 'NEW') + self.assertEqual(change_obj.reported, 1) + # project-test2 + self.assertEqual(self.history[1].result, 'FAILURE') + + # Check nodepool for a held node + held_node = None + for node in self.fake_nodepool.getNodes(): + if node['state'] == zuul.model.STATE_HOLD: + held_node = node + break + self.assertIsNotNone(held_node) + + # Validate node has recorded the failed job + if change != "": + ref = "refs/changes/%s/%s/.*" % ( + str(change_obj.number)[-1:], str(change_obj.number) + ) + + self.assertEqual( + held_node['hold_job'], + " ".join(['tenant-one', + 'review.example.com/org/project', + 'project-test2', ref]) + ) + self.assertEqual(held_node['comment'], "reason text") + + @simple_layout('layouts/autohold.yaml') + def test_autohold_change(self): + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + + self._test_autohold_scoped(A, change=A.number, ref="") + + @simple_layout('layouts/autohold.yaml') + def test_autohold_ref(self): + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + ref = A.data['currentPatchSet']['ref'] + self._test_autohold_scoped(A, change="", ref=ref) + + @simple_layout('layouts/autohold.yaml') + def test_autohold_scoping(self): + client = zuul.rpcclient.RPCClient('127.0.0.1', + self.gearman_server.port) + self.addCleanup(client.shutdown) + + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + + # create three autohold requests, scoped to job, change and + # a specific ref + change = str(A.number) + ref = A.data['currentPatchSet']['ref'] + r1 = client.autohold('tenant-one', 'org/project', 'project-test2', + "", "", "reason text", 1) + self.assertTrue(r1) + r2 = client.autohold('tenant-one', 'org/project', 'project-test2', + change, "", "reason text", 1) + self.assertTrue(r2) + r3 = client.autohold('tenant-one', 'org/project', 'project-test2', + "", ref, "reason text", 1) + self.assertTrue(r3) + + # Fail 3 jobs for the same change, and verify that the autohold + # requests are fullfilled in the expected order: from the most + # specific towards the most generic one. + + def _fail_job_and_verify_autohold_request(change_obj, ref_filter): + self.executor_server.failJob('project-test2', change_obj) + self.fake_gerrit.addEvent(change_obj.getPatchsetCreatedEvent(1)) + + self.waitUntilSettled() + + # Check nodepool for a held node + held_node = None + for node in self.fake_nodepool.getNodes(): + if node['state'] == zuul.model.STATE_HOLD: + held_node = node + break + self.assertIsNotNone(held_node) + + self.assertEqual( + held_node['hold_job'], + " ".join(['tenant-one', + 'review.example.com/org/project', + 'project-test2', ref_filter]) + ) + self.assertFalse(held_node['_lock'], "Node %s is locked" % + (node['_oid'],)) + self.fake_nodepool.removeNode(held_node) + + _fail_job_and_verify_autohold_request(A, ref) + + ref = "refs/changes/%s/%s/.*" % (str(change)[-1:], str(change)) + _fail_job_and_verify_autohold_request(A, ref) + _fail_job_and_verify_autohold_request(A, ".*") + @simple_layout('layouts/autohold.yaml') def test_autohold_ignores_aborted_jobs(self): client = zuul.rpcclient.RPCClient('127.0.0.1', self.gearman_server.port) self.addCleanup(client.shutdown) r = client.autohold('tenant-one', 'org/project', 'project-test2', - "reason text", 1) + "", "", "reason text", 1) self.assertTrue(r) self.executor_server.hold_jobs_in_build = True @@ -1624,7 +1762,7 @@ class TestScheduler(ZuulTestCase): self.addCleanup(client.shutdown) r = client.autohold('tenant-one', 'org/project', 'project-test2', - "reason text", 1) + "", "", "reason text", 1) self.assertTrue(r) autohold_requests = client.autohold_list() @@ -1633,11 +1771,12 @@ class TestScheduler(ZuulTestCase): # The single dict key should be a CSV string value key = list(autohold_requests.keys())[0] - tenant, project, job = key.split(',') + tenant, project, job, ref_filter = key.split(',') self.assertEqual('tenant-one', tenant) self.assertIn('org/project', project) self.assertEqual('project-test2', job) + self.assertEqual(".*", ref_filter) # Note: the value is converted from set to list by json. self.assertEqual([1, "reason text"], autohold_requests[key]) diff --git a/zuul/cmd/client.py b/zuul/cmd/client.py index ebf59b9443..a7b3ef3e99 100755 --- a/zuul/cmd/client.py +++ b/zuul/cmd/client.py @@ -51,6 +51,11 @@ class Client(zuul.cmd.ZuulApp): required=True) cmd_autohold.add_argument('--job', help='job name', required=True) + cmd_autohold.add_argument('--change', + help='specific change to hold nodes for', + required=False, default='') + cmd_autohold.add_argument('--ref', help='git ref to hold nodes for', + required=False, default='') cmd_autohold.add_argument('--reason', help='reason for the hold', required=True) cmd_autohold.add_argument('--count', @@ -173,9 +178,15 @@ class Client(zuul.cmd.ZuulApp): def autohold(self): client = zuul.rpcclient.RPCClient( self.server, self.port, self.ssl_key, self.ssl_cert, self.ssl_ca) + if self.args.change and self.args.ref: + print("Change and ref can't be both used for the same request") + return False + r = client.autohold(tenant=self.args.tenant, project=self.args.project, job=self.args.job, + change=self.args.change, + ref=self.args.ref, reason=self.args.reason, count=self.args.count) return r @@ -190,14 +201,19 @@ class Client(zuul.cmd.ZuulApp): return True table = prettytable.PrettyTable( - field_names=['Tenant', 'Project', 'Job', 'Count', 'Reason']) + field_names=[ + 'Tenant', 'Project', 'Job', 'Ref Filter', 'Count', 'Reason' + ]) for key, value in autohold_requests.items(): # The key comes to us as a CSV string because json doesn't like # non-str keys. - tenant_name, project_name, job_name = key.split(',') + tenant_name, project_name, job_name, ref_filter = key.split(',') count, reason = value - table.add_row([tenant_name, project_name, job_name, count, reason]) + + table.add_row([ + tenant_name, project_name, job_name, ref_filter, count, reason + ]) print(table) return True diff --git a/zuul/driver/gerrit/gerritsource.py b/zuul/driver/gerrit/gerritsource.py index 9e327b93a5..ed8e7ad9e2 100644 --- a/zuul/driver/gerrit/gerritsource.py +++ b/zuul/driver/gerrit/gerritsource.py @@ -138,6 +138,10 @@ class GerritSource(BaseSource): ) return [f] + def getRefForChange(self, change): + partial = change[-2:] + return "refs/changes/%s/%s/.*" % (partial, change) + approval = vs.Schema({'username': str, 'email': str, diff --git a/zuul/driver/git/gitsource.py b/zuul/driver/git/gitsource.py index a7d42be12b..9f0963df62 100644 --- a/zuul/driver/git/gitsource.py +++ b/zuul/driver/git/gitsource.py @@ -68,3 +68,6 @@ class GitSource(BaseSource): def getRejectFilters(self, config): return [] + + def getRefForChange(self, change): + raise NotImplemented() diff --git a/zuul/driver/github/githubsource.py b/zuul/driver/github/githubsource.py index 33f8f7cae3..6f9b14d2c1 100644 --- a/zuul/driver/github/githubsource.py +++ b/zuul/driver/github/githubsource.py @@ -144,6 +144,9 @@ class GithubSource(BaseSource): ) return [f] + def getRefForChange(self, change): + return "refs/pull/%s/head" % change + review = v.Schema({'username': str, 'email': str, diff --git a/zuul/rpcclient.py b/zuul/rpcclient.py index 8f2e5dc469..a947ed04c7 100644 --- a/zuul/rpcclient.py +++ b/zuul/rpcclient.py @@ -48,10 +48,12 @@ class RPCClient(object): self.log.debug("Job complete, success: %s" % (not job.failure)) return job - def autohold(self, tenant, project, job, reason, count): + def autohold(self, tenant, project, job, change, ref, reason, count): data = {'tenant': tenant, 'project': project, 'job': job, + 'change': change, + 'ref': ref, 'reason': reason, 'count': count} return not self.submitJob('zuul:autohold', data).failure diff --git a/zuul/rpclistener.py b/zuul/rpclistener.py index e5016dfab0..f3f55f6e0e 100644 --- a/zuul/rpclistener.py +++ b/zuul/rpclistener.py @@ -150,7 +150,20 @@ class RPCListener(object): job.sendWorkException(error.encode('utf8')) return + if args['change'] and args['ref']: + job.sendWorkException("Change and ref can't be both used " + "for the same request") + + if args['change']: + # Convert change into ref based on zuul connection + ref_filter = project.source.getRefForChange(args['change']) + elif args['ref']: + ref_filter = "%s" % args['ref'] + else: + ref_filter = ".*" + params['job_name'] = args['job'] + params['ref_filter'] = ref_filter params['reason'] = args['reason'] if args['count'] < 0: diff --git a/zuul/scheduler.py b/zuul/scheduler.py index a2e3b6eb1e..f86bc89ef6 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -19,6 +19,7 @@ import json import logging import os import pickle +import re import queue import socket import sys @@ -436,8 +437,9 @@ class Scheduler(threading.Thread): self.last_reconfigured = int(time.time()) # TODOv3(jeblair): reconfigure time should be per-tenant - def autohold(self, tenant_name, project_name, job_name, reason, count): - key = (tenant_name, project_name, job_name) + def autohold(self, tenant_name, project_name, job_name, ref_filter, + reason, count): + key = (tenant_name, project_name, job_name, ref_filter) if count == 0 and key in self.autohold_requests: self.log.debug("Removing autohold for %s", key) del self.autohold_requests[key] @@ -973,6 +975,66 @@ class Scheduler(threading.Thread): self.log.exception("Exception estimating build time:") pipeline.manager.onBuildStarted(event.build) + def _getAutoholdRequestKey(self, build): + change = build.build_set.item.change + + autohold_key_base = (build.pipeline.layout.tenant.name, + change.project.canonical_name, + build.job.name) + + class Scope(object): + """Enum defining a precedence/priority of autohold requests. + + Autohold requests for specific refs should be fulfilled first, + before those for changes, and generic jobs. + + Matching algorithm goes over all existing autohold requests, and + returns one with the highest number (in case of duplicated + requests the last one wins). + """ + NONE = 0 + JOB = 1 + CHANGE = 2 + REF = 3 + + def autohold_key_base_issubset(base, request_key): + """check whether the given key is a subset of the build key""" + index = 0 + base_len = len(base) + while index < base_len: + if base[index] != request_key[index]: + return False + index += 1 + return True + + # Do a partial match of the autohold key against all autohold + # requests, ignoring the last element of the key (ref filter), + # and finally do a regex match between ref filter from + # the autohold request and the build's change ref to check + # if it matches. Lastly, make sure that we match the most + # specific autohold request by comparing "scopes" + # of requests - the most specific is selected. + autohold_key = None + scope = Scope.NONE + for request in self.autohold_requests: + ref_filter = request[-1] + if not autohold_key_base_issubset(autohold_key_base, request) \ + or not re.match(ref_filter, change.ref): + continue + + if ref_filter == ".*": + candidate_scope = Scope.JOB + elif ref_filter.endswith(".*"): + candidate_scope = Scope.CHANGE + else: + candidate_scope = Scope.REF + + if candidate_scope > scope: + scope = candidate_scope + autohold_key = request + + return autohold_key + def _doBuildCompletedEvent(self, event): build = event.build @@ -981,11 +1043,9 @@ class Scheduler(threading.Thread): # the nodes to nodepool. try: nodeset = build.nodeset - autohold_key = (build.pipeline.layout.tenant.name, - build.build_set.item.change.project.canonical_name, - build.job.name) - if (build.result == "FAILURE" and - autohold_key in self.autohold_requests): + autohold_key = self._getAutoholdRequestKey(build) + + if (build.result == "FAILURE" and autohold_key): # We explicitly only want to hold nodes for jobs if they have # failed and have an autohold request. try: