From 01695c35e57138d30dc48fe7b6fde079a6da5c37 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Wed, 4 Jan 2017 17:29:25 -0800 Subject: [PATCH] Cancel/return nodepool requests on job cancel When canceling a running or requested job, cancel outstanding nodepool requests or return unused nodes. Change-Id: I77f8869b9d751ccd6c9f398ed03ef5ac482cc204 --- tests/test_nodepool.py | 15 +++++++++++++++ zuul/launcher/client.py | 9 +++++---- zuul/manager/__init__.py | 11 ++++++++++- zuul/nodepool.py | 11 ++++++++--- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/tests/test_nodepool.py b/tests/test_nodepool.py index 677ae739e1..78d85aa905 100644 --- a/tests/test_nodepool.py +++ b/tests/test_nodepool.py @@ -105,3 +105,18 @@ class TestNodepool(BaseTestCase): self.waitForRequests() self.assertEqual(len(self.provisioned_requests), 1) self.assertEqual(request.state, 'fulfilled') + + def test_node_request_canceled(self): + # Test that node requests can be canceled + + nodeset = model.NodeSet() + nodeset.addNode(model.Node('controller', 'ubuntu-xenial')) + nodeset.addNode(model.Node('compute', 'ubuntu-xenial')) + job = model.Job('testjob') + job.nodeset = nodeset + self.fake_nodepool.paused = True + request = self.nodepool.requestNodes(None, job) + self.nodepool.cancelRequest(request) + + self.waitForRequests() + self.assertEqual(len(self.provisioned_requests), 0) diff --git a/zuul/launcher/client.py b/zuul/launcher/client.py index 9fbf1bbdbd..e17c83cb63 100644 --- a/zuul/launcher/client.py +++ b/zuul/launcher/client.py @@ -427,6 +427,7 @@ class LaunchClient(object): return build def cancel(self, build): + # Returns whether a running build was canceled self.log.info("Cancel build %s for job %s" % (build, build.job)) build.canceled = True @@ -434,21 +435,21 @@ class LaunchClient(object): job = build.__gearman_job # noqa except AttributeError: self.log.debug("Build %s has no associated gearman job" % build) - return + return False # TODOv3(jeblair): make a nicer way of recording build start. if build.url is not None: self.log.debug("Build %s has already started" % build) self.cancelRunningBuild(build) self.log.debug("Canceled running build %s" % build) - return + return True else: self.log.debug("Build %s has not started yet" % build) self.log.debug("Looking for build %s in queue" % build) if self.cancelJobInQueue(build): self.log.debug("Removed build %s from queue" % build) - return + return False time.sleep(1) @@ -457,7 +458,7 @@ class LaunchClient(object): self.log.debug("Build %s has just started" % build) self.log.debug("Canceled running build %s" % build) self.cancelRunningBuild(build) - return + return True self.log.debug("Unable to cancel build %s" % build) def onBuildCompleted(self, job, result=None): diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py index f5a35cdb85..7f64986250 100644 --- a/zuul/manager/__init__.py +++ b/zuul/manager/__init__.py @@ -396,11 +396,20 @@ class PipelineManager(object): self.sched.nodepool.cancelRequest(req) old_build_set.node_requests = {} for build in old_build_set.getBuilds(): + was_running = False try: - self.sched.launcher.cancel(build) + was_running = self.sched.launcher.cancel(build) except: self.log.exception("Exception while canceling build %s " "for change %s" % (build, item.change)) + if not was_running: + try: + nodeset = build.build_set.getJobNodeSet(build.job.name) + self.nodepool.returnNodeset(nodeset) + except Exception: + self.log.exception("Unable to return nodeset %s for " + "canceled build request %s" % + (nodeset, build)) build.result = 'CANCELED' canceled = True for item_behind in item.items_behind: diff --git a/zuul/nodepool.py b/zuul/nodepool.py index 11b02b6e27..4d0442f511 100644 --- a/zuul/nodepool.py +++ b/zuul/nodepool.py @@ -35,8 +35,13 @@ class Nodepool(object): return req def cancelRequest(self, request): - if request in self.requests: - self.requests.remove(request) + self.log.debug("Canceling node request: %s" % (request,)) + if request.uid in self.requests: + try: + self.sched.zk.deleteNodeRequest(request) + except Exception: + self.log.exception("Error deleting node request:") + del self.requests[request.uid] def useNodeset(self, nodeset): for node in nodeset.getNodes(): @@ -51,7 +56,7 @@ class Nodepool(object): raise Exception("Node %s is not locked" % (node,)) if node.state == 'in-use': node.state = 'used' - self.sched.zk.storeNode(node) + self.sched.zk.storeNode(node) self._unlockNodes(nodeset.getNodes()) def unlockNodeset(self, nodeset):