Merge "Cancel/return nodepool requests on job cancel" into feature/zuulv3
This commit is contained in:
commit
5c63371b48
|
@ -105,3 +105,18 @@ class TestNodepool(BaseTestCase):
|
||||||
self.waitForRequests()
|
self.waitForRequests()
|
||||||
self.assertEqual(len(self.provisioned_requests), 1)
|
self.assertEqual(len(self.provisioned_requests), 1)
|
||||||
self.assertEqual(request.state, 'fulfilled')
|
self.assertEqual(request.state, 'fulfilled')
|
||||||
|
|
||||||
|
def test_node_request_canceled(self):
|
||||||
|
# Test that node requests can be canceled
|
||||||
|
|
||||||
|
nodeset = model.NodeSet()
|
||||||
|
nodeset.addNode(model.Node('controller', 'ubuntu-xenial'))
|
||||||
|
nodeset.addNode(model.Node('compute', 'ubuntu-xenial'))
|
||||||
|
job = model.Job('testjob')
|
||||||
|
job.nodeset = nodeset
|
||||||
|
self.fake_nodepool.paused = True
|
||||||
|
request = self.nodepool.requestNodes(None, job)
|
||||||
|
self.nodepool.cancelRequest(request)
|
||||||
|
|
||||||
|
self.waitForRequests()
|
||||||
|
self.assertEqual(len(self.provisioned_requests), 0)
|
||||||
|
|
|
@ -427,6 +427,7 @@ class LaunchClient(object):
|
||||||
return build
|
return build
|
||||||
|
|
||||||
def cancel(self, build):
|
def cancel(self, build):
|
||||||
|
# Returns whether a running build was canceled
|
||||||
self.log.info("Cancel build %s for job %s" % (build, build.job))
|
self.log.info("Cancel build %s for job %s" % (build, build.job))
|
||||||
|
|
||||||
build.canceled = True
|
build.canceled = True
|
||||||
|
@ -434,21 +435,21 @@ class LaunchClient(object):
|
||||||
job = build.__gearman_job # noqa
|
job = build.__gearman_job # noqa
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self.log.debug("Build %s has no associated gearman job" % build)
|
self.log.debug("Build %s has no associated gearman job" % build)
|
||||||
return
|
return False
|
||||||
|
|
||||||
# TODOv3(jeblair): make a nicer way of recording build start.
|
# TODOv3(jeblair): make a nicer way of recording build start.
|
||||||
if build.url is not None:
|
if build.url is not None:
|
||||||
self.log.debug("Build %s has already started" % build)
|
self.log.debug("Build %s has already started" % build)
|
||||||
self.cancelRunningBuild(build)
|
self.cancelRunningBuild(build)
|
||||||
self.log.debug("Canceled running build %s" % build)
|
self.log.debug("Canceled running build %s" % build)
|
||||||
return
|
return True
|
||||||
else:
|
else:
|
||||||
self.log.debug("Build %s has not started yet" % build)
|
self.log.debug("Build %s has not started yet" % build)
|
||||||
|
|
||||||
self.log.debug("Looking for build %s in queue" % build)
|
self.log.debug("Looking for build %s in queue" % build)
|
||||||
if self.cancelJobInQueue(build):
|
if self.cancelJobInQueue(build):
|
||||||
self.log.debug("Removed build %s from queue" % build)
|
self.log.debug("Removed build %s from queue" % build)
|
||||||
return
|
return False
|
||||||
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
@ -457,7 +458,7 @@ class LaunchClient(object):
|
||||||
self.log.debug("Build %s has just started" % build)
|
self.log.debug("Build %s has just started" % build)
|
||||||
self.log.debug("Canceled running build %s" % build)
|
self.log.debug("Canceled running build %s" % build)
|
||||||
self.cancelRunningBuild(build)
|
self.cancelRunningBuild(build)
|
||||||
return
|
return True
|
||||||
self.log.debug("Unable to cancel build %s" % build)
|
self.log.debug("Unable to cancel build %s" % build)
|
||||||
|
|
||||||
def onBuildCompleted(self, job, result=None):
|
def onBuildCompleted(self, job, result=None):
|
||||||
|
|
|
@ -396,11 +396,20 @@ class PipelineManager(object):
|
||||||
self.sched.nodepool.cancelRequest(req)
|
self.sched.nodepool.cancelRequest(req)
|
||||||
old_build_set.node_requests = {}
|
old_build_set.node_requests = {}
|
||||||
for build in old_build_set.getBuilds():
|
for build in old_build_set.getBuilds():
|
||||||
|
was_running = False
|
||||||
try:
|
try:
|
||||||
self.sched.launcher.cancel(build)
|
was_running = self.sched.launcher.cancel(build)
|
||||||
except:
|
except:
|
||||||
self.log.exception("Exception while canceling build %s "
|
self.log.exception("Exception while canceling build %s "
|
||||||
"for change %s" % (build, item.change))
|
"for change %s" % (build, item.change))
|
||||||
|
if not was_running:
|
||||||
|
try:
|
||||||
|
nodeset = build.build_set.getJobNodeSet(build.job.name)
|
||||||
|
self.nodepool.returnNodeset(nodeset)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Unable to return nodeset %s for "
|
||||||
|
"canceled build request %s" %
|
||||||
|
(nodeset, build))
|
||||||
build.result = 'CANCELED'
|
build.result = 'CANCELED'
|
||||||
canceled = True
|
canceled = True
|
||||||
for item_behind in item.items_behind:
|
for item_behind in item.items_behind:
|
||||||
|
|
|
@ -35,8 +35,13 @@ class Nodepool(object):
|
||||||
return req
|
return req
|
||||||
|
|
||||||
def cancelRequest(self, request):
|
def cancelRequest(self, request):
|
||||||
if request in self.requests:
|
self.log.debug("Canceling node request: %s" % (request,))
|
||||||
self.requests.remove(request)
|
if request.uid in self.requests:
|
||||||
|
try:
|
||||||
|
self.sched.zk.deleteNodeRequest(request)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Error deleting node request:")
|
||||||
|
del self.requests[request.uid]
|
||||||
|
|
||||||
def useNodeset(self, nodeset):
|
def useNodeset(self, nodeset):
|
||||||
for node in nodeset.getNodes():
|
for node in nodeset.getNodes():
|
||||||
|
@ -51,7 +56,7 @@ class Nodepool(object):
|
||||||
raise Exception("Node %s is not locked" % (node,))
|
raise Exception("Node %s is not locked" % (node,))
|
||||||
if node.state == 'in-use':
|
if node.state == 'in-use':
|
||||||
node.state = 'used'
|
node.state = 'used'
|
||||||
self.sched.zk.storeNode(node)
|
self.sched.zk.storeNode(node)
|
||||||
self._unlockNodes(nodeset.getNodes())
|
self._unlockNodes(nodeset.getNodes())
|
||||||
|
|
||||||
def unlockNodeset(self, nodeset):
|
def unlockNodeset(self, nodeset):
|
||||||
|
|
Loading…
Reference in New Issue