Merge "Verify nodes and requests are not leaked" into feature/zuulv3
This commit is contained in:
commit
09d6e4a9e0
|
@ -874,6 +874,7 @@ class FakeSwiftClientConnection(swiftclient.client.Connection):
|
|||
|
||||
class FakeNodepool(object):
|
||||
REQUEST_ROOT = '/nodepool/requests'
|
||||
NODE_ROOT = '/nodepool/nodes'
|
||||
|
||||
log = logging.getLogger("zuul.test.FakeNodepool")
|
||||
|
||||
|
@ -918,6 +919,28 @@ class FakeNodepool(object):
|
|||
reqs.append(data)
|
||||
return reqs
|
||||
|
||||
def getNodes(self):
|
||||
try:
|
||||
nodeids = self.client.get_children(self.NODE_ROOT)
|
||||
except kazoo.exceptions.NoNodeError:
|
||||
return []
|
||||
nodes = []
|
||||
for oid in sorted(nodeids):
|
||||
path = self.NODE_ROOT + '/' + oid
|
||||
data, stat = self.client.get(path)
|
||||
data = json.loads(data)
|
||||
data['_oid'] = oid
|
||||
try:
|
||||
lockfiles = self.client.get_children(path + '/lock')
|
||||
except kazoo.exceptions.NoNodeError:
|
||||
lockfiles = []
|
||||
if lockfiles:
|
||||
data['_lock'] = True
|
||||
else:
|
||||
data['_lock'] = False
|
||||
nodes.append(data)
|
||||
return nodes
|
||||
|
||||
def makeNode(self, request_id, node_type):
|
||||
now = time.time()
|
||||
path = '/nodepool/nodes/'
|
||||
|
@ -1257,9 +1280,12 @@ class ZuulTestCase(BaseTestCase):
|
|||
self.rpc.start()
|
||||
self.launch_client.gearman.waitForServer()
|
||||
|
||||
self.addCleanup(self.assertFinalState)
|
||||
self.addCleanup(self.shutdown)
|
||||
|
||||
def tearDown(self):
|
||||
super(ZuulTestCase, self).tearDown()
|
||||
self.assertFinalState()
|
||||
|
||||
def configure_connections(self):
|
||||
# Register connections from the config
|
||||
self.smtp_messages = []
|
||||
|
@ -1366,6 +1392,17 @@ class ZuulTestCase(BaseTestCase):
|
|||
self.addCommitToRepo(project, 'add content from fixture',
|
||||
files, branch='master', tag='init')
|
||||
|
||||
def assertNodepoolState(self):
|
||||
# Make sure that there are no pending requests
|
||||
|
||||
requests = self.fake_nodepool.getNodeRequests()
|
||||
self.assertEqual(len(requests), 0)
|
||||
|
||||
nodes = self.fake_nodepool.getNodes()
|
||||
for node in nodes:
|
||||
self.assertFalse(node['_lock'], "Node %s is locked" %
|
||||
(node['_oid'],))
|
||||
|
||||
def assertFinalState(self):
|
||||
# Make sure that git.Repo objects have been garbage collected.
|
||||
repos = []
|
||||
|
@ -1375,6 +1412,7 @@ class ZuulTestCase(BaseTestCase):
|
|||
repos.append(obj)
|
||||
self.assertEqual(len(repos), 0)
|
||||
self.assertEmptyQueues()
|
||||
self.assertNodepoolState()
|
||||
ipm = zuul.manager.independent.IndependentPipelineManager
|
||||
for tenant in self.sched.abide.tenants.values():
|
||||
for pipeline in tenant.layout.pipelines.values():
|
||||
|
|
|
@ -25,14 +25,8 @@ from tests.base import ZuulTestCase
|
|||
class TestWebapp(ZuulTestCase):
|
||||
tenant_config_file = 'config/single-tenant/main.yaml'
|
||||
|
||||
def _cleanup(self):
|
||||
self.launch_server.hold_jobs_in_build = False
|
||||
self.launch_server.release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
def setUp(self):
|
||||
super(TestWebapp, self).setUp()
|
||||
self.addCleanup(self._cleanup)
|
||||
self.launch_server.hold_jobs_in_build = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
A.addApproval('code-review', 2)
|
||||
|
@ -43,6 +37,12 @@ class TestWebapp(ZuulTestCase):
|
|||
self.waitUntilSettled()
|
||||
self.port = self.webapp.server.socket.getsockname()[1]
|
||||
|
||||
def tearDown(self):
|
||||
self.launch_server.hold_jobs_in_build = False
|
||||
self.launch_server.release()
|
||||
self.waitUntilSettled()
|
||||
super(TestWebapp, self).tearDown()
|
||||
|
||||
def test_webapp_status(self):
|
||||
"Test that we can filter to only certain changes in the webapp."
|
||||
|
||||
|
|
|
@ -471,7 +471,6 @@ class LaunchClient(object):
|
|||
data = getJobData(job)
|
||||
build.node_labels = data.get('node_labels', [])
|
||||
build.node_name = data.get('node_name')
|
||||
if not build.canceled:
|
||||
if result is None:
|
||||
result = data.get('result')
|
||||
if result is None:
|
||||
|
|
|
@ -395,6 +395,7 @@ class PipelineManager(object):
|
|||
for req in old_build_set.node_requests.values():
|
||||
self.sched.nodepool.cancelRequest(req)
|
||||
old_build_set.node_requests = {}
|
||||
canceled_jobs = set()
|
||||
for build in old_build_set.getBuilds():
|
||||
was_running = False
|
||||
try:
|
||||
|
@ -405,13 +406,18 @@ class PipelineManager(object):
|
|||
if not was_running:
|
||||
try:
|
||||
nodeset = build.build_set.getJobNodeSet(build.job.name)
|
||||
self.nodepool.returnNodeset(nodeset)
|
||||
self.sched.nodepool.returnNodeset(nodeset)
|
||||
except Exception:
|
||||
self.log.exception("Unable to return nodeset %s for "
|
||||
"canceled build request %s" %
|
||||
(nodeset, build))
|
||||
build.result = 'CANCELED'
|
||||
canceled = True
|
||||
canceled_jobs.add(build.job.name)
|
||||
for jobname, nodeset in old_build_set.nodesets.items()[:]:
|
||||
if jobname in canceled_jobs:
|
||||
continue
|
||||
self.sched.nodepool.returnNodeset(nodeset)
|
||||
for item_behind in item.items_behind:
|
||||
self.log.debug("Canceling jobs for change %s, behind change %s" %
|
||||
(item_behind.change, item.change))
|
||||
|
@ -609,11 +615,15 @@ class PipelineManager(object):
|
|||
self.log.debug("Item %s status is now:\n %s" %
|
||||
(item, item.formatStatus()))
|
||||
|
||||
try:
|
||||
if build.retry:
|
||||
build.build_set.removeJobNodeSet(build.job.name)
|
||||
|
||||
# If any jobs were skipped as a result of this build, return
|
||||
# their nodes.
|
||||
for build in build.build_set.getBuilds():
|
||||
if build.result == 'SKIPPED':
|
||||
nodeset = build.build_set.getJobNodeSet(build.job.name)
|
||||
self.nodepool.returnNodeset(nodeset)
|
||||
except Exception:
|
||||
self.log.exception("Unable to return nodeset %s" % (nodeset,))
|
||||
self.sched.nodepool.returnNodeset(nodeset)
|
||||
|
||||
return True
|
||||
|
||||
|
|
|
@ -762,6 +762,11 @@ class BuildSet(object):
|
|||
# required
|
||||
return self.nodesets.get(job_name)
|
||||
|
||||
def removeJobNodeSet(self, job_name):
|
||||
if job_name not in self.nodesets:
|
||||
raise Exception("No job set for %s" % (job_name))
|
||||
del self.nodesets[job_name]
|
||||
|
||||
def setJobNodeRequest(self, job_name, req):
|
||||
if job_name in self.node_requests:
|
||||
raise Exception("Prior node request for %s" % (job_name))
|
||||
|
|
|
@ -28,14 +28,15 @@ class Nodepool(object):
|
|||
nodeset = job.nodeset.copy()
|
||||
req = NodeRequest(build_set, job, nodeset)
|
||||
self.requests[req.uid] = req
|
||||
self.log.debug("Submitting node request: %s" % (req,))
|
||||
|
||||
self.sched.zk.submitNodeRequest(req, self._updateNodeRequest)
|
||||
# Logged after submission so that we have the request id
|
||||
self.log.info("Submited node request %s" % (req,))
|
||||
|
||||
return req
|
||||
|
||||
def cancelRequest(self, request):
|
||||
self.log.debug("Canceling node request: %s" % (request,))
|
||||
self.log.info("Canceling node request %s" % (request,))
|
||||
if request.uid in self.requests:
|
||||
try:
|
||||
self.sched.zk.deleteNodeRequest(request)
|
||||
|
@ -44,6 +45,7 @@ class Nodepool(object):
|
|||
del self.requests[request.uid]
|
||||
|
||||
def useNodeset(self, nodeset):
|
||||
self.log.info("Setting nodeset %s in use" % (nodeset,))
|
||||
for node in nodeset.getNodes():
|
||||
if node.lock is None:
|
||||
raise Exception("Node %s is not locked" % (node,))
|
||||
|
@ -51,6 +53,7 @@ class Nodepool(object):
|
|||
self.sched.zk.storeNode(node)
|
||||
|
||||
def returnNodeset(self, nodeset):
|
||||
self.log.info("Returning nodeset %s" % (nodeset,))
|
||||
for node in nodeset.getNodes():
|
||||
if node.lock is None:
|
||||
raise Exception("Node %s is not locked" % (node,))
|
||||
|
@ -79,7 +82,7 @@ class Nodepool(object):
|
|||
locked_nodes = []
|
||||
try:
|
||||
for node in nodes:
|
||||
self.log.debug("Locking node: %s" % (node,))
|
||||
self.log.debug("Locking node %s" % (node,))
|
||||
self.sched.zk.lockNode(node)
|
||||
locked_nodes.append(node)
|
||||
except Exception:
|
||||
|
@ -90,7 +93,7 @@ class Nodepool(object):
|
|||
def _updateNodeRequest(self, request, deleted):
|
||||
# Return False to indicate that we should stop watching the
|
||||
# node.
|
||||
self.log.debug("Updating node request: %s" % (request,))
|
||||
self.log.debug("Updating node request %s" % (request,))
|
||||
|
||||
if request.uid not in self.requests:
|
||||
return False
|
||||
|
@ -114,7 +117,7 @@ class Nodepool(object):
|
|||
# Called by the scheduler when it wants to accept and lock
|
||||
# nodes for (potential) use.
|
||||
|
||||
self.log.debug("Accepting node request: %s" % (request,))
|
||||
self.log.info("Accepting node request %s" % (request,))
|
||||
|
||||
# First, try to lock the nodes.
|
||||
locked = False
|
||||
|
@ -127,7 +130,7 @@ class Nodepool(object):
|
|||
|
||||
# Regardless of whether locking succeeded, delete the
|
||||
# request.
|
||||
self.log.debug("Deleting node request: %s" % (request,))
|
||||
self.log.debug("Deleting node request %s" % (request,))
|
||||
try:
|
||||
self.sched.zk.deleteNodeRequest(request)
|
||||
except Exception:
|
||||
|
|
|
@ -767,8 +767,18 @@ class Scheduler(threading.Thread):
|
|||
|
||||
def _doBuildCompletedEvent(self, event):
|
||||
build = event.build
|
||||
|
||||
# Regardless of any other conditions which might cause us not
|
||||
# to pass this on to the pipeline manager, make sure we return
|
||||
# the nodes to nodepool.
|
||||
try:
|
||||
nodeset = build.build_set.getJobNodeSet(build.job.name)
|
||||
self.nodepool.returnNodeset(nodeset)
|
||||
except Exception:
|
||||
self.log.exception("Unable to return nodeset %s" % (nodeset,))
|
||||
|
||||
if build.build_set is not build.build_set.item.current_build_set:
|
||||
self.log.warning("Build %s is not in the current build set" %
|
||||
self.log.debug("Build %s is not in the current build set" %
|
||||
(build,))
|
||||
return
|
||||
pipeline = build.build_set.item.pipeline
|
||||
|
|
Loading…
Reference in New Issue