Merge "Verify nodes and requests are not leaked" into feature/zuulv3

This commit is contained in:
Jenkins 2017-01-18 21:24:07 +00:00 committed by Gerrit Code Review
commit 09d6e4a9e0
7 changed files with 94 additions and 29 deletions

View File

@ -874,6 +874,7 @@ class FakeSwiftClientConnection(swiftclient.client.Connection):
class FakeNodepool(object):
REQUEST_ROOT = '/nodepool/requests'
NODE_ROOT = '/nodepool/nodes'
log = logging.getLogger("zuul.test.FakeNodepool")
@ -918,6 +919,28 @@ class FakeNodepool(object):
reqs.append(data)
return reqs
def getNodes(self):
try:
nodeids = self.client.get_children(self.NODE_ROOT)
except kazoo.exceptions.NoNodeError:
return []
nodes = []
for oid in sorted(nodeids):
path = self.NODE_ROOT + '/' + oid
data, stat = self.client.get(path)
data = json.loads(data)
data['_oid'] = oid
try:
lockfiles = self.client.get_children(path + '/lock')
except kazoo.exceptions.NoNodeError:
lockfiles = []
if lockfiles:
data['_lock'] = True
else:
data['_lock'] = False
nodes.append(data)
return nodes
def makeNode(self, request_id, node_type):
now = time.time()
path = '/nodepool/nodes/'
@ -1257,9 +1280,12 @@ class ZuulTestCase(BaseTestCase):
self.rpc.start()
self.launch_client.gearman.waitForServer()
self.addCleanup(self.assertFinalState)
self.addCleanup(self.shutdown)
def tearDown(self):
super(ZuulTestCase, self).tearDown()
self.assertFinalState()
def configure_connections(self):
# Register connections from the config
self.smtp_messages = []
@ -1366,6 +1392,17 @@ class ZuulTestCase(BaseTestCase):
self.addCommitToRepo(project, 'add content from fixture',
files, branch='master', tag='init')
def assertNodepoolState(self):
# Make sure that there are no pending requests
requests = self.fake_nodepool.getNodeRequests()
self.assertEqual(len(requests), 0)
nodes = self.fake_nodepool.getNodes()
for node in nodes:
self.assertFalse(node['_lock'], "Node %s is locked" %
(node['_oid'],))
def assertFinalState(self):
# Make sure that git.Repo objects have been garbage collected.
repos = []
@ -1375,6 +1412,7 @@ class ZuulTestCase(BaseTestCase):
repos.append(obj)
self.assertEqual(len(repos), 0)
self.assertEmptyQueues()
self.assertNodepoolState()
ipm = zuul.manager.independent.IndependentPipelineManager
for tenant in self.sched.abide.tenants.values():
for pipeline in tenant.layout.pipelines.values():

View File

@ -25,14 +25,8 @@ from tests.base import ZuulTestCase
class TestWebapp(ZuulTestCase):
tenant_config_file = 'config/single-tenant/main.yaml'
def _cleanup(self):
self.launch_server.hold_jobs_in_build = False
self.launch_server.release()
self.waitUntilSettled()
def setUp(self):
super(TestWebapp, self).setUp()
self.addCleanup(self._cleanup)
self.launch_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addApproval('code-review', 2)
@ -43,6 +37,12 @@ class TestWebapp(ZuulTestCase):
self.waitUntilSettled()
self.port = self.webapp.server.socket.getsockname()[1]
def tearDown(self):
self.launch_server.hold_jobs_in_build = False
self.launch_server.release()
self.waitUntilSettled()
super(TestWebapp, self).tearDown()
def test_webapp_status(self):
"Test that we can filter to only certain changes in the webapp."

View File

@ -471,7 +471,6 @@ class LaunchClient(object):
data = getJobData(job)
build.node_labels = data.get('node_labels', [])
build.node_name = data.get('node_name')
if not build.canceled:
if result is None:
result = data.get('result')
if result is None:

View File

@ -395,6 +395,7 @@ class PipelineManager(object):
for req in old_build_set.node_requests.values():
self.sched.nodepool.cancelRequest(req)
old_build_set.node_requests = {}
canceled_jobs = set()
for build in old_build_set.getBuilds():
was_running = False
try:
@ -405,13 +406,18 @@ class PipelineManager(object):
if not was_running:
try:
nodeset = build.build_set.getJobNodeSet(build.job.name)
self.nodepool.returnNodeset(nodeset)
self.sched.nodepool.returnNodeset(nodeset)
except Exception:
self.log.exception("Unable to return nodeset %s for "
"canceled build request %s" %
(nodeset, build))
build.result = 'CANCELED'
canceled = True
canceled_jobs.add(build.job.name)
for jobname, nodeset in old_build_set.nodesets.items()[:]:
if jobname in canceled_jobs:
continue
self.sched.nodepool.returnNodeset(nodeset)
for item_behind in item.items_behind:
self.log.debug("Canceling jobs for change %s, behind change %s" %
(item_behind.change, item.change))
@ -609,11 +615,15 @@ class PipelineManager(object):
self.log.debug("Item %s status is now:\n %s" %
(item, item.formatStatus()))
try:
if build.retry:
build.build_set.removeJobNodeSet(build.job.name)
# If any jobs were skipped as a result of this build, return
# their nodes.
for build in build.build_set.getBuilds():
if build.result == 'SKIPPED':
nodeset = build.build_set.getJobNodeSet(build.job.name)
self.nodepool.returnNodeset(nodeset)
except Exception:
self.log.exception("Unable to return nodeset %s" % (nodeset,))
self.sched.nodepool.returnNodeset(nodeset)
return True

View File

@ -762,6 +762,11 @@ class BuildSet(object):
# required
return self.nodesets.get(job_name)
def removeJobNodeSet(self, job_name):
if job_name not in self.nodesets:
raise Exception("No job set for %s" % (job_name))
del self.nodesets[job_name]
def setJobNodeRequest(self, job_name, req):
if job_name in self.node_requests:
raise Exception("Prior node request for %s" % (job_name))

View File

@ -28,14 +28,15 @@ class Nodepool(object):
nodeset = job.nodeset.copy()
req = NodeRequest(build_set, job, nodeset)
self.requests[req.uid] = req
self.log.debug("Submitting node request: %s" % (req,))
self.sched.zk.submitNodeRequest(req, self._updateNodeRequest)
# Logged after submission so that we have the request id
self.log.info("Submited node request %s" % (req,))
return req
def cancelRequest(self, request):
self.log.debug("Canceling node request: %s" % (request,))
self.log.info("Canceling node request %s" % (request,))
if request.uid in self.requests:
try:
self.sched.zk.deleteNodeRequest(request)
@ -44,6 +45,7 @@ class Nodepool(object):
del self.requests[request.uid]
def useNodeset(self, nodeset):
self.log.info("Setting nodeset %s in use" % (nodeset,))
for node in nodeset.getNodes():
if node.lock is None:
raise Exception("Node %s is not locked" % (node,))
@ -51,6 +53,7 @@ class Nodepool(object):
self.sched.zk.storeNode(node)
def returnNodeset(self, nodeset):
self.log.info("Returning nodeset %s" % (nodeset,))
for node in nodeset.getNodes():
if node.lock is None:
raise Exception("Node %s is not locked" % (node,))
@ -79,7 +82,7 @@ class Nodepool(object):
locked_nodes = []
try:
for node in nodes:
self.log.debug("Locking node: %s" % (node,))
self.log.debug("Locking node %s" % (node,))
self.sched.zk.lockNode(node)
locked_nodes.append(node)
except Exception:
@ -90,7 +93,7 @@ class Nodepool(object):
def _updateNodeRequest(self, request, deleted):
# Return False to indicate that we should stop watching the
# node.
self.log.debug("Updating node request: %s" % (request,))
self.log.debug("Updating node request %s" % (request,))
if request.uid not in self.requests:
return False
@ -114,7 +117,7 @@ class Nodepool(object):
# Called by the scheduler when it wants to accept and lock
# nodes for (potential) use.
self.log.debug("Accepting node request: %s" % (request,))
self.log.info("Accepting node request %s" % (request,))
# First, try to lock the nodes.
locked = False
@ -127,7 +130,7 @@ class Nodepool(object):
# Regardless of whether locking succeeded, delete the
# request.
self.log.debug("Deleting node request: %s" % (request,))
self.log.debug("Deleting node request %s" % (request,))
try:
self.sched.zk.deleteNodeRequest(request)
except Exception:

View File

@ -767,8 +767,18 @@ class Scheduler(threading.Thread):
def _doBuildCompletedEvent(self, event):
build = event.build
# Regardless of any other conditions which might cause us not
# to pass this on to the pipeline manager, make sure we return
# the nodes to nodepool.
try:
nodeset = build.build_set.getJobNodeSet(build.job.name)
self.nodepool.returnNodeset(nodeset)
except Exception:
self.log.exception("Unable to return nodeset %s" % (nodeset,))
if build.build_set is not build.build_set.item.current_build_set:
self.log.warning("Build %s is not in the current build set" %
self.log.debug("Build %s is not in the current build set" %
(build,))
return
pipeline = build.build_set.item.pipeline