Merge "Fix missing semaphore release on node failure"
This commit is contained in:
commit
069a550f81
|
@ -103,3 +103,9 @@
|
|||
- project-test1
|
||||
- semaphore-one-test1-resources-first
|
||||
- semaphore-one-test2-resources-first
|
||||
|
||||
- project:
|
||||
name: org/project4
|
||||
check:
|
||||
jobs:
|
||||
- semaphore-one-test1-resources-first
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
test
|
|
@ -9,3 +9,4 @@
|
|||
- org/project1
|
||||
- org/project2
|
||||
- org/project3
|
||||
- org/project4
|
||||
|
|
|
@ -6251,6 +6251,38 @@ class TestSemaphore(ZuulTestCase):
|
|||
self.assertEqual(A.reported, 1)
|
||||
self.assertEqual(B.reported, 1)
|
||||
|
||||
def test_semaphore_node_failure(self):
|
||||
"Test semaphore and node failure"
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
|
||||
# Pause nodepool so we can fail the node request later
|
||||
self.fake_nodepool.pause()
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A')
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
# By default we first lock the semaphore and then get the nodes
|
||||
# so at this point the semaphore needs to be aquired.
|
||||
self.assertTrue('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
# Fail the node request and unpause
|
||||
req = self.fake_nodepool.getNodeRequests()[0]
|
||||
self.fake_nodepool.addFailRequest(req)
|
||||
self.fake_nodepool.unpause()
|
||||
self.waitUntilSettled()
|
||||
|
||||
# At this point the job that holds the semaphore failed with
|
||||
# node_failure and the semaphore must be released.
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
self.assertEquals(1, A.reported)
|
||||
self.assertIn('semaphore-one-test3 semaphore-one-test3 : NODE_FAILURE',
|
||||
A.messages[0])
|
||||
|
||||
def test_semaphore_resources_first(self):
|
||||
"Test semaphores with max=1 (mutex) and get resources first"
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
|
@ -6298,6 +6330,38 @@ class TestSemaphore(ZuulTestCase):
|
|||
self.executor_server.release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
def test_semaphore_resources_first_node_failure(self):
|
||||
"Test semaphore and node failure"
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
|
||||
# Pause nodepool so we can fail the node request later
|
||||
self.fake_nodepool.pause()
|
||||
|
||||
A = self.fake_gerrit.addFakeChange('org/project4', 'master', 'A')
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
# With resources first we first get the nodes so at this point the
|
||||
# semaphore must not be aquired.
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
|
||||
# Fail the node request and unpause
|
||||
req = self.fake_nodepool.getNodeRequests()[0]
|
||||
self.fake_nodepool.addFailRequest(req)
|
||||
self.fake_nodepool.unpause()
|
||||
self.waitUntilSettled()
|
||||
|
||||
# At this point the job should never have acuired a semaphore so check
|
||||
# that it still has not locked a semaphore.
|
||||
self.assertFalse('test-semaphore' in
|
||||
tenant.semaphore_handler.semaphores)
|
||||
self.assertEquals(1, A.reported)
|
||||
self.assertIn('semaphore-one-test1-resources-first : NODE_FAILURE',
|
||||
A.messages[0])
|
||||
|
||||
def test_semaphore_zk_error(self):
|
||||
"Test semaphore release with zk error"
|
||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||
|
|
|
@ -864,6 +864,9 @@ class PipelineManager(object):
|
|||
(request, request.job.name,))
|
||||
build_set.item.setNodeRequestFailure(request.job)
|
||||
self._resumeBuilds(request.build_set)
|
||||
tenant = build_set.item.pipeline.tenant
|
||||
tenant.semaphore_handler.release(build_set.item, request.job)
|
||||
|
||||
self.log.info("Completed node request %s for job %s of item %s "
|
||||
"with nodes %s" %
|
||||
(request, request.job, build_set.item,
|
||||
|
|
Loading…
Reference in New Issue