Merge "Fix missing semaphore release on node failure"
This commit is contained in:
commit
069a550f81
|
@ -103,3 +103,9 @@
|
||||||
- project-test1
|
- project-test1
|
||||||
- semaphore-one-test1-resources-first
|
- semaphore-one-test1-resources-first
|
||||||
- semaphore-one-test2-resources-first
|
- semaphore-one-test2-resources-first
|
||||||
|
|
||||||
|
- project:
|
||||||
|
name: org/project4
|
||||||
|
check:
|
||||||
|
jobs:
|
||||||
|
- semaphore-one-test1-resources-first
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
test
|
|
@ -9,3 +9,4 @@
|
||||||
- org/project1
|
- org/project1
|
||||||
- org/project2
|
- org/project2
|
||||||
- org/project3
|
- org/project3
|
||||||
|
- org/project4
|
||||||
|
|
|
@ -6251,6 +6251,38 @@ class TestSemaphore(ZuulTestCase):
|
||||||
self.assertEqual(A.reported, 1)
|
self.assertEqual(A.reported, 1)
|
||||||
self.assertEqual(B.reported, 1)
|
self.assertEqual(B.reported, 1)
|
||||||
|
|
||||||
|
def test_semaphore_node_failure(self):
|
||||||
|
"Test semaphore and node failure"
|
||||||
|
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||||
|
|
||||||
|
# Pause nodepool so we can fail the node request later
|
||||||
|
self.fake_nodepool.pause()
|
||||||
|
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A')
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# By default we first lock the semaphore and then get the nodes
|
||||||
|
# so at this point the semaphore needs to be aquired.
|
||||||
|
self.assertTrue('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
# Fail the node request and unpause
|
||||||
|
req = self.fake_nodepool.getNodeRequests()[0]
|
||||||
|
self.fake_nodepool.addFailRequest(req)
|
||||||
|
self.fake_nodepool.unpause()
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# At this point the job that holds the semaphore failed with
|
||||||
|
# node_failure and the semaphore must be released.
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
self.assertEquals(1, A.reported)
|
||||||
|
self.assertIn('semaphore-one-test3 semaphore-one-test3 : NODE_FAILURE',
|
||||||
|
A.messages[0])
|
||||||
|
|
||||||
def test_semaphore_resources_first(self):
|
def test_semaphore_resources_first(self):
|
||||||
"Test semaphores with max=1 (mutex) and get resources first"
|
"Test semaphores with max=1 (mutex) and get resources first"
|
||||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||||
|
@ -6298,6 +6330,38 @@ class TestSemaphore(ZuulTestCase):
|
||||||
self.executor_server.release()
|
self.executor_server.release()
|
||||||
self.waitUntilSettled()
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
def test_semaphore_resources_first_node_failure(self):
|
||||||
|
"Test semaphore and node failure"
|
||||||
|
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||||
|
|
||||||
|
# Pause nodepool so we can fail the node request later
|
||||||
|
self.fake_nodepool.pause()
|
||||||
|
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project4', 'master', 'A')
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# With resources first we first get the nodes so at this point the
|
||||||
|
# semaphore must not be aquired.
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
# Fail the node request and unpause
|
||||||
|
req = self.fake_nodepool.getNodeRequests()[0]
|
||||||
|
self.fake_nodepool.addFailRequest(req)
|
||||||
|
self.fake_nodepool.unpause()
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# At this point the job should never have acuired a semaphore so check
|
||||||
|
# that it still has not locked a semaphore.
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
self.assertEquals(1, A.reported)
|
||||||
|
self.assertIn('semaphore-one-test1-resources-first : NODE_FAILURE',
|
||||||
|
A.messages[0])
|
||||||
|
|
||||||
def test_semaphore_zk_error(self):
|
def test_semaphore_zk_error(self):
|
||||||
"Test semaphore release with zk error"
|
"Test semaphore release with zk error"
|
||||||
tenant = self.sched.abide.tenants.get('tenant-one')
|
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||||
|
|
|
@ -864,6 +864,9 @@ class PipelineManager(object):
|
||||||
(request, request.job.name,))
|
(request, request.job.name,))
|
||||||
build_set.item.setNodeRequestFailure(request.job)
|
build_set.item.setNodeRequestFailure(request.job)
|
||||||
self._resumeBuilds(request.build_set)
|
self._resumeBuilds(request.build_set)
|
||||||
|
tenant = build_set.item.pipeline.tenant
|
||||||
|
tenant.semaphore_handler.release(build_set.item, request.job)
|
||||||
|
|
||||||
self.log.info("Completed node request %s for job %s of item %s "
|
self.log.info("Completed node request %s for job %s of item %s "
|
||||||
"with nodes %s" %
|
"with nodes %s" %
|
||||||
(request, request.job, build_set.item,
|
(request, request.job, build_set.item,
|
||||||
|
|
Loading…
Reference in New Issue