diff --git a/tests/fixtures/config/semaphore/git/common-config/zuul.yaml b/tests/fixtures/config/semaphore/git/common-config/zuul.yaml index dcb0cbd034..01ca390fc5 100644 --- a/tests/fixtures/config/semaphore/git/common-config/zuul.yaml +++ b/tests/fixtures/config/semaphore/git/common-config/zuul.yaml @@ -103,3 +103,9 @@ - project-test1 - semaphore-one-test1-resources-first - semaphore-one-test2-resources-first + +- project: + name: org/project4 + check: + jobs: + - semaphore-one-test1-resources-first diff --git a/tests/fixtures/config/semaphore/git/org_project4/README b/tests/fixtures/config/semaphore/git/org_project4/README new file mode 100644 index 0000000000..9daeafb986 --- /dev/null +++ b/tests/fixtures/config/semaphore/git/org_project4/README @@ -0,0 +1 @@ +test diff --git a/tests/fixtures/config/semaphore/main.yaml b/tests/fixtures/config/semaphore/main.yaml index 919921e916..3d7a5c9c2d 100644 --- a/tests/fixtures/config/semaphore/main.yaml +++ b/tests/fixtures/config/semaphore/main.yaml @@ -9,3 +9,4 @@ - org/project1 - org/project2 - org/project3 + - org/project4 diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py index 0848103c6b..62583986bd 100644 --- a/tests/unit/test_scheduler.py +++ b/tests/unit/test_scheduler.py @@ -6251,6 +6251,38 @@ class TestSemaphore(ZuulTestCase): self.assertEqual(A.reported, 1) self.assertEqual(B.reported, 1) + def test_semaphore_node_failure(self): + "Test semaphore and node failure" + tenant = self.sched.abide.tenants.get('tenant-one') + + # Pause nodepool so we can fail the node request later + self.fake_nodepool.pause() + + A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A') + self.assertFalse('test-semaphore' in + tenant.semaphore_handler.semaphores) + self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) + self.waitUntilSettled() + + # By default we first lock the semaphore and then get the nodes + # so at this point the semaphore needs to be aquired. + self.assertTrue('test-semaphore' in + tenant.semaphore_handler.semaphores) + + # Fail the node request and unpause + req = self.fake_nodepool.getNodeRequests()[0] + self.fake_nodepool.addFailRequest(req) + self.fake_nodepool.unpause() + self.waitUntilSettled() + + # At this point the job that holds the semaphore failed with + # node_failure and the semaphore must be released. + self.assertFalse('test-semaphore' in + tenant.semaphore_handler.semaphores) + self.assertEquals(1, A.reported) + self.assertIn('semaphore-one-test3 semaphore-one-test3 : NODE_FAILURE', + A.messages[0]) + def test_semaphore_resources_first(self): "Test semaphores with max=1 (mutex) and get resources first" tenant = self.sched.abide.tenants.get('tenant-one') @@ -6298,6 +6330,38 @@ class TestSemaphore(ZuulTestCase): self.executor_server.release() self.waitUntilSettled() + def test_semaphore_resources_first_node_failure(self): + "Test semaphore and node failure" + tenant = self.sched.abide.tenants.get('tenant-one') + + # Pause nodepool so we can fail the node request later + self.fake_nodepool.pause() + + A = self.fake_gerrit.addFakeChange('org/project4', 'master', 'A') + self.assertFalse('test-semaphore' in + tenant.semaphore_handler.semaphores) + self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) + self.waitUntilSettled() + + # With resources first we first get the nodes so at this point the + # semaphore must not be aquired. + self.assertFalse('test-semaphore' in + tenant.semaphore_handler.semaphores) + + # Fail the node request and unpause + req = self.fake_nodepool.getNodeRequests()[0] + self.fake_nodepool.addFailRequest(req) + self.fake_nodepool.unpause() + self.waitUntilSettled() + + # At this point the job should never have acuired a semaphore so check + # that it still has not locked a semaphore. + self.assertFalse('test-semaphore' in + tenant.semaphore_handler.semaphores) + self.assertEquals(1, A.reported) + self.assertIn('semaphore-one-test1-resources-first : NODE_FAILURE', + A.messages[0]) + def test_semaphore_zk_error(self): "Test semaphore release with zk error" tenant = self.sched.abide.tenants.get('tenant-one') diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py index 416f3f1d08..d3ad0d6e37 100644 --- a/zuul/manager/__init__.py +++ b/zuul/manager/__init__.py @@ -864,6 +864,9 @@ class PipelineManager(object): (request, request.job.name,)) build_set.item.setNodeRequestFailure(request.job) self._resumeBuilds(request.build_set) + tenant = build_set.item.pipeline.tenant + tenant.semaphore_handler.release(build_set.item, request.job) + self.log.info("Completed node request %s for job %s of item %s " "with nodes %s" % (request, request.job, build_set.item,