Browse Source

Merge "Fix missing semaphore release on zk error"

Zuul 1 year ago
parent
commit
623533ab85

+ 15
- 0
tests/fixtures/config/semaphore/git/common-config/zuul.yaml View File

@@ -47,6 +47,15 @@
47 47
     semaphore: test-semaphore-two
48 48
     run: playbooks/semaphore-two-test2.yaml
49 49
 
50
+- job:
51
+    name: semaphore-one-test3
52
+    semaphore: test-semaphore
53
+    run: playbooks/semaphore-two-test1.yaml
54
+    nodeset:
55
+      nodes:
56
+        - name: controller
57
+          label: label1
58
+
50 59
 - project:
51 60
     name: org/project
52 61
     check:
@@ -62,3 +71,9 @@
62 71
         - project-test1
63 72
         - semaphore-two-test1
64 73
         - semaphore-two-test2
74
+
75
+- project:
76
+    name: org/project2
77
+    check:
78
+      jobs:
79
+        - semaphore-one-test3

+ 1
- 0
tests/fixtures/config/semaphore/git/org_project2/README View File

@@ -0,0 +1 @@
1
+test

+ 1
- 0
tests/fixtures/config/semaphore/main.yaml View File

@@ -7,3 +7,4 @@
7 7
         untrusted-projects:
8 8
           - org/project
9 9
           - org/project1
10
+          - org/project2

+ 30
- 0
tests/unit/test_scheduler.py View File

@@ -23,6 +23,7 @@ import shutil
23 23
 import time
24 24
 from unittest import mock
25 25
 from unittest import skip
26
+from kazoo.exceptions import NoNodeError
26 27
 
27 28
 import git
28 29
 import testtools
@@ -5487,6 +5488,35 @@ class TestSemaphore(ZuulTestCase):
5487 5488
         self.assertEqual(A.reported, 1)
5488 5489
         self.assertEqual(B.reported, 1)
5489 5490
 
5491
+    def test_semaphore_zk_error(self):
5492
+        "Test semaphore release with zk error"
5493
+        tenant = self.sched.abide.tenants.get('tenant-one')
5494
+
5495
+        A = self.fake_gerrit.addFakeChange('org/project2', 'master', 'A')
5496
+        self.assertFalse('test-semaphore' in
5497
+                         tenant.semaphore_handler.semaphores)
5498
+
5499
+        # Simulate a single zk error in useNodeSet
5500
+        orig_useNodeSet = self.nodepool.useNodeSet
5501
+
5502
+        def broken_use_nodeset(nodeset):
5503
+            # restore original useNodeSet
5504
+            self.nodepool.useNodeSet = orig_useNodeSet
5505
+            raise NoNodeError()
5506
+
5507
+        self.nodepool.useNodeSet = broken_use_nodeset
5508
+
5509
+        self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
5510
+        self.waitUntilSettled()
5511
+
5512
+        # The semaphore should be released
5513
+        self.assertFalse('test-semaphore' in
5514
+                         tenant.semaphore_handler.semaphores)
5515
+
5516
+        # cleanup the queue
5517
+        self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
5518
+        self.waitUntilSettled()
5519
+
5490 5520
     def test_semaphore_abandon(self):
5491 5521
         "Test abandon with job semaphores"
5492 5522
         self.executor_server.hold_jobs_in_build = True

+ 8
- 0
zuul/manager/__init__.py View File

@@ -399,6 +399,14 @@ class PipelineManager(object):
399 399
             except Exception:
400 400
                 self.log.exception("Exception while executing job %s "
401 401
                                    "for change %s:" % (job, item.change))
402
+                try:
403
+                    # If we hit an exception we don't have a build in the
404
+                    # current item so a potentially aquired semaphore must be
405
+                    # released as it won't be released on dequeue of the item.
406
+                    tenant = item.pipeline.layout.tenant
407
+                    tenant.semaphore_handler.release(item, job)
408
+                except Exception:
409
+                    self.log.exception("Exception while releasing semaphore")
402 410
 
403 411
     def executeJobs(self, item):
404 412
         # TODO(jeblair): This should return a value indicating a job

Loading…
Cancel
Save