Browse Source

Merge "Be more aggressive in canceling node requests"

tags/3.4.0
Zuul 5 months ago
parent
commit
e15926faca
2 changed files with 22 additions and 11 deletions
  1. 2
    3
      zuul/model.py
  2. 20
    8
      zuul/scheduler.py

+ 2
- 3
zuul/model.py View File

@@ -1861,9 +1861,8 @@ class BuildSet(object):
1861 1861
         return self.node_requests.get(job_name)
1862 1862
 
1863 1863
     def removeJobNodeRequest(self, job_name):
1864
-        if job_name not in self.node_requests:
1865
-            raise Exception("No node request for %s" % (job_name))
1866
-        del self.node_requests[job_name]
1864
+        if job_name in self.node_requests:
1865
+            del self.node_requests[job_name]
1867 1866
 
1868 1867
     def jobNodeRequestComplete(self, job_name, req, nodeset):
1869 1868
         if job_name in self.nodesets:

+ 20
- 8
zuul/scheduler.py View File

@@ -770,6 +770,7 @@ class Scheduler(threading.Thread):
770 770
                                           new_pipeline.window_floor)
771 771
             items_to_remove = []
772 772
             builds_to_cancel = []
773
+            requests_to_cancel = []
773 774
             last_head = None
774 775
             for shared_queue in old_pipeline.queues:
775 776
                 # Attempt to keep window sizes from shrinking where possible
@@ -812,15 +813,25 @@ class Scheduler(threading.Thread):
812 813
                             else:
813 814
                                 item.removeBuild(build)
814 815
                                 builds_to_cancel.append(build)
816
+                        for request_job, request in \
817
+                            item.current_build_set.node_requests.items():
818
+                            new_job = item.getJob(request_job)
819
+                            if not new_job:
820
+                                requests_to_cancel.append(
821
+                                    (item.current_build_set, request))
815 822
                     else:
816 823
                         items_to_remove.append(item)
817 824
             for item in items_to_remove:
818
-                self.log.warning(
825
+                self.log.info(
819 826
                     "Removing item %s during reconfiguration" % (item,))
820 827
                 for build in item.current_build_set.getBuilds():
821 828
                     builds_to_cancel.append(build)
829
+                for request_job, request in \
830
+                    item.current_build_set.node_requests.items():
831
+                    requests_to_cancel.append(
832
+                        (item.current_build_set, request))
822 833
             for build in builds_to_cancel:
823
-                self.log.warning(
834
+                self.log.info(
824 835
                     "Canceling build %s during reconfiguration" % (build,))
825 836
                 try:
826 837
                     self.executor.cancel(build)
@@ -839,6 +850,12 @@ class Scheduler(threading.Thread):
839 850
                         "for change %s" % (build, build.build_set.item.change))
840 851
                 tenant.semaphore_handler.release(
841 852
                     build.build_set.item, build.job)
853
+            for build_set, request in requests_to_cancel:
854
+                self.log.info(
855
+                    "Canceling node request %s during reconfiguration",
856
+                    request)
857
+                self.nodepool.cancelRequest(request)
858
+                build_set.removeJobNodeRequest(request.job.name)
842 859
 
843 860
     def _reconfigureTenant(self, tenant):
844 861
         # This is called from _doReconfigureEvent while holding the
@@ -1313,12 +1330,7 @@ class Scheduler(threading.Thread):
1313 1330
             self.log.warning("Item %s does not contain job %s "
1314 1331
                              "for node request %s",
1315 1332
                              build_set.item, request.job.name, request)
1316
-            try:
1317
-                build_set.removeJobNodeRequest(request.job.name)
1318
-            except Exception:
1319
-                self.log.exception("Unable to remove obsolete node request "
1320
-                                   "%s for %s job %s",
1321
-                                   request, build_set.item, request.job.name)
1333
+            build_set.removeJobNodeRequest(request.job.name)
1322 1334
             if request.fulfilled:
1323 1335
                 self.nodepool.returnNodeSet(request.nodeset)
1324 1336
             return

Loading…
Cancel
Save