Refactor the checkNodeRequest method

We perform some checks which aren't necessary any more. This method is better thought of as a method of getting a nodeset from a fulfilled node request, so update it accordingly. Change-Id: I1113820115af68b706b6fe06d6d03cd35ae6b382
2021-08-31 15:38:05 -07:00 · 2021-08-31 15:38:05 -07:00 · 514f62ea31
parent dbab353ca3
commit 514f62ea31
4 changed files with 12 additions and 32 deletions
--- a/tests/unit/test_nodepool.py
+++ b/tests/unit/test_nodepool.py
@ -90,8 +90,7 @@ class TestNodepool(TestNodepoolBase):
        self.assertEqual(zk_request.state, 'fulfilled')

        # Accept the nodes
-        new_nodeset = self.nodepool.checkNodeRequest(
-            request, request.id, nodeset)
+        new_nodeset = self.nodepool.getNodeSet(request, nodeset)
        self.assertIsNotNone(new_nodeset)
        # acceptNodes will be called on the executor, but only if the
        # noderequest was accepted before.
--- a/zuul/manager/init.py
+++ b/zuul/manager/init.py
@ -1517,9 +1517,7 @@ class PipelineManager(metaclass=ABCMeta):

        if nodeset is not None:
            build_set.jobNodeRequestComplete(request.job_name, nodeset)
-        # TODO (felix): Check if the failed is still needed as the
-        # NodesProvisionedEvents are now in ZooKeeper.
-        if request.failed or not request.fulfilled:
+        if not request.fulfilled:
            log.info("Node request %s: failure for %s",
                     request, request.job_name)
            job = build_set.item.getJob(request.job_name)
--- a/zuul/nodepool.py
+++ b/zuul/nodepool.py
@ -482,38 +482,27 @@ class Nodepool(object):
            self._unlockNodes(locked_nodes)
            raise

-    def checkNodeRequest(self, request, request_id, job_nodeset):
+    def getNodeSet(self, request, job_nodeset):
        """
-        Called by the scheduler when it wants to accept a node request for
-        potential use of its nodes. The nodes itself will be accepted and
-        locked by the executor when the corresponding job is started.
+        Get a NodeSet object with info about real nodes.
+
+        :param NodeRequest request: The fulfilled NodeRequest
+        :param NodeSet job_nodeset: The NodeSet object attached to the job

        :returns: A new NodeSet object which contains information from
            nodepool about the actual allocated nodes.
        """
-        log = get_annotated_logger(self.log, request.event_id)
-        log.info("Accepting node request %s", request)
        # A copy of the nodeset with information about the real nodes
        nodeset = job_nodeset.copy()

        # If we didn't request nodes and the request is fulfilled then just
-        # reutrn. We don't have to do anything in this case. Further don't even
-        # ask ZK for the request as empty requests are not put into ZK.
+        # return. We don't have to do anything in this case.
        if not request.labels and request.fulfilled:
            return nodeset

        # Load the node info from ZK.
-        try:
-            for node_id, node in zip(request.nodes, nodeset.getNodes()):
-                self.zk_nodepool.updateNode(node, node_id)
-        except Exception:
-            # If we cannot retrieve the node request from ZK we
-            # probably lost the connection and thus the ZK
-            # session. Just log the problem with zookeeper and fail
-            # here.
-            log.exception("Error getting node request %s:", request_id)
-            request.failed = True
-            return nodeset
+        for node_id, node in zip(request.nodes, nodeset.getNodes()):
+            self.zk_nodepool.updateNode(node, node_id)

        return nodeset

--- a/zuul/scheduler.py
+++ b/zuul/scheduler.py
@ -2109,19 +2109,13 @@ class Scheduler(threading.Thread):
            build_set.removeJobNodeRequest(request.job_name)
            return

-        # TODO (felix): Since we are now using the request from ZK directly, do
-        # we still have to validate the request ID? As we are using the same ID
-        # for the lookup in ZK this check should now always succeed.
-        nodeset = self.nodepool.checkNodeRequest(request, event.request_id,
-                                                 job.nodeset)
-        if nodeset is None:
-            return
-
        # If the request failed, we must directly delete it as the nodes will
        # never be accepted.
        if request.state == STATE_FAILED:
            self.nodepool.deleteNodeRequest(request)

+        nodeset = self.nodepool.getNodeSet(request, job.nodeset)
+
        if build_set.getJobNodeSet(request.job_name) is None:
            pipeline.manager.onNodesProvisioned(request, nodeset, build_set)
        else: