From 7b4ce1be8ec059d5822800b4776b87510170214b Mon Sep 17 00:00:00 2001 From: Simon Westphahl Date: Wed, 24 Aug 2022 15:28:52 +0200 Subject: [PATCH] Consider all node types when adjusting label quota Since nodes can have multiple other labels apart from the requested type, we need to adjust the available quota for all labels of nodes that were allocated to a request. This fixes a problem where a static driver pool could pause because the requested node types were no longer available but the request was still accepted due to wrong label quota. Change-Id: Ia9626ec26a66870574019ecc3f119a18e6c5022d --- nodepool/launcher.py | 15 ++++--- .../fixtures/static-2-nodes-multilabel.yaml | 4 ++ nodepool/tests/unit/test_driver_static.py | 41 +++++++++++++++++++ 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/nodepool/launcher.py b/nodepool/launcher.py index ed73d3e86..a7b8460eb 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -247,15 +247,20 @@ class PoolWorker(threading.Thread, stats.StatsReporter): # Got a lock, so assign it log.info("Assigning node request %s" % req) + rh = pm.getRequestHandler(self, req) + rh.run() + if has_quota_support: # Adjust the label quota so we don't accept more requests # than we have labels available. - for label in req.node_types: - with contextlib.suppress(KeyError): - label_quota[label] -= 1 + # Since nodes can have multiple other labels apart from the + # requested type, we need to adjust the quota for all labels + # of nodes that are allocated to the request. + for node in rh.nodeset: + for node_type in node.type: + with contextlib.suppress(KeyError): + label_quota[node_type] -= 1 - rh = pm.getRequestHandler(self, req) - rh.run() if rh.paused: self.paused_handler = rh self.request_handlers.append(rh) diff --git a/nodepool/tests/fixtures/static-2-nodes-multilabel.yaml b/nodepool/tests/fixtures/static-2-nodes-multilabel.yaml index 48e551129..e2834205b 100644 --- a/nodepool/tests/fixtures/static-2-nodes-multilabel.yaml +++ b/nodepool/tests/fixtures/static-2-nodes-multilabel.yaml @@ -11,6 +11,8 @@ zookeeper-tls: labels: - name: fake-label - name: fake-label2 + - name: label-host-1 + - name: label-host-2 providers: - name: static-provider @@ -22,6 +24,7 @@ providers: labels: - fake-label - fake-label2 + - label-host-1 host-key: ssh-rsa FAKEKEY timeout: 13 connection-port: 22022 @@ -29,6 +32,7 @@ providers: - name: fake-host-2 labels: - fake-label2 + - label-host-2 host-key: ssh-rsa FAKEKEY timeout: 13 connection-port: 22022 diff --git a/nodepool/tests/unit/test_driver_static.py b/nodepool/tests/unit/test_driver_static.py index f2dc3e1fd..8523c9222 100644 --- a/nodepool/tests/unit/test_driver_static.py +++ b/nodepool/tests/unit/test_driver_static.py @@ -326,6 +326,47 @@ class TestDriverStatic(tests.DBTestCase): self.waitForNodeDeletion(node) self.waitForNodeRequest(req_waiting, zk.FULFILLED) + def test_label_quota(self): + configfile = self.setup_config('static-2-nodes-multilabel.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + + req1 = zk.NodeRequest() + req1.state = zk.REQUESTED + req1.node_types.append('label-host-1') + + req2 = zk.NodeRequest() + req2.state = zk.REQUESTED + req2.node_types.append('label-host-2') + + # Request a label that is no longer available, but wasn't requested + # by any of the previous requests. + req_waiting = zk.NodeRequest() + req_waiting.state = zk.REQUESTED + req_waiting.node_types.append('fake-label2') + + self.zk.storeNodeRequest(req1) + self.zk.storeNodeRequest(req2) + self.zk.storeNodeRequest(req_waiting) + + req1 = self.waitForNodeRequest(req1, zk.FULFILLED) + node = self.zk.getNode(req1.nodes[0]) + self.zk.lockNode(node) + node.state = zk.USED + self.zk.storeNode(node) + + req2 = self.waitForNodeRequest(req2, zk.FULFILLED) + + # Assert that the request was not accepted, which means that + # the label quota was correctly adjusted. + req_waiting = self.zk.getNodeRequest(req_waiting.id) + self.assertEqual(req_waiting.state, zk.REQUESTED) + self.assertEqual(req_waiting.declined_by, []) + + self.zk.unlockNode(node) + self.waitForNodeDeletion(node) + self.waitForNodeRequest(req_waiting, zk.FULFILLED) + def test_static_ignore_assigned_ready_nodes(self): """Regression test to not touch assigned READY nodes""" configfile = self.setup_config('static-basic.yaml')