From d7ac31b5cd3e79ff2fbffe7c8d5128a32c9f1153 Mon Sep 17 00:00:00 2001 From: Simon Westphahl Date: Tue, 8 Oct 2019 12:19:09 +0200 Subject: [PATCH] Assign static 'building' nodes in cleanup handler In certain cases there can be a race between a node being re-registered and a request handler creating a new 'building' node. This will lead to a starvation of the 'building' node until a node with a matching label is re-registered again (if at all). To avoid this, the cleanup handler will check if there are any ready nodes that can be assigned to nodes waiting for a matching label. Change-Id: Ifc7fe2c59d5c4393c462f508ed449736ae145371 --- nodepool/driver/static/provider.py | 20 +++++++++++++++ nodepool/tests/unit/test_driver_static.py | 30 +++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/nodepool/driver/static/provider.py b/nodepool/driver/static/provider.py index 2e6f8af52..e0f0cd8a1 100644 --- a/nodepool/driver/static/provider.py +++ b/nodepool/driver/static/provider.py @@ -375,6 +375,26 @@ class StaticNodeProvider(Provider): except Exception: self.log.exception("Couldn't sync node:") continue + try: + self.assignReadyNodes(node, pool) + except Exception: + self.log.exception("Couldn't assign ready nodes:") + + def assignReadyNodes(self, node, pool): + waiting_nodes = self.getWaitingNodesOfType(node["labels"]) + if not waiting_nodes: + return + ready_nodes = self.getRegisteredReadyNodes(nodeTuple(node)) + if not ready_nodes: + return + + leaked_count = min(len(waiting_nodes), len(ready_nodes)) + self.log.info("Found %s ready node(s) that can be assigned to a " + "waiting node", leaked_count) + + self.deregisterNode(leaked_count, nodeTuple(node)) + self.registerNodeFromConfig( + leaked_count, self.provider.name, pool.name, node) def getRequestHandler(self, poolworker, request): return StaticNodeRequestHandler(poolworker, request) diff --git a/nodepool/tests/unit/test_driver_static.py b/nodepool/tests/unit/test_driver_static.py index 219a89758..b2c0f7308 100644 --- a/nodepool/tests/unit/test_driver_static.py +++ b/nodepool/tests/unit/test_driver_static.py @@ -274,6 +274,36 @@ class TestDriverStatic(tests.DBTestCase): self.waitForNodeDeletion(node) self.waitForNodeRequest(req_waiting, zk.FULFILLED) + def test_static_handler_race_cleanup(self): + configfile = self.setup_config('static-basic.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + node = self.waitForNodes('fake-label')[0] + + # Create the result of a race between re-registration of a + # ready node and a new building node. + data = node.toDict() + data.update({ + "state": zk.BUILDING, + "hostname": "", + "username": "", + "connection_port": 22, + }) + building_node = zk.Node.fromDict(data) + self.zk.storeNode(building_node) + self.zk.lockNode(building_node) + + # Node will be deregistered and assigned to the building node + self.waitForNodeDeletion(node) + nodes = self.waitForNodes('fake-label') + self.assertEqual(len(nodes), 1) + self.assertEqual(building_node.id, nodes[0].id) + + building_node.state = zk.USED + self.zk.storeNode(building_node) + self.zk.unlockNode(building_node) + self.waitForNodeDeletion(building_node) + def test_static_multinode_handler(self): configfile = self.setup_config('static.yaml') pool = self.useNodepool(configfile, watermark_sleep=1)