diff --git a/nodepool/driver/__init__.py b/nodepool/driver/__init__.py index eee016285..6eedf673b 100644 --- a/nodepool/driver/__init__.py +++ b/nodepool/driver/__init__.py @@ -160,6 +160,12 @@ class Provider(ProviderNotifications): The class or instance attribute **name** must be provided as a string. """ + + # This is only intended for the static driver. It indicates + # whether we should pause the provider when we are at quota. This + # should almost always be true. + PAUSE_AT_QUOTA = True + def __init__(self, *args, **kw): super().__init__(*args, **kw) @@ -495,12 +501,18 @@ class NodeRequestHandler(NodeRequestHandlerNotifications, self.log.info( "Not enough quota remaining to satisfy request") - if not self.paused: - self.log.debug( - "Pausing request handling to satisfy request") - self.paused = True self.zk.deleteOldestUnusedNode(self.provider.name, self.pool.name) + if self.manager.PAUSE_AT_QUOTA: + if not self.paused: + self.log.debug( + "Pausing request handling to satisfy request") + self.paused = True + else: + # Release the request so that we or another + # provider can try again when the label quota + # is available. + self._declinedHandlerCleanup() return if self.paused: diff --git a/nodepool/driver/static/provider.py b/nodepool/driver/static/provider.py index e7348f4a6..c8e10f278 100644 --- a/nodepool/driver/static/provider.py +++ b/nodepool/driver/static/provider.py @@ -48,6 +48,11 @@ class StaticNodeProvider(Provider, QuotaSupport): log = logging.getLogger("nodepool.driver.static." "StaticNodeProvider") + # Do not pause the provider when we are at quota so that requests + # for different labels do not interfere with each other (resources + # are not fungible in the static proivder). + PAUSE_AT_QUOTA = False + def __init__(self, provider, *args): self.provider = provider # Lock to avoid data races when registering nodes from diff --git a/nodepool/tests/fixtures/static-two-node.yaml b/nodepool/tests/fixtures/static-two-node.yaml new file mode 100644 index 000000000..b551f35e0 --- /dev/null +++ b/nodepool/tests/fixtures/static-two-node.yaml @@ -0,0 +1,36 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +labels: + - name: fake-label + - name: fake-label2 + +providers: + - name: static-provider + driver: static + pools: + - name: main + node-attributes: + key1: value1 + key2: value2 + nodes: + - name: fake-host-1 + labels: fake-label + host-key: ssh-rsa FAKEKEY + timeout: 13 + connection-port: 22022 + username: zuul + - name: fake-host-2 + labels: fake-label2 + host-key: ssh-rsa FAKEKEY + timeout: 13 + connection-port: 22022 + username: zuul + host-key-checking: false diff --git a/nodepool/tests/unit/test_driver_static.py b/nodepool/tests/unit/test_driver_static.py index 8523c9222..b5de5d44d 100644 --- a/nodepool/tests/unit/test_driver_static.py +++ b/nodepool/tests/unit/test_driver_static.py @@ -549,6 +549,61 @@ class TestDriverStatic(tests.DBTestCase): self.assertEqual(len(req.nodes), 1) self.assertNotEqual(req.nodes[0], nodes[0].id) + def test_liveness_two_node(self): + ''' + Test that a node going offline doesn't block a static provider + ''' + configfile = self.setup_config('static-two-node.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + nodes1 = self.waitForNodes('fake-label') + self.assertEqual(len(nodes1), 1) + nodes2 = self.waitForNodes('fake-label2') + self.assertEqual(len(nodes2), 1) + + # We will take this node offline later + req1 = zk.NodeRequest() + req1.state = zk.REQUESTED + req1.node_types.append('fake-label') + + # This nod does not perform host checks so never goes offline + req2 = zk.NodeRequest() + req2.state = zk.REQUESTED + req2.node_types.append('fake-label2') + + # Take the first node offline then submit a node request for + # it (which should be accepted but then released once the + # driver fails the liveness check). + with mock.patch("nodepool.nodeutils.nodescan") as nodescan_mock: + nodescan_mock.side_effect = OSError + self.zk.storeNodeRequest(req1) + self.waitForNodeDeletion(nodes1[0]) + + # We know the first request has been accepted and + # processed since the node was deleted. At this point, + # submit the second request. + self.zk.storeNodeRequest(req2) + + self.log.debug("Waiting for request %s", req2.id) + req2 = self.waitForNodeRequest(req2) + # The second request is fulfilled; get the status of the + # first request at the same time. + req1 = self.zk.getNodeRequest(req1.id) + + # Verify that the first request was not fulfilled at the time + # that the second was. + self.assertEqual(req1.state, zk.REQUESTED) + self.assertEqual(req2.state, zk.FULFILLED) + self.assertEqual(len(req2.nodes), 1) + self.assertNotEqual(req2.nodes[0], nodes2[0].id) + + # Now let req1 complete with the node online. + self.log.debug("Waiting for request %s", req1.id) + req1 = self.waitForNodeRequest(req1) + self.assertEqual(req1.state, zk.FULFILLED) + self.assertEqual(len(req1.nodes), 1) + self.assertNotEqual(req1.nodes[0], nodes1[0].id) + def test_host_key_checking_toggle(self): """Test that host key checking can be disabled""" configfile = self.setup_config('static-no-check.yaml')