# Copyright (C) 2014 OpenStack Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import logging import time import fixtures from nodepool import tests from nodepool import zk import nodepool.fakeprovider import nodepool.nodepool class TestNodepool(tests.DBTestCase): log = logging.getLogger("nodepool.TestNodepool") def test_node_assignment(self): ''' Successful node launch should have unlocked nodes in READY state and assigned to the request. ''' configfile = self.setup_config('node.yaml') self._useBuilder(configfile) image = self.waitForImage('fake-provider', 'fake-image') nodepool.nodepool.LOCK_CLEANUP = 1 pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertNotEqual(req.nodes, []) for node_id in req.nodes: node = self.zk.getNode(node_id) self.assertEqual(node.allocated_to, req.id) self.assertEqual(node.state, zk.READY) self.assertIsNotNone(node.launcher) self.assertEqual(node.az, "az1") p = "{path}/{id}".format( path=self.zk._imageUploadPath(image.image_name, image.build_id, image.provider_name), id=image.id) self.assertEqual(node.image_id, p) self.zk.lockNode(node, blocking=False) self.zk.unlockNode(node) # Verify the cleanup thread removed the lock self.assertIsNotNone( self.zk.client.exists(self.zk._requestLockPath(req.id)) ) self.zk.deleteNodeRequest(req) self.waitForNodeRequestLockDeletion(req.id) self.assertReportedStat('nodepool.nodes.ready', '1|g') self.assertReportedStat('nodepool.nodes.building', '0|g') def test_node_assignment_at_quota(self): ''' Successful node launch should have unlocked nodes in READY state and assigned to the request. ''' configfile = self.setup_config('node_quota.yaml') self._useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') nodepool.nodepool.LOCK_CLEANUP = 1 pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.wait_for_config(pool) client = pool.getProviderManager('fake-provider')._getClient() # One of the things we want to test is that if we spawn many # node launches at once, we do not deadlock while the request # handler pauses for quota. To ensure we test that case, # pause server creation until we have accepted all of the node # requests we submit. This will ensure that we hold locks on # all of the nodes before pausing so that we can validate they # are released. client.pause_creates = True req1 = zk.NodeRequest() req1.state = zk.REQUESTED req1.node_types.append('fake-label') req1.node_types.append('fake-label') self.zk.storeNodeRequest(req1) req2 = zk.NodeRequest() req2.state = zk.REQUESTED req2.node_types.append('fake-label') req2.node_types.append('fake-label') self.zk.storeNodeRequest(req2) req1 = self.waitForNodeRequest(req1, (zk.PENDING,)) req2 = self.waitForNodeRequest(req2, (zk.PENDING,)) # At this point, we should be about to create or have already # created two servers for the first request, and the request # handler has accepted the second node request but paused # waiting for the server count to go below quota. # Wait until both of the servers exist. while len(client._server_list) < 2: time.sleep(0.1) # Allow the servers to finish being created. for server in client._server_list: server.event.set() self.log.debug("Waiting for 1st request %s", req1.id) req1 = self.waitForNodeRequest(req1) self.assertEqual(req1.state, zk.FULFILLED) self.assertEqual(len(req1.nodes), 2) # Mark the first request's nodes as USED, which will get them deleted # and allow the second to proceed. self.log.debug("Marking first node as used %s", req1.id) node = self.zk.getNode(req1.nodes[0]) node.state = zk.USED self.zk.storeNode(node) self.waitForNodeDeletion(node) # To force the sequential nature of what we're testing, wait for # the 2nd request to get a node allocated to it now that we've # freed up a node. self.log.debug("Waiting for node allocation for 2nd request") done = False while not done: for n in self.zk.nodeIterator(): if n.allocated_to == req2.id: done = True break self.log.debug("Marking second node as used %s", req1.id) node = self.zk.getNode(req1.nodes[1]) node.state = zk.USED self.zk.storeNode(node) self.waitForNodeDeletion(node) self.log.debug("Deleting 1st request %s", req1.id) self.zk.deleteNodeRequest(req1) self.waitForNodeRequestLockDeletion(req1.id) # Wait until both of the servers exist. while len(client._server_list) < 2: time.sleep(0.1) # Allow the servers to finish being created. for server in client._server_list: server.event.set() req2 = self.waitForNodeRequest(req2) self.assertEqual(req2.state, zk.FULFILLED) self.assertEqual(len(req2.nodes), 2) def test_fail_request_on_launch_failure(self): ''' Test that provider launch error fails the request. ''' configfile = self.setup_config('node_launch_retry.yaml') self._useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.wait_for_config(pool) manager = pool.getProviderManager('fake-provider') manager.createServer_fails = 2 req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(0, manager.createServer_fails) self.assertEqual(req.state, zk.FAILED) self.assertNotEqual(req.declined_by, []) def test_invalid_image_fails(self): ''' Test that an invalid image declines and fails the request. ''' configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append("zorky-zumba") self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FAILED) self.assertNotEqual(req.declined_by, []) def test_node(self): """Test that an image and node are created""" configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].provider, 'fake-provider') self.assertEqual(nodes[0].type, 'fake-label') self.assertNotEqual(nodes[0].host_keys, []) def test_disabled_label(self): """Test that a node is not created with min-ready=0""" configfile = self.setup_config('node_disabled_label.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') self.assertEqual([], self.zk.getNodeRequests()) self.assertEqual([], self.zk.getNodes()) def test_node_net_name(self): """Test that a node is created with a net name""" configfile = self.setup_config('node_net_name.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].provider, 'fake-provider') self.assertEqual(nodes[0].type, 'fake-label') def test_node_vhd_image(self): """Test that a image and node are created vhd image""" configfile = self.setup_config('node_vhd.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].provider, 'fake-provider') self.assertEqual(nodes[0].type, 'fake-label') def test_node_vhd_and_qcow2(self): """Test label provided by vhd and qcow2 images builds""" configfile = self.setup_config('node_vhd_and_qcow2.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) self.waitForImage('fake-provider1', 'fake-image') self.waitForImage('fake-provider2', 'fake-image') pool.start() nodes = self.waitForNodes('fake-label', 2) self.assertEqual(len(nodes), 2) self.assertEqual(zk.READY, nodes[0].state) self.assertEqual(zk.READY, nodes[1].state) if nodes[0].provider == 'fake-provider1': self.assertEqual(nodes[1].provider, 'fake-provider2') else: self.assertEqual(nodes[1].provider, 'fake-provider1') def test_dib_upload_fail(self): """Test that an image upload failure is contained.""" configfile = self.setup_config('node_upload_fail.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider2', 'fake-image') nodes = self.waitForNodes('fake-label', 2) self.assertEqual(len(nodes), 2) total_nodes = sum(1 for _ in self.zk.nodeIterator()) self.assertEqual(total_nodes, 2) self.assertEqual(nodes[0].provider, 'fake-provider2') self.assertEqual(nodes[0].type, 'fake-label') self.assertEqual(nodes[1].provider, 'fake-provider2') self.assertEqual(nodes[1].type, 'fake-label') def test_node_az(self): """Test that an image and node are created with az specified""" configfile = self.setup_config('node_az.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].provider, 'fake-provider') self.assertEqual(nodes[0].az, 'az1') def test_node_ipv6(self): """Test that a node is created w/ or w/o ipv6 preferred flag""" configfile = self.setup_config('node_ipv6.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider1', 'fake-image') self.waitForImage('fake-provider2', 'fake-image') self.waitForImage('fake-provider3', 'fake-image') label1_nodes = self.waitForNodes('fake-label1') label2_nodes = self.waitForNodes('fake-label2') label3_nodes = self.waitForNodes('fake-label3') self.assertEqual(len(label1_nodes), 1) self.assertEqual(len(label2_nodes), 1) self.assertEqual(len(label3_nodes), 1) # ipv6 preferred set to true and ipv6 address available self.assertEqual(label1_nodes[0].provider, 'fake-provider1') self.assertEqual(label1_nodes[0].public_ipv4, 'fake') self.assertEqual(label1_nodes[0].public_ipv6, 'fake_v6') # ipv6 preferred unspecified and ipv6 address available self.assertEqual(label2_nodes[0].provider, 'fake-provider2') self.assertEqual(label2_nodes[0].public_ipv4, 'fake') self.assertEqual(label2_nodes[0].public_ipv6, 'fake_v6') # ipv6 preferred set to true but ipv6 address unavailable self.assertEqual(label3_nodes[0].provider, 'fake-provider3') self.assertEqual(label3_nodes[0].public_ipv4, 'fake') self.assertEqual(label3_nodes[0].public_ipv6, '') def test_node_delete_success(self): configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.assertEqual(zk.READY, nodes[0].state) self.assertEqual('fake-provider', nodes[0].provider) nodes[0].state = zk.DELETING self.zk.storeNode(nodes[0]) # Wait for this one to be deleted self.waitForNodeDeletion(nodes[0]) # Wait for a new one to take it's place new_nodes = self.waitForNodes('fake-label') self.assertEqual(len(new_nodes), 1) self.assertEqual(zk.READY, new_nodes[0].state) self.assertEqual('fake-provider', new_nodes[0].provider) self.assertNotEqual(nodes[0], new_nodes[0]) def test_node_launch_retries(self): configfile = self.setup_config('node_launch_retry.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.wait_for_config(pool) manager = pool.getProviderManager('fake-provider') manager.createServer_fails = 2 self.waitForImage('fake-provider', 'fake-image') req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FAILED) # retries in config is set to 2, so 2 attempts to create a server self.assertEqual(0, manager.createServer_fails) def test_node_delete_failure(self): def fail_delete(self, name): raise RuntimeError('Fake Error') fake_delete = 'nodepool.provider_manager.FakeProviderManager.deleteServer' self.useFixture(fixtures.MonkeyPatch(fake_delete, fail_delete)) configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.zk.lockNode(nodes[0], blocking=False) nodepool.nodepool.InstanceDeleter.delete( self.zk, pool.getProviderManager('fake-provider'), nodes[0]) # Make sure our old node is in delete state, even though delete failed deleted_node = self.zk.getNode(nodes[0].id) self.assertIsNotNone(deleted_node) self.assertEqual(deleted_node.state, zk.DELETING) # Make sure we have a new, READY node nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].provider, 'fake-provider') def test_leaked_node(self): """Test that a leaked node is deleted""" configfile = self.setup_config('leaked_node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') self.log.debug("Waiting for initial pool...") nodes = self.waitForNodes('fake-label') self.log.debug("...done waiting for initial pool.") # Make sure we have a node built and ready self.assertEqual(len(nodes), 1) manager = pool.getProviderManager('fake-provider') servers = manager.listServers() self.assertEqual(len(servers), 1) # Delete the node from ZooKeeper, but leave the instance # so it is leaked. self.log.debug("Delete node db record so instance is leaked...") self.zk.deleteNode(nodes[0]) self.log.debug("...deleted node db so instance is leaked.") # Wait for nodepool to replace it self.log.debug("Waiting for replacement pool...") new_nodes = self.waitForNodes('fake-label') self.log.debug("...done waiting for replacement pool.") self.assertEqual(len(new_nodes), 1) # Wait for the instance to be cleaned up self.waitForInstanceDeletion(manager, nodes[0].external_id) # Make sure we end up with only one server (the replacement) servers = manager.listServers() self.assertEqual(len(servers), 1) def test_label_provider(self): """Test that only providers listed in the label satisfy the request""" configfile = self.setup_config('node_label_provider.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') self.waitForImage('fake-provider2', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].provider, 'fake-provider2') def _create_pending_request(self): req = zk.NodeRequest() req.state = zk.PENDING req.requestor = 'test_nodepool' req.node_types.append('fake-label') self.zk.storeNodeRequest(req) # Create a node that is allocated to the request, but not yet assigned # within the NodeRequest object node = zk.Node() node.state = zk.READY node.type = 'fake-label' node.public_ipv4 = 'fake' node.provider = 'fake-provider' node.pool = 'main' node.allocated_to = req.id self.zk.storeNode(node) return (req, node) def test_lost_requests(self): """Test a request left pending is reset and satisfied on restart""" (req, node) = self._create_pending_request() configfile = self.setup_config('node_lost_requests.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() req = self.waitForNodeRequest(req, (zk.FULFILLED,)) # Since our config file has min-ready=0, we should be able to re-use # the previously assigned node, thus making sure that the cleanup # code reset the 'allocated_to' field. self.assertIn(node.id, req.nodes) def test_node_deallocation(self): """Test an allocated node with a missing request is deallocated""" node = zk.Node() node.state = zk.READY node.type = 'fake-label' node.public_ipv4 = 'fake' node.provider = 'fake-provider' node.allocated_to = "MISSING" self.zk.storeNode(node) configfile = self.setup_config('node_lost_requests.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() while True: node = self.zk.getNode(node.id) if not node.allocated_to: break def test_multiple_pools(self): """Test that an image and node are created""" configfile = self.setup_config('multiple_pools.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') lab1 = self.waitForNodes('fake-label1') lab2 = self.waitForNodes('fake-label2') self.assertEqual(len(lab1), 1) self.assertEqual(lab1[0].provider, 'fake-provider') self.assertEqual(lab1[0].type, 'fake-label1') self.assertEqual(lab1[0].az, 'az1') self.assertEqual(lab1[0].pool, 'pool1') self.assertEqual(len(lab2), 1) self.assertEqual(lab2[0].provider, 'fake-provider') self.assertEqual(lab2[0].type, 'fake-label2') self.assertEqual(lab2[0].az, 'az2') self.assertEqual(lab2[0].pool, 'pool2')