diff --git a/nodepool/driver/fake/provider.py b/nodepool/driver/fake/provider.py index 1bbdfddb7..e38376a1d 100644 --- a/nodepool/driver/fake/provider.py +++ b/nodepool/driver/fake/provider.py @@ -421,3 +421,8 @@ class FakeProvider(OpenStackProvider): def getRequestHandler(self, poolworker, request): return FakeNodeRequestHandler(poolworker, request) + + def start(self, zk_conn): + if self.provider.region_name == 'broken-region': + raise Exception("Broken cloud config") + super().start(zk_conn) diff --git a/nodepool/launcher.py b/nodepool/launcher.py index c6f619f6e..5377f12b3 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -957,6 +957,9 @@ class NodePool(threading.Thread): self.reconfigureZooKeeper(config) provider_manager.ProviderManager.reconfigure(self.config, config, self.getZK()) + for provider_name in list(config.providers.keys()): + if provider_name not in config.provider_managers: + del config.providers[provider_name] self.setConfig(config) def removeCompletedRequests(self): diff --git a/nodepool/provider_manager.py b/nodepool/provider_manager.py index 72a7bece1..06674e894 100644 --- a/nodepool/provider_manager.py +++ b/nodepool/provider_manager.py @@ -57,8 +57,13 @@ class ProviderManager(object): else: ProviderManager.log.debug("Creating new ProviderManager object" " for %s" % p.name) - new_config.provider_managers[p.name] = get_provider(p) - new_config.provider_managers[p.name].start(zk_conn) + try: + pm = get_provider(p) + pm.start(zk_conn) + new_config.provider_managers[p.name] = pm + except Exception: + ProviderManager.log.exception( + "Error starting provider %s", p.name) for stop_manager in stop_managers: stop_manager.stop() diff --git a/nodepool/tests/fixtures/broken_provider_config.yaml b/nodepool/tests/fixtures/broken_provider_config.yaml new file mode 100644 index 000000000..500a55658 --- /dev/null +++ b/nodepool/tests/fixtures/broken_provider_config.yaml @@ -0,0 +1,49 @@ +elements-dir: . +images-dir: '{images_dir}' +build-log-dir: '{build_log_dir}' + +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +labels: + - name: broken-label + min-ready: 0 + - name: good-label + min-ready: 0 + +providers: + - name: good-provider + cloud: fake + driver: fake + region-name: fake + rate: 0.0001 + cloud-images: + - name: good-image + pools: + - name: main + labels: + - name: good-label + cloud-image: good-image + min-ram: 8192 + flavor-name: 'Fake' + - name: broken-provider + cloud: fake + driver: fake + region-name: broken-region + rate: 0.0001 + cloud-images: + - name: broken-image + pools: + - name: main + labels: + - name: broken-label + cloud-image: broken-image + min-ram: 8192 + flavor-name: 'Fake' diff --git a/nodepool/tests/unit/test_launcher.py b/nodepool/tests/unit/test_launcher.py index 3a1fd51a0..7d7f763ad 100644 --- a/nodepool/tests/unit/test_launcher.py +++ b/nodepool/tests/unit/test_launcher.py @@ -1797,6 +1797,36 @@ class TestLauncher(tests.DBTestCase): req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FAILED) + def test_broken_provider(self): + ''' + If a provider has a broken config, it should not be started, and + any requests for it should be declined/failed. Other + providers should be started and should be able to fulfill + requests. + ''' + configfile = self.setup_config('broken_provider_config.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + self.wait_for_config(pool) + + manager = pool.getProviderManager('good-provider') + manager._client.create_image(name="good-image") + + good_req = zk.NodeRequest() + good_req.state = zk.REQUESTED + good_req.node_types.append('good-label') + self.zk.storeNodeRequest(good_req) + + broken_req = zk.NodeRequest() + broken_req.state = zk.REQUESTED + broken_req.node_types.append('broken-label') + self.zk.storeNodeRequest(broken_req) + + good_req = self.waitForNodeRequest(good_req) + broken_req = self.waitForNodeRequest(broken_req) + self.assertEqual(good_req.state, zk.FULFILLED) + self.assertEqual(broken_req.state, zk.FAILED) + def test_provider_wont_wedge(self): ''' A provider should not wedge itself when it is at (1) maximum capacity