Merge "Improve handling of errors in provider manager startup"

This commit is contained in:
Zuul 2022-02-05 18:03:37 +00:00 committed by Gerrit Code Review
commit 1ee602c3f5
5 changed files with 94 additions and 2 deletions

View File

@ -421,3 +421,8 @@ class FakeProvider(OpenStackProvider):
def getRequestHandler(self, poolworker, request):
return FakeNodeRequestHandler(poolworker, request)
def start(self, zk_conn):
if self.provider.region_name == 'broken-region':
raise Exception("Broken cloud config")
super().start(zk_conn)

View File

@ -959,6 +959,9 @@ class NodePool(threading.Thread):
self.reconfigureZooKeeper(config)
provider_manager.ProviderManager.reconfigure(self.config, config,
self.getZK())
for provider_name in list(config.providers.keys()):
if provider_name not in config.provider_managers:
del config.providers[provider_name]
self.setConfig(config)
def removeCompletedRequests(self):

View File

@ -57,8 +57,13 @@ class ProviderManager(object):
else:
ProviderManager.log.debug("Creating new ProviderManager object"
" for %s" % p.name)
new_config.provider_managers[p.name] = get_provider(p)
new_config.provider_managers[p.name].start(zk_conn)
try:
pm = get_provider(p)
pm.start(zk_conn)
new_config.provider_managers[p.name] = pm
except Exception:
ProviderManager.log.exception(
"Error starting provider %s", p.name)
for stop_manager in stop_managers:
stop_manager.stop()

View File

@ -0,0 +1,49 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: broken-label
min-ready: 0
- name: good-label
min-ready: 0
providers:
- name: good-provider
cloud: fake
driver: fake
region-name: fake
rate: 0.0001
cloud-images:
- name: good-image
pools:
- name: main
labels:
- name: good-label
cloud-image: good-image
min-ram: 8192
flavor-name: 'Fake'
- name: broken-provider
cloud: fake
driver: fake
region-name: broken-region
rate: 0.0001
cloud-images:
- name: broken-image
pools:
- name: main
labels:
- name: broken-label
cloud-image: broken-image
min-ram: 8192
flavor-name: 'Fake'

View File

@ -1805,6 +1805,36 @@ class TestLauncher(tests.DBTestCase):
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FAILED)
def test_broken_provider(self):
'''
If a provider has a broken config, it should not be started, and
any requests for it should be declined/failed. Other
providers should be started and should be able to fulfill
requests.
'''
configfile = self.setup_config('broken_provider_config.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.wait_for_config(pool)
manager = pool.getProviderManager('good-provider')
manager._client.create_image(name="good-image")
good_req = zk.NodeRequest()
good_req.state = zk.REQUESTED
good_req.node_types.append('good-label')
self.zk.storeNodeRequest(good_req)
broken_req = zk.NodeRequest()
broken_req.state = zk.REQUESTED
broken_req.node_types.append('broken-label')
self.zk.storeNodeRequest(broken_req)
good_req = self.waitForNodeRequest(good_req)
broken_req = self.waitForNodeRequest(broken_req)
self.assertEqual(good_req.state, zk.FULFILLED)
self.assertEqual(broken_req.state, zk.FAILED)
def test_provider_wont_wedge(self):
'''
A provider should not wedge itself when it is at (1) maximum capacity