From ae65d94e34c35df43e59e109d7a7658de3bc8f0d Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Tue, 12 Sep 2017 15:07:49 +0200 Subject: [PATCH] Don't fail on quota exceeded When the quota available to nodepool is reduced it can happen that some launch attempts fail due to exceeded quota. This needs to be handled gracefully and the quota cache should be invalidated such that the reduced available quota is encountered directly for the following node requests. Change-Id: I2fb2ea3e0d68cfa52b2b663b9c6d1654e2f45e97 --- nodepool/driver/openstack/handler.py | 15 +++++++++++++-- nodepool/driver/openstack/provider.py | 3 +++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/nodepool/driver/openstack/handler.py b/nodepool/driver/openstack/handler.py index 424d88fd8..251408deb 100644 --- a/nodepool/driver/openstack/handler.py +++ b/nodepool/driver/openstack/handler.py @@ -212,7 +212,7 @@ class NodeLauncher(threading.Thread, stats.StatsReporter): try: self._launchNode() break - except Exception: + except Exception as e: if attempts <= self._retries: self.log.exception( "Launch attempt %d/%d failed for node %s:", @@ -228,7 +228,18 @@ class NodeLauncher(threading.Thread, stats.StatsReporter): self._zk.storeNode(self._node) if attempts == self._retries: raise - attempts += 1 + # Don't count launch attempts caused by quota exceeded. These + # are transient and should occur only if the quota available + # to nodepool gets reduced. In this case directly invalidate + # the quota cache such that the available quota is recalculated + # for the next node request. Also sleep for a while before + # retrying to relaunch this node. + if 'quota exceeded' in str(e).lower(): + self.log.info("Quota exceeded, invalidating quota cache") + self._provider.invalidateQuotaCache() + time.sleep(5) + else: + attempts += 1 self._node.state = zk.READY self._zk.storeNode(self._node) diff --git a/nodepool/driver/openstack/provider.py b/nodepool/driver/openstack/provider.py index c13d9cfe8..670d236a0 100755 --- a/nodepool/driver/openstack/provider.py +++ b/nodepool/driver/openstack/provider.py @@ -196,6 +196,9 @@ class OpenStackProvider(Provider): return copy.deepcopy(self._current_nodepool_quota['quota']) + def invalidateQuotaCache(self): + self._current_nodepool_quota['timestamp'] = 0 + def estimatedNodepoolQuotaUsed(self, zk, pool=None): ''' Sums up the quota used (or planned) currently by nodepool. If pool is