Don't fail on quota exceeded

When the quota available to nodepool is reduced it can happen that
some launch attempts fail due to exceeded quota. This needs to be
handled gracefully and the quota cache should be invalidated such that
the reduced available quota is encountered directly for the following
node requests.

Change-Id: I2fb2ea3e0d68cfa52b2b663b9c6d1654e2f45e97
This commit is contained in:
Tobias Henkel 2017-09-12 15:07:49 +02:00
parent 1b465699ed
commit ae65d94e34
2 changed files with 16 additions and 2 deletions

View File

@ -212,7 +212,7 @@ class NodeLauncher(threading.Thread, stats.StatsReporter):
try:
self._launchNode()
break
except Exception:
except Exception as e:
if attempts <= self._retries:
self.log.exception(
"Launch attempt %d/%d failed for node %s:",
@ -228,7 +228,18 @@ class NodeLauncher(threading.Thread, stats.StatsReporter):
self._zk.storeNode(self._node)
if attempts == self._retries:
raise
attempts += 1
# Don't count launch attempts caused by quota exceeded. These
# are transient and should occur only if the quota available
# to nodepool gets reduced. In this case directly invalidate
# the quota cache such that the available quota is recalculated
# for the next node request. Also sleep for a while before
# retrying to relaunch this node.
if 'quota exceeded' in str(e).lower():
self.log.info("Quota exceeded, invalidating quota cache")
self._provider.invalidateQuotaCache()
time.sleep(5)
else:
attempts += 1
self._node.state = zk.READY
self._zk.storeNode(self._node)

View File

@ -196,6 +196,9 @@ class OpenStackProvider(Provider):
return copy.deepcopy(self._current_nodepool_quota['quota'])
def invalidateQuotaCache(self):
self._current_nodepool_quota['timestamp'] = 0
def estimatedNodepoolQuotaUsed(self, zk, pool=None):
'''
Sums up the quota used (or planned) currently by nodepool. If pool is