Merge "Do not reset quota cache timestamp when invalid"
This commit is contained in:
commit
7129681181
|
@ -105,8 +105,7 @@ class FakeOpenStackCloud(object):
|
|||
]
|
||||
self._azs = ['az1', 'az2']
|
||||
self._server_list = []
|
||||
self.max_cores, self.max_instances, self.max_ram = FakeOpenStackCloud.\
|
||||
_get_quota()
|
||||
self._update_quota()
|
||||
self._down_ports = [
|
||||
Dummy(Dummy.PORT, id=uuid.uuid4().hex, status='DOWN',
|
||||
device_owner="compute:nova"),
|
||||
|
@ -114,6 +113,10 @@ class FakeOpenStackCloud(object):
|
|||
device_owner=None),
|
||||
]
|
||||
|
||||
def _update_quota(self):
|
||||
self.max_cores, self.max_instances, self.max_ram = FakeOpenStackCloud.\
|
||||
_get_quota()
|
||||
|
||||
def _get(self, name_or_id, instance_list):
|
||||
self.log.debug("Get %s in %s" % (name_or_id, repr(instance_list)))
|
||||
for instance in instance_list:
|
||||
|
@ -162,6 +165,7 @@ class FakeOpenStackCloud(object):
|
|||
private_v4 = 'fake'
|
||||
host_id = 'fake'
|
||||
interface_ip = 'fake'
|
||||
self._update_quota()
|
||||
over_quota = False
|
||||
if (instance_type == Dummy.INSTANCE and
|
||||
self.max_instances > -1 and
|
||||
|
@ -292,6 +296,7 @@ class FakeOpenStackCloud(object):
|
|||
return self._azs.copy()
|
||||
|
||||
def get_compute_limits(self):
|
||||
self._update_quota()
|
||||
return Dummy(
|
||||
'limits',
|
||||
max_total_cores=self.max_cores,
|
||||
|
|
|
@ -259,7 +259,8 @@ class QuotaSupport:
|
|||
|
||||
def __init__(self, *args, **kw):
|
||||
super().__init__(*args, **kw)
|
||||
self._current_nodepool_quota = None
|
||||
self._current_nodepool_quota_timestamp = 0
|
||||
self._current_nodepool_quota = {}
|
||||
|
||||
@abc.abstractmethod
|
||||
def quotaNeededByLabel(self, label, pool):
|
||||
|
@ -291,7 +292,7 @@ class QuotaSupport:
|
|||
pass
|
||||
|
||||
def invalidateQuotaCache(self):
|
||||
self._current_nodepool_quota['timestamp'] = 0
|
||||
self._current_nodepool_quota_timestamp = 0
|
||||
|
||||
def estimatedNodepoolQuota(self):
|
||||
'''
|
||||
|
@ -307,8 +308,8 @@ class QuotaSupport:
|
|||
|
||||
if self._current_nodepool_quota:
|
||||
now = time.time()
|
||||
if now < self._current_nodepool_quota['timestamp'] + MAX_QUOTA_AGE:
|
||||
return copy.deepcopy(self._current_nodepool_quota['quota'])
|
||||
if now < self._current_nodepool_quota_timestamp + MAX_QUOTA_AGE:
|
||||
return copy.deepcopy(self._current_nodepool_quota)
|
||||
|
||||
# This is initialized with the full tenant quota and later becomes
|
||||
# the quota available for nodepool.
|
||||
|
@ -318,7 +319,7 @@ class QuotaSupport:
|
|||
if self._current_nodepool_quota:
|
||||
self.log.exception("Unable to get provider quota, "
|
||||
"using cached value")
|
||||
return copy.deepcopy(self._current_nodepool_quota['quota'])
|
||||
return copy.deepcopy(self._current_nodepool_quota)
|
||||
raise
|
||||
|
||||
self.log.debug("Provider quota for %s: %s",
|
||||
|
@ -328,10 +329,8 @@ class QuotaSupport:
|
|||
# to get the quota available for us.
|
||||
nodepool_quota.subtract(self.unmanagedQuotaUsed())
|
||||
|
||||
self._current_nodepool_quota = {
|
||||
'quota': nodepool_quota,
|
||||
'timestamp': time.time()
|
||||
}
|
||||
self._current_nodepool_quota = nodepool_quota
|
||||
self._current_nodepool_quota_timestamp = time.time()
|
||||
|
||||
self.log.debug("Available quota for %s: %s",
|
||||
self.provider.name, nodepool_quota)
|
||||
|
|
|
@ -583,6 +583,15 @@ class DBTestCase(BaseTestCase):
|
|||
self.wait_for_threads()
|
||||
return ready_nodes[label]
|
||||
|
||||
def waitForAnyNodeInState(self, state):
|
||||
# Wait for a node to be in the aborted state
|
||||
for _ in iterate_timeout(ONE_MINUTE, Exception,
|
||||
f"Timeout waiting for node in {state} state",
|
||||
interval=1):
|
||||
for node in self.zk.nodeIterator(False):
|
||||
if node.state == state:
|
||||
return node
|
||||
|
||||
def waitForNodeRequest(self, req, states=None):
|
||||
'''
|
||||
Wait for a node request to transition to a final state.
|
||||
|
|
|
@ -403,6 +403,7 @@ class TestLauncher(tests.DBTestCase):
|
|||
|
||||
# patch the cloud with requested quota
|
||||
def fake_get_quota():
|
||||
nonlocal max_cores, max_instances, max_ram
|
||||
return (max_cores, max_instances, max_ram)
|
||||
self.useFixture(fixtures.MockPatchObject(
|
||||
fakeprovider.FakeProvider.fake_cloud, '_get_quota',
|
||||
|
@ -435,7 +436,7 @@ class TestLauncher(tests.DBTestCase):
|
|||
|
||||
# Now, reduce the quota so the next node unexpectedly
|
||||
# (according to nodepool's quota estimate) fails.
|
||||
client.max_instances = 1
|
||||
max_instances = 1
|
||||
|
||||
# Request a second node; this request should pause the handler.
|
||||
req2 = zk.NodeRequest()
|
||||
|
@ -2280,6 +2281,53 @@ class TestLauncher(tests.DBTestCase):
|
|||
req3 = self.waitForNodeRequest(req3)
|
||||
self.assertEqual(req3.state, zk.FAILED)
|
||||
|
||||
def test_ignore_provider_quota_true_with_provider_error(self):
|
||||
'''
|
||||
Tests that quota errors returned from the provider when allocating a
|
||||
node are correctly handled and retried. The node request should be
|
||||
retried until there is sufficient quota to satisfy it.
|
||||
'''
|
||||
max_instances = 0
|
||||
|
||||
def fake_get_quota():
|
||||
nonlocal max_instances
|
||||
return (100, max_instances, 1000000)
|
||||
|
||||
self.useFixture(fixtures.MockPatchObject(
|
||||
fakeprovider.FakeProvider.fake_cloud, '_get_quota',
|
||||
fake_get_quota
|
||||
))
|
||||
|
||||
configfile = self.setup_config('ignore_provider_quota_true.yaml')
|
||||
self.useBuilder(configfile)
|
||||
self.waitForImage('fake-provider', 'fake-image')
|
||||
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
pool.start()
|
||||
|
||||
# Create a request with ignore-provider-quota set to true that should
|
||||
# pass regardless of the lack of cloud/provider quota.
|
||||
self.replace_config(configfile, 'ignore_provider_quota_true.yaml')
|
||||
|
||||
self.log.debug(
|
||||
"Submitting an initial request with ignore-provider-quota True")
|
||||
req = zk.NodeRequest()
|
||||
req.state = zk.REQUESTED
|
||||
req.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req)
|
||||
|
||||
self.waitForAnyNodeInState(zk.ABORTED)
|
||||
|
||||
self.assertReportedStat(
|
||||
'nodepool.launch.provider.fake-provider.error.quota')
|
||||
self.assertReportedStat(
|
||||
'nodepool.launch.error.quota')
|
||||
|
||||
# Bump up the quota to allow the provider to allocate a node
|
||||
max_instances = 1
|
||||
req = self.waitForNodeRequest(req)
|
||||
self.assertEqual(req.state, zk.FULFILLED)
|
||||
|
||||
def test_request_order(self):
|
||||
"""Test that requests are handled in sorted order"""
|
||||
configfile = self.setup_config('node_no_min_ready.yaml')
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes an exception that was raised by the OpenStack driver when attempting
|
||||
to reset the quota timestamp and `ignore-provider-quota` is `true`. This
|
||||
exception prevented nodepool from seeing quota errors from OpenStack,
|
||||
causing it to fail the node request instead of retrying as it does for
|
||||
other providers.
|
Loading…
Reference in New Issue