Add ability to ignore provider quota for a pool

In some circumstances it is useful to tell the launcher to
ignore the provide quota and just trust the max-* settings
for that pool instead.

This particular need arises when using Rackspace public cloud
for both instances and OnMetal nodes. In this situation the
quota for instances and for OnMetal nodes is different, but
nodepool only queries the quota for instances. When trying to
build OnMetal nodes, the quota check fails - but should not.

In this circumstance, instead of making shade/nodepool
complicated by figuring out how to calculate disparate quota
types, it makes sense to rather just allow nodepool to ignore
the quota for a pool to try executing the build instead.

While this is our use-case, it may also be useful to others
for other reasons.

Change-Id: I232a1ab365795381ab180aceb48e8c87843ac713
This commit is contained in:
Jesse Pretorius 2018-07-10 12:30:44 +01:00 committed by Jesse Pretorius (odyssey4me)
parent 03b7b4baef
commit 4c8b5f4f99
7 changed files with 264 additions and 12 deletions

View File

@ -488,6 +488,12 @@ Example::
ram allocated by nodepool. If not defined nodepool can use as much ram as
the tenant allows.
``ignore-provider-quota``
Ignore the provider quota for this pool. Instead, only check against the
configured max values for this pool and the current usage based on stored
data. This may be useful in circumstances where the provider is incorrectly
calculating quota.
``availability-zones`` (list)
A list of availability zones to use.

View File

@ -113,6 +113,7 @@ class ProviderPool(ConfigPool):
self.name = None
self.max_cores = None
self.max_ram = None
self.ignore_provider_quota = False
self.azs = None
self.networks = None
self.security_groups = None
@ -133,6 +134,8 @@ class ProviderPool(ConfigPool):
other.name == self.name and
other.max_cores == self.max_cores and
other.max_ram == self.max_ram and
other.ignore_provider_quota == (
self.ignore_provider_quota) and
other.azs == self.azs and
other.networks == self.networks and
other.security_groups == self.security_groups and
@ -264,6 +267,7 @@ class OpenStackProviderConfig(ProviderConfig):
pp.max_cores = pool.get('max-cores', math.inf)
pp.max_servers = pool.get('max-servers', math.inf)
pp.max_ram = pool.get('max-ram', math.inf)
pp.ignore_provider_quota = pool.get('ignore-provider-quota', False)
pp.azs = pool.get('availability-zones')
pp.networks = pool.get('networks', [])
pp.security_groups = pool.get('security-groups', [])
@ -353,6 +357,7 @@ class OpenStackProviderConfig(ProviderConfig):
'networks': [str],
'auto-floating-ip': bool,
'host-key-checking': bool,
'ignore-provider-quota': bool,
'max-cores': int,
'max-servers': int,
'max-ram': int,

View File

@ -298,15 +298,18 @@ class OpenStackNodeRequestHandler(NodeRequestHandler):
def hasRemainingQuota(self, ntype):
needed_quota = self.manager.quotaNeededByNodeType(ntype, self.pool)
# Calculate remaining quota which is calculated as:
# quota = <total nodepool quota> - <used quota> - <quota for node>
cloud_quota = self.manager.estimatedNodepoolQuota()
cloud_quota.subtract(self.manager.estimatedNodepoolQuotaUsed(self.zk))
cloud_quota.subtract(needed_quota)
self.log.debug("Predicted remaining tenant quota: %s", cloud_quota)
if not self.pool.ignore_provider_quota:
# Calculate remaining quota which is calculated as:
# quota = <total nodepool quota> - <used quota> - <quota for node>
cloud_quota = self.manager.estimatedNodepoolQuota()
cloud_quota.subtract(
self.manager.estimatedNodepoolQuotaUsed(self.zk))
cloud_quota.subtract(needed_quota)
self.log.debug("Predicted remaining provider quota: %s",
cloud_quota)
if not cloud_quota.non_negative():
return False
if not cloud_quota.non_negative():
return False
# Now calculate pool specific quota. Values indicating no quota default
# to math.inf representing infinity that can be calculated with.
@ -329,11 +332,12 @@ class OpenStackNodeRequestHandler(NodeRequestHandler):
needed_quota.add(
self.manager.quotaNeededByNodeType(ntype, self.pool))
cloud_quota = self.manager.estimatedNodepoolQuota()
cloud_quota.subtract(needed_quota)
if not self.pool.ignore_provider_quota:
cloud_quota = self.manager.estimatedNodepoolQuota()
cloud_quota.subtract(needed_quota)
if not cloud_quota.non_negative():
return False
if not cloud_quota.non_negative():
return False
# Now calculate pool specific quota. Values indicating no quota default
# to math.inf representing infinity that can be calculated with.

View File

@ -0,0 +1,48 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
build-log-retention: 1
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
ignore-provider-quota: false
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,49 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
build-log-retention: 1
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 1
ignore-provider-quota: true
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -1436,3 +1436,135 @@ class TestLauncher(tests.DBTestCase):
while pool_worker[0].paused_handler:
time.sleep(0.1)
self.assertEqual(0, len(pool_worker[0].request_handlers))
def test_ignore_provider_quota_false(self):
'''
Test that a node request get fulfilled with ignore-provider-quota set
to false.
'''
# Set max-cores quota value to 0 to force "out of quota". Note that
# the fake provider checks the number of instances during server
# creation to decide if it should throw an over quota exception,
# but it doesn't check cores.
def fake_get_quota():
return (0, 20, 1000000)
self.useFixture(fixtures.MockPatchObject(
fakeprovider.FakeProvider.fake_cloud, '_get_quota',
fake_get_quota
))
configfile = self.setup_config('ignore_provider_quota_false.yaml')
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
# Create a request with ignore-provider-quota set to false that should
# fail because it will decline the request because "it would exceed
# quota".
self.log.debug("Submitting request with ignore-provider-quota False")
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.zk.storeNodeRequest(req)
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FAILED)
def test_ignore_provider_quota_true(self):
'''
Test that a node request get fulfilled with ignore-provider-quota set
to true.
'''
# Set max-cores quota value to 0 to force "out of quota". Note that
# the fake provider checks the number of instances during server
# creation to decide if it should throw an over quota exception,
# but it doesn't check cores.
def fake_get_quota():
return (0, 20, 1000000)
self.useFixture(fixtures.MockPatchObject(
fakeprovider.FakeProvider.fake_cloud, '_get_quota',
fake_get_quota
))
configfile = self.setup_config('ignore_provider_quota_true.yaml')
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
# Create a request with ignore-provider-quota set to true that should
# pass regardless of the lack of cloud/provider quota.
self.replace_config(configfile, 'ignore_provider_quota_true.yaml')
self.log.debug(
"Submitting an initial request with ignore-provider-quota True")
req1 = zk.NodeRequest()
req1.state = zk.REQUESTED
req1.node_types.append('fake-label')
self.zk.storeNodeRequest(req1)
req1 = self.waitForNodeRequest(req1)
self.assertEqual(req1.state, zk.FULFILLED)
# Lock this node so it appears as used and not deleted
req1_node = self.zk.getNode(req1.nodes[0])
self.zk.lockNode(req1_node, blocking=False)
# Request a second node; this request should pause the handler
# due to the pool set with max-servers: 1
req2 = zk.NodeRequest()
req2.state = zk.REQUESTED
req2.node_types.append('fake-label')
self.log.debug(
"Submitting a second request with ignore-provider-quota True"
"but with a full max-servers quota.")
self.zk.storeNodeRequest(req2)
pool_worker = pool.getPoolWorkers('fake-provider')
while not pool_worker[0].paused_handler:
time.sleep(0.1)
# The handler is paused now and the request should be in state PENDING
req2 = self.waitForNodeRequest(req2, zk.PENDING)
self.assertEqual(req2.state, zk.PENDING)
# Now free up the first node
self.log.debug("Marking first node as used %s", req1.id)
req1_node.state = zk.USED
self.zk.storeNode(req1_node)
self.zk.unlockNode(req1_node)
self.waitForNodeDeletion(req1_node)
# After the first node is cleaned up the second request should be
# able to fulfill now.
req2 = self.waitForNodeRequest(req2)
self.assertEqual(req2.state, zk.FULFILLED)
# Lock this node so it appears as used and not deleted
req2_node = self.zk.getNode(req2.nodes[0])
self.zk.lockNode(req2_node, blocking=False)
# Now free up the second node
self.log.debug("Marking second node as used %s", req2.id)
req2_node.state = zk.USED
self.zk.storeNode(req2_node)
self.zk.unlockNode(req2_node)
self.waitForNodeDeletion(req2_node)
# Request a 2 node set; this request should fail
# due to the provider only being able to fulfill
# a single node at a time.
req3 = zk.NodeRequest()
req3.state = zk.REQUESTED
req3.node_types.append('fake-label')
req3.node_types.append('fake-label')
self.log.debug(
"Submitting a third request with ignore-provider-quota True"
"for a 2-node set which the provider cannot fulfill.")
self.zk.storeNodeRequest(req3)
req3 = self.waitForNodeRequest(req3)
self.assertEqual(req3.state, zk.FAILED)

View File

@ -0,0 +1,8 @@
---
features:
- |
A new boolean pool variable ``ignore-provider-quota`` has been added to
allow the provider quota to be ignored for a pool. Instead, nodepool only
checks against the configured max values for the pool and the current usage
based on stored data. This may be useful in circumstances where the
provider is incorrectly calculating quota.