Add ability to ignore provider quota for a pool

In some circumstances it is useful to tell the launcher to ignore the provide quota and just trust the max-* settings for that pool instead. This particular need arises when using Rackspace public cloud for both instances and OnMetal nodes. In this situation the quota for instances and for OnMetal nodes is different, but nodepool only queries the quota for instances. When trying to build OnMetal nodes, the quota check fails - but should not. In this circumstance, instead of making shade/nodepool complicated by figuring out how to calculate disparate quota types, it makes sense to rather just allow nodepool to ignore the quota for a pool to try executing the build instead. While this is our use-case, it may also be useful to others for other reasons. Change-Id: I232a1ab365795381ab180aceb48e8c87843ac713
2018-07-10 12:30:44 +01:00 · 2018-07-10 12:30:44 +01:00 · 4c8b5f4f99
commit 4c8b5f4f99
parent 03b7b4baef
7 changed files with 264 additions and 12 deletions
--- a/doc/source/configuration.rst
+++ b/doc/source/configuration.rst
@ -488,6 +488,12 @@ Example::
    ram allocated by nodepool. If not defined nodepool can use as much ram as
    the tenant allows.
  ``ignore-provider-quota``
    Ignore the provider quota for this pool. Instead, only check against the
    configured max values for this pool and the current usage based on stored
    data. This may be useful in circumstances where the provider is incorrectly
    calculating quota.
  ``availability-zones`` (list)
    A list of availability zones to use.
--- a/nodepool/driver/openstack/config.py
+++ b/nodepool/driver/openstack/config.py
@ -113,6 +113,7 @@ class ProviderPool(ConfigPool):
        self.name = None
        self.max_cores = None
        self.max_ram = None
        self.ignore_provider_quota = False
        self.azs = None
        self.networks = None
        self.security_groups = None
@ -133,6 +134,8 @@ class ProviderPool(ConfigPool):
                    other.name == self.name and
                    other.max_cores == self.max_cores and
                    other.max_ram == self.max_ram and
                    other.ignore_provider_quota == (
                        self.ignore_provider_quota) and
                    other.azs == self.azs and
                    other.networks == self.networks and
                    other.security_groups == self.security_groups and
@ -264,6 +267,7 @@ class OpenStackProviderConfig(ProviderConfig):
            pp.max_cores = pool.get('max-cores', math.inf)
            pp.max_servers = pool.get('max-servers', math.inf)
            pp.max_ram = pool.get('max-ram', math.inf)
            pp.ignore_provider_quota = pool.get('ignore-provider-quota', False)
            pp.azs = pool.get('availability-zones')
            pp.networks = pool.get('networks', [])
            pp.security_groups = pool.get('security-groups', [])
@ -353,6 +357,7 @@ class OpenStackProviderConfig(ProviderConfig):
            'networks': [str],
            'auto-floating-ip': bool,
            'host-key-checking': bool,
            'ignore-provider-quota': bool,
            'max-cores': int,
            'max-servers': int,
            'max-ram': int,
--- a/nodepool/driver/openstack/handler.py
+++ b/nodepool/driver/openstack/handler.py
@ -298,15 +298,18 @@ class OpenStackNodeRequestHandler(NodeRequestHandler):
    def hasRemainingQuota(self, ntype):
        needed_quota = self.manager.quotaNeededByNodeType(ntype, self.pool)
-        # Calculate remaining quota which is calculated as:
+        if not self.pool.ignore_provider_quota:
-        # quota = <total nodepool quota> - <used quota> - <quota for node>
+            # Calculate remaining quota which is calculated as:
-        cloud_quota = self.manager.estimatedNodepoolQuota()
+            # quota = <total nodepool quota> - <used quota> - <quota for node>
-        cloud_quota.subtract(self.manager.estimatedNodepoolQuotaUsed(self.zk))
+            cloud_quota = self.manager.estimatedNodepoolQuota()
-        cloud_quota.subtract(needed_quota)
+            cloud_quota.subtract(
-        self.log.debug("Predicted remaining tenant quota: %s", cloud_quota)
+                self.manager.estimatedNodepoolQuotaUsed(self.zk))
            cloud_quota.subtract(needed_quota)
            self.log.debug("Predicted remaining provider quota: %s",
                           cloud_quota)
-        if not cloud_quota.non_negative():
+            if not cloud_quota.non_negative():
-            return False
+                return False
        # Now calculate pool specific quota. Values indicating no quota default
        # to math.inf representing infinity that can be calculated with.
@ -329,11 +332,12 @@ class OpenStackNodeRequestHandler(NodeRequestHandler):
            needed_quota.add(
                self.manager.quotaNeededByNodeType(ntype, self.pool))
-        cloud_quota = self.manager.estimatedNodepoolQuota()
+        if not self.pool.ignore_provider_quota:
-        cloud_quota.subtract(needed_quota)
+            cloud_quota = self.manager.estimatedNodepoolQuota()
            cloud_quota.subtract(needed_quota)
-        if not cloud_quota.non_negative():
+            if not cloud_quota.non_negative():
-            return False
+                return False
        # Now calculate pool specific quota. Values indicating no quota default
        # to math.inf representing infinity that can be calculated with.
--- a/nodepool/tests/fixtures/ignore_provider_quota_false.yaml
+++ b/nodepool/tests/fixtures/ignore_provider_quota_false.yaml
@ -0,0 +1,48 @@
 elements-dir: .
 images-dir: '{images_dir}'
 build-log-dir: '{build_log_dir}'
 build-log-retention: 1
 zookeeper-servers:
  - host: {zookeeper_host}
    port: {zookeeper_port}
    chroot: {zookeeper_chroot}
 labels:
  - name: fake-label
 providers:
  - name: fake-provider
    cloud: fake
    driver: fake
    region-name: fake-region
    rate: 0.0001
    diskimages:
      - name: fake-image
        meta:
          key: value
          key2: value
    pools:
      - name: main
        ignore-provider-quota: false
        availability-zones:
          - az1
        networks:
          - net-name
        labels:
          - name: fake-label
            diskimage: fake-image
            min-ram: 8192
            flavor-name: 'Fake'
 diskimages:
  - name: fake-image
    elements:
      - fedora
      - vm
    release: 21
    env-vars:
      TMPDIR: /opt/dib_tmp
      DIB_IMAGE_CACHE: /opt/dib_cache
      DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
      BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
--- a/nodepool/tests/fixtures/ignore_provider_quota_true.yaml
+++ b/nodepool/tests/fixtures/ignore_provider_quota_true.yaml
@ -0,0 +1,49 @@
 elements-dir: .
 images-dir: '{images_dir}'
 build-log-dir: '{build_log_dir}'
 build-log-retention: 1
 zookeeper-servers:
  - host: {zookeeper_host}
    port: {zookeeper_port}
    chroot: {zookeeper_chroot}
 labels:
  - name: fake-label
 providers:
  - name: fake-provider
    cloud: fake
    driver: fake
    region-name: fake-region
    rate: 0.0001
    diskimages:
      - name: fake-image
        meta:
          key: value
          key2: value
    pools:
      - name: main
        max-servers: 1
        ignore-provider-quota: true
        availability-zones:
          - az1
        networks:
          - net-name
        labels:
          - name: fake-label
            diskimage: fake-image
            min-ram: 8192
            flavor-name: 'Fake'
 diskimages:
  - name: fake-image
    elements:
      - fedora
      - vm
    release: 21
    env-vars:
      TMPDIR: /opt/dib_tmp
      DIB_IMAGE_CACHE: /opt/dib_cache
      DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
      BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
--- a/nodepool/tests/test_launcher.py
+++ b/nodepool/tests/test_launcher.py
@ -1436,3 +1436,135 @@ class TestLauncher(tests.DBTestCase):
        while pool_worker[0].paused_handler:
            time.sleep(0.1)
        self.assertEqual(0, len(pool_worker[0].request_handlers))
    def test_ignore_provider_quota_false(self):
        '''
        Test that a node request get fulfilled with ignore-provider-quota set
        to false.
        '''
        # Set max-cores quota value to 0 to force "out of quota". Note that
        # the fake provider checks the number of instances during server
        # creation to decide if it should throw an over quota exception,
        # but it doesn't check cores.
        def fake_get_quota():
            return (0, 20, 1000000)
        self.useFixture(fixtures.MockPatchObject(
            fakeprovider.FakeProvider.fake_cloud, '_get_quota',
            fake_get_quota
        ))
        configfile = self.setup_config('ignore_provider_quota_false.yaml')
        self.useBuilder(configfile)
        self.waitForImage('fake-provider', 'fake-image')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()
        # Create a request with ignore-provider-quota set to false that should
        # fail because it will decline the request because "it would exceed
        # quota".
        self.log.debug("Submitting request with ignore-provider-quota False")
        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FAILED)
    def test_ignore_provider_quota_true(self):
        '''
        Test that a node request get fulfilled with ignore-provider-quota set
        to true.
        '''
        # Set max-cores quota value to 0 to force "out of quota". Note that
        # the fake provider checks the number of instances during server
        # creation to decide if it should throw an over quota exception,
        # but it doesn't check cores.
        def fake_get_quota():
            return (0, 20, 1000000)
        self.useFixture(fixtures.MockPatchObject(
            fakeprovider.FakeProvider.fake_cloud, '_get_quota',
            fake_get_quota
        ))
        configfile = self.setup_config('ignore_provider_quota_true.yaml')
        self.useBuilder(configfile)
        self.waitForImage('fake-provider', 'fake-image')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()
        # Create a request with ignore-provider-quota set to true that should
        # pass regardless of the lack of cloud/provider quota.
        self.replace_config(configfile, 'ignore_provider_quota_true.yaml')
        self.log.debug(
            "Submitting an initial request with ignore-provider-quota True")
        req1 = zk.NodeRequest()
        req1.state = zk.REQUESTED
        req1.node_types.append('fake-label')
        self.zk.storeNodeRequest(req1)
        req1 = self.waitForNodeRequest(req1)
        self.assertEqual(req1.state, zk.FULFILLED)
        # Lock this node so it appears as used and not deleted
        req1_node = self.zk.getNode(req1.nodes[0])
        self.zk.lockNode(req1_node, blocking=False)
        # Request a second node; this request should pause the handler
        # due to the pool set with max-servers: 1
        req2 = zk.NodeRequest()
        req2.state = zk.REQUESTED
        req2.node_types.append('fake-label')
        self.log.debug(
            "Submitting a second request with ignore-provider-quota True"
            "but with a full max-servers quota.")
        self.zk.storeNodeRequest(req2)
        pool_worker = pool.getPoolWorkers('fake-provider')
        while not pool_worker[0].paused_handler:
            time.sleep(0.1)
        # The handler is paused now and the request should be in state PENDING
        req2 = self.waitForNodeRequest(req2, zk.PENDING)
        self.assertEqual(req2.state, zk.PENDING)
        # Now free up the first node
        self.log.debug("Marking first node as used %s", req1.id)
        req1_node.state = zk.USED
        self.zk.storeNode(req1_node)
        self.zk.unlockNode(req1_node)
        self.waitForNodeDeletion(req1_node)
        # After the first node is cleaned up the second request should be
        # able to fulfill now.
        req2 = self.waitForNodeRequest(req2)
        self.assertEqual(req2.state, zk.FULFILLED)
        # Lock this node so it appears as used and not deleted
        req2_node = self.zk.getNode(req2.nodes[0])
        self.zk.lockNode(req2_node, blocking=False)
        # Now free up the second node
        self.log.debug("Marking second node as used %s", req2.id)
        req2_node.state = zk.USED
        self.zk.storeNode(req2_node)
        self.zk.unlockNode(req2_node)
        self.waitForNodeDeletion(req2_node)
        # Request a 2 node set; this request should fail
        # due to the provider only being able to fulfill
        # a single node at a time.
        req3 = zk.NodeRequest()
        req3.state = zk.REQUESTED
        req3.node_types.append('fake-label')
        req3.node_types.append('fake-label')
        self.log.debug(
            "Submitting a third request with ignore-provider-quota True"
            "for a 2-node set which the provider cannot fulfill.")
        self.zk.storeNodeRequest(req3)
        req3 = self.waitForNodeRequest(req3)
        self.assertEqual(req3.state, zk.FAILED)
--- a/releasenotes/notes/ignore-provider-quota-aa19e7a7271ee106.yaml
+++ b/releasenotes/notes/ignore-provider-quota-aa19e7a7271ee106.yaml
@ -0,0 +1,8 @@
 ---
 features:
  - |
    A new boolean pool variable ``ignore-provider-quota`` has been added to
    allow the provider quota to be ignored for a pool. Instead, nodepool only
    checks against the configured max values for the pool and the current usage
    based on stored data. This may be useful in circumstances where the
    provider is incorrectly calculating quota.