From ac187302a332016da9ca8169398499f02f2c3982 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Sat, 3 Jun 2023 10:28:07 -0700 Subject: [PATCH] Add extra-resources quota handling to the k8s driver Some k8s schedulers like run.ai use custom pod annotations rather than standard k8s resources to specify required resources such as gpus. To facilitate quota handling for these resources in nodepool, this change adds an extra-resources attribute to labels that can be used to ensure nodepool doesn't try to launch more resources than can be handled. Users can already specify a 'max-resources' limit for arbitrary resources in the nodepool config; this change allows them to also specify arbitrary resource consumption with 'extra-resources'. Change-Id: I3d2612a7d168bf415d58029aa295e60c3c83cecd --- doc/source/kubernetes.rst | 62 +++++++++++ nodepool/driver/kubernetes/config.py | 47 +++++--- nodepool/driver/kubernetes/handler.py | 11 +- nodepool/driver/kubernetes/provider.py | 4 +- .../fixtures/kubernetes-pool-quota-extra.yaml | 27 +++++ .../kubernetes-tenant-quota-extra.yaml | 29 +++++ nodepool/tests/unit/test_driver_kubernetes.py | 102 ++++++++++-------- 7 files changed, 217 insertions(+), 65 deletions(-) create mode 100644 nodepool/tests/fixtures/kubernetes-pool-quota-extra.yaml create mode 100644 nodepool/tests/fixtures/kubernetes-tenant-quota-extra.yaml diff --git a/doc/source/kubernetes.rst b/doc/source/kubernetes.rst index 9fc7718dc..dda9e0ad0 100644 --- a/doc/source/kubernetes.rst +++ b/doc/source/kubernetes.rst @@ -64,6 +64,42 @@ Selecting the kubernetes driver adds the following options to the The number of times to retry launching a node before considering the job failed. + .. attr:: max-cores + :type: int + :default: unlimited + + Maximum number of cores usable from this provider's pools by + default. This can be used to limit usage of the kubernetes + backend. If not defined nodepool can use all cores up to the + limit of the backend. + + .. attr:: max-servers + :type: int + :default: unlimited + + Maximum number of pods spawnable from this provider's pools by + default. This can be used to limit the number of pods. If not + defined nodepool can create as many servers the kubernetes + backend allows. + + .. attr:: max-ram + :type: int + :default: unlimited + + Maximum ram usable from this provider's pools by default. This + can be used to limit the amount of ram allocated by nodepool. If + not defined nodepool can use as much ram as the kubernetes + backend allows. + + .. attr:: max-resources + :type: dict + :default: unlimited + + A dictionary of other quota resource limits applicable to this + provider's pools by default. Arbitrary limits may be supplied + with the + :attr:`providers.[kubernetes].pools.labels.extra-resources` + attribute. .. attr:: pools :type: list @@ -117,6 +153,14 @@ Selecting the kubernetes driver adds the following options to the the amount of ram allocated by nodepool. If not defined nodepool can use as much ram as the kubernetes backend allows. + .. attr:: max-resources + :type: dict + :default: unlimited + + A dictionary of other quota resource limits applicable to + this pool. Arbitrary limits may be supplied with the + :attr:`providers.[kubernetes].pools.labels.extra-resources` attribute. + .. attr:: default-label-cpu :type: int @@ -144,6 +188,15 @@ Selecting the kubernetes driver adds the following options to the :attr:`providers.[kubernetes].pools.labels.storage` for all labels of this pool that do not set their own value. + .. attr:: default-label-extra-resources + :type: dict + + Only used by the + :value:`providers.[kubernetes].pools.labels.type.pod` label type; + specifies default values for + :attr:`providers.[kubernetes].pools.labels.extra-resources` for all labels of + this pool that do not set their own value. + .. attr:: default-label-cpu-limit :type: int @@ -286,6 +339,15 @@ Selecting the kubernetes driver adds the following options to the MB to request for the pod. If no limit is specified, this will also be used as the limit. + .. attr:: extra-resources + :type: dict + + Only used by the + :value:`providers.[kubernetes].pools.labels.type.pod` + label type; specifies any extra resources that Nodepool + should consider in its quota calculation other than the + resources described above (cpu, memory, storage). + .. attr:: cpu-limit :type: int diff --git a/nodepool/driver/kubernetes/config.py b/nodepool/driver/kubernetes/config.py index 8d37e9dd6..1cee41ae9 100644 --- a/nodepool/driver/kubernetes/config.py +++ b/nodepool/driver/kubernetes/config.py @@ -1,4 +1,5 @@ # Copyright 2018 Red Hat +# Copyright 2023 Acme Gating, LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import defaultdict +import math + import voluptuous as v from nodepool.driver import ConfigPool @@ -37,11 +41,15 @@ class KubernetesPool(ConfigPool): def load(self, pool_config, full_config): super().load(pool_config) self.name = pool_config['name'] - self.max_cores = pool_config.get('max-cores') - self.max_ram = pool_config.get('max-ram') + self.max_cores = pool_config.get('max-cores', self.provider.max_cores) + self.max_ram = pool_config.get('max-ram', self.provider.max_ram) + self.max_resources = self.provider.max_resources.copy() + self.max_resources.update(pool_config.get('max-resources', {})) self.default_label_cpu = pool_config.get('default-label-cpu') self.default_label_memory = pool_config.get('default-label-memory') self.default_label_storage = pool_config.get('default-label-storage') + self.default_label_extra_resources = pool_config.get( + 'default-label-extra_resources', {}) self.labels = {} for label in pool_config.get('labels', []): pl = KubernetesLabel() @@ -54,11 +62,13 @@ class KubernetesPool(ConfigPool): pl.cpu = label.get('cpu', self.default_label_cpu) pl.memory = label.get('memory', self.default_label_memory) pl.storage = label.get('storage', self.default_label_storage) + pl.extra_resources = self.default_label_extra_resources.copy() + pl.extra_resources.update(label.get('extra-resources', {})) # The limits are the first of: # 1) label specific configured limit # 2) default label configured limit # 3) label specific configured request - # 4) default label configured default + # 4) default label configured request # 5) None default_cpu_limit = pool_config.get( 'default-label-cpu-limit', pl.cpu) @@ -102,10 +112,16 @@ class KubernetesProviderConfig(ProviderConfig): def load(self, config): self.launch_retries = int(self.provider.get('launch-retries', 3)) self.context = self.provider.get('context') + self.max_servers = self.provider.get('max-servers', math.inf) + self.max_cores = self.provider.get('max-cores', math.inf) + self.max_ram = self.provider.get('max-ram', math.inf) + self.max_resources = defaultdict(lambda: math.inf) + for k, val in self.provider.get('max-resources', {}).items(): + self.max_resources[k] = val for pool in self.provider.get('pools', []): pp = KubernetesPool() - pp.load(pool, config) pp.provider = self + pp.load(pool, config) self.pools[pp.name] = pp def getSchema(self): @@ -135,26 +151,33 @@ class KubernetesProviderConfig(ProviderConfig): 'volume-mounts': list, 'labels': dict, 'annotations': dict, + 'extra-resources': {str: int}, } pool = ConfigPool.getCommonSchemaDict() pool.update({ v.Required('name'): str, v.Required('labels'): [k8s_label], - v.Optional('max-cores'): int, - v.Optional('max-ram'): int, - v.Optional('default-label-cpu'): int, - v.Optional('default-label-memory'): int, - v.Optional('default-label-storage'): int, - v.Optional('default-label-cpu-limit'): int, - v.Optional('default-label-memory-limit'): int, - v.Optional('default-label-storage-limit'): int, + 'max-cores': int, + 'max-ram': int, + 'max-resources': {str: int}, + 'default-label-cpu': int, + 'default-label-memory': int, + 'default-label-storage': int, + 'default-label-cpu-limit': int, + 'default-label-memory-limit': int, + 'default-label-storage-limit': int, + 'default-label-extra-resources': {str: int}, }) provider = { v.Required('pools'): [pool], 'context': str, 'launch-retries': int, + 'max-servers': int, + 'max-cores': int, + 'max-ram': int, + 'max-resources': {str: int}, } schema = ProviderConfig.getCommonSchemaDict() diff --git a/nodepool/driver/kubernetes/handler.py b/nodepool/driver/kubernetes/handler.py index 6286a6d11..2f915227c 100644 --- a/nodepool/driver/kubernetes/handler.py +++ b/nodepool/driver/kubernetes/handler.py @@ -1,4 +1,5 @@ # Copyright 2018 Red Hat +# Copyright 2023 Acme Gating, LLC # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -163,11 +164,11 @@ class KubernetesNodeRequestHandler(NodeRequestHandler): # Now calculate pool specific quota. Values indicating no quota default # to math.inf representing infinity that can be calculated with. - pool_quota = QuotaInformation( - cores=getattr(self.pool, 'max_cores', None), - instances=self.pool.max_servers, - ram=getattr(self.pool, 'max_ram', None), - default=math.inf) + args = dict(cores=getattr(self.pool, 'max_cores', None), + instances=self.pool.max_servers, + ram=getattr(self.pool, 'max_ram', None)) + args.update(self.pool.max_resources) + pool_quota = QuotaInformation(**args, default=math.inf) pool_quota.subtract( self.manager.estimatedNodepoolQuotaUsed(self.pool)) self.log.debug("Current pool quota: %s" % pool_quota) diff --git a/nodepool/driver/kubernetes/provider.py b/nodepool/driver/kubernetes/provider.py index b4615e913..ae848a874 100644 --- a/nodepool/driver/kubernetes/provider.py +++ b/nodepool/driver/kubernetes/provider.py @@ -1,4 +1,5 @@ # Copyright 2018 Red Hat +# Copyright 2023 Acme Gating, LLC # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -427,7 +428,8 @@ class KubernetesProvider(Provider, QuotaSupport): resources["ram"] = provider_label.memory if provider_label.storage: resources["ephemeral-storage"] = provider_label.storage - return QuotaInformation(instances=1, default=1, **resources) + resources.update(provider_label.extra_resources) + return QuotaInformation(instances=1, **resources) def unmanagedQuotaUsed(self): # TODO: return real quota information about quota diff --git a/nodepool/tests/fixtures/kubernetes-pool-quota-extra.yaml b/nodepool/tests/fixtures/kubernetes-pool-quota-extra.yaml new file mode 100644 index 000000000..ab8a4767e --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-pool-quota-extra.yaml @@ -0,0 +1,27 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +labels: + - name: pod-fedora + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + max-resources: + mygpu: 2 + labels: + - name: pod-fedora + type: pod + image: docker.io/fedora:28 + extra-resources: + mygpu: 1 diff --git a/nodepool/tests/fixtures/kubernetes-tenant-quota-extra.yaml b/nodepool/tests/fixtures/kubernetes-tenant-quota-extra.yaml new file mode 100644 index 000000000..73aace616 --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-tenant-quota-extra.yaml @@ -0,0 +1,29 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +tenant-resource-limits: + - tenant-name: tenant-1 + mygpu: 2 + +labels: + - name: pod-fedora + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + labels: + - name: pod-fedora + type: pod + image: docker.io/fedora:28 + extra-resources: + mygpu: 1 diff --git a/nodepool/tests/unit/test_driver_kubernetes.py b/nodepool/tests/unit/test_driver_kubernetes.py index e618f8703..a377379fa 100644 --- a/nodepool/tests/unit/test_driver_kubernetes.py +++ b/nodepool/tests/unit/test_driver_kubernetes.py @@ -1,4 +1,5 @@ # Copyright (C) 2018 Red Hat +# Copyright 2023 Acme Gating, LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -439,53 +440,6 @@ class TestDriverKubernetes(tests.DBTestCase): }, }) - def test_kubernetes_pool_quota_servers(self): - self._test_kubernetes_quota('kubernetes-pool-quota-servers.yaml') - - def test_kubernetes_pool_quota_cores(self): - self._test_kubernetes_quota('kubernetes-pool-quota-cores.yaml') - - def test_kubernetes_pool_quota_ram(self): - self._test_kubernetes_quota('kubernetes-pool-quota-ram.yaml') - - def test_kubernetes_tenant_quota_servers(self): - self._test_kubernetes_quota( - 'kubernetes-tenant-quota-servers.yaml', pause=False) - - def test_kubernetes_tenant_quota_cores(self): - self._test_kubernetes_quota( - 'kubernetes-tenant-quota-cores.yaml', pause=False) - - def test_kubernetes_tenant_quota_ram(self): - self._test_kubernetes_quota( - 'kubernetes-tenant-quota-ram.yaml', pause=False) - - def test_kubernetes_leaked_node(self): - conf = self.setup_config('kubernetes-leaked-node.yaml') - pool = self.useNodepool(conf, watermark_sleep=1) - pool.cleanup_interval = 1 - self.startPool(pool) - - # wait for min-ready node to be available - nodes = self.waitForNodes('pod-fedora') - self.assertEqual(len(nodes), 1) - manager = pool.getProviderManager('kubespray') - servers = manager.listNodes() - self.assertEqual(len(servers), 1) - - # delete node from zk so it becomes 'leaked' - self.zk.deleteNode(nodes[0]) - - # node gets replaced, wait for that - new_nodes = self.waitForNodes('pod-fedora') - self.assertEqual(len(new_nodes), 1) - - # original node should get deleted eventually - self.waitForInstanceDeletion(manager, nodes[0].external_id) - - servers = manager.listNodes() - self.assertEqual(len(servers), 1) - def _test_kubernetes_quota(self, config, pause=True): configfile = self.setup_config(config) pool = self.useNodepool(configfile, watermark_sleep=1) @@ -534,3 +488,57 @@ class TestDriverKubernetes(tests.DBTestCase): # We should unpause and fulfill this now req = self.waitForNodeRequest(max_req, (zk.FULFILLED,)) self.assertEqual(req.state, zk.FULFILLED) + + def test_kubernetes_pool_quota_servers(self): + self._test_kubernetes_quota('kubernetes-pool-quota-servers.yaml') + + def test_kubernetes_pool_quota_cores(self): + self._test_kubernetes_quota('kubernetes-pool-quota-cores.yaml') + + def test_kubernetes_pool_quota_ram(self): + self._test_kubernetes_quota('kubernetes-pool-quota-ram.yaml') + + def test_kubernetes_pool_quota_extra(self): + self._test_kubernetes_quota('kubernetes-pool-quota-extra.yaml') + + def test_kubernetes_tenant_quota_servers(self): + self._test_kubernetes_quota( + 'kubernetes-tenant-quota-servers.yaml', pause=False) + + def test_kubernetes_tenant_quota_cores(self): + self._test_kubernetes_quota( + 'kubernetes-tenant-quota-cores.yaml', pause=False) + + def test_kubernetes_tenant_quota_ram(self): + self._test_kubernetes_quota( + 'kubernetes-tenant-quota-ram.yaml', pause=False) + + def test_kubernetes_tenant_quota_extra(self): + self._test_kubernetes_quota( + 'kubernetes-tenant-quota-extra.yaml', pause=False) + + def test_kubernetes_leaked_node(self): + conf = self.setup_config('kubernetes-leaked-node.yaml') + pool = self.useNodepool(conf, watermark_sleep=1) + pool.cleanup_interval = 1 + self.startPool(pool) + + # wait for min-ready node to be available + nodes = self.waitForNodes('pod-fedora') + self.assertEqual(len(nodes), 1) + manager = pool.getProviderManager('kubespray') + servers = manager.listNodes() + self.assertEqual(len(servers), 1) + + # delete node from zk so it becomes 'leaked' + self.zk.deleteNode(nodes[0]) + + # node gets replaced, wait for that + new_nodes = self.waitForNodes('pod-fedora') + self.assertEqual(len(new_nodes), 1) + + # original node should get deleted eventually + self.waitForInstanceDeletion(manager, nodes[0].external_id) + + servers = manager.listNodes() + self.assertEqual(len(servers), 1)