Add extra-resources quota handling to the k8s driver

Some k8s schedulers like run.ai use custom pod annotations rather
than standard k8s resources to specify required resources such as
gpus.  To facilitate quota handling for these resources in nodepool,
this change adds an extra-resources attribute to labels that can be
used to ensure nodepool doesn't try to launch more resources than
can be handled.

Users can already specify a 'max-resources' limit for arbitrary
resources in the nodepool config; this change allows them to also
specify arbitrary resource consumption with 'extra-resources'.

Change-Id: I3d2612a7d168bf415d58029aa295e60c3c83cecd
This commit is contained in:
James E. Blair
2023-06-03 10:28:07 -07:00
parent 1822976350
commit ac187302a3
7 changed files with 217 additions and 65 deletions

View File

@@ -64,6 +64,42 @@ Selecting the kubernetes driver adds the following options to the
The number of times to retry launching a node before considering
the job failed.
.. attr:: max-cores
:type: int
:default: unlimited
Maximum number of cores usable from this provider's pools by
default. This can be used to limit usage of the kubernetes
backend. If not defined nodepool can use all cores up to the
limit of the backend.
.. attr:: max-servers
:type: int
:default: unlimited
Maximum number of pods spawnable from this provider's pools by
default. This can be used to limit the number of pods. If not
defined nodepool can create as many servers the kubernetes
backend allows.
.. attr:: max-ram
:type: int
:default: unlimited
Maximum ram usable from this provider's pools by default. This
can be used to limit the amount of ram allocated by nodepool. If
not defined nodepool can use as much ram as the kubernetes
backend allows.
.. attr:: max-resources
:type: dict
:default: unlimited
A dictionary of other quota resource limits applicable to this
provider's pools by default. Arbitrary limits may be supplied
with the
:attr:`providers.[kubernetes].pools.labels.extra-resources`
attribute.
.. attr:: pools
:type: list
@@ -117,6 +153,14 @@ Selecting the kubernetes driver adds the following options to the
the amount of ram allocated by nodepool. If not defined
nodepool can use as much ram as the kubernetes backend allows.
.. attr:: max-resources
:type: dict
:default: unlimited
A dictionary of other quota resource limits applicable to
this pool. Arbitrary limits may be supplied with the
:attr:`providers.[kubernetes].pools.labels.extra-resources` attribute.
.. attr:: default-label-cpu
:type: int
@@ -144,6 +188,15 @@ Selecting the kubernetes driver adds the following options to the
:attr:`providers.[kubernetes].pools.labels.storage` for all labels of
this pool that do not set their own value.
.. attr:: default-label-extra-resources
:type: dict
Only used by the
:value:`providers.[kubernetes].pools.labels.type.pod` label type;
specifies default values for
:attr:`providers.[kubernetes].pools.labels.extra-resources` for all labels of
this pool that do not set their own value.
.. attr:: default-label-cpu-limit
:type: int
@@ -286,6 +339,15 @@ Selecting the kubernetes driver adds the following options to the
MB to request for the pod. If no limit is specified, this
will also be used as the limit.
.. attr:: extra-resources
:type: dict
Only used by the
:value:`providers.[kubernetes].pools.labels.type.pod`
label type; specifies any extra resources that Nodepool
should consider in its quota calculation other than the
resources described above (cpu, memory, storage).
.. attr:: cpu-limit
:type: int

View File

@@ -1,4 +1,5 @@
# Copyright 2018 Red Hat
# Copyright 2023 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
import math
import voluptuous as v
from nodepool.driver import ConfigPool
@@ -37,11 +41,15 @@ class KubernetesPool(ConfigPool):
def load(self, pool_config, full_config):
super().load(pool_config)
self.name = pool_config['name']
self.max_cores = pool_config.get('max-cores')
self.max_ram = pool_config.get('max-ram')
self.max_cores = pool_config.get('max-cores', self.provider.max_cores)
self.max_ram = pool_config.get('max-ram', self.provider.max_ram)
self.max_resources = self.provider.max_resources.copy()
self.max_resources.update(pool_config.get('max-resources', {}))
self.default_label_cpu = pool_config.get('default-label-cpu')
self.default_label_memory = pool_config.get('default-label-memory')
self.default_label_storage = pool_config.get('default-label-storage')
self.default_label_extra_resources = pool_config.get(
'default-label-extra_resources', {})
self.labels = {}
for label in pool_config.get('labels', []):
pl = KubernetesLabel()
@@ -54,11 +62,13 @@ class KubernetesPool(ConfigPool):
pl.cpu = label.get('cpu', self.default_label_cpu)
pl.memory = label.get('memory', self.default_label_memory)
pl.storage = label.get('storage', self.default_label_storage)
pl.extra_resources = self.default_label_extra_resources.copy()
pl.extra_resources.update(label.get('extra-resources', {}))
# The limits are the first of:
# 1) label specific configured limit
# 2) default label configured limit
# 3) label specific configured request
# 4) default label configured default
# 4) default label configured request
# 5) None
default_cpu_limit = pool_config.get(
'default-label-cpu-limit', pl.cpu)
@@ -102,10 +112,16 @@ class KubernetesProviderConfig(ProviderConfig):
def load(self, config):
self.launch_retries = int(self.provider.get('launch-retries', 3))
self.context = self.provider.get('context')
self.max_servers = self.provider.get('max-servers', math.inf)
self.max_cores = self.provider.get('max-cores', math.inf)
self.max_ram = self.provider.get('max-ram', math.inf)
self.max_resources = defaultdict(lambda: math.inf)
for k, val in self.provider.get('max-resources', {}).items():
self.max_resources[k] = val
for pool in self.provider.get('pools', []):
pp = KubernetesPool()
pp.load(pool, config)
pp.provider = self
pp.load(pool, config)
self.pools[pp.name] = pp
def getSchema(self):
@@ -135,26 +151,33 @@ class KubernetesProviderConfig(ProviderConfig):
'volume-mounts': list,
'labels': dict,
'annotations': dict,
'extra-resources': {str: int},
}
pool = ConfigPool.getCommonSchemaDict()
pool.update({
v.Required('name'): str,
v.Required('labels'): [k8s_label],
v.Optional('max-cores'): int,
v.Optional('max-ram'): int,
v.Optional('default-label-cpu'): int,
v.Optional('default-label-memory'): int,
v.Optional('default-label-storage'): int,
v.Optional('default-label-cpu-limit'): int,
v.Optional('default-label-memory-limit'): int,
v.Optional('default-label-storage-limit'): int,
'max-cores': int,
'max-ram': int,
'max-resources': {str: int},
'default-label-cpu': int,
'default-label-memory': int,
'default-label-storage': int,
'default-label-cpu-limit': int,
'default-label-memory-limit': int,
'default-label-storage-limit': int,
'default-label-extra-resources': {str: int},
})
provider = {
v.Required('pools'): [pool],
'context': str,
'launch-retries': int,
'max-servers': int,
'max-cores': int,
'max-ram': int,
'max-resources': {str: int},
}
schema = ProviderConfig.getCommonSchemaDict()

View File

@@ -1,4 +1,5 @@
# Copyright 2018 Red Hat
# Copyright 2023 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@@ -163,11 +164,11 @@ class KubernetesNodeRequestHandler(NodeRequestHandler):
# Now calculate pool specific quota. Values indicating no quota default
# to math.inf representing infinity that can be calculated with.
pool_quota = QuotaInformation(
cores=getattr(self.pool, 'max_cores', None),
instances=self.pool.max_servers,
ram=getattr(self.pool, 'max_ram', None),
default=math.inf)
args = dict(cores=getattr(self.pool, 'max_cores', None),
instances=self.pool.max_servers,
ram=getattr(self.pool, 'max_ram', None))
args.update(self.pool.max_resources)
pool_quota = QuotaInformation(**args, default=math.inf)
pool_quota.subtract(
self.manager.estimatedNodepoolQuotaUsed(self.pool))
self.log.debug("Current pool quota: %s" % pool_quota)

View File

@@ -1,4 +1,5 @@
# Copyright 2018 Red Hat
# Copyright 2023 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@@ -427,7 +428,8 @@ class KubernetesProvider(Provider, QuotaSupport):
resources["ram"] = provider_label.memory
if provider_label.storage:
resources["ephemeral-storage"] = provider_label.storage
return QuotaInformation(instances=1, default=1, **resources)
resources.update(provider_label.extra_resources)
return QuotaInformation(instances=1, **resources)
def unmanagedQuotaUsed(self):
# TODO: return real quota information about quota

View File

@@ -0,0 +1,27 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: pod-fedora
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
max-resources:
mygpu: 2
labels:
- name: pod-fedora
type: pod
image: docker.io/fedora:28
extra-resources:
mygpu: 1

View File

@@ -0,0 +1,29 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
tenant-resource-limits:
- tenant-name: tenant-1
mygpu: 2
labels:
- name: pod-fedora
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
labels:
- name: pod-fedora
type: pod
image: docker.io/fedora:28
extra-resources:
mygpu: 1

View File

@@ -1,4 +1,5 @@
# Copyright (C) 2018 Red Hat
# Copyright 2023 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -439,53 +440,6 @@ class TestDriverKubernetes(tests.DBTestCase):
},
})
def test_kubernetes_pool_quota_servers(self):
self._test_kubernetes_quota('kubernetes-pool-quota-servers.yaml')
def test_kubernetes_pool_quota_cores(self):
self._test_kubernetes_quota('kubernetes-pool-quota-cores.yaml')
def test_kubernetes_pool_quota_ram(self):
self._test_kubernetes_quota('kubernetes-pool-quota-ram.yaml')
def test_kubernetes_tenant_quota_servers(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-servers.yaml', pause=False)
def test_kubernetes_tenant_quota_cores(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-cores.yaml', pause=False)
def test_kubernetes_tenant_quota_ram(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-ram.yaml', pause=False)
def test_kubernetes_leaked_node(self):
conf = self.setup_config('kubernetes-leaked-node.yaml')
pool = self.useNodepool(conf, watermark_sleep=1)
pool.cleanup_interval = 1
self.startPool(pool)
# wait for min-ready node to be available
nodes = self.waitForNodes('pod-fedora')
self.assertEqual(len(nodes), 1)
manager = pool.getProviderManager('kubespray')
servers = manager.listNodes()
self.assertEqual(len(servers), 1)
# delete node from zk so it becomes 'leaked'
self.zk.deleteNode(nodes[0])
# node gets replaced, wait for that
new_nodes = self.waitForNodes('pod-fedora')
self.assertEqual(len(new_nodes), 1)
# original node should get deleted eventually
self.waitForInstanceDeletion(manager, nodes[0].external_id)
servers = manager.listNodes()
self.assertEqual(len(servers), 1)
def _test_kubernetes_quota(self, config, pause=True):
configfile = self.setup_config(config)
pool = self.useNodepool(configfile, watermark_sleep=1)
@@ -534,3 +488,57 @@ class TestDriverKubernetes(tests.DBTestCase):
# We should unpause and fulfill this now
req = self.waitForNodeRequest(max_req, (zk.FULFILLED,))
self.assertEqual(req.state, zk.FULFILLED)
def test_kubernetes_pool_quota_servers(self):
self._test_kubernetes_quota('kubernetes-pool-quota-servers.yaml')
def test_kubernetes_pool_quota_cores(self):
self._test_kubernetes_quota('kubernetes-pool-quota-cores.yaml')
def test_kubernetes_pool_quota_ram(self):
self._test_kubernetes_quota('kubernetes-pool-quota-ram.yaml')
def test_kubernetes_pool_quota_extra(self):
self._test_kubernetes_quota('kubernetes-pool-quota-extra.yaml')
def test_kubernetes_tenant_quota_servers(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-servers.yaml', pause=False)
def test_kubernetes_tenant_quota_cores(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-cores.yaml', pause=False)
def test_kubernetes_tenant_quota_ram(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-ram.yaml', pause=False)
def test_kubernetes_tenant_quota_extra(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-extra.yaml', pause=False)
def test_kubernetes_leaked_node(self):
conf = self.setup_config('kubernetes-leaked-node.yaml')
pool = self.useNodepool(conf, watermark_sleep=1)
pool.cleanup_interval = 1
self.startPool(pool)
# wait for min-ready node to be available
nodes = self.waitForNodes('pod-fedora')
self.assertEqual(len(nodes), 1)
manager = pool.getProviderManager('kubespray')
servers = manager.listNodes()
self.assertEqual(len(servers), 1)
# delete node from zk so it becomes 'leaked'
self.zk.deleteNode(nodes[0])
# node gets replaced, wait for that
new_nodes = self.waitForNodes('pod-fedora')
self.assertEqual(len(new_nodes), 1)
# original node should get deleted eventually
self.waitForInstanceDeletion(manager, nodes[0].external_id)
servers = manager.listNodes()
self.assertEqual(len(servers), 1)