Merge "Default limits for k8s labels and quota support"

changes/34/844334/1
Zuul 4 months ago committed by Gerrit Code Review
commit 6416b14838
  1. 39
      doc/source/kubernetes.rst
  2. 12
      nodepool/driver/kubernetes/config.py
  3. 4
      nodepool/driver/kubernetes/handler.py
  4. 19
      nodepool/driver/kubernetes/provider.py
  5. 9
      nodepool/launcher.py
  6. 32
      nodepool/tests/fixtures/kubernetes-default-limits.yaml
  7. 25
      nodepool/tests/fixtures/kubernetes-pool-quota-cores.yaml
  8. 25
      nodepool/tests/fixtures/kubernetes-pool-quota-ram.yaml
  9. 24
      nodepool/tests/fixtures/kubernetes-pool-quota-servers.yaml
  10. 28
      nodepool/tests/fixtures/kubernetes-tenant-quota-cores.yaml
  11. 28
      nodepool/tests/fixtures/kubernetes-tenant-quota-ram.yaml
  12. 27
      nodepool/tests/fixtures/kubernetes-tenant-quota-servers.yaml
  13. 1
      nodepool/tests/fixtures/kubernetes.yaml
  14. 90
      nodepool/tests/unit/test_driver_kubernetes.py
  15. 21
      releasenotes/notes/kubernetes-default-limits-f4bcc430a6274043.yaml

@ -96,6 +96,45 @@ Selecting the kubernetes driver adds the following options to the
A dictionary of key-value pairs that will be stored with the node data
in ZooKeeper. The keys and values can be any arbitrary string.
.. attr:: max-cores
:type: int
Maximum number of cores usable from this pool. This can be used
to limit usage of the kubernetes backend. If not defined nodepool can
use all cores up to the limit of the backend.
.. attr:: max-servers
:type: int
Maximum number of pods spawnable from this pool. This can
be used to limit the number of pods. If not defined
nodepool can create as many servers the kubernetes backend allows.
.. attr:: max-ram
:type: int
Maximum ram usable from this pool. This can be used to limit
the amount of ram allocated by nodepool. If not defined
nodepool can use as much ram as the kubernetes backend allows.
.. attr:: default-label-cpu
:type: int
Only used by the
:value:`providers.[kubernetes].pools.labels.type.pod` label type;
specifies specifies a default value for
:attr:`providers.[kubernetes].pools.labels.cpu` for all labels of
this pool that do not set their own value.
.. attr:: default-label-memory
:type: int
Only used by the
:value:`providers.[kubernetes].pools.labels.type.pod` label type;
specifies a default value for
:attr:`providers.[kubernetes].pools.labels.memory` for all labels of
this pool that do not set their own value.
.. attr:: labels
:type: list

@ -37,6 +37,10 @@ class KubernetesPool(ConfigPool):
def load(self, pool_config, full_config):
super().load(pool_config)
self.name = pool_config['name']
self.max_cores = pool_config.get('max-cores')
self.max_ram = pool_config.get('max-ram')
self.default_label_cpu = pool_config.get('default-label-cpu')
self.default_label_memory = pool_config.get('default-label-memory')
self.labels = {}
for label in pool_config.get('labels', []):
pl = KubernetesLabel()
@ -46,8 +50,8 @@ class KubernetesPool(ConfigPool):
pl.image_pull = label.get('image-pull', 'IfNotPresent')
pl.python_path = label.get('python-path', 'auto')
pl.shell_type = label.get('shell-type')
pl.cpu = label.get('cpu')
pl.memory = label.get('memory')
pl.cpu = label.get('cpu', self.default_label_cpu)
pl.memory = label.get('memory', self.default_label_memory)
pl.env = label.get('env', [])
pl.node_selector = label.get('node-selector')
pl.pool = self
@ -101,6 +105,10 @@ class KubernetesProviderConfig(ProviderConfig):
pool.update({
v.Required('name'): str,
v.Required('labels'): [k8s_label],
v.Optional('max-cores'): int,
v.Optional('max-ram'): int,
v.Optional('default-label-cpu'): int,
v.Optional('default-label-memory'): int,
})
provider = {

@ -46,6 +46,10 @@ class K8SLauncher(NodeLauncher):
else:
self.node.connection_type = "kubectl"
self.node.interface_ip = resource['pod']
pool = self.handler.provider.pools.get(self.node.pool)
resources = self.handler.manager.quotaNeededByLabel(
self.node.type[0], pool)
self.node.resources = resources.get_resources()
self.zk.storeNode(self.node)
self.log.info("Resource %s is ready" % resource['name'])

@ -288,7 +288,15 @@ class KubernetesProvider(Provider, QuotaSupport):
pod_body = {
'apiVersion': 'v1',
'kind': 'Pod',
'metadata': {'name': label.name},
'metadata': {
'name': label.name,
'labels': {
'nodepool_node_id': node.id,
'nodepool_provider_name': self.provider.name,
'nodepool_pool_name': pool,
'nodepool_node_label': label.name,
}
},
'spec': spec_body,
'restartPolicy': 'Never',
}
@ -323,8 +331,13 @@ class KubernetesProvider(Provider, QuotaSupport):
default=math.inf)
def quotaNeededByLabel(self, ntype, pool):
# TODO: return real quota information about a label
return QuotaInformation(cores=1, instances=1, ram=1, default=1)
provider_label = pool.labels[ntype]
resources = {}
if provider_label.cpu:
resources["cores"] = provider_label.cpu
if provider_label.memory:
resources["ram"] = provider_label.memory
return QuotaInformation(instances=1, default=1, **resources)
def unmanagedQuotaUsed(self):
# TODO: return real quota information about quota

@ -227,8 +227,8 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
if check_tenant_quota and not self._hasTenantQuota(req, pm):
# Defer request for it to be handled and fulfilled at a later
# run.
log.debug(
"Deferring request because it would exceed tenant quota")
log.debug("Deferring request %s because it would "
"exceed tenant quota", req)
continue
log.debug("Locking request")
@ -326,9 +326,10 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
**self.nodepool.config.tenant_resource_limits[tenant_name])
tenant_quota.subtract(used_quota)
log.debug("Current tenant quota: %s", tenant_quota)
log.debug("Current tenant quota for %s: %s", tenant_name, tenant_quota)
tenant_quota.subtract(needed_quota)
log.debug("Predicted remaining tenant quota: %s", tenant_quota)
log.debug("Predicted remaining tenant quota for %s: %s",
tenant_name, tenant_quota)
return tenant_quota.non_negative()
def _getUsedQuotaForTenant(self, tenant_name):

@ -0,0 +1,32 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: pod-default
- name: pod-custom-cpu
- name: pod-custom-mem
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
default-label-cpu: 2
default-label-memory: 1024
labels:
- name: pod-default
type: pod
- name: pod-custom-cpu
type: pod
cpu: 4
- name: pod-custom-mem
type: pod
memory: 2048

@ -0,0 +1,25 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: pod-fedora
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
max-cores: 4
labels:
- name: pod-fedora
type: pod
image: docker.io/fedora:28
cpu: 2

@ -0,0 +1,25 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: pod-fedora
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
max-ram: 2048
labels:
- name: pod-fedora
type: pod
image: docker.io/fedora:28
memory: 1024

@ -0,0 +1,24 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: pod-fedora
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
max-servers: 2
labels:
- name: pod-fedora
type: pod
image: docker.io/fedora:28

@ -0,0 +1,28 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
tenant-resource-limits:
- tenant-name: tenant-1
max-cores: 4
labels:
- name: pod-fedora
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
labels:
- name: pod-fedora
type: pod
image: docker.io/fedora:28
cpu: 2

@ -0,0 +1,28 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
tenant-resource-limits:
- tenant-name: tenant-1
max-ram: 2048
labels:
- name: pod-fedora
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
labels:
- name: pod-fedora
type: pod
image: docker.io/fedora:28
memory: 1024

@ -0,0 +1,27 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
tenant-resource-limits:
- tenant-name: tenant-1
max-servers: 2
labels:
- name: pod-fedora
providers:
- name: kubespray
driver: kubernetes
context: admin-cluster.local
pools:
- name: main
labels:
- name: pod-fedora
type: pod
image: docker.io/fedora:28

@ -22,7 +22,6 @@ providers:
context: admin-cluster.local
pools:
- name: main
max-servers: 2
node-attributes:
key1: value1
key2: value2

@ -156,15 +156,83 @@ class TestDriverKubernetes(tests.DBTestCase):
self.waitForNodeDeletion(node)
def test_kubernetes_max_servers(self):
configfile = self.setup_config('kubernetes.yaml')
def test_kubernetes_default_label_resources(self):
configfile = self.setup_config('kubernetes-default-limits.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('pod-default')
req.node_types.append('pod-custom-cpu')
req.node_types.append('pod-custom-mem')
self.zk.storeNodeRequest(req)
self.log.debug("Waiting for request %s", req.id)
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FULFILLED)
self.assertNotEqual(req.nodes, [])
node_default = self.zk.getNode(req.nodes[0])
node_cust_cpu = self.zk.getNode(req.nodes[1])
node_cust_mem = self.zk.getNode(req.nodes[2])
resources_default = {
'instances': 1,
'cores': 2,
'ram': 1024,
}
resources_cust_cpu = {
'instances': 1,
'cores': 4,
'ram': 1024,
}
resources_cust_mem = {
'instances': 1,
'cores': 2,
'ram': 2048,
}
self.assertDictEqual(resources_default, node_default.resources)
self.assertDictEqual(resources_cust_cpu, node_cust_cpu.resources)
self.assertDictEqual(resources_cust_mem, node_cust_mem.resources)
for node in (node_default, node_cust_cpu, node_cust_mem):
node.state = zk.DELETING
self.zk.storeNode(node)
self.waitForNodeDeletion(node)
def test_kubernetes_pool_quota_servers(self):
self._test_kubernetes_quota('kubernetes-pool-quota-servers.yaml')
def test_kubernetes_pool_quota_cores(self):
self._test_kubernetes_quota('kubernetes-pool-quota-cores.yaml')
def test_kubernetes_pool_quota_ram(self):
self._test_kubernetes_quota('kubernetes-pool-quota-ram.yaml')
def test_kubernetes_tenant_quota_servers(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-servers.yaml', pause=False)
def test_kubernetes_tenant_quota_cores(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-cores.yaml', pause=False)
def test_kubernetes_tenant_quota_ram(self):
self._test_kubernetes_quota(
'kubernetes-tenant-quota-ram.yaml', pause=False)
def _test_kubernetes_quota(self, config, pause=True):
configfile = self.setup_config(config)
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
# Start two pods to hit max-server limit
reqs = []
for x in [1, 2]:
for _ in [1, 2]:
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.tenant_name = 'tenant-1'
req.node_types.append('pod-fedora')
self.zk.storeNodeRequest(req)
reqs.append(req)
@ -179,13 +247,19 @@ class TestDriverKubernetes(tests.DBTestCase):
# Now request a third pod that will hit the limit
max_req = zk.NodeRequest()
max_req.state = zk.REQUESTED
max_req.tenant_name = 'tenant-1'
max_req.node_types.append('pod-fedora')
self.zk.storeNodeRequest(max_req)
# The previous request should pause the handler
pool_worker = pool.getPoolWorkers('kubespray')
while not pool_worker[0].paused_handler:
time.sleep(0.1)
# if at pool quota, the handler will get paused
# but not if at tenant quota
if pause:
# The previous request should pause the handler
pool_worker = pool.getPoolWorkers('kubespray')
while not pool_worker[0].paused_handler:
time.sleep(0.1)
else:
self.waitForNodeRequest(max_req, (zk.REQUESTED,))
# Delete the earlier two pods freeing space for the third.
for req in fulfilled_reqs:
@ -195,5 +269,5 @@ class TestDriverKubernetes(tests.DBTestCase):
self.waitForNodeDeletion(node)
# We should unpause and fulfill this now
req = self.waitForNodeRequest(max_req)
req = self.waitForNodeRequest(max_req, (zk.FULFILLED,))
self.assertEqual(req.state, zk.FULFILLED)

@ -0,0 +1,21 @@
---
features:
- |
Config options for kubernetes providers were added to define default limits
for cpu and memory for pod-type labels.
* attr:`providers.[kubernetes].pools.default-label-cpu`
* attr:`providers.[kubernetes].pools.default-label-memory`
These values will apply to all pod-type labels within the same pool that do
not override these limits. This allows to enforce resource limits on pod
labels. It thereby enables to account for pool and tenant quotas in terms
of cpu and memory consumption. New config options for kubernetes pools
therefore also include
* attr:`providers.[kubernetes].pools.max-cores`
* attr:`providers.[kubernetes].pools.max-ram`
The exsisting tenant quota settings apply accordingly. Note that cpu and
memory quotas can still not be considered for labels that do not specify
any limits, i.e. neither a pool default, nor label specific limit is set.
Loading…
Cancel
Save