diff --git a/doc/source/kubernetes.rst b/doc/source/kubernetes.rst index 416774a84..b66ea4453 100644 --- a/doc/source/kubernetes.rst +++ b/doc/source/kubernetes.rst @@ -96,6 +96,45 @@ Selecting the kubernetes driver adds the following options to the A dictionary of key-value pairs that will be stored with the node data in ZooKeeper. The keys and values can be any arbitrary string. + .. attr:: max-cores + :type: int + + Maximum number of cores usable from this pool. This can be used + to limit usage of the kubernetes backend. If not defined nodepool can + use all cores up to the limit of the backend. + + .. attr:: max-servers + :type: int + + Maximum number of pods spawnable from this pool. This can + be used to limit the number of pods. If not defined + nodepool can create as many servers the kubernetes backend allows. + + .. attr:: max-ram + :type: int + + Maximum ram usable from this pool. This can be used to limit + the amount of ram allocated by nodepool. If not defined + nodepool can use as much ram as the kubernetes backend allows. + + .. attr:: default-label-cpu + :type: int + + Only used by the + :value:`providers.[kubernetes].pools.labels.type.pod` label type; + specifies specifies a default value for + :attr:`providers.[kubernetes].pools.labels.cpu` for all labels of + this pool that do not set their own value. + + .. attr:: default-label-memory + :type: int + + Only used by the + :value:`providers.[kubernetes].pools.labels.type.pod` label type; + specifies a default value for + :attr:`providers.[kubernetes].pools.labels.memory` for all labels of + this pool that do not set their own value. + .. attr:: labels :type: list diff --git a/nodepool/driver/kubernetes/config.py b/nodepool/driver/kubernetes/config.py index 4df437ca6..6b238a98e 100644 --- a/nodepool/driver/kubernetes/config.py +++ b/nodepool/driver/kubernetes/config.py @@ -37,6 +37,10 @@ class KubernetesPool(ConfigPool): def load(self, pool_config, full_config): super().load(pool_config) self.name = pool_config['name'] + self.max_cores = pool_config.get('max-cores') + self.max_ram = pool_config.get('max-ram') + self.default_label_cpu = pool_config.get('default-label-cpu') + self.default_label_memory = pool_config.get('default-label-memory') self.labels = {} for label in pool_config.get('labels', []): pl = KubernetesLabel() @@ -46,8 +50,8 @@ class KubernetesPool(ConfigPool): pl.image_pull = label.get('image-pull', 'IfNotPresent') pl.python_path = label.get('python-path', 'auto') pl.shell_type = label.get('shell-type') - pl.cpu = label.get('cpu') - pl.memory = label.get('memory') + pl.cpu = label.get('cpu', self.default_label_cpu) + pl.memory = label.get('memory', self.default_label_memory) pl.env = label.get('env', []) pl.node_selector = label.get('node-selector') pl.pool = self @@ -101,6 +105,10 @@ class KubernetesProviderConfig(ProviderConfig): pool.update({ v.Required('name'): str, v.Required('labels'): [k8s_label], + v.Optional('max-cores'): int, + v.Optional('max-ram'): int, + v.Optional('default-label-cpu'): int, + v.Optional('default-label-memory'): int, }) provider = { diff --git a/nodepool/driver/kubernetes/handler.py b/nodepool/driver/kubernetes/handler.py index 89855f13b..226e321a1 100644 --- a/nodepool/driver/kubernetes/handler.py +++ b/nodepool/driver/kubernetes/handler.py @@ -46,6 +46,10 @@ class K8SLauncher(NodeLauncher): else: self.node.connection_type = "kubectl" self.node.interface_ip = resource['pod'] + pool = self.handler.provider.pools.get(self.node.pool) + resources = self.handler.manager.quotaNeededByLabel( + self.node.type[0], pool) + self.node.resources = resources.get_resources() self.zk.storeNode(self.node) self.log.info("Resource %s is ready" % resource['name']) diff --git a/nodepool/driver/kubernetes/provider.py b/nodepool/driver/kubernetes/provider.py index ed5ede416..1d96f2d7a 100644 --- a/nodepool/driver/kubernetes/provider.py +++ b/nodepool/driver/kubernetes/provider.py @@ -288,7 +288,15 @@ class KubernetesProvider(Provider, QuotaSupport): pod_body = { 'apiVersion': 'v1', 'kind': 'Pod', - 'metadata': {'name': label.name}, + 'metadata': { + 'name': label.name, + 'labels': { + 'nodepool_node_id': node.id, + 'nodepool_provider_name': self.provider.name, + 'nodepool_pool_name': pool, + 'nodepool_node_label': label.name, + } + }, 'spec': spec_body, 'restartPolicy': 'Never', } @@ -323,8 +331,13 @@ class KubernetesProvider(Provider, QuotaSupport): default=math.inf) def quotaNeededByLabel(self, ntype, pool): - # TODO: return real quota information about a label - return QuotaInformation(cores=1, instances=1, ram=1, default=1) + provider_label = pool.labels[ntype] + resources = {} + if provider_label.cpu: + resources["cores"] = provider_label.cpu + if provider_label.memory: + resources["ram"] = provider_label.memory + return QuotaInformation(instances=1, default=1, **resources) def unmanagedQuotaUsed(self): # TODO: return real quota information about quota diff --git a/nodepool/launcher.py b/nodepool/launcher.py index 871a5d88e..ba8a9ec97 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -227,8 +227,8 @@ class PoolWorker(threading.Thread, stats.StatsReporter): if check_tenant_quota and not self._hasTenantQuota(req, pm): # Defer request for it to be handled and fulfilled at a later # run. - log.debug( - "Deferring request because it would exceed tenant quota") + log.debug("Deferring request %s because it would " + "exceed tenant quota", req) continue log.debug("Locking request") @@ -326,9 +326,10 @@ class PoolWorker(threading.Thread, stats.StatsReporter): **self.nodepool.config.tenant_resource_limits[tenant_name]) tenant_quota.subtract(used_quota) - log.debug("Current tenant quota: %s", tenant_quota) + log.debug("Current tenant quota for %s: %s", tenant_name, tenant_quota) tenant_quota.subtract(needed_quota) - log.debug("Predicted remaining tenant quota: %s", tenant_quota) + log.debug("Predicted remaining tenant quota for %s: %s", + tenant_name, tenant_quota) return tenant_quota.non_negative() def _getUsedQuotaForTenant(self, tenant_name): diff --git a/nodepool/tests/fixtures/kubernetes-default-limits.yaml b/nodepool/tests/fixtures/kubernetes-default-limits.yaml new file mode 100644 index 000000000..1b7b42b1e --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-default-limits.yaml @@ -0,0 +1,32 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +labels: + - name: pod-default + - name: pod-custom-cpu + - name: pod-custom-mem + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + default-label-cpu: 2 + default-label-memory: 1024 + labels: + - name: pod-default + type: pod + - name: pod-custom-cpu + type: pod + cpu: 4 + - name: pod-custom-mem + type: pod + memory: 2048 diff --git a/nodepool/tests/fixtures/kubernetes-pool-quota-cores.yaml b/nodepool/tests/fixtures/kubernetes-pool-quota-cores.yaml new file mode 100644 index 000000000..c66cff812 --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-pool-quota-cores.yaml @@ -0,0 +1,25 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +labels: + - name: pod-fedora + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + max-cores: 4 + labels: + - name: pod-fedora + type: pod + image: docker.io/fedora:28 + cpu: 2 diff --git a/nodepool/tests/fixtures/kubernetes-pool-quota-ram.yaml b/nodepool/tests/fixtures/kubernetes-pool-quota-ram.yaml new file mode 100644 index 000000000..ecf89ad3e --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-pool-quota-ram.yaml @@ -0,0 +1,25 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +labels: + - name: pod-fedora + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + max-ram: 2048 + labels: + - name: pod-fedora + type: pod + image: docker.io/fedora:28 + memory: 1024 diff --git a/nodepool/tests/fixtures/kubernetes-pool-quota-servers.yaml b/nodepool/tests/fixtures/kubernetes-pool-quota-servers.yaml new file mode 100644 index 000000000..e23e7f87b --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-pool-quota-servers.yaml @@ -0,0 +1,24 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +labels: + - name: pod-fedora + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + max-servers: 2 + labels: + - name: pod-fedora + type: pod + image: docker.io/fedora:28 diff --git a/nodepool/tests/fixtures/kubernetes-tenant-quota-cores.yaml b/nodepool/tests/fixtures/kubernetes-tenant-quota-cores.yaml new file mode 100644 index 000000000..97a8e022d --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-tenant-quota-cores.yaml @@ -0,0 +1,28 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +tenant-resource-limits: + - tenant-name: tenant-1 + max-cores: 4 + +labels: + - name: pod-fedora + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + labels: + - name: pod-fedora + type: pod + image: docker.io/fedora:28 + cpu: 2 diff --git a/nodepool/tests/fixtures/kubernetes-tenant-quota-ram.yaml b/nodepool/tests/fixtures/kubernetes-tenant-quota-ram.yaml new file mode 100644 index 000000000..e00cf7d4b --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-tenant-quota-ram.yaml @@ -0,0 +1,28 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +tenant-resource-limits: + - tenant-name: tenant-1 + max-ram: 2048 + +labels: + - name: pod-fedora + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + labels: + - name: pod-fedora + type: pod + image: docker.io/fedora:28 + memory: 1024 diff --git a/nodepool/tests/fixtures/kubernetes-tenant-quota-servers.yaml b/nodepool/tests/fixtures/kubernetes-tenant-quota-servers.yaml new file mode 100644 index 000000000..42f41059a --- /dev/null +++ b/nodepool/tests/fixtures/kubernetes-tenant-quota-servers.yaml @@ -0,0 +1,27 @@ +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +tenant-resource-limits: + - tenant-name: tenant-1 + max-servers: 2 + +labels: + - name: pod-fedora + +providers: + - name: kubespray + driver: kubernetes + context: admin-cluster.local + pools: + - name: main + labels: + - name: pod-fedora + type: pod + image: docker.io/fedora:28 diff --git a/nodepool/tests/fixtures/kubernetes.yaml b/nodepool/tests/fixtures/kubernetes.yaml index 917a1cb05..f747a5707 100644 --- a/nodepool/tests/fixtures/kubernetes.yaml +++ b/nodepool/tests/fixtures/kubernetes.yaml @@ -22,7 +22,6 @@ providers: context: admin-cluster.local pools: - name: main - max-servers: 2 node-attributes: key1: value1 key2: value2 diff --git a/nodepool/tests/unit/test_driver_kubernetes.py b/nodepool/tests/unit/test_driver_kubernetes.py index 80c01eb18..ae6fcc117 100644 --- a/nodepool/tests/unit/test_driver_kubernetes.py +++ b/nodepool/tests/unit/test_driver_kubernetes.py @@ -156,15 +156,83 @@ class TestDriverKubernetes(tests.DBTestCase): self.waitForNodeDeletion(node) - def test_kubernetes_max_servers(self): - configfile = self.setup_config('kubernetes.yaml') + def test_kubernetes_default_label_resources(self): + configfile = self.setup_config('kubernetes-default-limits.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + + req = zk.NodeRequest() + req.state = zk.REQUESTED + req.node_types.append('pod-default') + req.node_types.append('pod-custom-cpu') + req.node_types.append('pod-custom-mem') + self.zk.storeNodeRequest(req) + + self.log.debug("Waiting for request %s", req.id) + req = self.waitForNodeRequest(req) + self.assertEqual(req.state, zk.FULFILLED) + + self.assertNotEqual(req.nodes, []) + node_default = self.zk.getNode(req.nodes[0]) + node_cust_cpu = self.zk.getNode(req.nodes[1]) + node_cust_mem = self.zk.getNode(req.nodes[2]) + + resources_default = { + 'instances': 1, + 'cores': 2, + 'ram': 1024, + } + resources_cust_cpu = { + 'instances': 1, + 'cores': 4, + 'ram': 1024, + } + resources_cust_mem = { + 'instances': 1, + 'cores': 2, + 'ram': 2048, + } + + self.assertDictEqual(resources_default, node_default.resources) + self.assertDictEqual(resources_cust_cpu, node_cust_cpu.resources) + self.assertDictEqual(resources_cust_mem, node_cust_mem.resources) + + for node in (node_default, node_cust_cpu, node_cust_mem): + node.state = zk.DELETING + self.zk.storeNode(node) + self.waitForNodeDeletion(node) + + def test_kubernetes_pool_quota_servers(self): + self._test_kubernetes_quota('kubernetes-pool-quota-servers.yaml') + + def test_kubernetes_pool_quota_cores(self): + self._test_kubernetes_quota('kubernetes-pool-quota-cores.yaml') + + def test_kubernetes_pool_quota_ram(self): + self._test_kubernetes_quota('kubernetes-pool-quota-ram.yaml') + + def test_kubernetes_tenant_quota_servers(self): + self._test_kubernetes_quota( + 'kubernetes-tenant-quota-servers.yaml', pause=False) + + def test_kubernetes_tenant_quota_cores(self): + self._test_kubernetes_quota( + 'kubernetes-tenant-quota-cores.yaml', pause=False) + + def test_kubernetes_tenant_quota_ram(self): + self._test_kubernetes_quota( + 'kubernetes-tenant-quota-ram.yaml', pause=False) + + def _test_kubernetes_quota(self, config, pause=True): + configfile = self.setup_config(config) pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() # Start two pods to hit max-server limit reqs = [] - for x in [1, 2]: + for _ in [1, 2]: req = zk.NodeRequest() req.state = zk.REQUESTED + req.tenant_name = 'tenant-1' req.node_types.append('pod-fedora') self.zk.storeNodeRequest(req) reqs.append(req) @@ -179,13 +247,19 @@ class TestDriverKubernetes(tests.DBTestCase): # Now request a third pod that will hit the limit max_req = zk.NodeRequest() max_req.state = zk.REQUESTED + max_req.tenant_name = 'tenant-1' max_req.node_types.append('pod-fedora') self.zk.storeNodeRequest(max_req) - # The previous request should pause the handler - pool_worker = pool.getPoolWorkers('kubespray') - while not pool_worker[0].paused_handler: - time.sleep(0.1) + # if at pool quota, the handler will get paused + # but not if at tenant quota + if pause: + # The previous request should pause the handler + pool_worker = pool.getPoolWorkers('kubespray') + while not pool_worker[0].paused_handler: + time.sleep(0.1) + else: + self.waitForNodeRequest(max_req, (zk.REQUESTED,)) # Delete the earlier two pods freeing space for the third. for req in fulfilled_reqs: @@ -195,5 +269,5 @@ class TestDriverKubernetes(tests.DBTestCase): self.waitForNodeDeletion(node) # We should unpause and fulfill this now - req = self.waitForNodeRequest(max_req) + req = self.waitForNodeRequest(max_req, (zk.FULFILLED,)) self.assertEqual(req.state, zk.FULFILLED) diff --git a/releasenotes/notes/kubernetes-default-limits-f4bcc430a6274043.yaml b/releasenotes/notes/kubernetes-default-limits-f4bcc430a6274043.yaml new file mode 100644 index 000000000..d7ff3f8f6 --- /dev/null +++ b/releasenotes/notes/kubernetes-default-limits-f4bcc430a6274043.yaml @@ -0,0 +1,21 @@ +--- +features: + - | + Config options for kubernetes providers were added to define default limits + for cpu and memory for pod-type labels. + + * attr:`providers.[kubernetes].pools.default-label-cpu` + * attr:`providers.[kubernetes].pools.default-label-memory` + + These values will apply to all pod-type labels within the same pool that do + not override these limits. This allows to enforce resource limits on pod + labels. It thereby enables to account for pool and tenant quotas in terms + of cpu and memory consumption. New config options for kubernetes pools + therefore also include + + * attr:`providers.[kubernetes].pools.max-cores` + * attr:`providers.[kubernetes].pools.max-ram` + + The exsisting tenant quota settings apply accordingly. Note that cpu and + memory quotas can still not be considered for labels that do not specify + any limits, i.e. neither a pool default, nor label specific limit is set.