Fix gpu parameter type in openshiftpods

In config validation, the gpu parametr type was specified as str
rather than float.  This is corrected.

This was not discovered in testing because the only tests which use
the gpu parameter for the other k8s drivers are not present in the
openshiftpods driver.  This change also adds the missing tests for
the default resource and resource limits feature which exercises the
gpu limits.

Change-Id: Ife932acaeb5a90ebc94ad36c3b4615a4469f0c40
This commit is contained in:
James E. Blair
2023-12-01 08:03:24 -08:00
parent f39d2b955a
commit 5849c3b9a7
5 changed files with 306 additions and 1 deletions

View File

@@ -72,7 +72,7 @@ class OpenshiftPodsProviderConfig(OpenshiftProviderConfig):
'cpu-limit': int,
'memory-limit': int,
'storage-limit': int,
'gpu': str,
'gpu': float,
'gpu-resource': str,
'python-path': str,
'shell-type': str,

View File

@@ -50,6 +50,9 @@ class OpenshiftPodLauncher(OpenshiftLauncher):
'user': 'zuul-worker',
}
self.node.connection_type = "kubectl"
pool = self.handler.provider.pools.get(self.node.pool)
self.node.resources = self.handler.manager.quotaNeededByLabel(
self.node.type[0], pool).get_resources()
self.node.cloud = self.provider_config.context
self.node.host_id = pod_node_id
self.zk.storeNode(self.node)

View File

@@ -0,0 +1,40 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: pod-default
- name: pod-custom-cpu
- name: pod-custom-mem
- name: pod-custom-storage
providers:
- name: openshift
driver: openshiftpods
context: admin-cluster.local
pools:
- name: main
default-label-cpu: 2
default-label-memory: 1024
default-label-storage: 10
default-label-cpu-limit: 8
default-label-memory-limit: 4196
default-label-storage-limit: 40
labels:
- name: pod-default
image: test
- name: pod-custom-cpu
image: test
cpu-limit: 4
- name: pod-custom-mem
image: test
memory-limit: 2048
- name: pod-custom-storage
image: test
storage-limit: 20

View File

@@ -0,0 +1,42 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: pod-default
- name: pod-custom-cpu
- name: pod-custom-mem
- name: pod-custom-storage
- name: pod-custom-gpu
providers:
- name: openshift
driver: openshiftpods
context: admin-cluster.local
pools:
- name: main
default-label-cpu: 2
default-label-memory: 1024
default-label-storage: 10
labels:
- name: pod-default
image: test
- name: pod-custom-cpu
image: test
cpu: 4
- name: pod-custom-mem
image: test
memory: 2048
- name: pod-custom-storage
image: test
storage: 20
- name: pod-custom-gpu
image: test
gpu-resource: gpu-vendor.example/example-gpu
gpu: 0.5

View File

@@ -24,6 +24,7 @@ from nodepool.zk import zookeeper as zk
class FakeCoreClient(object):
def __init__(self):
self._pod_requests = []
self.pods = []
class FakeApi:
@@ -41,6 +42,7 @@ class FakeCoreClient(object):
class FakePod:
class metadata:
name = pod_body['metadata']['name']
self._pod_requests.append((ns, pod_body))
self.pods.append(FakePod)
return FakePod
@@ -221,3 +223,221 @@ class TestDriverOpenshiftPods(tests.DBTestCase):
def test_openshift_pod_tenant_quota_extra(self):
self._test_openshift_pod_quota(
'openshiftpods-tenant-quota-extra.yaml', pause=False)
def test_openshift_pod_default_label_resources(self):
configfile = self.setup_config('openshiftpods-default-resources.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.startPool(pool)
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('pod-default')
req.node_types.append('pod-custom-cpu')
req.node_types.append('pod-custom-mem')
req.node_types.append('pod-custom-storage')
req.node_types.append('pod-custom-gpu')
self.zk.storeNodeRequest(req)
self.log.debug("Waiting for request %s", req.id)
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FULFILLED)
self.assertNotEqual(req.nodes, [])
node_default = self.zk.getNode(req.nodes[0])
node_cust_cpu = self.zk.getNode(req.nodes[1])
node_cust_mem = self.zk.getNode(req.nodes[2])
node_cust_storage = self.zk.getNode(req.nodes[3])
node_cust_gpu = self.zk.getNode(req.nodes[4])
resources_default = {
'instances': 1,
'cores': 2,
'ram': 1024,
'ephemeral-storage': 10,
}
resources_cust_cpu = {
'instances': 1,
'cores': 4,
'ram': 1024,
'ephemeral-storage': 10,
}
resources_cust_mem = {
'instances': 1,
'cores': 2,
'ram': 2048,
'ephemeral-storage': 10,
}
resources_cust_storage = {
'instances': 1,
'cores': 2,
'ram': 1024,
'ephemeral-storage': 20,
}
resources_cust_gpu = {
'instances': 1,
'cores': 2,
'ram': 1024,
'ephemeral-storage': 10,
'gpu-vendor.example/example-gpu': 0.5
}
self.assertDictEqual(resources_default, node_default.resources)
self.assertDictEqual(resources_cust_cpu, node_cust_cpu.resources)
self.assertDictEqual(resources_cust_mem, node_cust_mem.resources)
self.assertDictEqual(resources_cust_storage,
node_cust_storage.resources)
self.assertDictEqual(resources_cust_gpu, node_cust_gpu.resources)
ns, pod = self.fake_k8s_client._pod_requests[0]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '1024Mi'
},
'requests': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '1024Mi'
},
})
ns, pod = self.fake_k8s_client._pod_requests[1]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 4,
'ephemeral-storage': '10M',
'memory': '1024Mi'
},
'requests': {
'cpu': 4,
'ephemeral-storage': '10M',
'memory': '1024Mi'
},
})
ns, pod = self.fake_k8s_client._pod_requests[2]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '2048Mi'
},
'requests': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '2048Mi'
},
})
ns, pod = self.fake_k8s_client._pod_requests[3]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 2,
'ephemeral-storage': '20M',
'memory': '1024Mi'
},
'requests': {
'cpu': 2,
'ephemeral-storage': '20M',
'memory': '1024Mi'
},
})
ns, pod = self.fake_k8s_client._pod_requests[4]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '1024Mi',
'gpu-vendor.example/example-gpu': '0.50'
},
'requests': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '1024Mi',
'gpu-vendor.example/example-gpu': '0.50'
},
})
for node in (node_default,
node_cust_cpu,
node_cust_mem,
node_cust_gpu):
node.state = zk.DELETING
self.zk.storeNode(node)
self.waitForNodeDeletion(node)
def test_openshift_pod_default_label_limits(self):
configfile = self.setup_config('openshiftpods-default-limits.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.startPool(pool)
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('pod-default')
req.node_types.append('pod-custom-cpu')
req.node_types.append('pod-custom-mem')
req.node_types.append('pod-custom-storage')
self.zk.storeNodeRequest(req)
self.log.debug("Waiting for request %s", req.id)
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FULFILLED)
self.assertNotEqual(req.nodes, [])
ns, pod = self.fake_k8s_client._pod_requests[0]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 8,
'ephemeral-storage': '40M',
'memory': '4196Mi'
},
'requests': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '1024Mi'
},
})
ns, pod = self.fake_k8s_client._pod_requests[1]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 4,
'ephemeral-storage': '40M',
'memory': '4196Mi'
},
'requests': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '1024Mi'
},
})
ns, pod = self.fake_k8s_client._pod_requests[2]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 8,
'ephemeral-storage': '40M',
'memory': '2048Mi'
},
'requests': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '1024Mi'
},
})
ns, pod = self.fake_k8s_client._pod_requests[3]
self.assertEqual(pod['spec']['containers'][0]['resources'], {
'limits': {
'cpu': 8,
'ephemeral-storage': '20M',
'memory': '4196Mi'
},
'requests': {
'cpu': 2,
'ephemeral-storage': '10M',
'memory': '1024Mi'
},
})