Add disambiguator to k8s/openshift resources

If the creation of a k8s namespace or pod fails, we will retry.
If we attempt to create a namespace or pod with the same name
and k8s has not finished deleting the old resource, then it
may respond with a 409 error and thwart our attempt to retry.

To address this, whatever top-level resource we are creating
(usually a namespace except in the case of the openshiftpods driver),
append the attempt counter to the end.  This results in a unique
name for each attempt and avoids collisions.

Change-Id: Idc3b1a19857c5fea40c5dfb9a142914857fa2d7a
This commit is contained in:
James E. Blair
2024-12-19 14:26:49 -08:00
parent 8fbaf3d295
commit 6df3f42cc3
5 changed files with 15 additions and 14 deletions

View File

@@ -29,16 +29,16 @@ class K8SLauncher(NodeLauncher):
self.label = provider_label
self._retries = provider_config.launch_retries
def _launchLabel(self):
def _launchLabel(self, attempt):
self.log.debug("Creating resource")
if self.label.type == "namespace":
resource = self.handler.manager.createNamespace(
self.node, self.handler.pool.name, self.label,
self.handler.request)
self.handler.request, attempt)
else:
resource = self.handler.manager.createPod(
self.node, self.handler.pool.name, self.label,
self.handler.request)
self.handler.request, attempt)
self.node.state = zk.READY
self.node.python_path = self.label.python_path
@@ -61,7 +61,7 @@ class K8SLauncher(NodeLauncher):
attempts = 1
while attempts <= self._retries:
try:
self._launchLabel()
self._launchLabel(attempts)
break
except kze.SessionExpiredError:
# If we lost our ZooKeeper session, we've lost our node lock

View File

@@ -157,10 +157,11 @@ class KubernetesProvider(Provider, QuotaSupport):
time.sleep(1)
def createNamespace(
self, node, pool, label, request, restricted_access=False
self, node, pool, label, request, attempt,
restricted_access=False
):
name = node.id
namespace = "%s-%s" % (pool, name)
namespace = "%s-%s-%s" % (pool, name, attempt)
user = "zuul-worker"
self.log.debug("%s: creating namespace" % namespace)
@@ -303,12 +304,12 @@ class KubernetesProvider(Provider, QuotaSupport):
self.log.info("%s: namespace created" % namespace)
return resource
def createPod(self, node, pool, label, request):
def createPod(self, node, pool, label, request, attempt):
if label.spec:
pod_body = self.getPodBodyCustom(node, pool, label, request)
else:
pod_body = self.getPodBodyNodepool(node, pool, label, request)
resource = self.createNamespace(node, pool, label, request,
resource = self.createNamespace(node, pool, label, request, attempt,
restricted_access=True)
namespace = resource['namespace']
self.k8s_client.create_namespaced_pod(namespace, pod_body)

View File

@@ -30,9 +30,9 @@ class OpenshiftLauncher(NodeLauncher):
self.label = provider_label
self._retries = provider_config.launch_retries
def _launchLabel(self):
def _launchLabel(self, attempt):
self.log.debug("Creating resource")
project = "%s-%s" % (self.handler.pool.name, self.node.id)
project = "%s-%s-%s" % (self.handler.pool.name, self.node.id, attempt)
self.node.external_id = self.handler.manager.createProject(
self.node, self.handler.pool.name, project, self.label,
self.handler.request)
@@ -66,7 +66,7 @@ class OpenshiftLauncher(NodeLauncher):
attempts = 1
while attempts <= self._retries:
try:
self._launchLabel()
self._launchLabel(attempts)
break
except kze.SessionExpiredError:
# If we lost our ZooKeeper session, we've lost our node lock

View File

@@ -22,9 +22,9 @@ from nodepool.driver.openshift.handler import OpenshiftNodeRequestHandler
class OpenshiftPodLauncher(OpenshiftLauncher):
def _launchLabel(self):
def _launchLabel(self, attempt):
self.log.debug("Creating resource")
pod_name = "%s-%s" % (self.label.name, self.node.id)
pod_name = "%s-%s-%s" % (self.label.name, self.node.id, attempt)
project = self.handler.pool.name
self.handler.manager.createPod(self.node, self.handler.pool.name,
project, pod_name, self.label,

View File

@@ -470,7 +470,7 @@ class TestDriverOpenshiftPods(tests.DBTestCase):
self.assertEqual(node.host_id, 'k8s-default-pool-abcd-1234')
ns, pod = self.fake_k8s_client._pod_requests[0]
self.assertEqual(pod['metadata'], {
'name': 'pod-custom-0000000000',
'name': 'pod-custom-0000000000-1',
'annotations': {},
'labels': {
'nodepool_node_id': '0000000000',