Add disambiguator to k8s/openshift resources
If the creation of a k8s namespace or pod fails, we will retry. If we attempt to create a namespace or pod with the same name and k8s has not finished deleting the old resource, then it may respond with a 409 error and thwart our attempt to retry. To address this, whatever top-level resource we are creating (usually a namespace except in the case of the openshiftpods driver), append the attempt counter to the end. This results in a unique name for each attempt and avoids collisions. Change-Id: Idc3b1a19857c5fea40c5dfb9a142914857fa2d7a
This commit is contained in:
@@ -29,16 +29,16 @@ class K8SLauncher(NodeLauncher):
|
||||
self.label = provider_label
|
||||
self._retries = provider_config.launch_retries
|
||||
|
||||
def _launchLabel(self):
|
||||
def _launchLabel(self, attempt):
|
||||
self.log.debug("Creating resource")
|
||||
if self.label.type == "namespace":
|
||||
resource = self.handler.manager.createNamespace(
|
||||
self.node, self.handler.pool.name, self.label,
|
||||
self.handler.request)
|
||||
self.handler.request, attempt)
|
||||
else:
|
||||
resource = self.handler.manager.createPod(
|
||||
self.node, self.handler.pool.name, self.label,
|
||||
self.handler.request)
|
||||
self.handler.request, attempt)
|
||||
|
||||
self.node.state = zk.READY
|
||||
self.node.python_path = self.label.python_path
|
||||
@@ -61,7 +61,7 @@ class K8SLauncher(NodeLauncher):
|
||||
attempts = 1
|
||||
while attempts <= self._retries:
|
||||
try:
|
||||
self._launchLabel()
|
||||
self._launchLabel(attempts)
|
||||
break
|
||||
except kze.SessionExpiredError:
|
||||
# If we lost our ZooKeeper session, we've lost our node lock
|
||||
|
||||
@@ -157,10 +157,11 @@ class KubernetesProvider(Provider, QuotaSupport):
|
||||
time.sleep(1)
|
||||
|
||||
def createNamespace(
|
||||
self, node, pool, label, request, restricted_access=False
|
||||
self, node, pool, label, request, attempt,
|
||||
restricted_access=False
|
||||
):
|
||||
name = node.id
|
||||
namespace = "%s-%s" % (pool, name)
|
||||
namespace = "%s-%s-%s" % (pool, name, attempt)
|
||||
user = "zuul-worker"
|
||||
|
||||
self.log.debug("%s: creating namespace" % namespace)
|
||||
@@ -303,12 +304,12 @@ class KubernetesProvider(Provider, QuotaSupport):
|
||||
self.log.info("%s: namespace created" % namespace)
|
||||
return resource
|
||||
|
||||
def createPod(self, node, pool, label, request):
|
||||
def createPod(self, node, pool, label, request, attempt):
|
||||
if label.spec:
|
||||
pod_body = self.getPodBodyCustom(node, pool, label, request)
|
||||
else:
|
||||
pod_body = self.getPodBodyNodepool(node, pool, label, request)
|
||||
resource = self.createNamespace(node, pool, label, request,
|
||||
resource = self.createNamespace(node, pool, label, request, attempt,
|
||||
restricted_access=True)
|
||||
namespace = resource['namespace']
|
||||
self.k8s_client.create_namespaced_pod(namespace, pod_body)
|
||||
|
||||
@@ -30,9 +30,9 @@ class OpenshiftLauncher(NodeLauncher):
|
||||
self.label = provider_label
|
||||
self._retries = provider_config.launch_retries
|
||||
|
||||
def _launchLabel(self):
|
||||
def _launchLabel(self, attempt):
|
||||
self.log.debug("Creating resource")
|
||||
project = "%s-%s" % (self.handler.pool.name, self.node.id)
|
||||
project = "%s-%s-%s" % (self.handler.pool.name, self.node.id, attempt)
|
||||
self.node.external_id = self.handler.manager.createProject(
|
||||
self.node, self.handler.pool.name, project, self.label,
|
||||
self.handler.request)
|
||||
@@ -66,7 +66,7 @@ class OpenshiftLauncher(NodeLauncher):
|
||||
attempts = 1
|
||||
while attempts <= self._retries:
|
||||
try:
|
||||
self._launchLabel()
|
||||
self._launchLabel(attempts)
|
||||
break
|
||||
except kze.SessionExpiredError:
|
||||
# If we lost our ZooKeeper session, we've lost our node lock
|
||||
|
||||
@@ -22,9 +22,9 @@ from nodepool.driver.openshift.handler import OpenshiftNodeRequestHandler
|
||||
|
||||
|
||||
class OpenshiftPodLauncher(OpenshiftLauncher):
|
||||
def _launchLabel(self):
|
||||
def _launchLabel(self, attempt):
|
||||
self.log.debug("Creating resource")
|
||||
pod_name = "%s-%s" % (self.label.name, self.node.id)
|
||||
pod_name = "%s-%s-%s" % (self.label.name, self.node.id, attempt)
|
||||
project = self.handler.pool.name
|
||||
self.handler.manager.createPod(self.node, self.handler.pool.name,
|
||||
project, pod_name, self.label,
|
||||
|
||||
@@ -470,7 +470,7 @@ class TestDriverOpenshiftPods(tests.DBTestCase):
|
||||
self.assertEqual(node.host_id, 'k8s-default-pool-abcd-1234')
|
||||
ns, pod = self.fake_k8s_client._pod_requests[0]
|
||||
self.assertEqual(pod['metadata'], {
|
||||
'name': 'pod-custom-0000000000',
|
||||
'name': 'pod-custom-0000000000-1',
|
||||
'annotations': {},
|
||||
'labels': {
|
||||
'nodepool_node_id': '0000000000',
|
||||
|
||||
Reference in New Issue
Block a user