From f928fb2d15510167814fff56cd3b7921f9cba5cd Mon Sep 17 00:00:00 2001 From: Benjamin Schanzel Date: Mon, 17 Aug 2020 16:15:36 +0200 Subject: [PATCH] Fix Multi Node Builds with k8s Build Nodes Multi-node builds are an explicit feature of Zuul, but are currently broken with k8s/OpenShift Pod nodes. Nodepool spawns each k8s pod in its own separate namespace, with separate service accounts and contexts. Ansible handles the connection to k8s pods via its kubectl connection plugin and allows to play against pods in separate contexts via the `ansible_kubectl_context` host var. However, Zuul misses to set this host var, defaulting to the `default-context` of the kubeconfig it prepares for the build. This leads to a situation where Ansible tries to play each task against the same pod instead of the ones specified by the respective task. This change fixes this by setting the `ansible_kubectl_context` host vars in the inventory accordingly. Also, the preparation of the executors kubeconfig is error-prone as it adds multiple clusters with the same name to the config file. This leads to errors when kubectl evaluates its config. This change therefore makes sure that a cluster is added only once. Change-Id: I8e31f2ab209be5b3fef565901f7aa16b00ab93f6 --- zuul/executor/server.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/zuul/executor/server.py b/zuul/executor/server.py index 20292a1f44..337660a724 100644 --- a/zuul/executor/server.py +++ b/zuul/executor/server.py @@ -1590,6 +1590,9 @@ class AnsibleJob(object): if self.winrm_read_timeout is not None: host_vars['ansible_winrm_read_timeout_sec'] = \ self.winrm_read_timeout + elif connection_type == "kubectl": + host_vars['ansible_kubectl_context'] = \ + node.get('kubectl_context') host_keys = [] for key in node.get('host_keys', []): @@ -1906,17 +1909,23 @@ class AnsibleJob(object): } # Add cluster cluster_name = urlsplit(data['host']).netloc.replace('.', '-') - cluster = { - 'server': data['host'], - } - if data.get('ca_crt'): - cluster['certificate-authority-data'] = data['ca_crt'] - if data['skiptls']: - cluster['insecure-skip-tls-verify'] = True - kube_cfg['clusters'].append({ - 'name': cluster_name, - 'cluster': cluster, - }) + + # Do not add a cluster/server that already exists in the kubeconfig + # because that leads to 'duplicate name' errors on multi-node builds. + # Also, as the cluster name directly corresponds to a server, there + # is no need to add it twice. + if cluster_name not in [c['name'] for c in kube_cfg['clusters']]: + cluster = { + 'server': data['host'], + } + if data.get('ca_crt'): + cluster['certificate-authority-data'] = data['ca_crt'] + if data['skiptls']: + cluster['insecure-skip-tls-verify'] = True + kube_cfg['clusters'].append({ + 'name': cluster_name, + 'cluster': cluster, + }) # Add user user_name = "%s:%s" % (data['namespace'], data['user'])