Issue a smart-reconfigure after (re-)creating zuul
Zuul no longer automatically performs a smart-reconfigure on startup so we need to do that ourselves in case the tenant config has changed. There's a minor race window after the zuul CR spec changes where the statefulset of the scheduler has not rolled out. We have to wait for it to complete before calling smart-reconfigure, or we risk running it on pods scheduled for deletion. Also adding a fix from: https://review.opendev.org/c/zuul/zuul-operator/+/861279 This is needed to get exec in pods to work. Change-Id: Ib35e85ed7666c2eb322971302f7f0d94a28bfa1f Co-Authored-By: Jan Gutter <github@jangutter.com> Co-Authored-By: Michal Nasiadka <mnasiadka@gmail.com> Co-Authored-By: Michael Kelly <mkelly@arista.com>
This commit is contained in:
@@ -161,7 +161,7 @@
|
|||||||
var: console_stream
|
var: console_stream
|
||||||
|
|
||||||
- name: fail if console stream does not contains expected job output
|
- name: fail if console stream does not contains expected job output
|
||||||
when: "'Job console starting...' not in console_stream.stdout"
|
when: "'Job console starting' not in console_stream.stdout"
|
||||||
# It seems like wsdump.py doesn't always stay connected for the whole job duration
|
# It seems like wsdump.py doesn't always stay connected for the whole job duration
|
||||||
# when: "'Demo job is running' not in console_stream.stdout"
|
# when: "'Demo job is running' not in console_stream.stdout"
|
||||||
fail:
|
fail:
|
||||||
|
|||||||
@@ -174,6 +174,11 @@ def update_fn(name, namespace, logger, old, new, memo, **kwargs):
|
|||||||
if spec_changed:
|
if spec_changed:
|
||||||
zuul.create_zuul()
|
zuul.create_zuul()
|
||||||
|
|
||||||
|
if conf_changed:
|
||||||
|
if spec_changed:
|
||||||
|
zuul.wait_for_statefulset('zuul-scheduler')
|
||||||
|
zuul.smart_reconfigure()
|
||||||
|
|
||||||
memoize_secrets(memo, logger)
|
memoize_secrets(memo, logger)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ import string
|
|||||||
import kopf
|
import kopf
|
||||||
import yaml
|
import yaml
|
||||||
import jinja2
|
import jinja2
|
||||||
import kubernetes
|
|
||||||
from kubernetes.client import Configuration
|
from kubernetes.client import Configuration
|
||||||
from kubernetes.client.api import core_v1_api
|
from kubernetes.client.api import core_v1_api
|
||||||
from kubernetes.stream import stream
|
from kubernetes.stream import stream
|
||||||
@@ -82,7 +81,6 @@ def update_secret(api, namespace, name, string_data):
|
|||||||
|
|
||||||
|
|
||||||
def pod_exec(namespace, name, command):
|
def pod_exec(namespace, name, command):
|
||||||
kubernetes.config.load_kube_config()
|
|
||||||
try:
|
try:
|
||||||
c = Configuration().get_default_copy()
|
c = Configuration().get_default_copy()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import kopf
|
|||||||
import copy
|
import copy
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import time
|
||||||
|
|
||||||
import jinja2
|
import jinja2
|
||||||
import pykube
|
import pykube
|
||||||
@@ -410,6 +411,29 @@ class Zuul:
|
|||||||
utils.apply_file(self.api, 'zuul.yaml', namespace=self.namespace, **kw)
|
utils.apply_file(self.api, 'zuul.yaml', namespace=self.namespace, **kw)
|
||||||
self.create_nodepool()
|
self.create_nodepool()
|
||||||
|
|
||||||
|
def wait_for_statefulset(self, set_name, tries=6, delay=10):
|
||||||
|
self.log.info("Waiting for StatefulSet %s to finish rollout", set_name)
|
||||||
|
for _ in range(tries):
|
||||||
|
scheduler_set = objects.StatefulSet.objects(self.api).filter(
|
||||||
|
namespace=self.namespace,
|
||||||
|
selector={'app.kubernetes.io/instance': self.name,
|
||||||
|
'app.kubernetes.io/component': set_name,
|
||||||
|
'app.kubernetes.io/name': 'zuul',
|
||||||
|
'app.kubernetes.io/part-of': 'zuul'}).get(
|
||||||
|
name=set_name)
|
||||||
|
spec = scheduler_set.obj['spec']
|
||||||
|
status = scheduler_set.obj['status']
|
||||||
|
if (spec['replicas'] == status.get('replicas', None) and
|
||||||
|
spec['replicas'] == status.get('currentReplicas', None) and
|
||||||
|
spec['replicas'] == status.get('readyReplicas', None) and
|
||||||
|
(status.get('updateRevision', None) ==
|
||||||
|
status.get('currentRevision', None))):
|
||||||
|
self.log.info("StatefulSet %s completed rollout", set_name)
|
||||||
|
return
|
||||||
|
time.sleep(delay)
|
||||||
|
self.log.error("StatefulSet did not finish rollout after %d seconds",
|
||||||
|
tries * delay)
|
||||||
|
|
||||||
def smart_reconfigure(self):
|
def smart_reconfigure(self):
|
||||||
self.log.info("Smart reconfigure")
|
self.log.info("Smart reconfigure")
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user