[nova] Fix nova bootstrap script

- Fix typo that prevented script to time out
- Print timestamps while waiting for computes to be ready.
  This is necessary for debugging. There has been many
  failures of the bootstrap script in test jobs recently.
- For test jobs enable extra waiting in the cell setup
  init script. This postpones the hypervisor discovery
  until all computes are up.
- Update bootstrap job dependency. It must start after
  cell setup job is finished.

Also fix Ldap chart. Update openldap image to symas/openldap:2.6.8-debian-12
and update chart appropriately.

These two fixes must be put in a single commit because
both are blockers.

Signed-off-by: Vladimir Kozhukalov <kozhukalov@gmail.com>
Change-Id: I3895e9be5365639c8eb796550faee900b2a1aeda
This commit is contained in:
Vladimir Kozhukalov
2026-03-31 14:07:00 -05:00
parent 8fa7e4be34
commit 26ba057c83
7 changed files with 78 additions and 10 deletions

View File

@@ -5,4 +5,19 @@ set -xe
{{- $port := tuple "ldap" "internal" "ldap" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
LDAPHOST="{{ .Values.endpoints.ldap.scheme }}://{{ $url }}:{{ $port }}"
ADMIN="cn={{ .Values.secrets.identity.admin }},{{ tuple .Values.openldap.domain . | include "splitdomain" }}"
ldapadd -x -D $ADMIN -H $LDAPHOST -w {{ .Values.openldap.password }} -f /etc/sample_data.ldif
PASSWORD="{{ .Values.openldap.password }}"
# Wait for LDAP server to be ready
retries=0
max_retries=60
until ldapsearch -x -H $LDAPHOST -b "" -s base "(objectclass=*)" namingContexts 2>/dev/null | grep -q namingContexts; do
retries=$((retries + 1))
if [ $retries -ge $max_retries ]; then
echo "ERROR: LDAP server not reachable after $max_retries attempts"
exit 1
fi
echo "Waiting for LDAP server to be ready... ($retries/$max_retries)"
sleep 5
done
ldapadd -x -c -D $ADMIN -H $LDAPHOST -w $PASSWORD -f /etc/sample_data.ldif

View File

@@ -46,26 +46,50 @@ spec:
{{ tuple $envAll "ldap" "server" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }}
nodeSelector:
{{ .Values.labels.server.node_selector_key }}: {{ .Values.labels.server.node_selector_value | quote }}
securityContext:
fsGroup: 1001
initContainers:
{{ tuple $envAll "ldap" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 6 }}
- name: ldap-perms
{{ tuple $envAll "ldap" | include "helm-toolkit.snippets.image" | indent 8 }}
securityContext:
runAsUser: 0
command:
- chown
- -R
- "1001:1001"
- /openldap/data
- /openldap/slapd.d
volumeMounts:
- name: ldap-data
mountPath: /openldap/data
- name: ldap-config
mountPath: /openldap/slapd.d
containers:
- name: ldap
{{ tuple $envAll "ldap" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.server | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env:
- name: LDAP_DOMAIN
value: {{ .Values.openldap.domain }}
- name: LDAP_ROOT
value: {{ tuple .Values.openldap.domain . | include "splitdomain" }}
- name: LDAP_ADMIN_PASSWORD
value: {{ .Values.openldap.password }}
- name: LDAP_SKIP_DEFAULT_TREE
value: "yes"
ports:
- containerPort: {{ tuple "ldap" "internal" "ldap" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
readinessProbe:
tcpSocket:
port: {{ tuple "ldap" "internal" "ldap" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 5
periodSeconds: 10
volumeMounts:
- name: pod-tmp
mountPath: /tmp
- name: ldap-data
mountPath: /var/lib/ldap
mountPath: /openldap/data
- name: ldap-config
mountPath: /etc/ldap/slapd.d
mountPath: /openldap/slapd.d
volumes:
- name: pod-tmp
emptyDir: {}

View File

@@ -65,9 +65,9 @@ pod:
images:
tags:
bootstrap: "docker.io/osixia/openldap:1.2.0"
ldap: "docker.io/osixia/openldap:1.2.0"
dep_check: quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_jammy
bootstrap: "symas/openldap:2.6.8-debian-12"
ldap: "symas/openldap:2.6.8-debian-12"
dep_check: quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_noble
image_repo_sync: quay.io/airshipit/docker:27.5.0
pull_policy: IfNotPresent
local_registry:
@@ -161,7 +161,7 @@ endpoints:
scheme: 'ldap'
port:
ldap:
default: 389
default: 1389
network_policy:
ldap:
@@ -172,6 +172,13 @@ network_policy:
data:
sample: |
dn: dc=cluster,dc=local
objectClass: top
objectClass: dcObject
objectClass: organization
dc: cluster
o: cluster
dn: ou=People,dc=cluster,dc=local
objectclass: organizationalunit
ou: People

View File

@@ -183,6 +183,7 @@ bootstrap:
WAIT_AFTER_READY=0
SLEEP=5
while [[ $HYPERVISOR_WAIT == true ]]; do
date '+%Y-%m-%d %H:%M:%S.%3N'
# Its possible that openstack command may fail due to not being able to
# reach the compute service
set +e
@@ -195,7 +196,7 @@ bootstrap:
if [[ $PERCENT_READY == 100 ]]; then
HYPERVISOR_WAIT=false
echo "All hypervisors are ready."
elif [[ WAIT_AFTER_READY -ge $REMAINING_WAIT ]]; then
elif [[ $WAIT_AFTER_READY -ge $REMAINING_WAIT ]]; then
HYPERVISOR_WAIT=false
echo "Waited the configured time -- $HYPERVISORS out of $COMPUTE_HOSTS hypervisor(s) ready -- proceeding with the bootstrap."
else
@@ -355,6 +356,8 @@ dependencies:
- endpoint: internal
service: identity
bootstrap:
jobs:
- nova-cell-setup
services:
- endpoint: internal
service: identity

View File

@@ -0,0 +1,5 @@
---
ldap:
- |
Update openldap image to symas/openldap:2.6.8-debian-12
...

View File

@@ -0,0 +1,10 @@
---
nova:
- |
Update the wait computes default bootstrap script. Now it is going to print timestamps
while waiting for the computes to be ready. This is going to be handy during debugging.
Also the typo has been fixed that prevented the script to fail after timeout.
- |
Update dependencies for the bootstrap job. It must start after cell setup job is finished
because it looks up for hypervisors which are discovered by the cell setup job.
...

View File

@@ -45,6 +45,10 @@ conf:
bootstrap:
wait_for_computes:
enabled: true
jobs:
cell_setup:
extended_wait:
enabled: true
EOF
: ${OSH_EXTRA_HELM_ARGS:=""}
helm upgrade --install nova ${OSH_HELM_REPO}/nova \