diff --git a/doc/source/developer-onboarding.rst b/doc/source/developer-onboarding.rst index 2c869e16..3a849b0a 100644 --- a/doc/source/developer-onboarding.rst +++ b/doc/source/developer-onboarding.rst @@ -43,6 +43,25 @@ debug it, e.g.: ./tools/g2/bin/ssh.sh n0 +Running Resilency Tests Behind Corporate Proxy +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If your development environment is behind a corporate proxy, you will need to +update following files to add your envrionment's proxy information, dns, or +possibly your internal ntp servers, in order to deploy airship: + + * `charts/coredns/values.yaml`: Update the upstream coredns nameserver IPs + to your internal DNS addresses. + * `examples/basic/KubernetesNetwork.yaml`: Since resilency manifest uses + the examples/basic environment configuration, you will need to Update + the kubernetes network configuration in this folder. Update the upstream + nameserver IPs to your internal DNS addresses. Add the http(s) proxy URL + and additional_no_proxy list. Also, if your enviornment requires that, + update the ntp server list to your internal ntp server addresses for + more reliable time sync. + * `tools/g2/templates/network-config.sub`: Update the upstream nameserver + IPs to your internal DNS addresses. + Bootstrapping ------------- diff --git a/promenade/templates/include/up.sh b/promenade/templates/include/up.sh index fdb0b496..75473214 100644 --- a/promenade/templates/include/up.sh +++ b/promenade/templates/include/up.sh @@ -74,6 +74,14 @@ export http_proxy={{ config['KubernetesNetwork:proxy.url'] | default('', true) } export https_proxy={{ config['KubernetesNetwork:proxy.url'] | default('', true) }} export no_proxy={{ config.get(kind='KubernetesNetwork') | fill_no_proxy }} +# Configure apt proxy +if [[ -n "${http_proxy}" ]]; then + log "Configuring Apt Proxy" + cat << EOF | sudo tee /etc/apt/apt.conf.d/50proxyconf +Acquire::https::proxy "${https_proxy}"; +Acquire::http::proxy "${http_proxy}"; +EOF +fi # Install system packages # @@ -139,5 +147,13 @@ fi if systemctl -q is-enabled containerd > /dev/null 2>&1; then systemctl restart containerd || true fi +# Pull the hyperkube image prior to restarting kubelet, this is +# needed for more reliable image pull in an environment with slow +# network connectivity to avoid image pull timeouts and retries by +# kubelet. +# The || true is added to let the deployment continue, evenif the +# $IMAGE_HYPERKUBE is not defined in the environment, and the image +# pull doesn't happen. +docker image pull "${IMAGE_HYPERKUBE}" || true systemctl enable kubelet systemctl restart kubelet diff --git a/promenade/templates/include/utils.sh b/promenade/templates/include/utils.sh index 8bac561b..853bde51 100644 --- a/promenade/templates/include/utils.sh +++ b/promenade/templates/include/utils.sh @@ -222,7 +222,7 @@ function validate_kubectl_logs { NAMESPACE=default POD_NAME=log-test-${NODE}-$(date +%s) - cat <&1 | tee -a "${LOG_FILE}" +ssh_cmd "${GENESIS_NAME}" env "IMAGE_HYPERKUBE=${IMAGE_HYPERKUBE}" \ + env "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY}" \ + /root/promenade/genesis.sh 2>&1 | tee -a "${LOG_FILE}" ssh_cmd "${GENESIS_NAME}" /root/promenade/validate-genesis.sh 2>&1 | tee -a "${LOG_FILE}" set +o pipefail diff --git a/tools/g2/stages/join-nodes.sh b/tools/g2/stages/join-nodes.sh index 4a888cec..c28deed9 100755 --- a/tools/g2/stages/join-nodes.sh +++ b/tools/g2/stages/join-nodes.sh @@ -52,7 +52,7 @@ mkdir -p "${SCRIPT_DIR}" for NAME in "${NODES[@]}"; do log Building join script for node "${NAME}" - CURL_ARGS=("--fail" "--max-time" "300" "--retry" "16" "--retry-delay" "15") + CURL_ARGS=("-v" "--max-time" "600" "--retry" "20" "--retry-delay" "15" "--connect-timeout" "30" "--progress-bar") if [[ $GET_KEYSTONE_TOKEN == 1 ]]; then TOKEN="$(os_ks_get_token "${VIA}")" if [[ -z $TOKEN ]]; then @@ -67,7 +67,7 @@ for NAME in "${NODES[@]}"; do promenade_health_check "${VIA}" log "Validating documents" - ssh_cmd "${VIA}" curl -v "${CURL_ARGS[@]}" -X POST -H "Content-Type: application/json" -d "$(promenade_render_validate_body "${USE_DECKHAND}" "${DECKHAND_REVISION}")" "$(promenade_render_validate_url)" + ssh_cmd "${VIA}" curl "${CURL_ARGS[@]}" -X POST -H "Content-Type: application/json" -d "$(promenade_render_validate_body "${USE_DECKHAND}" "${DECKHAND_REVISION}")" "$(promenade_render_validate_url)" JOIN_CURL_URL="$(promenade_render_curl_url "${NAME}" "${USE_DECKHAND}" "${DECKHAND_REVISION}" "${LABELS[@]}")" log "Fetching join script via: ${JOIN_CURL_URL}" diff --git a/tools/g2/stages/move-master.sh b/tools/g2/stages/move-master.sh index 01b437c6..d21d0c1e 100755 --- a/tools/g2/stages/move-master.sh +++ b/tools/g2/stages/move-master.sh @@ -6,15 +6,15 @@ source "${GATE_UTILS}" VIA="n1" -CURL_ARGS=("--fail" "--max-time" "300" "--retry" "16" "--retry-delay" "15") +CURL_ARGS=("-v" "--max-time" "600" "--retry" "20" "--retry-delay" "15" "--connect-timeout" "30" "--progress-bar") -log Adding labels to node n0 +log "Adding labels to node n0" JSON="{\"calico-etcd\": \"enabled\", \"coredns\": \"enabled\", \"kubernetes-apiserver\": \"enabled\", \"kubernetes-controller-manager\": \"enabled\", \"kubernetes-etcd\": \"enabled\", \"kubernetes-scheduler\": \"enabled\", \"ucp-control-plane\": \"enabled\"}" -ssh_cmd "${VIA}" curl -v "${CURL_ARGS[@]}" -X PUT -H "Content-Type: application/json" -d "${JSON}" "$(promenade_put_labels_url n0)" +ssh_cmd "${VIA}" curl "${CURL_ARGS[@]}" -X PUT -H "Content-Type: application/json" -d "${JSON}" "$(promenade_put_labels_url n0)" # Need to wait -sleep 60 +sleep 120 validate_etcd_membership kubernetes n1 n0 n1 n2 n3 validate_etcd_membership calico n1 n0 n1 n2 n3 @@ -22,10 +22,10 @@ validate_etcd_membership calico n1 n0 n1 n2 n3 log Removing labels from node n2 JSON="{\"coredns\": \"enabled\", \"ucp-control-plane\": \"enabled\"}" -ssh_cmd "${VIA}" curl -v "${CURL_ARGS[@]}" -X PUT -H "Content-Type: application/json" -d "${JSON}" "$(promenade_put_labels_url n2)" +ssh_cmd "${VIA}" curl "${CURL_ARGS[@]}" -X PUT -H "Content-Type: application/json" -d "${JSON}" "$(promenade_put_labels_url n2)" # Need to wait -sleep 60 +sleep 120 validate_cluster n1 diff --git a/tools/g2/stages/teardown-nodes.sh b/tools/g2/stages/teardown-nodes.sh index 3123fcdd..ce69ea27 100755 --- a/tools/g2/stages/teardown-nodes.sh +++ b/tools/g2/stages/teardown-nodes.sh @@ -8,8 +8,11 @@ declare -a NODES RECREATE=0 -while getopts "n:rv:" opt; do +while getopts "e:n:rv:" opt; do case "${opt}" in + e) + ETCD_CLUSTERS+=("${OPTARG}") + ;; n) NODES+=("${OPTARG}") ;; @@ -35,6 +38,9 @@ fi for NAME in "${NODES[@]}"; do log Tearing down node "${NAME}" promenade_teardown_node "${NAME}" "${VIA}" + for ETCD_CLUSTER in "${ETCD_CLUSTERS[@]}"; do + etcdctl_member_remove "${ETCD_CLUSTER}" "${VIA}" "${NAME}" + done vm_clean "${NAME}" if [[ ${RECREATE} == "1" ]]; then vm_create "${NAME}"