N-Node gate, and improved logging

This PS sets up the check scripts to support a N node gate in zuul
it also adds more comprehensive logging of OS-H.

Change-Id: Id2af01c688a438eeec348f221fb05b09401c80a0
This commit is contained in:
Pete Birley 2017-06-07 09:22:17 -05:00
parent 78f6d00c15
commit de656ea7f2
4 changed files with 113 additions and 7 deletions

63
tools/gate/dump_logs.sh Executable file
View File

@ -0,0 +1,63 @@
#!/bin/bash
set +xe
echo "Capturing logs from environment."
mkdir -p ${LOGS_DIR}/k8s/etc
sudo cp -a /etc/kubernetes ${LOGS_DIR}/k8s/etc
sudo chmod 777 --recursive ${LOGS_DIR}/*
mkdir -p ${LOGS_DIR}/k8s
for OBJECT_TYPE in nodes \
namespace; do
kubectl get ${OBJECT_TYPE} -o yaml > ${LOGS_DIR}/k8s/${OBJECT_TYPE}.yaml
done
kubectl describe nodes > ${LOGS_DIR}/k8s/nodes.txt
for OBJECT_TYPE in svc \
pods \
jobs \
deployments \
daemonsets \
statefulsets \
configmaps \
secrets; do
kubectl get --all-namespaces ${OBJECT_TYPE} -o yaml > \
${LOGS_DIR}/k8s/${OBJECT_TYPE}.yaml
done
mkdir -p ${LOGS_DIR}/k8s/pods
kubectl get pods -a --all-namespaces -o json | jq -r \
'.items[].metadata | .namespace + " " + .name' | while read line; do
NAMESPACE=$(echo $line | awk '{print $1}')
NAME=$(echo $line | awk '{print $2}')
kubectl get --namespace $NAMESPACE pod $NAME -o json | jq -r \
'.spec.containers[].name' | while read line; do
CONTAINER=$(echo $line | awk '{print $1}')
kubectl logs $NAME --namespace $NAMESPACE -c $CONTAINER > \
${LOGS_DIR}/k8s/pods/$NAMESPACE-$NAME-$CONTAINER.txt
done
done
mkdir -p ${LOGS_DIR}/k8s/svc
kubectl get svc -o json --all-namespaces | jq -r \
'.items[].metadata | .namespace + " " + .name' | while read line; do
NAMESPACE=$(echo $line | awk '{print $1}')
NAME=$(echo $line | awk '{print $2}')
kubectl describe svc $NAME --namespace $NAMESPACE > \
${LOGS_DIR}/k8s/svc/$NAMESPACE-$NAME.txt
done
mkdir -p ${LOGS_DIR}/k8s/rbac
for OBJECT_TYPE in clusterroles \
roles \
clusterrolebindings \
rolebindings; do
kubectl get ${OBJECT_TYPE} -o yaml > ${LOGS_DIR}/k8s/rbac/${OBJECT_TYPE}.yaml
done
mkdir -p ${LOGS_DIR}/nodes/$(hostname)
sudo iptables-save > ${LOGS_DIR}/nodes/$(hostname)/iptables.txt
sudo ip a > ${LOGS_DIR}/nodes/$(hostname)/ip.txt
sudo route -n > ${LOGS_DIR}/nodes/$(hostname)/routes.txt
arp -a > ${LOGS_DIR}/nodes/$(hostname)/arp.txt
exit $1

View File

@ -44,6 +44,33 @@ function kube_wait_for_pods {
set -x
}
function kube_wait_for_nodes {
# Default wait timeout is 180 seconds
set +x
end=$(date +%s)
if [ x$2 != "x" ]; then
end=$((end + $2))
else
end=$((end + 180))
fi
while true; do
NUMBER_OF_NODES=$(kubectl get nodes --no-headers -o name | wc -l)
NUMBER_OF_NODES_EXPECTED=$(($(cat /etc/nodepool/sub_nodes_private | wc -l) + 1))
[ $NUMBER_OF_NODES -eq $NUMBER_OF_NODES_EXPECTED ] && \
NODES_ONLINE="True" || NODES_ONLINE="False"
while read SUB_NODE; do
echo $SUB_NODE | grep -q ^Ready && NODES_READY="True" || NODES_READY="False"
done < <(kubectl get nodes --no-headers | awk '{ print $2 }')
[ $NODES_ONLINE == "True" -a $NODES_READY == "True" ] && \
break || true
sleep 5
now=$(date +%s)
[ $now -gt $end ] && echo "Nodes Failed to be ready in time." && \
kubectl get nodes -o wide && exit -1
done
set -x
}
function kubeadm_aio_reqs_install {
TMP_DIR=$(mktemp -d)
if [ "x$HOST_OS" == "xubuntu" ]; then

View File

@ -28,6 +28,11 @@ source ${WORK_DIR}/tools/gate/funcs/helm.sh
export LOGS_DIR=${LOGS_DIR:-"${WORK_DIR}/logs/"}
mkdir -p ${LOGS_DIR}
function dump_logs () {
${WORK_DIR}/tools/gate/dump_logs.sh
}
trap 'dump_logs "$?"' ERR
# Moving the ws-linter here to avoid it blocking all the jobs just for ws
if [ "x$INTEGRATION_TYPE" == "xlinter" ]; then
bash ${WORK_DIR}/tools/gate/whitespace.sh
@ -55,4 +60,5 @@ fi
if [ "x$INTEGRATION_TYPE" == "xbasic" ]; then
bash ${WORK_DIR}/tools/gate/helm_dry_run.sh
bash ${WORK_DIR}/tools/gate/basic_launch.sh
bash ${WORK_DIR}/tools/gate/dump_logs.sh 0
fi

View File

@ -17,20 +17,30 @@ set -ex
sudo chown $(whoami) ${SSH_PRIVATE_KEY}
sudo chmod 600 ${SSH_PRIVATE_KEY}
PRIMARY_NODE_IP=$(cat /etc/nodepool/primary_node_private | tail -1)
KUBEADM_TOKEN=$(sudo docker exec kubeadm-aio kubeadm token list | tail -n -1 | awk '{ print $1 }')
NODE_2=$(cat /etc/nodepool/sub_nodes_private | tail -1)
ssh-keyscan "${NODE_2}" >> ~/.ssh/known_hosts
ssh -i ${SSH_PRIVATE_KEY} $(whoami)@${NODE_2} mkdir -p ${WORK_DIR%/*}
scp -i ${SSH_PRIVATE_KEY} -r ${WORK_DIR} $(whoami)@${NODE_2}:${WORK_DIR%/*}
ssh -i ${SSH_PRIVATE_KEY} $(whoami)@${NODE_2} "export WORK_DIR=${WORK_DIR}; export KUBEADM_TOKEN=${KUBEADM_TOKEN}; export PRIMARY_NODE_IP=${PRIMARY_NODE_IP}; export KUBEADM_IMAGE=${KUBEADM_IMAGE}; bash ${WORK_DIR}/tools/gate/provision_gate_worker_node.sh"
SUB_NODE_PROVISION_SCRIPT=$(mktemp --suffix=.sh)
cat /etc/nodepool/sub_nodes_private | while read SUB_NODE; do
cat >> ${SUB_NODE_PROVISION_SCRIPT} <<EOS
ssh-keyscan "${SUB_NODE}" >> ~/.ssh/known_hosts
ssh -i ${SSH_PRIVATE_KEY} $(whoami)@${SUB_NODE} mkdir -p ${WORK_DIR%/*}
scp -i ${SSH_PRIVATE_KEY} -r ${WORK_DIR} $(whoami)@${SUB_NODE}:${WORK_DIR%/*}
ssh -i ${SSH_PRIVATE_KEY} $(whoami)@${SUB_NODE} "export WORK_DIR=${WORK_DIR}; \
export KUBEADM_TOKEN=${KUBEADM_TOKEN}; \
export PRIMARY_NODE_IP=${PRIMARY_NODE_IP}; \
export KUBEADM_IMAGE=${KUBEADM_IMAGE}; \
bash ${WORK_DIR}/tools/gate/provision_gate_worker_node.sh"
EOS
done
bash ${SUB_NODE_PROVISION_SCRIPT}
rm -rf ${SUB_NODE_PROVISION_SCRIPT}
sleep 120
source ${WORK_DIR}/tools/gate/funcs/kube.sh
kube_wait_for_nodes 240
kube_wait_for_pods kube-system 240
kube_wait_for_pods openstack 240
kubectl get nodes --show-all
kubectl get --all-namespaces all --show-all
sudo docker exec kubeadm-aio openstack-helm-dev-prep