fuel-ccp-installer/utils/jenkins/kargo_deploy.sh

411 lines
16 KiB
Bash
Executable File

#!/bin/bash
set -xe
# for now we assume that master ip is 10.0.0.2 and slaves ips are 10.0.0.{3,4,5,...}
ADMIN_PASSWORD=${ADMIN_PASSWORD:-vagrant}
ADMIN_USER=${ADMIN_USER:-vagrant}
WORKSPACE=${WORKSPACE:-.}
ENV_NAME=${ENV_NAME:-kargo-example}
SLAVES_COUNT=${SLAVES_COUNT:-0}
if [ "$VLAN_BRIDGE" ]; then
CONF_PATH=${CONF_PATH:-${BASH_SOURCE%/*}/default30-kargo-bridge.yaml}
else
CONF_PATH=${CONF_PATH:-${BASH_SOURCE%/*}/default30-kargo.yaml}
fi
IMAGE_PATH=${IMAGE_PATH:-$HOME/packer-ubuntu-16.04.1-server-amd64.qcow2}
# detect OS type from the image name, assume ubuntu by default
NODE_BASE_OS=$(basename ${IMAGE_PATH} | grep -io -e ubuntu -e debian || echo -n "ubuntu")
ADMIN_NODE_BASE_OS="${ADMIN_NODE_BASE_OS:-$NODE_BASE_OS}"
DEPLOY_TIMEOUT=${DEPLOY_TIMEOUT:-60}
SSH_OPTIONS="-A -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
SSH_OPTIONS_COPYID="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
VM_LABEL=${BUILD_TAG:-unknown}
KARGO_REPO=${KARGO_REPO:-https://github.com/kubernetes-incubator/kargo.git}
KARGO_COMMIT=${KARGO_COMMIT:-origin/master}
# Default deployment settings
COMMON_DEFAULTS_YAML="kargo_default_common.yaml"
COMMON_DEFAULTS_SRC="${BASH_SOURCE%/*}/../kargo/${COMMON_DEFAULTS_YAML}"
OS_SPECIFIC_DEFAULTS_YAML="kargo_default_${NODE_BASE_OS}.yaml"
OS_SPECIFIC_DEFAULTS_SRC="${BASH_SOURCE%/*}/../kargo/${OS_SPECIFIC_DEFAULTS_YAML}"
SCALE_DEFAULTS_YAML="scale_defaults.yaml"
SCALE_DEFAULTS_SRC="${BASH_SOURCE%/*}/../kargo/${SCALE_DEFAULTS_YAML}"
SCALE_MODE=${SCALE_MODE:-no}
LOG_LEVEL=${LOG_LEVEL:--v}
ANSIBLE_TIMEOUT=${ANSIBLE_TIMEOUT:-600}
ANSIBLE_FORKS=${ANSIBLE_FORKS:-50}
# Valid sources: pip, apt
ANSIBLE_INSTALL_SOURCE=pip
required_ansible_version="2.2.1"
function collect_info {
# Get diagnostic info and store it as the logs.tar.gz at the admin node
admin_node_command FORKS=$ANSIBLE_FORKS ADMIN_USER=$ADMIN_USER \
ADMIN_WORKSPACE=$ADMIN_WORKSPACE collect_logs.sh > /dev/null
}
function exit_gracefully {
local exit_code=$?
set +e
# set exit code if it is a param
[[ -n "$1" ]] && exit_code=$1
if [[ "$ENV_TYPE" == "fuel-devops" && "$KEEP_ENV" != "0" ]]; then
if [[ "${exit_code}" -eq "0" && "${DONT_DESTROY_ON_SUCCESS}" != "1" ]]; then
dos.py erase ${ENV_NAME}
else
if [ "${exit_code}" -ne "0" ];then
dos.py suspend ${ENV_NAME}
dos.py snapshot ${ENV_NAME} ${ENV_NAME}.snapshot
dos.py destroy ${ENV_NAME}
echo "To revert snapshot please run: dos.py revert ${ENV_NAME} ${ENV_NAME}.snapshot"
fi
fi
fi
# Kill current ssh-agent
if [ -z "$INHERIT_SSH_AGENT" ]; then
eval $(ssh-agent -k)
fi
exit $exit_code
}
function with_retries {
local retries=3
set +e
set -o pipefail
for try in $(seq 1 $retries); do
${@}
[ $? -eq 0 ] && break
if [[ "$try" == "$retries" ]]; then
exit 1
fi
done
set +o pipefail
set -e
}
function admin_node_command {
# Accepts commands from args passed to function or multiple commands via stdin,
# one per line.
if [[ "$ADMIN_IP" == "local" ]]; then
if [ $# -gt 0 ];then
eval "$@"
else
cat | while read cmd; do
eval "$cmd"
done
fi
else
ssh $SSH_OPTIONS $ADMIN_USER@$ADMIN_IP "$@"
fi
}
function wait_for_nodes {
for IP in $@; do
elapsed_time=0
master_wait_time=30
while true; do
report=$(sshpass -p ${ADMIN_PASSWORD} ssh ${SSH_OPTIONS} -o PreferredAuthentications=password ${ADMIN_USER}@${IP} echo ok || echo not ready)
if [ "${report}" = "ok" ]; then
break
fi
if [ "${elapsed_time}" -gt "${master_wait_time}" ]; then
exit 2
fi
sleep 1
let elapsed_time+=1
done
done
}
function wait_for_apt_lock_release {
while admin_node_command 'sudo lslocks | egrep "apt|dpkg"'; do
echo 'Waiting for other software managers to release apt lock ...'
sleep 10
done
}
function with_ansible {
local tries=5
local retry_opt=""
playbook=$1
retryfile=${playbook/.yml/.retry}
until admin_node_command \
ANSIBLE_CONFIG=$ADMIN_WORKSPACE/utils/kargo/ansible.cfg \
ansible-playbook \
--ssh-extra-args "-A\ -o\ StrictHostKeyChecking=no\ -o\ ConnectionAttempts=20" \
-u ${ADMIN_USER} -b \
--become-user=root -i $ADMIN_WORKSPACE/inventory/inventory.cfg \
--forks=$ANSIBLE_FORKS --timeout $ANSIBLE_TIMEOUT $DEFAULT_OPTS \
-e ansible_ssh_user=${ADMIN_USER} \
$custom_opts $retry_opt $@; do
if [[ $tries -gt 1 ]]; then
tries=$((tries - 1))
echo "Deployment failed! Trying $tries more times..."
else
collect_info
exit_gracefully 1
fi
if admin_node_command test -e "$retryfile"; then
retry_opt="--limit @${retryfile}"
fi
done
rm -f "$retryfile" || true
}
mkdir -p tmp logs
# If SLAVE_IPS or IRONIC_NODE_LIST are specified or REAPPLY is set, then treat env as pre-provisioned
if [[ -z "$REAPPLY" && -z "$SLAVE_IPS" && -z "$IRONIC_NODE_LIST" ]]; then
ENV_TYPE="fuel-devops"
echo "Trying to ensure bridge-nf-call-iptables is disabled..."
br_netfilter=$(cat /proc/sys/net/bridge/bridge-nf-call-iptables)
if [[ "$br_netfilter" == "1" ]]; then
sudo sh -c 'echo 0 > /proc/sys/net/bridge/bridge-nf-call-iptables'
fi
dos.py erase ${ENV_NAME} || true
rm -rf logs/*
ENV_NAME=${ENV_NAME} SLAVES_COUNT=${SLAVES_COUNT} IMAGE_PATH=${IMAGE_PATH} CONF_PATH=${CONF_PATH} python ${BASH_SOURCE%/*}/env.py create_env
SLAVE_IPS=($(ENV_NAME=${ENV_NAME} python ${BASH_SOURCE%/*}/env.py get_slaves_ips | tr -d "[],'"))
# Set ADMIN_IP=local to use current host to run ansible
ADMIN_IP=${ADMIN_IP:-${SLAVE_IPS[0]}}
wait_for_nodes ${SLAVE_IPS[0]}
else
ENV_TYPE=${ENV_TYPE:-other_or_reapply}
SLAVE_IPS=( $SLAVE_IPS )
fi
ADMIN_IP=${ADMIN_IP:-${SLAVE_IPS[0]}}
# Trap errors during env preparation stage
trap exit_gracefully ERR INT TERM
# FIXME(mattymo): Should be part of underlay
echo "Checking local SSH environment..."
if ssh-add -l &>/dev/null; then
echo "Local SSH agent detected with at least one identity."
INHERIT_SSH_AGENT="yes"
else
echo "No SSH agent available. Preparing SSH key..."
if ! [ -f $WORKSPACE/id_rsa ]; then
ssh-keygen -t rsa -f $WORKSPACE/id_rsa -N "" -q
chmod 600 ${WORKSPACE}/id_rsa*
test -f ~/.ssh/config && SSH_OPTIONS="${SSH_OPTIONS} -F /dev/null"
fi
eval $(ssh-agent)
ssh-add $WORKSPACE/id_rsa
fi
# Install missing packages on the host running this script
if ! type sshpass 2>&1 > /dev/null; then
sudo apt-get update && sudo apt-get install -y sshpass
fi
# Copy utils/kargo dir to WORKSPACE/utils/kargo so it works across both local
# and remote admin node deployment modes.
echo "Preparing admin node..."
if [[ "$ADMIN_IP" != "local" ]]; then
ADMIN_WORKSPACE="workspace"
sshpass -p $ADMIN_PASSWORD ssh-copy-id $SSH_OPTIONS_COPYID -o PreferredAuthentications=password $ADMIN_USER@${ADMIN_IP} -p 22
else
ADMIN_WORKSPACE="$WORKSPACE"
fi
if [[ -n "$ADMIN_NODE_CLEANUP" ]]; then
if [[ "$ADMIN_IP" != "local" ]]; then
admin_node_command rm -rf $ADMIN_WORKSPACE || true
else
for dir in inventory kargo utils; do
admin_node_command rm -rf ${ADMIN_WORKSPACE}/${dir} || true
done
fi
fi
admin_node_command mkdir -p "$ADMIN_WORKSPACE/utils/kargo" "$ADMIN_WORKSPACE/inventory"
tar cz ${BASH_SOURCE%/*}/../kargo | admin_node_command tar xzf - -C $ADMIN_WORKSPACE/utils/
echo "Setting up ansible and required dependencies..."
# Install mandatory packages on admin node
if ! admin_node_command type sshpass 2>&1 > /dev/null; then
admin_node_command "sh -c \"sudo apt-get update && sudo apt-get install -y sshpass\""
fi
if ! admin_node_command type git 2>&1 > /dev/null; then
admin_node_command "sh -c \"sudo apt-get update && sudo apt-get install -y git\""
fi
if ! admin_node_command type ansible 2>&1 > /dev/null; then
# Wait for apt lock in case it is updating from cron job
case $ADMIN_NODE_BASE_OS in
ubuntu)
wait_for_apt_lock_release
with_retries admin_node_command -- sudo apt-get update
wait_for_apt_lock_release
with_retries admin_node_command -- sudo apt-get install -y software-properties-common
wait_for_apt_lock_release
with_retries admin_node_command -- sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com --recv-keys 7BB9C367
wait_for_apt_lock_release
with_retries admin_node_command -- "sh -c \"sudo apt-add-repository -y 'deb http://ppa.launchpad.net/ansible/ansible/ubuntu xenial main'\""
wait_for_apt_lock_release
with_retries admin_node_command -- sudo apt-get update
wait_for_apt_lock_release
;;
debian)
cat ${BASH_SOURCE%/*}/files/debian_backports_repo.list | admin_node_command "sudo sh -c 'cat - > /etc/apt/sources.list.d/backports.list'"
cat ${BASH_SOURCE%/*}/files/debian_pinning | admin_node_command "sudo sh -c 'cat - > /etc/apt/preferences.d/backports'"
wait_for_apt_lock_release
with_retries admin_node_command -- sudo apt-get update
wait_for_apt_lock_release
with_retries admin_node_command -- sudo apt-get -y install --only-upgrade python-setuptools
;;
esac
wait_for_apt_lock_release
if [[ "$ANSIBLE_INSTALL_SOURCE" == "apt" ]]; then
with_retries admin_node_command -- sudo apt-get install -y ansible python-netaddr
elif [[ "$ANSIBLE_INSTALL_SOURCE" == "pip" ]]; then
admin_node_command -- sudo pip uninstall -y setuptools pip || true
with_retries admin_node_command -- sudo apt-get install -y --reinstall python-netaddr libssl-dev python-pip python-setuptools python-pkg-resources
with_retries admin_node_command -- sudo -H easy_install pyopenssl==16.2.0
with_retries admin_node_command -- sudo pip install --upgrade ansible==$required_ansible_version
else
echo "ERROR: Unknown Ansible install source: ${ANSIBLE_INSTALL_SOURCE}"
exit 1
fi
fi
echo "Checking out kargo playbook..."
admin_node_command git clone "$KARGO_REPO" "$ADMIN_WORKSPACE/kargo" || true
admin_node_command "sh -c 'cd $ADMIN_WORKSPACE/kargo && git fetch --all && git checkout $KARGO_COMMIT'"
echo "Uploading default settings and inventory..."
# Only copy default files if they are absent from inventory dir
if ! admin_node_command test -e "$ADMIN_WORKSPACE/inventory/${COMMON_DEFAULTS_YAML}"; then
cat $COMMON_DEFAULTS_SRC | admin_node_command "cat > $ADMIN_WORKSPACE/inventory/${COMMON_DEFAULTS_YAML}"
fi
if ! admin_node_command test -e "$ADMIN_WORKSPACE/inventory/${OS_SPECIFIC_DEFAULTS_YAML}"; then
cat $OS_SPECIFIC_DEFAULTS_SRC | admin_node_command "cat > $ADMIN_WORKSPACE/inventory/${OS_SPECIFIC_DEFAULTS_YAML}"
fi
if ! admin_node_command test -e "$ADMIN_WORKSPACE/inventory/${SCALE_DEFAULTS_YAML}"; then
cat $SCALE_DEFAULTS_SRC | admin_node_command "cat > $ADMIN_WORKSPACE/inventory/${SCALE_DEFAULTS_YAML}"
fi
if ! admin_node_command test -e "${ADMIN_WORKSPACE}/inventory/group_vars"; then
admin_node_command ln -rsf "${ADMIN_WORKSPACE}/kargo/inventory/group_vars" "${ADMIN_WORKSPACE}/inventory/group_vars"
fi
if [[ -n "${CUSTOM_YAML}" ]]; then
echo "Uploading custom YAML for deployment..."
echo -e "$CUSTOM_YAML" | admin_node_command "cat > $ADMIN_WORKSPACE/inventory/custom.yaml"
fi
if admin_node_command test -e "$ADMIN_WORKSPACE/inventory/custom.yaml"; then
custom_opts="-e @$ADMIN_WORKSPACE/inventory/custom.yaml"
fi
if [ -n "${SLAVE_IPS}" ]; then
admin_node_command CONFIG_FILE=$ADMIN_WORKSPACE/inventory/inventory.cfg python3 $ADMIN_WORKSPACE/kargo/contrib/inventory_builder/inventory.py ${SLAVE_IPS[@]}
elif [ -n "${IRONIC_NODE_LIST}" ]; then
inventory_formatted=$(echo -e "$IRONIC_NODE_LIST" | ${BASH_SOURCE%/*}/../ironic/nodelist_to_inventory.py)
admin_node_command CONFIG_FILE=$ADMIN_WORKSPACE/inventory/inventory.cfg python3 $ADMIN_WORKSPACE/kargo/contrib/inventory_builder/inventory.py load /dev/stdin <<< "$inventory_formatted"
fi
# Try to get IPs from inventory first
if [ -z "${SLAVE_IPS}" ]; then
if admin_node_command stat $ADMIN_WORKSPACE/inventory/inventory.cfg; then
SLAVE_IPS=($(admin_node_command CONFIG_FILE=$ADMIN_WORKSPACE/inventory/inventory.cfg python3 $ADMIN_WORKSPACE/kargo/contrib/inventory_builder/inventory.py print_ips))
else
echo "No slave nodes available. Unable to proceed!"
exit_gracefully 1
fi
fi
COMMON_DEFAULTS_OPT="-e @$ADMIN_WORKSPACE/inventory/${COMMON_DEFAULTS_YAML}"
OS_SPECIFIC_DEFAULTS_OPT="-e @$ADMIN_WORKSPACE/inventory/${OS_SPECIFIC_DEFAULTS_YAML}"
SCALE_DEFAULTS_OPT="-e @$ADMIN_WORKSPACE/inventory/${SCALE_DEFAULTS_YAML}"
if [[ "${#SLAVE_IPS[@]}" -lt 50 && "$SCALE_MODE" == "no" ]]; then
DEFAULT_OPTS="${COMMON_DEFAULTS_OPT} ${OS_SPECIFIC_DEFAULTS_OPT}"
else
DEFAULT_OPTS="${COMMON_DEFAULTS_OPT} ${OS_SPECIFIC_DEFAULTS_OPT} ${SCALE_DEFAULTS_OPT}"
fi
# Stop trapping pre-setup tasks
set +e
echo "Running pre-setup steps on nodes via ansible..."
with_ansible $ADMIN_WORKSPACE/utils/kargo/preinstall.yml -e "ansible_ssh_pass=${ADMIN_PASSWORD}"
echo "Deploying k8s masters/etcds first via ansible..."
with_ansible $ADMIN_WORKSPACE/kargo/cluster.yml --limit kube-master:etcd
# Only run non-master deployment if there are non-masters in inventory.
if admin_node_command ansible-playbook -i $ADMIN_WORKSPACE/inventory/inventory.cfg \
$ADMIN_WORKSPACE/kargo/cluster.yml --limit kube-node:!kube-master:!etcd \
--list-hosts &>/dev/null; then
echo "Deploying k8s non-masters via ansible..."
with_ansible $ADMIN_WORKSPACE/kargo/cluster.yml --limit kube-node:!kube-master:!etcd
fi
echo "Initial deploy succeeded. Proceeding with post-install tasks..."
with_ansible $ADMIN_WORKSPACE/utils/kargo/postinstall.yml
# FIXME(mattymo): Move this to underlay
# setup VLAN if everything is ok and env will not be deleted
if [ "$VLAN_BRIDGE" ] && [ "${DONT_DESTROY_ON_SUCCESS}" = "1" ];then
rm -f VLAN_IPS
for IP in ${SLAVE_IPS[@]}; do
bridged_iface_mac="`ENV_NAME=${ENV_NAME} python ${BASH_SOURCE%/*}/env.py get_bridged_iface_mac $IP`"
sshpass -p ${ADMIN_PASSWORD} ssh ${SSH_OPTIONS} ${ADMIN_USER}@${IP} bash -s <<EOF >>VLAN_IPS
bridged_iface=\$(/sbin/ifconfig -a|awk -v mac="$bridged_iface_mac" '\$0 ~ mac {print \$1}' 'RS=\n\n')
sudo ip route del default
sudo dhclient "\${bridged_iface}"
echo \$(ip addr list |grep ${bridged_iface_mac} -A 1 |grep 'inet ' |cut -d' ' -f6| cut -d/ -f1)
EOF
done
set +x
sed -i '/^\s*$/d' VLAN_IPS
echo "**************************************"
echo "**************************************"
echo "**************************************"
echo "Deployment is complete!"
echo "* VLANs IP addresses"
echo "* MASTER IP: `head -n1 VLAN_IPS`"
echo "* NODE IPS: `tail -n +2 VLAN_IPS | tr '\n' ' '`"
echo "* USERNAME: $ADMIN_USER"
echo "* PASSWORD: $ADMIN_PASSWORD"
echo "* K8s dashboard: https://kube:changeme@`head -n1 VLAN_IPS`/ui/"
echo "**************************************"
echo "**************************************"
echo "**************************************"
set -x
rm -f VLAN_IPS
else
echo "**************************************"
echo "**************************************"
echo "**************************************"
echo "Deployment is complete!"
echo "* Node network addresses:"
echo "* MASTER IP: $ADMIN_IP"
echo "* NODE IPS: $SLAVE_IPS"
echo "* USERNAME: $ADMIN_USER"
echo "* PASSWORD: $ADMIN_PASSWORD"
echo "* K8s dashboard: https://kube:changeme@${SLAVE_IPS[0]}/ui/"
echo "**************************************"
echo "**************************************"
echo "**************************************"
fi
# TODO(mattymo): Shift to FORCE_NEW instead of REAPPLY
echo "To reapply deployment, run env REAPPLY=yes ADMIN_IP=$ADMIN_IP $0"
exit_gracefully 0