#!/bin/bash # Copyright 2015 Mirantis, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. if ${DEBUG}; then DOCKER="docker -D" else DOCKER="docker" fi function show_usage { echo "Usage:" echo " $0 command" echo echo "Available commands:" echo " help: show this message" echo " build: create all Docker containers" echo " list: list container short names (-l for more output)" echo " start: start all Docker containers" echo " restart: restart one or more Docker containers" echo " stop: stop one or more Docker containers" echo " shell: start a shell or run a command in a Docker container" echo " logs: print console log from a container" echo " revert: reset container to original state" echo " destroy: destroy one or more containers" echo " copy: copy files in or out of container" echo " check: check of container is ready" echo " backup: back up entire deployment (--full to include containers, puppet and repos)" echo " restore: restore backed up deployment (--full includes containers)" } function parse_options { opts="$@" for opt in $@; do case $opt in -V|--version) VERSION=$2 shift 2 ;; -d|--debug) DEBUG=true shift ;; --nodebug) DEBUG=false shift ;; --) shift nonopts+=("$@") return ;; help|build|start|check|list|copy|restart|stop|revert|shell|upgrade|restore|backup|destroy|logs|post_start_hooks) nonopts+=("$@") return ;; -*) echo "Unrecognized option: $opt" 1>&2 exit 1 ;; *) nonopts+=("$opt") ;; esac done } function debug { if $DEBUG; then echo $@ fi } function build_image { ${DOCKER} build -t $2 $1 } function revert_container { stop_container $1 destroy_container $1 start_container $1 } function build_storage_containers { #Format: build_image $SOURCE_DIR/storage-foo storage/foo return 0 } function retry_checker { tries=0 echo "checking with command \"$*\"" until eval $*; do rc=$? let 'tries=tries+1' echo "try number $tries" echo "return code is $rc" if [ $tries -gt $CHECK_RETRIES ];then failure=1 break fi sleep 5 done } function get_service_credentials { credentialfile=$(mktemp /tmp/servicepws.XXXXX) get_service_credentials.py $ASTUTE_YAML > $credentialfile . $credentialfile rm -f $credentialfile } function check_ready { #Uses a custom command to ensure a container is ready get_service_credentials failure=0 echo "checking container $1" case $1 in nailgun) retry_checker "shell_container nailgun supervisorctl status nailgun | grep -q RUNNING" ;; ostf) retry_checker "egrep -q ^[2-4][0-9]? < <(curl --connect-timeout 1 -s -w '%{http_code}' http://$ADMIN_IP:8777/ostf/not_found -o /dev/null)" ;; #NOTICE: Cobbler console tool does not comply unix conversation: 'cobbler profile find' always return 0 as exit code cobbler) retry_checker "shell_container cobbler ps waux | grep -q 'cobblerd -F' && pgrep dnsmasq" retry_checker "shell_container cobbler cobbler profile find --name=centos* | grep -q centos && shell_container cobbler cobbler profile find --name=ubuntu* | grep -q ubuntu && shell_container cobbler cobbler profile find --name=bootstrap* | grep -q bootstrap" ;; rabbitmq) retry_checker "curl -f -L -i -u \"$astute_user:$astute_password\" http://$ADMIN_IP:15672/api/nodes 1>/dev/null 2>&1" retry_checker "curl -f -L -u \"$mcollective_user:$mcollective_password\" -s http://$ADMIN_IP:15672/api/exchanges | grep -qw 'mcollective_broadcast'" retry_checker "curl -f -L -u \"$mcollective_user:$mcollective_password\" -s http://$ADMIN_IP:15672/api/exchanges | grep -qw 'mcollective_directed'" ;; postgres) retry_checker "shell_container postgres PGPASSWORD=$postgres_nailgun_password /usr/bin/psql -h $ADMIN_IP -U \"$postgres_nailgun_user\" \"$postgres_nailgun_dbname\" -c '\copyright' 2>&1 1>/dev/null" ;; astute) retry_checker "shell_container astute ps waux | grep -q 'astuted'" retry_checker "curl -f -L -u \"$astute_user:$astute_password\" -s http://$ADMIN_IP:15672/api/exchanges | grep -qw 'nailgun'" retry_checker "curl -f -L -u \"$astute_user:$astute_password\" -s http://$ADMIN_IP:15672/api/exchanges | grep -qw 'naily_service'" ;; rsync) retry_checker "shell_container rsync netstat -ntl | grep -q 873" ;; rsyslog) retry_checker "shell_container rsyslog netstat -nl | grep -q 514" ;; mcollective) retry_checker "shell_container mcollective ps waux | grep -q mcollectived" ;; nginx) retry_checker "shell_container nginx ps waux | grep -q nginx" ;; keystone) retry_checker "shell_container keystone keystone --os-auth-url \"http://$ADMIN_IP:35357/v2.0\" --os-username \"$keystone_nailgun_user\" --os-password \"$keystone_nailgun_password\" token-get &>/dev/null" ;; *) echo "No defined test for determining if $1 is ready." ;; esac #Catch all to ensure puppet is not running retry_checker "! shell_container $1 pgrep puppet" if [ $failure -eq 1 ]; then echo "ERROR: $1 failed to start." return 1 else echo "$1 is ready." return 0 fi } function run_storage_containers { #Run storage containers once #Note: storage containers exit, but keep volumes available #Example: #${DOCKER} run -d ${CONTAINER_VOLUMES[$FOO_CNT]} --name "$FOO_CNT" storage/foo || true return 0 } function export_containers { #--trim option removes $CNT_PREFIX from container name when exporting if [[ "$1" == "--trim" ]]; then trim=true shift else trim=false fi for image in $@; do [ $trim ] && image=$(sed "s/${CNT_PREFIX}//" <<< "$image") ${DOCKER} export $1 | gzip -c > "${image}.tar.gz" done } function list_containers { #Usage: # (no option) short names # -l (short and long names and status) if [[ "$1" = "-l" ]]; then printf "%-13s%-25s%-13s%-25s\n" "Name" "Image" "Status" "Full container name" for container in "${!CONTAINER_NAMES[@]}"; do if container_created $container; then if is_running $container; then running="Running" else running="Stopped" fi else running="Not created" fi longname="${CONTAINER_NAMES["$container"]}" imagename="${IMAGE_PREFIX}/${container}_${VERSION}" printf "%-13s%-25s%-13s%-25s\n" "$container" "$imagename" "$running" "$longname" done else for container in "${!CONTAINER_NAMES[@]}"; do echo $container done fi } function commit_container { container_name="${CONTAINER_NAMES[$1]}" image="$IMAGE_PREFIX/$1_$VERSION" ${DOCKER} commit $container_name $image } function start_container { if [ -z "$1" ]; then echo "Must specify a container name" 1>&2 exit 1 fi if [ "$1" = "all" ]; then for container in $CONTAINER_SEQUENCE; do start_container $container done return fi image_name="$IMAGE_PREFIX/$1" container_name=${CONTAINER_NAMES[$1]} if container_created "$container_name"; then pre_start_hooks $1 if is_running "$container_name"; then if is_ghost "$container_name"; then restart_container $1 else echo "$container_name is already running." fi else # Clean up broken mounts if needed id=$(get_container_id $container_name) grep "$id" /proc/mounts | awk '{print $2}' | sort -r | xargs --no-run-if-empty -n1 umount -l 2>/dev/null ${DOCKER} start $container_name fi post_start_hooks $1 if [ "$2" = "--attach" ]; then attach_container $container_name fi else first_run_container "$1" $2 fi } function shutdown_container { echo "Stopping $1..." kill $2 ${DOCKER} stop $1 exit 0 } function attach_container { echo "Attaching to container $1..." ${DOCKER} attach --no-stdin $1 & APID=$! trap "shutdown_container $1 $APID" INT TERM while test -d "/proc/$APID/fd" ; do sleep 10 & wait $! done } function shell_container { case $EXEC_DRIVER in lxc) lxc_shell_container "$@" ;; *) exec_shell_container "$@" esac } function exec_shell_container { exec_opts='' #Interactive shell only if we have TTY if [ -t 0 ]; then exec_opts+=' -i' else #FIXME(mattymo): BASH 3.1.3 and higher don't need sleep sleep 0.1 if read -t 0; then exec_opts+=' -i' fi fi if [ -t 1 -a ! -p /proc/self/fd/0 ]; then exec_opts+=' -t' fi id=$(get_container_id "$1") if [ $? -ne 0 ]; then echo "Could not get docker ID for $container. Is it running?" 1>&2 return 1 fi #TODO(mattymo): fix UTF-8 bash warning #Setting C locale to suppress bash warning prefix="env LANG=C" if [ -z "$2" ]; then command="/bin/bash" else shift command=("$@") fi docker exec $exec_opts $id $prefix "${command[@]}" } function lxc_shell_container { id=$(get_container_id "$1") if [ $? -ne 0 ]; then echo "Could not get docker ID for $container. Is it running?" 1>&2 return 1 fi if [ -z "$2" ]; then command="/bin/bash" else shift command=("$@") fi lxc-attach --name "$id" -- "${command[@]}" } function stop_container { if [[ "$1" == 'all' ]]; then ${DOCKER} stop ${CONTAINER_NAMES[@]} else for container in $@; do echo "Stopping $container..." ${DOCKER} stop ${CONTAINER_NAMES[$container]} done fi } function destroy_container { if [[ "$1" == 'all' ]]; then stop_container all ${DOCKER} rm -f ${CONTAINER_NAMES[@]} else for container in $@; do stop_container $container ${DOCKER} rm -f ${CONTAINER_NAMES[$container]} if [ $? -ne 0 ]; then #This happens because devicemapper glitched #Try to unmount all devicemapper mounts manually and try again echo "Destruction of container $container failed. Trying workaround..." id=$(${DOCKER} inspect -f='{{if .ID}}{{.ID}}{{else}}{{.Id}}{{end}}' ${CONTAINER_NAMES[$container]}) if [ -z $id ]; then echo "Could not get docker ID for $container" 1>&2 return 1 fi umount -l $(grep "$id" /proc/mounts | awk '{print $2}' | sort -r) #Try to delete again ${DOCKER} rm -f ${CONTAINER_NAMES[$container]} if [ $? -ne 0 ];then echo "Workaround failed. Unable to destroy container $container." fi fi done fi } function logs { ${DOCKER} logs ${CONTAINER_NAMES[$1]} } function restart_container { ${DOCKER} restart ${CONTAINER_NAMES[$1]} } function container_lookup { echo ${CONTAINER_NAMES[$1]} } function get_container_id { #Try to get ID from container short name first id=$(${DOCKER} inspect -f='{{if .ID}}{{.ID}}{{else}}{{.Id}}{{end}}' ${CONTAINER_NAMES[$1]} 2>/dev/null) if [ -z "$id" ]; then #Try to get ID short ID, long ID, or container name id=$(${DOCKER} inspect -f='{{if .ID}}{{.ID}}{{else}}{{.Id}}{{end}}' "$1") if [ -z "$id" ]; then echo "Could not get docker ID for container $1. Is it running?" 1>&2 return 1 fi fi echo "$id" } function container_created { ${DOCKER} ps -a | grep -q $1 return $? } function is_ghost { LANG=C ${DOCKER} ps | grep $1 | grep -q Ghost return $? } function is_running { ${DOCKER} ps | grep -q $1 return $? } function first_run_container { opts="${CONTAINER_OPTIONS[$1]} ${CONTAINER_VOLUMES[$1]}" container_name="${CONTAINER_NAMES[$1]}" image="$IMAGE_PREFIX/$1_$VERSION" if ! is_running $container_name; then pre_setup_hooks $1 ${DOCKER} run $opts $BACKGROUND --name=$container_name $image post_setup_hooks $1 else echo "$container_name is already running." fi if [ "$2" = "--attach" ]; then attach_container $container_name fi return 0 } function pre_setup_hooks { return 0 } function pre_start_hooks { return 0 } function post_setup_hooks { case $1 in *) ;; esac } function post_start_hooks { case $1 in *) ;; esac } function container_root { id=$(${DOCKER} inspect -f='{{if .ID}}{{.ID}}{{else}}{{.Id}}{{end}}' ${CONTAINER_NAMES[$1]}) if [ -n "$id" ]; then echo "/var/lib/docker/devicemapper/mnt/${id}/rootfs" return 0 else echo "Unable to get root for container ${1}." 1>&2 return 1 fi } function copy_files { #Overview: # Works similar to rsync: # Container to host: # sync_files cobbler:/var/lib/tftpboot/ /localpath/ # Host to container: # sync_files /etc/puppet cobbler:/etc/puppet #TODO(mattymo): add options and more parameters if [ -z "$2" ]; then echo "This command requires two parameters. See usage:" echo " $0 copy src dest" echo echo "Examples:" echo " $0 copy nailgun:/etc/nailguns/settings.yaml /root/settings.yaml" echo " $0 copy /root/newpkg.rpm mcollective:/root/" exit 1 fi #Test which parameter is local if test -n "$(shopt -s nullglob; echo $1*)"; then method="push" local=$1 remote=$2 else method="pull" remote=$1 local=$2 fi container=$(echo $remote | cut -d':' -f1) remotepath=$(echo $remote | cut -d':' -f2-) if [[ ${CONTAINER_NAMES[@]} =~ .*${container}.* ]]; then cont_root=$(container_root $container) if [ $? -ne 0 ];then return 1; fi else echo "Unable to locate container to copy to/from." return 2 fi remote="${cont_root}/${remotepath}" if [ "$method" = "push" ]; then cp -R $local $remote else cp -R $remote $local fi } function backup { set -e trap backup_fail EXIT if [ "$1" == "--full" ]; then fullbackup=1 shift elif [ "$2" == "--full" ]; then fullbackup=1 fi backup_id=$(date +%F_%H%M) image_suffix="_${backup_id}" use_rsync=0 #Sets backup_dir parse_backup_dir $1 mkdir -p $SYSTEM_DIRS $backup_dir [[ "$backup_dir" =~ var ]] && [[ "$fullbackup" == "1" ]] && verify_disk_space "backup" if check_nailgun_tasks; then echo "There are currently running Fuel tasks. Please wait for them to \ finish or cancel them." 1>&2 exit 1 fi if [[ "$fullbackup" == "1" ]]; then backup_containers "$backup_id" backup_system_dirs --full else backup_system_dirs fi backup_postgres_db backup_compress [ $use_rsync -eq 1 ] && backup_rsync_upload $rsync_dest $backup_dir backup_cleanup $backup_dir echo "Backup complete. File is available at $backup_dir/fuel_backup${image_suffix}.tar.lrz" #remove trap trap - EXIT } function backup_fail { exit_code=$? echo "Backup failed!" 1>&2 exit $exit_code } function parse_backup_dir { use_rsync=0 if [ -z "$1" ]; then #Default backup dir backup_dir="${BACKUP_ROOT}/backup_${backup_id}" elif [ -d "$1" ]; then #User defined dir exists, so use it backup_dir="$1" elif [[ "$1" =~ .:. ]]; then #Remote rsync dir use_rsync=1 backup_dir="${BACKUP_ROOT}/backup_${backup_id}" rsync_dest="$1" else echo "Unrecognized backup destination. Valid options include:" 1>&2 echo " (blank) - backup to $BACKUP_ROOT" 1>&2 echo " /path/to/backup - local backup directory" 1>&2 echo " user@server:/path - backup using rsync to server" 1>&2 exit 1 fi } function backup_system_dirs { #Pauses containers, backs up system dirs, and then unpauses #--full option includes $FULL_BACKUP_DIRS echo "Pausing containers..." ${DOCKER} ps -q | xargs -n1 --no-run-if-empty ${DOCKER} pause echo "Archiving system folders" tar cf $backup_dir/system-dirs.tar -C / $SYSTEM_DIRS if [[ "$1" == "--full" ]]; then tar rf $backup_Dir/system-dirs.tar -C / $FULL_BACKUP_DIRS fi echo "Unpausing containers..." ${DOCKER} ps -a | grep Paused | cut -d' ' -f1 | xargs -n1 --no-run-if-empty ${DOCKER} unpause } function backup_containers { #Backs up all containers, regardless of being related to Fuel purge_images=0 [ $purge_images -eq 0 ] && rm -rf "$backup_dir" mkdir -p $SYSTEM_DIRS $backup_dir echo "Reading container data..." while read containerid; do container_name="$(${DOCKER} inspect -f='{{.Name}}' $containerid | tr -d '/')" container_image="$(${DOCKER} inspect -f='{{.Config.Image}}' $containerid)" container_image+=$image_suffix container_archive="$(echo "$container_image" | sed 's/\//__/').tar" #Commit container as new image echo "Committing $container_name..." ${DOCKER} commit "$containerid" "${container_image}" done < <(${DOCKER} ps -aq) echo "Saving containers to combined archive..." images_to_save=$(${DOCKER} images | grep $image_suffix | cut -d' ' -f1) ${DOCKER} save $images_to_save > "${backup_dir}/docker-images.tar" echo "Cleaning up temporary images..." ${DOCKER} rmi $images_to_save } function backup_postgres_db { if [ -n "$1" ];then dst=$1 else dst="$backup_dir/postgres_backup.sql" fi echo "Backing up PostgreSQL database to ${dst}..." shell_container postgres su - postgres -c 'pg_dumpall --clean' > "$dst" } function backup_compress { echo "Compressing archives..." component_tars=($backup_dir/*.tar) ( cd $backup_dir && tar cf $backup_dir/fuel_backup${image_suffix}.tar *.tar *.sql) rm -rf "${component_tars[@]}" #Improve compression on bare metal if [ -z "$(virt-what)" ] ; then lrzopts="-L2 -U" else lrzopts="-L2" fi lrzip $lrzopts "$backup_dir/fuel_backup${image_suffix}.tar" -o "$backup_dir/fuel_backup${image_suffix}.tar.lrz" } function backup_rsync_upload { dest="$1" backup_dir="$2" echo "Starting rsync backup. You may be prompted for a login." rsync -vP $backup_dir/*.tar.lrz "$dest" } function backup_cleanup { echo "Cleaning up..." [ -d "$1" ] && rm -f $1/*.tar } function check_nailgun_tasks { #Returns 0 if tasks are running in nailgun #if command returns error, then app is not running shell_container nailgun fuel task &> /dev/null || return 1 shell_container nailgun fuel task | grep -q running &> /dev/null return $? } function restore { #TODO(mattymo): Optionally not include system dirs during restore #TODO(mattymo): support remote file such as ssh://user@myhost/backup.tar.lrz # or http://myhost/backup.tar.lrz if [ "$2" == "--full" ]; then fullrestore=1 fi set -e trap restore_fail EXIT if check_nailgun_tasks; then echo "There are currently running Fuel tasks. Please wait for them to \ finish or cancel them. Run \"fuel task list\" for more details." 1>&2 exit 1 fi verify_disk_space "restore" "$fullrestore" backupfile=$1 if [ -z "$backupfile" ]; then #TODO(mattymo): Parse BACKUP_DIR for lrz files echo "Specify a backup file to restore" 1>&2 exit 1 elif ! [ -f "$backupfile" ]; then echo "Archive does not exist: $backupfile" 1>&2 exit 1 elif ! [[ "$backupfile" =~ lrz$ ]]; then echo "Archive does not have lrz extension." 1>&2 exit 2 fi timestamp=$(echo $backupfile | sed -n 's/.*\([0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]_[0-9][0-9][0-9][0-9]\).*/\1/p') if [ -z "$timestamp" ]; then echo "Unable to parse timestamp in archive." 1>&2 exit 3 fi restoredir="$BACKUP_ROOT/restore-$timestamp/" disable_supervisor if [ "$fullrestore" == "1" ]; then echo "Stopping and destroying existing containers..." destroy_container all else echo "Stopping containers..." stop_container all fi unpack_archive "$backupfile" "$restoredir" [ "$fullrestore" == "1" ] && restore_images "$restoredir" [ "$fullrestore" == "1" ] && rename_images "$timestamp" restore_systemdirs "$restoredir" set +e echo "Starting containers..." start_container all enable_supervisor for container in $CONTAINER_SEQUENCE; do check_ready $container done echo "Restore complete." #remove trap trap - EXIT } function restore_fail { echo "Restore failed!" 1>&2 exit 1 } function unpack_archive { #feedback as everything restores backupfile="$1" restoredir="$2" mkdir -p "$restoredir" lrzip -d -o "$restoredir/fuel_backup.tar" $backupfile tar -xf "$restoredir/fuel_backup.tar" -C "$restoredir" && rm -f "$restoredir/fuel_backup.tar" } function restore_images { restoredir="$1" for imgfile in $restoredir/*.tar; do echo "Loading $imgfile..." if ! [[ "$imgfile" =~ system-dirs ]] && ! [[ "$imgfile" =~ fuel_backup.tar ]]; then ${DOCKER} load -i $imgfile fi #rm -f $imgfile done } function rename_images { timestamp="$1" while read containername; do oldname=$containername newname=$(echo $containername | sed -n "s/_${timestamp}//p") docker tag -f "$oldname" "$newname" docker rmi "$oldname" done < <(docker images | grep $timestamp | cut -d' ' -f1) } function restore_systemdirs { restoredir="$1" tar xf $restoredir/system-dirs.tar -C / } function disable_supervisor { supervisorctl shutdown } function enable_supervisor { service supervisord start } function verify_disk_space { if [ -z "$1" ]; then echo "Backup or restore operation not specified." 1>&2 exit 1 fi fullbackup=1 if [[ "$2" != "$fullbackup" ]]; then #2gb free space required for light backup (( required = 2 * 1024 * 1024 )) spaceerror="Insufficient disk space to perform $1. At least 2gb must be free on /var partition." else #11gb free space required to backup and restore (( required = 11 * 1024 * 1024 )) spaceerror="Insufficient disk space to perform $1. At least 11gb must be free on /var partition." fi avail=$(df /var | grep /var | awk '{print $4}') if (( avail < required )); then echo "$spaceerror" 1>&2 exit 1 fi }