#!/bin/bash # This script installs Spark # More documentation in the README.md file if [ "${DIB_DEBUG_TRACE:-0}" -gt 0 ]; then set -x fi install-packages wget tar tmp_dir=/tmp/spark mkdir -p $tmp_dir pushd $tmp_dir # The user is not providing his own Spark distribution package if [ -z "$SPARK_DOWNLOAD_URL" ]; then # Check hadoop version # INFO on hadoop versions: http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html if [ -z "$DIB_SPARK_VERSION" ]; then case "$DIB_HADOOP_VERSION" in CDH4) DIB_SPARK_VERSION=1.0.0 SPARK_HADOOP_DL=cdh4 ;; *) echo -e "WARNING: Hadoop version $DIB_HADOOP_VERSION not supported." echo -e "WARNING: make sure SPARK_DOWNLOAD_URL points to a compatible Spark version." ;; esac fi SPARK_DOWNLOAD_URL="http://archive.apache.org/dist/spark/spark-$DIB_SPARK_VERSION/spark-$DIB_SPARK_VERSION-bin-$SPARK_HADOOP_DL.tgz" fi echo "Downloading SPARK" wget "$SPARK_DOWNLOAD_URL" if [ $? -ne 0 ]; then echo -e "Could not download spark.\nAborting" exit 1 fi echo "Extracting SPARK" spark_file=$(basename "$SPARK_DOWNLOAD_URL") extract_folder=$(tar tzf $spark_file | sed -e 's@/.*@@' | uniq) echo "Decompressing Spark..." tar xzf $spark_file rm $spark_file echo "Moving SPARK to /opt/" # Placing spark in /opt/spark mv $extract_folder /opt/spark echo "$SPARK_DOWNLOAD_URL" > /opt/spark/spark_url.txt popd rm -Rf $tmp_dir