58 lines
1.5 KiB
Bash
Executable File
58 lines
1.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# This script installs Spark
|
|
# More documentation in the README.md file
|
|
|
|
if [ "${DIB_DEBUG_TRACE:-0}" -gt 0 ]; then
|
|
set -x
|
|
fi
|
|
set -eu
|
|
set -o pipefail
|
|
|
|
install-packages wget tar
|
|
|
|
tmp_dir=/tmp/spark
|
|
mkdir -p $tmp_dir
|
|
pushd $tmp_dir
|
|
|
|
# The user is not providing his own Spark distribution package
|
|
if [ -z "${SPARK_DOWNLOAD_URL:-}" ]; then
|
|
# Check hadoop version
|
|
# INFO on hadoop versions: http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html
|
|
if [ -z "${DIB_SPARK_VERSION:-}" ]; then
|
|
case "$DIB_HADOOP_VERSION" in
|
|
CDH4)
|
|
DIB_SPARK_VERSION=1.0.0
|
|
SPARK_HADOOP_DL=cdh4
|
|
;;
|
|
*)
|
|
echo -e "WARNING: Hadoop version $DIB_HADOOP_VERSION not supported."
|
|
echo -e "WARNING: make sure SPARK_DOWNLOAD_URL points to a compatible Spark version."
|
|
;;
|
|
esac
|
|
fi
|
|
|
|
SPARK_DOWNLOAD_URL="http://archive.apache.org/dist/spark/spark-$DIB_SPARK_VERSION/spark-$DIB_SPARK_VERSION-bin-$SPARK_HADOOP_DL.tgz"
|
|
fi
|
|
|
|
echo "Downloading SPARK"
|
|
wget "$SPARK_DOWNLOAD_URL"
|
|
if [ $? -ne 0 ]; then
|
|
echo -e "Could not download spark.\nAborting"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Extracting SPARK"
|
|
spark_file=$(basename "$SPARK_DOWNLOAD_URL")
|
|
extract_folder=$(tar tzf $spark_file | sed -e 's@/.*@@' | uniq)
|
|
echo "Decompressing Spark..."
|
|
tar xzf $spark_file
|
|
rm $spark_file
|
|
|
|
echo "Moving SPARK to /opt/"
|
|
# Placing spark in /opt/spark
|
|
mv $extract_folder /opt/spark
|
|
echo "$SPARK_DOWNLOAD_URL" > /opt/spark/spark_url.txt
|
|
|
|
popd
|
|
rm -Rf $tmp_dir
|