Add a Spark element
Add a Spark element that installs Spark in the image. By default the script tries to find a suitable Spark version based on the Hadoop version string. For now only the Hadoop CDH installed by the hadoop-cdh element is supported. Environment variables can be used to select a different Spark version or use a custom Spark distribution, generated by hand. This element is going to be used to build images that will be configured and run by the Savanna Spark plugin. Change-Id: Icd4dfef792a7c112bdcccc8951f7e86892abbe86
This commit is contained in:
parent
566d4bf52d
commit
40b3594bda
10
elements/spark/README.md
Normal file
10
elements/spark/README.md
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
Installs Spark on Ubuntu. Requires Hadoop CDH 4 (hadoop-cdh element).
|
||||||
|
|
||||||
|
It tries to choose the right version of the Spark binaries to install based on the
|
||||||
|
Hadoop version defined in 'DIB_HADOOP_VERSION'.
|
||||||
|
This behaviour can be controlled also by using 'DIB_SPARK_VERSION' or directly with
|
||||||
|
'SPARK_DOWNLOAD_URL'
|
||||||
|
|
||||||
|
If you set 'SPARK_CUSTOM_DISTRO' to 1, you can point the 'SPARK_DOWNLOAD_URL'
|
||||||
|
variable to a custom Spark distribution created with the make-distribution.sh
|
||||||
|
script included in Spark.
|
1
elements/spark/element-deps
Normal file
1
elements/spark/element-deps
Normal file
@ -0,0 +1 @@
|
|||||||
|
hadoop-cdh
|
61
elements/spark/install.d/60-spark
Executable file
61
elements/spark/install.d/60-spark
Executable file
@ -0,0 +1,61 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# This script installs Spark
|
||||||
|
# More documentation in the README.md file
|
||||||
|
|
||||||
|
install-packages wget tar
|
||||||
|
|
||||||
|
tmp_dir=/tmp/spark
|
||||||
|
mkdir -p $tmp_dir
|
||||||
|
pushd $tmp_dir
|
||||||
|
|
||||||
|
echo "Creating spark user & group"
|
||||||
|
addgroup spark
|
||||||
|
adduser --ingroup spark --disabled-password --gecos GECOS spark
|
||||||
|
adduser spark sudo
|
||||||
|
|
||||||
|
# The user is not providing his own Spark distribution package
|
||||||
|
if [ -z "$SPARK_DOWNLOAD_URL" ]; then
|
||||||
|
# Check hadoop version
|
||||||
|
# pietro: we know for sure that spark 0.8.1 works on CDH 4.5.0 mr1,
|
||||||
|
# other combinations need testing
|
||||||
|
# INFO on hadoop versions: http://spark.incubator.apache.org/docs/latest/hadoop-third-party-distributions.html
|
||||||
|
if [ -z "$DIB_SPARK_VERSION" ]; then
|
||||||
|
case "$DIB_HADOOP_VERSION" in
|
||||||
|
2.0.0-mr1-cdh4.5.0)
|
||||||
|
DIB_SPARK_VERSION=0.8.1
|
||||||
|
SPARK_HADOOP_DL=cdh4
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo -e "WARNING: Hadoop version $DIB_HADOOP_VERSION not supported."
|
||||||
|
echo -e "WARNING: make sure SPARK_DOWNLOAD_URL points to a compatible Spark version."
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
SPARK_DOWNLOAD_URL="http://www.apache.org/dist/incubator/spark/spark-$DIB_SPARK_VERSION-incubating/spark-$DIB_SPARK_VERSION-incubating-bin-$SPARK_HADOOP_DL.tgz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
wget "$SPARK_DOWNLOAD_URL"
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo -e "Could not download spark.\nAborting"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
spark_file=$(basename "$SPARK_DOWNLOAD_URL")
|
||||||
|
extract_folder=$(tar tzf $spark_file | sed -e 's@/.*@@' | uniq)
|
||||||
|
echo "Decompressing Spark..."
|
||||||
|
tar xzf $spark_file
|
||||||
|
rm $spark_file
|
||||||
|
echo "$SPARK_DOWNLOAD_URL" > ~spark/spark_url.txt
|
||||||
|
|
||||||
|
if [ -z "$SPARK_CUSTOM_DISTRO" ]; then
|
||||||
|
mv $extract_folder ~spark/spark-bin
|
||||||
|
chown -R spark:spark ~spark/spark-bin
|
||||||
|
else
|
||||||
|
mv $extract_folder/dist ~spark/spark-dist
|
||||||
|
rm -Rf $extract_folder
|
||||||
|
chown -R spark:spark ~spark/spark-dist
|
||||||
|
fi
|
||||||
|
|
||||||
|
popd
|
||||||
|
rm -Rf $tmp_dir
|
6
elements/spark/root.d/0-check
Executable file
6
elements/spark/root.d/0-check
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
if [ -z "$SPARK_DOWNLOAD_URL" -a -z "$DIB_HADOOP_VERSION"]; then
|
||||||
|
echo -e "Neither DIB_HADOOP_VERSION nor SPARK_DOWNLOAD_URL are set. Impossible to install Spark.\nAborting"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
Loading…
Reference in New Issue
Block a user