diff --git a/elements/spark/README.md b/elements/spark/README.md index e5f22a8c..34ed8da5 100644 --- a/elements/spark/README.md +++ b/elements/spark/README.md @@ -1,7 +1,6 @@ Installs Spark on Ubuntu. Requires Hadoop CDH 4 (hadoop-cdh element). -It tries to choose the right version of the Spark binaries to install based on the -Hadoop version defined in 'DIB_HADOOP_VERSION'. +It will install a version of Spark known to be compatible with CDH 4 This behaviour can be controlled also by using 'DIB_SPARK_VERSION' or directly with 'SPARK_DOWNLOAD_URL' diff --git a/elements/spark/install.d/60-spark b/elements/spark/install.d/60-spark index 1e8f16ae..343b6a87 100755 --- a/elements/spark/install.d/60-spark +++ b/elements/spark/install.d/60-spark @@ -8,21 +8,14 @@ tmp_dir=/tmp/spark mkdir -p $tmp_dir pushd $tmp_dir -echo "Creating spark user & group" -addgroup spark -adduser --ingroup spark --disabled-password --gecos GECOS spark -adduser spark sudo - # The user is not providing his own Spark distribution package if [ -z "$SPARK_DOWNLOAD_URL" ]; then # Check hadoop version - # pietro: we know for sure that spark 0.8.1 works on CDH 4.5.0 mr1, - # other combinations need testing # INFO on hadoop versions: http://spark.incubator.apache.org/docs/latest/hadoop-third-party-distributions.html if [ -z "$DIB_SPARK_VERSION" ]; then case "$DIB_HADOOP_VERSION" in - 2.0.0-mr1-cdh4.5.0) - DIB_SPARK_VERSION=0.8.1 + CDH4) + DIB_SPARK_VERSION=0.9.1 SPARK_HADOOP_DL=cdh4 ;; *) @@ -32,30 +25,27 @@ if [ -z "$SPARK_DOWNLOAD_URL" ]; then esac fi - SPARK_DOWNLOAD_URL="http://www.apache.org/dist/incubator/spark/spark-$DIB_SPARK_VERSION-incubating/spark-$DIB_SPARK_VERSION-incubating-bin-$SPARK_HADOOP_DL.tgz" + SPARK_DOWNLOAD_URL="http://www.apache.org/dist/incubator/spark/spark-$DIB_SPARK_VERSION/spark-$DIB_SPARK_VERSION-bin-$SPARK_HADOOP_DL.tgz" fi +echo "Downloading SPARK" wget "$SPARK_DOWNLOAD_URL" if [ $? -ne 0 ]; then echo -e "Could not download spark.\nAborting" exit 1 fi +echo "Extracting SPARK" spark_file=$(basename "$SPARK_DOWNLOAD_URL") extract_folder=$(tar tzf $spark_file | sed -e 's@/.*@@' | uniq) echo "Decompressing Spark..." tar xzf $spark_file rm $spark_file -echo "$SPARK_DOWNLOAD_URL" > ~spark/spark_url.txt -if [ -z "$SPARK_CUSTOM_DISTRO" ]; then - mv $extract_folder ~spark/spark-bin - chown -R spark:spark ~spark/spark-bin -else - mv $extract_folder/dist ~spark/spark-dist - rm -Rf $extract_folder - chown -R spark:spark ~spark/spark-dist -fi +echo "Moving SPARK to /opt/" +# Placing spark in /opt/spark +mv $extract_folder /opt/spark +echo "$SPARK_DOWNLOAD_URL" > /opt/spark/spark_url.txt popd rm -Rf $tmp_dir diff --git a/elements/spark/post-install.d/20-spark b/elements/spark/post-install.d/20-spark new file mode 100644 index 00000000..5b08e8ae --- /dev/null +++ b/elements/spark/post-install.d/20-spark @@ -0,0 +1,15 @@ +#!/bin/bash + +# We have to chown the Spark directories to make it usable for the default user +# Unfortunately the Ubuntu user does not exists when DIB is creating the image +# and we need to execute this code the first time the VM boots. + +sed -i -e 's,^exit 0$,[ -f /opt/spark/firstboot.sh ] \&\& sh /opt/spark/firstboot.sh; exit 0,' /etc/rc.local + +cat >> /opt/spark/firstboot.sh <