Add an Hadoop CDH element

Add an Hadoop CDH element that installs only HDFS from CDH 4.5
Comments in the 40-setup-hadoop script indicate where code should be
added to support other CDH versions and add full support for the
distribution. If this version will be accepted, a bug report will be
created to track the missing features.

This element will be used to create an image with Spark, suitable for
use with the Savanna Spark plugin.

Change-Id: I6202e6f5d88cc697af53ff1fd5d23d35c80b2b6d
This commit is contained in:
Daniele Venzano 2014-02-04 15:31:02 +01:00
parent dd2c54d632
commit 566d4bf52d
7 changed files with 233 additions and 0 deletions

View File

@ -0,0 +1,2 @@
Installs Java and Hadoop CDH 4 (the Cloudera distribution), configures SSH.
Only HDFS is installed at this time.

View File

@ -0,0 +1 @@
savanna-version

View File

@ -0,0 +1,38 @@
#!/bin/bash
echo "Java setup begins"
set -e
# NOTE: $(dirname $0) is read-only, use space under $TARGET_ROOT
JAVA_HOME=$TARGET_ROOT/usr/java
mkdir -p $JAVA_HOME
if [ -n "$JAVA_DOWNLOAD_URL" ]; then
install-packages wget
JAVA_FILE=$(basename $JAVA_DOWNLOAD_URL)
wget --no-check-certificate --no-cookies --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com" -O $JAVA_HOME/$JAVA_FILE $JAVA_DOWNLOAD_URL
if [ $? -eq 0 ]; then
echo "Java downloaded"
else
echo "Error downloading java. Exiting."
exit 1
fi
elif [ -n "$JAVA_FILE" ]; then
install -D -g root -o root -m 0755 $(dirname $0)/$JAVA_FILE $JAVA_HOME
fi
if [ -z "$JAVA_FILE" ]; then
install-packages default-jre
else
cd $JAVA_HOME
if echo $JAVA_FILE | grep -q -s -F .tar.gz ; then
echo -e "\n" | tar -zxvf $JAVA_FILE
elif echo $JAVA_FILE | grep -q -s -F .bin ; then
echo -e "\n" | sh $JAVA_FILE
else
echo "Unknown file type: $JAVA_FILE. Exiting."
exit 1
fi
rm $JAVA_FILE
fi
echo "Java was installed"

View File

@ -0,0 +1,47 @@
#!/bin/bash
echo "Adjusting ssh configuration"
# /etc/ssh/sshd_config is provided by openssh-server
# /etc/ssh/ssh_config is provided by openssh-client
# Note: You need diskimage-builder w/ SHA 82eacdec (11 July 2013) for
# this install to work on Fedora - https://review.openstack.org/#/c/36739/
install-packages augeas-tools openssh-server openssh-client
augtool -s set /files/etc/ssh/sshd_config/GSSAPIAuthentication no
augtool -s set /files/etc/ssh/sshd_config/UseDNS no
augtool -s set /files/etc/ssh/sshd_config/PermitTunnel yes
# ssh-client configuration
# Common
augtool -s set /files/etc/ssh/ssh_config/Host/StrictHostKeyChecking no
augtool -s set /files/etc/ssh/ssh_config/Host/GSSAPIAuthentication no
distro=$(lsb_release -is || :)
echo $distro
case "$distro" in
Ubuntu )
augtool -s set /files/etc/ssh/sshd_config/GSSAPICleanupCredentials yes
augtool -s set /files/etc/ssh/sshd_config/AuthorizedKeysFile .ssh/authorized_keys
;;
Fedora )
sed -i 's/ssh_pwauth: 0/ssh_pwauth: 1/' /etc/cloud/cloud.cfg
augtool -s clear /files/etc/sudoers/Defaults[type=':nrpe']/requiretty/negate
augtool -s set /files/etc/ssh/sshd_config/SyslogFacility AUTH
augtool -s set /files/etc/ssh/sshd_config/StrictModes yes
augtool -s set /files/etc/ssh/sshd_config/RSAAuthentication yes
augtool -s set /files/etc/ssh/sshd_config/PubkeyAuthentication yes
;;
RedHatEnterpriseServer | CentOS )
sed -i 's/ssh_pwauth: 0/ssh_pwauth: 1/' /etc/cloud/cloud.cfg
augtool -s clear /files/etc/sudoers/Defaults[type=':nrpe']/requiretty/negate
augtool -s set /files/etc/ssh/sshd_config/SyslogFacility AUTH
augtool -s set /files/etc/ssh/sshd_config/PubkeyAuthentication yes
;;
* )
echo "Unknown distro: $distro. Exiting."
exit 1
;;
esac
:

View File

@ -0,0 +1,95 @@
#!/bin/bash
# This element installs Hadoop CDH 4 HDFS from Cloudera.
# It does not do a full install of CDH, it installs the miminum needed to
# Spark to run correctly.
distro=$(lsb_release -is || :)
if [ "$distro" != "Ubuntu" ]; then
echo "Distro $distro not supported by CDH. Exiting."
exit 1
fi
if [ $DIB_HADOOP_VERSION != "2.0.0-mr1-cdh4.5.0" ]; then
echo "CDH version $DIB_HADOOP_VERSION not supported. Exiting."
fi
echo "Hadoop CDH setup begins for $distro"
tmp_dir=/tmp/hadoop
echo "Creating hadoop user & group"
case "$distro" in
Ubuntu )
addgroup hadoop
adduser --ingroup hadoop --disabled-password --gecos GECOS hadoop
adduser hadoop sudo
;;
esac
echo "CDH 4 will be injected into image. Starting the download"
install-packages wget
# Here more versions of CDH could be supported by downloading the right repository package.
wget -P $tmp_dir "http://archive.cloudera.com/cdh4/one-click-install/precise/amd64/cdh4-repository_1.0_all.deb"
if [ $? -ne 0 ]; then
echo -e "Could not find CDH 4.\nAborting"
exit 1
fi
# Pin packages from cloudera repository
cat >> /etc/apt/preferences.d/cloudera << EOF
Package: *
Pin: origin "archive.cloudera.com"
Pin-Priority: 800
EOF
case "$distro" in
Ubuntu )
dpkg -i $tmp_dir/cdh4-repository_1.0_all.deb
curl -s http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh/archive.key | sudo apt-key add -
sudo apt-get update
# Here the script could be expanded to install all CDH packages and not only HDFS.
install-packages hadoop-hdfs-namenode hadoop-hdfs-datanode
# pietro: small hack to fix install problems on ubuntu
# the CDH package contains a broken symlink instead of the log4j jar file
# these 4 lines should go away once Cloudera fixes the package
echo "Fixing install problem for CDH: log4j";
wget http://repo1.maven.org/maven2/org/slf4j/slf4j-log4j12/1.6.1/slf4j-log4j12-1.6.1.jar;
sudo rm /usr/lib/hadoop/lib/slf4j-log4j12-1.6.1.jar;
sudo mv slf4j-log4j12-1.6.1.jar /usr/lib/hadoop/lib/
;;
esac
rm -r $tmp_dir
echo "Pre-configuring Hadoop"
# Find JAVA_HOME...
JAVA_HOME=$(find $TARGET_ROOT/usr/java/ -maxdepth 1 -name "jdk*")
if [ -z "$JAVA_HOME" ]; then
case "$distro" in
Ubuntu)
JAVA_HOME=$(readlink -e /usr/bin/java | sed "s:bin/java::")
;;
esac
fi
cat >> /home/hadoop/.bashrc <<EOF
PATH=$PATH:/usr/sbin:$JAVA_HOME/bin
JAVA_HOME=$JAVA_HOME
EOF
sed -i -e "s,export JAVA_HOME=.*,export JAVA_HOME=$JAVA_HOME," \
-e "s,export HADOOP_LOG_DIR=.*,export HADOOP_LOG_DIR=/mnt/log/hadoop/\$USER," \
-e "s,export HADOOP_SECURE_DN_LOG_DIR=.*,export HADOOP_SECURE_DN_LOG_DIR=/mnt/log/hadoop/hdfs," \
/etc/hadoop/hadoop-env.sh
echo "Applying firstboot script"
if [ "$distro" == "Ubuntu" ]; then
# File '/etc/rc.local' may not exist
if [ -f "/etc/rc.local" ]; then
mv /etc/rc.local /etc/rc.local.old
fi
install -D -g root -o root -m 0755 $(dirname $0)/firstboot /etc/rc.local
fi

View File

@ -0,0 +1,31 @@
#!/bin/bash
distro=$(lsb_release -is || :)
case "$distro" in
Ubuntu )
mkdir /run/hadoop
chown hadoop:hadoop /run/hadoop/
mkdir -p /home/ubuntu/.ssh
touch /home/ubuntu/.ssh/authorized_keys
chown -R ubuntu:ubuntu /home/ubuntu
chown -R ubuntu:ubuntu /etc/hadoop
;;
* )
echo "Unknown distro: $distro. Exiting."
exit 1
;;
esac
# Common
chown root:root /mnt
mkdir -p /var/run/hadoop ; chown hadoop:hadoop /var/run/hadoop
mkdir -p /mnt/log/hadoop ; chown hadoop:hadoop /mnt/log/hadoop
# Clean
if [ "$distro" == "Ubuntu" ]; then
if [ -f /etc/rc.local.old ]; then
mv /etc/rc.local.old /etc/rc.local
fi
fi
exit 0

View File

@ -0,0 +1,19 @@
#!/bin/bash
set -e
if [ -z "$JAVA_DOWNLOAD_URL" ]; then
if [ -z "$JAVA_FILE" ]; then
echo "JAVA_FILE and JAVA_DOWNLOAD_URL are not set. Proceeding with distro native Java."
fi
fi
if [ -z "$DIB_HADOOP_VERSION" ]; then
echo "DIB_HADOOP_VERSION is not set. Impossible to install hadoop. Exit"
exit 1
fi
version_check=$(echo $DIB_HADOOP_VERSION | sed -e '/[0-9]\.[0-9]\.[0-9]/d')
if [[ -z $version_check ]]; then
echo "All variables are set, continue."
else
echo "Version error. Exit"
exit 1
fi