From a77a9a978a655044a0b58a299df965c89391090d Mon Sep 17 00:00:00 2001 From: Jeremy Freudberg Date: Fri, 28 Jul 2017 12:17:58 +0000 Subject: [PATCH] Add S3 jar to Hadoop classpath As prereq of support for S3 datasource, the hadoop-aws jar needs to be in the Hadoop classpath. The jar is copied into the proper folder when possible on the appropriate plugins, and otherwise can be provided from a download URL by the user. Additionally, set the correct value of DIB_HDFS_LIB_DIR on the Vanilla plugin to avoid any unnecessary simlinking. Partially-Implements: bp sahara-support-s3 Change-Id: I94c5b0055b87f6a4e1382118d0718e588fccfe87 --- diskimage-create/diskimage-create.sh | 30 ++++++++-------- elements/hadoop/install.d/40-setup-hadoop | 5 +-- elements/s3_hadoop/README.rst | 23 ++++++++++++ elements/s3_hadoop/element-deps | 1 + elements/s3_hadoop/package-installs.yaml | 2 ++ .../post-install.d/89-add-amazon-jar | 36 +++++++++++++++++++ elements/swift_hadoop/README.rst | 2 +- 7 files changed, 82 insertions(+), 17 deletions(-) create mode 100644 elements/s3_hadoop/README.rst create mode 100644 elements/s3_hadoop/element-deps create mode 100644 elements/s3_hadoop/package-installs.yaml create mode 100755 elements/s3_hadoop/post-install.d/89-add-amazon-jar diff --git a/diskimage-create/diskimage-create.sh b/diskimage-create/diskimage-create.sh index a0effdf4..a29c26d9 100755 --- a/diskimage-create/diskimage-create.sh +++ b/diskimage-create/diskimage-create.sh @@ -474,15 +474,15 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "vanilla" ]; then export HIVE_VERSION=${HIVE_VERSION:-"0.11.0"} export HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL=${HADOOP_V2_7_1_NATIVE_LIBS_DOWNLOAD_URL:-"https://tarballs.openstack.org/sahara/dist/common-artifacts/hadoop-native-libs-2.7.1.tar.gz"} export OOZIE_HADOOP_V2_7_1_DOWNLOAD_URL=${OOZIE_HADOOP_V2_7_1_FILE:-"http://sahara-files.mirantis.com/oozie-4.2.0-hadoop-2.7.1.tar.gz"} - export DIB_HDFS_LIB_DIR="/opt/hadoop/share/hadoop/tools/lib" + export DIB_HDFS_LIB_DIR="/opt/hadoop/share/hadoop/common/lib" export plugin_type="vanilla" export DIB_SPARK_VERSION=1.6.0 export SPARK_HADOOP_DL=hadoop2.6 - ubuntu_elements_sequence="hadoop oozie mysql hive $JAVA_ELEMENT swift_hadoop spark" - fedora_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark" - centos_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark nc" - centos7_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark nc" + ubuntu_elements_sequence="hadoop oozie mysql hive $JAVA_ELEMENT swift_hadoop spark s3_hadoop" + fedora_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark s3_hadoop" + centos_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark nc s3_hadoop" + centos7_elements_sequence="hadoop oozie mysql disable-firewall hive $JAVA_ELEMENT swift_hadoop spark nc s3_hadoop" # Workaround for https://bugs.launchpad.net/diskimage-builder/+bug/1204824 # https://bugs.launchpad.net/sahara/+bug/1252684 @@ -557,7 +557,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "spark" ]; then # Tell the cloudera element to install only hdfs export DIB_CDH_HDFS_ONLY=1 - ubuntu_elements_sequence="$JAVA_ELEMENT swift_hadoop spark hadoop-cloudera" + ubuntu_elements_sequence="$JAVA_ELEMENT swift_hadoop spark hadoop-cloudera s3_hadoop" export ubuntu_image_name=${ubuntu_spark_image_name:-"ubuntu_sahara_spark_latest"} # Creating Ubuntu cloud image @@ -605,19 +605,19 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "ambari" ]; then if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then ambari_ubuntu_image_name=${ambari_ubuntu_image_name:-ubuntu_sahara_ambari} - ambari_element_sequence="ambari $JAVA_ELEMENT swift_hadoop kdc" + ambari_element_sequence="ambari $JAVA_ELEMENT swift_hadoop kdc s3_hadoop" export DIB_RELEASE="trusty" image_create ubuntu $ambari_ubuntu_image_name $ambari_element_sequence unset DIB_RELEASE fi if [ "$BASE_IMAGE_OS" = "centos" ]; then ambari_centos_image_name=${ambari_centos_image_name:-centos_sahara_ambari} - ambari_element_sequence="ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc" + ambari_element_sequence="ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc s3_hadoop" image_create centos $ambari_centos_image_name $ambari_element_sequence fi if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then ambari_centos7_image_name=${ambari_centos7_image_name:-"centos7-sahara-ambari"} - ambari_element_sequence="disable-selinux ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc" + ambari_element_sequence="disable-selinux ambari $JAVA_ELEMENT disable-firewall swift_hadoop kdc nc s3_hadoop" image_create centos7 $ambari_centos7_image_name $ambari_element_sequence fi @@ -643,7 +643,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then HADOOP_VERSION=${DIB_CDH_MINOR_VERSION%.*} fi - cloudera_elements_sequence="hadoop-cloudera swift_hadoop kdc" + cloudera_elements_sequence="hadoop-cloudera swift_hadoop kdc s3_hadoop" if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then export DIB_CDH_VERSION="5.5" @@ -684,7 +684,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then fi if [ "$BASE_IMAGE_OS" = "centos" ]; then - centos_cloudera_elements_sequence="selinux-permissive disable-firewall nc" + centos_cloudera_elements_sequence="selinux-permissive disable-firewall nc s3_hadoop" if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then export DIB_CDH_VERSION="5.5" @@ -696,7 +696,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "cloudera" ]; then fi if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "centos7" ]; then - centos7_cloudera_elements_sequence="selinux-permissive disable-firewall nc" + centos7_cloudera_elements_sequence="selinux-permissive disable-firewall nc s3_hadoop" if [ -z "$HADOOP_VERSION" -o "$HADOOP_VERSION" = "5.5" ]; then export DIB_CDH_VERSION="5.5" @@ -745,6 +745,7 @@ fi ########################## if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then export DIB_MAPR_VERSION=${DIB_MAPR_VERSION:-${DIB_DEFAULT_MAPR_VERSION}} + export plugin_type="mapr" export DIB_CLOUD_INIT_DATASOURCES=$CLOUD_INIT_DATASOURCES @@ -752,8 +753,8 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then #MapR repository requires additional space export DIB_MIN_TMPFS=10 - mapr_ubuntu_elements_sequence="ssh hadoop-mapr $JAVA_ELEMENT" - mapr_centos_elements_sequence="ssh hadoop-mapr selinux-permissive $JAVA_ELEMENT disable-firewall nc" + mapr_ubuntu_elements_sequence="ssh hadoop-mapr $JAVA_ELEMENT s3_hadoop" + mapr_centos_elements_sequence="ssh hadoop-mapr selinux-permissive $JAVA_ELEMENT disable-firewall nc s3_hadoop" if [ -z "$BASE_IMAGE_OS" -o "$BASE_IMAGE_OS" = "ubuntu" ]; then export DIB_RELEASE=${DIB_RELEASE:-trusty} @@ -780,6 +781,7 @@ if [ -z "$PLUGIN" -o "$PLUGIN" = "mapr" ]; then unset DIB_CLOUD_INIT_DATASOURCES fi + unset plugin_type fi diff --git a/elements/hadoop/install.d/40-setup-hadoop b/elements/hadoop/install.d/40-setup-hadoop index c0d28ddf..7dd995e0 100755 --- a/elements/hadoop/install.d/40-setup-hadoop +++ b/elements/hadoop/install.d/40-setup-hadoop @@ -61,8 +61,9 @@ EOF $HADOOP_HOME/etc/hadoop/yarn-env.sh echo "source $JAVA_RC" >> $HADOOP_HOME/etc/hadoop/yarn-env.sh - # enable swiftfs - ln -s ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-openstack-${DIB_HADOOP_VERSION}.jar ${HADOOP_HOME}/share/hadoop/common/lib/ + # remove apache-built swiftfs + rm ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-openstack-${DIB_HADOOP_VERSION}.jar + } case "$DISTRO_NAME" in diff --git a/elements/s3_hadoop/README.rst b/elements/s3_hadoop/README.rst new file mode 100644 index 00000000..3118dc62 --- /dev/null +++ b/elements/s3_hadoop/README.rst @@ -0,0 +1,23 @@ +========= +s3_hadoop +========= + +Copy the Hadoop S3 connector jar file into the Hadoop classpath. + +Environment Variables +--------------------- + +HADOOP_S3_JAR_ORIGIN + :Required: No + :Default: Depends on plugin. + :Description: Path to where the S3 jar is (already) located. + +HADOOP_S3_JAR_DOWNLOAD + :Required: No + :Default: None. + :Description: If set, use a download a specific S3 jar instead of one already available on the image. + +DIB_HDFS_LIB_DIR + :Required: No + :Default: /usr/share/hadoop/lib + :Description: Directory in the guest where to save the S3 jar. Shared with swift_hadoop. diff --git a/elements/s3_hadoop/element-deps b/elements/s3_hadoop/element-deps new file mode 100644 index 00000000..7076aba9 --- /dev/null +++ b/elements/s3_hadoop/element-deps @@ -0,0 +1 @@ +package-installs diff --git a/elements/s3_hadoop/package-installs.yaml b/elements/s3_hadoop/package-installs.yaml new file mode 100644 index 00000000..cc77790e --- /dev/null +++ b/elements/s3_hadoop/package-installs.yaml @@ -0,0 +1,2 @@ +wget: + phase: post-install.d diff --git a/elements/s3_hadoop/post-install.d/89-add-amazon-jar b/elements/s3_hadoop/post-install.d/89-add-amazon-jar new file mode 100755 index 00000000..30e2c376 --- /dev/null +++ b/elements/s3_hadoop/post-install.d/89-add-amazon-jar @@ -0,0 +1,36 @@ +#!/bin/bash +if [ "${DIB_DEBUG_TRACE:-0}" -gt 0 ]; then + set -x +fi +set -eu +set -o pipefail + +if [ -z "${HADOOP_S3_JAR_ORIGIN:-}" ]; then + # The jar is not locally available during image-gen on Ambari/MapR plugins: relevant packages are installed later. + # The jar is not appropriate for the Storm plugin: you cannot stream data from an object store. + # For plugins not found in the switch statement below, a user-specified jar can still be downloaded. + case "$plugin_type" in + "vanilla") + HADOOP_S3_JAR_ORIGIN="/opt/hadoop/share/hadoop/tools/lib/hadoop-aws-$DIB_HADOOP_VERSION.jar" + ;; + "cloudera" | "spark" ) + HADOOP_S3_JAR_ORIGIN="/usr/lib/hadoop/hadoop-aws.jar" + ;; + esac +fi + +HDFS_LIB_DIR=${DIB_HDFS_LIB_DIR:-"/usr/share/hadoop/lib"} # matches swift_hadoop default + +if [ -z "${HADOOP_S3_JAR_DOWNLOAD:-}" ]; then + if [ "${HADOOP_S3_JAR_ORIGIN:-}" ]; then + cp $HADOOP_S3_JAR_ORIGIN $HDFS_LIB_DIR/hadoop-aws.jar + fi +else + wget -O $HDFS_LIB_DIR/hadoop-aws.jar $HADOOP_S3_JAR_DOWNLOAD +fi + +path=$HDFS_LIB_DIR/hadoop-aws.jar + +if [ -f $path ]; then + chmod 0644 $path +fi diff --git a/elements/swift_hadoop/README.rst b/elements/swift_hadoop/README.rst index e4474142..dc8b75d4 100644 --- a/elements/swift_hadoop/README.rst +++ b/elements/swift_hadoop/README.rst @@ -17,7 +17,7 @@ swift_url DIB_HDFS_LIB_DIR :Required: No :Default: /usr/share/hadoop/lib - :Description: Directory in the guest where to save the swift jar. + :Description: Directory in the guest where to save the swift jar. Shared with s3_hadoop. DIB_HADOOP_SWIFT_JAR_NAME :Required: No