[SPARK-9633] [BUILD] SBT download locations outdated; need an update
[spark.git] / make-distribution.sh
1 #!/usr/bin/env bash
2
3 #
4 # Licensed to the Apache Software Foundation (ASF) under one or more
5 # contributor license agreements. See the NOTICE file distributed with
6 # this work for additional information regarding copyright ownership.
7 # The ASF licenses this file to You under the Apache License, Version 2.0
8 # (the "License"); you may not use this file except in compliance with
9 # the License. You may obtain a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
18 #
19
20 #
21 # Script to create a binary distribution for easy deploys of Spark.
22 # The distribution directory defaults to dist/ but can be overridden below.
23 # The distribution contains fat (assembly) jars that include the Scala library,
24 # so it is completely self contained.
25 # It does not contain source or *.class files.
26 #
27 # Optional Arguments
28 # --tgz: Additionally creates spark-$VERSION-bin.tar.gz
29 # --hadoop VERSION: Builds against specified version of Hadoop.
30 # --with-yarn: Enables support for Hadoop YARN.
31 # --with-hive: Enable support for reading Hive tables.
32 # --name: A moniker for the release target. Defaults to the Hadoop verison.
33 #
34 # Recommended deploy/testing procedure (standalone mode):
35 # 1) Rsync / deploy the dist/ dir to one host
36 # 2) cd to deploy dir; ./sbin/start-master.sh
37 # 3) Verify master is up by visiting web page, ie http://master-ip:8080. Note the spark:// URL.
38 # 4) ./sbin/start-slave.sh 1 <<spark:// URL>>
39 # 5) ./bin/spark-shell --master spark://my-master-ip:7077
40 #
41
42 set -o pipefail
43 set -e
44
45 # Figure out where the Spark framework is installed
46 FWDIR="$(cd `dirname $0`; pwd)"
47 DISTDIR="$FWDIR/dist"
48
49 # Initialize defaults
50 SPARK_HADOOP_VERSION=1.0.4
51 SPARK_YARN=false
52 SPARK_HIVE=false
53 SPARK_TACHYON=false
54 MAKE_TGZ=false
55 NAME=none
56
57 # Parse arguments
58 while (( "$#" )); do
59 case $1 in
60 --hadoop)
61 SPARK_HADOOP_VERSION="$2"
62 shift
63 ;;
64 --with-yarn)
65 SPARK_YARN=true
66 ;;
67 --with-hive)
68 SPARK_HIVE=true
69 ;;
70 --skip-java-test)
71 SKIP_JAVA_TEST=true
72 ;;
73 --with-tachyon)
74 SPARK_TACHYON=true
75 ;;
76 --tgz)
77 MAKE_TGZ=true
78 ;;
79 --name)
80 NAME="$2"
81 shift
82 ;;
83 esac
84 shift
85 done
86
87 if [ -z "$JAVA_HOME" ]; then
88 echo "Error: JAVA_HOME is not set, cannot proceed."
89 exit -1
90 fi
91
92 if ! which mvn &>/dev/null; then
93 echo -e "You need Maven installed to build Spark."
94 echo -e "Download Maven from https://maven.apache.org/"
95 exit -1;
96 fi
97 VERSION=$(mvn help:evaluate -Dexpression=project.version 2>/dev/null | grep -v "INFO" | tail -n 1)
98
99 JAVA_CMD="$JAVA_HOME"/bin/java
100 JAVA_VERSION=$("$JAVA_CMD" -version 2>&1)
101 if [[ ! "$JAVA_VERSION" =~ "1.6" && -z "$SKIP_JAVA_TEST" ]]; then
102 echo "***NOTE***: JAVA_HOME is not set to a JDK 6 installation. The resulting"
103 echo " distribution may not work well with PySpark and will not run"
104 echo " with Java 6 (See SPARK-1703 and SPARK-1911)."
105 echo " This test can be disabled by adding --skip-java-test."
106 echo "Output from 'java -version' was:"
107 echo "$JAVA_VERSION"
108 read -p "Would you like to continue anyways? [y,n]: " -r
109 if [[ ! $REPLY =~ ^[Yy]$ ]]; then
110 echo "Okay, exiting."
111 exit 1
112 fi
113 fi
114
115 if [ "$NAME" == "none" ]; then
116 NAME=$SPARK_HADOOP_VERSION
117 fi
118
119 echo "Spark version is $VERSION"
120
121 if [ "$MAKE_TGZ" == "true" ]; then
122 echo "Making spark-$VERSION-bin-$NAME.tgz"
123 else
124 echo "Making distribution for Spark $VERSION in $DISTDIR..."
125 fi
126
127 echo "Hadoop version set to $SPARK_HADOOP_VERSION"
128 echo "Release name set to $NAME"
129 if [ "$SPARK_YARN" == "true" ]; then
130 echo "YARN enabled"
131 else
132 echo "YARN disabled"
133 fi
134
135 if [ "$SPARK_TACHYON" == "true" ]; then
136 echo "Tachyon Enabled"
137 else
138 echo "Tachyon Disabled"
139 fi
140
141 # Build uber fat JAR
142 cd $FWDIR
143
144 export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
145
146 BUILD_COMMAND="mvn clean package"
147
148 # Use special profiles for hadoop versions 0.23.x, 2.2.x, 2.3.x, 2.4.x
149 if [[ "$SPARK_HADOOP_VERSION" =~ ^0\.23\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-0.23"; fi
150 if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.2\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.2"; fi
151 if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.3\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.3"; fi
152 if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.4\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.4"; fi
153 if [[ "$SPARK_HIVE" == "true" ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phive"; fi
154 if [[ "$SPARK_YARN" == "true" ]]; then
155 # For hadoop versions 0.23.x to 2.1.x, use the yarn-alpha profile
156 if [[ "$SPARK_HADOOP_VERSION" =~ ^0\.2[3-9]\. ]] ||
157 [[ "$SPARK_HADOOP_VERSION" =~ ^0\.[3-9][0-9]\. ]] ||
158 [[ "$SPARK_HADOOP_VERSION" =~ ^1\.[0-9]\. ]] ||
159 [[ "$SPARK_HADOOP_VERSION" =~ ^2\.[0-1]\. ]]; then
160 BUILD_COMMAND="$BUILD_COMMAND -Pyarn-alpha"
161 # For hadoop versions 2.2+, use the yarn profile
162 elif [[ "$SPARK_HADOOP_VERSION" =~ ^2.[2-9]. ]]; then
163 BUILD_COMMAND="$BUILD_COMMAND -Pyarn"
164 fi
165 BUILD_COMMAND="$BUILD_COMMAND -Dyarn.version=$SPARK_HADOOP_VERSION"
166 fi
167 BUILD_COMMAND="$BUILD_COMMAND -Dhadoop.version=$SPARK_HADOOP_VERSION"
168 BUILD_COMMAND="$BUILD_COMMAND -DskipTests"
169
170 # Actually build the jar
171 echo -e "\nBuilding with..."
172 echo -e "\$ $BUILD_COMMAND\n"
173 ${BUILD_COMMAND}
174
175 # Make directories
176 rm -rf "$DISTDIR"
177 mkdir -p "$DISTDIR/lib"
178 echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
179
180 # Copy jars
181 cp "$FWDIR"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
182 cp "$FWDIR"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
183
184 # Copy example sources (needed for python and SQL)
185 mkdir -p "$DISTDIR/examples/src/main"
186 cp -r "$FWDIR"/examples/src/main "$DISTDIR/examples/src/"
187
188 if [ "$SPARK_HIVE" == "true" ]; then
189 cp "$FWDIR"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
190 fi
191
192 # Copy license and ASF files
193 cp "$FWDIR/LICENSE" "$DISTDIR"
194 cp "$FWDIR/NOTICE" "$DISTDIR"
195
196 if [ -e "$FWDIR"/CHANGES.txt ]; then
197 cp "$FWDIR/CHANGES.txt" "$DISTDIR"
198 fi
199
200 # Copy other things
201 mkdir "$DISTDIR"/conf
202 cp "$FWDIR"/conf/*.template "$DISTDIR"/conf
203 cp "$FWDIR"/conf/slaves "$DISTDIR"/conf
204 cp "$FWDIR/README.md" "$DISTDIR"
205 cp -r "$FWDIR/bin" "$DISTDIR"
206 cp -r "$FWDIR/python" "$DISTDIR"
207 cp -r "$FWDIR/sbin" "$DISTDIR"
208 cp -r "$FWDIR/ec2" "$DISTDIR"
209
210 # Download and copy in tachyon, if requested
211 if [ "$SPARK_TACHYON" == "true" ]; then
212 TACHYON_VERSION="0.4.1"
213 TACHYON_URL="https://github.com/amplab/tachyon/releases/download/v${TACHYON_VERSION}/tachyon-${TACHYON_VERSION}-bin.tar.gz"
214
215 TMPD=`mktemp -d 2>/dev/null || mktemp -d -t 'disttmp'`
216
217 pushd $TMPD > /dev/null
218 echo "Fetching tachyon tgz"
219 wget "$TACHYON_URL"
220
221 tar xf "tachyon-${TACHYON_VERSION}-bin.tar.gz"
222 cp "tachyon-${TACHYON_VERSION}/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/lib"
223 mkdir -p "$DISTDIR/tachyon/src/main/java/tachyon/web"
224 cp -r "tachyon-${TACHYON_VERSION}"/{bin,conf,libexec} "$DISTDIR/tachyon"
225 cp -r "tachyon-${TACHYON_VERSION}"/src/main/java/tachyon/web/resources "$DISTDIR/tachyon/src/main/java/tachyon/web"
226
227 if [[ `uname -a` == Darwin* ]]; then
228 # need to run sed differently on osx
229 nl=$'\n'; sed -i "" -e "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\$nl export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
230 else
231 sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
232 fi
233
234 popd > /dev/null
235 rm -rf $TMPD
236 fi
237
238 if [ "$MAKE_TGZ" == "true" ]; then
239 TARDIR_NAME=spark-$VERSION-bin-$NAME
240 TARDIR="$FWDIR/$TARDIR_NAME"
241 rm -rf "$TARDIR"
242 cp -r "$DISTDIR" "$TARDIR"
243 tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$FWDIR" "$TARDIR_NAME"
244 rm -rf "$TARDIR"
245 fi