spark源码解析：2.3 start-class.sh脚本分析

mac2024-01-27 33

上节解析了start-daemon.sh脚本的内容并进行了debug：start-master.sh脚本解析，这节分析spark-class.sh脚本的内容并进行debug

1 spark-class内容解析

# 判断是否配置了SPARK_HOME，如果没有则手动设置 if [ -z "${SPARK_HOME}" ]; then source "$(dirname "$0")"/find-spark-home fi # 通过.加上文件名临时执行load-spark-env.sh脚本 ## 设置SPARK_HOME、SPARK_SCALA_VERSION环境变量 . "${SPARK_HOME}"/bin/load-spark-env.sh # 判断是否设置JAVA_HOME,获取java可执行文件的位置并赋值给RUNNER if [ -n "${JAVA_HOME}" ]; then RUNNER="${JAVA_HOME}/bin/java" else if [ "$(command -v java)" ]; then RUNNER="java" else echo "JAVA_HOME is not set" >&2 exit 1 fi fi # 获取spark的jar包的位置 if [ -d "${SPARK_HOME}/jars" ]; then SPARK_JARS_DIR="${SPARK_HOME}/jars" else SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars" fi if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2 echo "You need to build Spark with the target \"package\" before running this program." 1>&2 exit 1 else LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*" fi # Add the launcher build dir to the classpath if requested. if [ -n "$SPARK_PREPEND_CLASSES" ]; then LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH" fi # For tests if [[ -n "$SPARK_TESTING" ]]; then unset YARN_CONF_DIR unset HADOOP_CONF_DIR fi # 组装参数并打印 build_command() { "$RUNNER" -Xmx128m -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@" printf "%d\0" $? } # 看不懂 set +o posix CMD=() # 读取变量 while IFS= read -d '' -r ARG; do CMD+=("$ARG") # 组装数据 done < <(build_command "$@") COUNT=${#CMD[@]} LAST=$((COUNT - 1)) LAUNCHER_EXIT_CODE=${CMD[$LAST]} # Certain JVM failures result in errors being printed to stdout (instead of stderr), which causes # the code that parses the output of the launcher to get confused. In those cases, check if the # exit code is an integer, and if it's not, handle it as a special error case. if ! [[ $LAUNCHER_EXIT_CODE =~ ^[0-9]+$ ]]; then echo "${CMD[@]}" | head -n-1 1>&2 exit 1 fi if [ $LAUNCHER_EXIT_CODE != 0 ]; then exit $LAUNCHER_EXIT_CODE fi # 执行最终命令 CMD=("${CMD[@]:0:$LAST}") exec "${CMD[@]}"

2 debug spark-class脚本

sh -x /usr/local/spark/bin/spark-class org.apache.spark.deploy.master.Master --host s101 --port 7077 --webui-port 8080

# 判断是否配置了SPARK_HOME，如果没有则手动设置 + '[' -z /usr/local/spark ']' # 通过.加上文件名临时执行load-spark-env.sh脚本 ## 设置SPARK_HOME、SPARK_SCALA_VERSION环境变量 + . /usr/local/spark/bin/load-spark-env.sh ++ '[' -z /usr/local/spark ']' ++ '[' -z '' ']' ++ export SPARK_ENV_LOADED=1 ++ SPARK_ENV_LOADED=1 ++ export SPARK_CONF_DIR=/usr/local/spark/conf ++ SPARK_CONF_DIR=/usr/local/spark/conf ++ '[' -f /usr/local/spark/conf/spark-env.sh ']' ++ set -a ++ . /usr/local/spark/conf/spark-env.sh +++ JAVA_HOME=/usr/local/jdk +++ HADOOP_CONF_DIR=/usr/local/spark/hadoop/etc/hadoop +++ SPARK_LOCAL_IP=s101 ++ set +a ++ '[' -z '' ']' ++ ASSEMBLY_DIR2=/usr/local/spark/assembly/target/scala-2.11 ++ ASSEMBLY_DIR1=/usr/local/spark/assembly/target/scala-2.12 ++ [[ -d /usr/local/spark/assembly/target/scala-2.11 ]] ++ '[' -d /usr/local/spark/assembly/target/scala-2.11 ']' ++ export SPARK_SCALA_VERSION=2.12 ++ SPARK_SCALA_VERSION=2.12 # 判断是否设置JAVA_HOME,获取java可执行文件的位置并赋值给RUNNER + '[' -n /usr/local/jdk ']' + RUNNER=/usr/local/jdk/bin/java # 获取spark的jar包的位置 + '[' -d /usr/local/spark/jars ']' + SPARK_JARS_DIR=/usr/local/spark/jars + '[' '!' -d /usr/local/spark/jars ']' + LAUNCH_CLASSPATH='/usr/local/spark/jars/*' + '[' -n '' ']' + [[ -n '' ]] + set +o posix + CMD=() + IFS= + read -d '' -r ARG # 组装参数并打印 ++ build_command org.apache.spark.deploy.master.Master --host s101 --port 7077 --webui-port 8080 ++ /usr/local/jdk/bin/java -Xmx128m -cp '/usr/local/spark/jars/*' org.apache.spark.launcher.Main org.apache.spark.deploy.master.Master --host s101 --port 7077 --webui-port 8080 + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG + CMD+=("$ARG") + IFS= + read -d '' -r ARG ++ printf '%d\0' 0 + CMD+=("$ARG") + IFS= + read -d '' -r ARG + COUNT=12 + LAST=11 + LAUNCHER_EXIT_CODE=0 + [[ 0 =~ ^[0-9]+$ ]] + '[' 0 '!=' 0 ']' + CMD=("${CMD[@]:0:$LAST}") # 执行最终命令 # 通过java -cp命令执行Master的main方法 + exec /usr/local/jdk/bin/java -cp '/usr/local/spark/conf/:/usr/local/spark/jars/*:/usr/local/spark/hadoop/etc/hadoop' -Xmx1g org.apache.spark.deploy.master.Master --host s101 --port 7077 --webui-port 8080

下一篇博客：

查看org.apache.spark.deploy.master.Master中的main方法的具体执行逻辑

最新回复(0)