diff --git a/spark/3.5.4/22.03-lts-sp1/Dockerfile b/spark/3.5.4/22.03-lts-sp1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..cfa2ffc93155a38a10a7d0d50ac05cbe28db9580 --- /dev/null +++ b/spark/3.5.4/22.03-lts-sp1/Dockerfile @@ -0,0 +1,80 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM openeuler/openeuler:22.03-lts-sp1 + +ARG spark_uid=185 + + +RUN set -ex && \ + dnf update -y && \ + ln -s /lib /lib64 && \ + dnf install -y gnupg2 wget bash krb5 procps net-tools shadow dpkg java-11-openjdk && \ + groupadd --system --gid=${spark_uid} spark && \ + useradd --system --uid=${spark_uid} --gid=spark spark && \ + dnf install -y python3 python3-pip && \ + mkdir -p /opt/spark && \ + mkdir /opt/spark/python && \ + mkdir -p /opt/spark/examples && \ + mkdir -p /opt/spark/work-dir && \ + touch /opt/spark/RELEASE && \ + chown -R spark:spark /opt/spark && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + rm -rf /var/cache/dnf/* + +# Grab gosu for easy step-down from root +ENV GOSU_VERSION 1.14 +RUN set -ex; \ + wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)"; \ + chmod +x /usr/local/bin/gosu; \ + gosu nobody true +# Install Apache Spark +# https://downloads.apache.org/spark/KEYS +ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz \ + SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz.asc + +RUN set -ex; \ + export SPARK_TMP="$(mktemp -d)"; \ + cd $SPARK_TMP; \ + wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ + \ + tar -xf spark.tgz --strip-components=1; \ + chown -R spark:spark .; \ + mv jars /opt/spark/; \ + mv bin /opt/spark/; \ + mv sbin /opt/spark/; \ + mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ + mv examples /opt/spark/; \ + mv kubernetes/tests /opt/spark/; \ + mv data /opt/spark/; \ + mv python/pyspark /opt/spark/python/pyspark/; \ + mv python/lib /opt/spark/python/lib/; \ + cd ..; \ + rm -rf "$SPARK_TMP"; + +COPY entrypoint.sh /opt/ + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir +RUN chmod g+w /opt/spark/work-dir +RUN chmod a+x /opt/decom.sh +RUN chmod a+x /opt/entrypoint.sh + +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/spark/3.5.4/22.03-lts-sp1/entrypoint.sh b/spark/3.5.4/22.03-lts-sp1/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..6a5c6d2d1d9dba4f5e4f42a39553b56e07792a75 --- /dev/null +++ b/spark/3.5.4/22.03-lts-sp1/entrypoint.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Prevent any errors from being silently ignored +set -eo pipefail + +attempt_setup_fake_passwd_entry() { + # Check whether there is a passwd entry for the container UID + local myuid; myuid="$(id -u)" + # If there is no passwd entry for the container UID, attempt to fake one + # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 + # It's to resolve OpenShift random UID case. + # See also: https://github.com/docker-library/postgres/pull/448 + if ! getent passwd "$myuid" &> /dev/null; then + local wrapper + for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do + if [ -s "$wrapper" ]; then + NSS_WRAPPER_PASSWD="$(mktemp)" + NSS_WRAPPER_GROUP="$(mktemp)" + export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP + local mygid; mygid="$(id -g)" + printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" + printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" + break + fi + done + fi +} + +if [ -z "$JAVA_HOME" ]; then + JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') +fi + +SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" +for v in "${!SPARK_JAVA_OPT_@}"; do + SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) +done + +if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then + SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" +fi + +if ! [ -z "${PYSPARK_PYTHON+x}" ]; then + export PYSPARK_PYTHON +fi +if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then + export PYSPARK_DRIVER_PYTHON +fi + +# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. +# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. +if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then + export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" +fi + +if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; +fi + +if ! [ -z "${SPARK_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; +elif ! [ -z "${SPARK_HOME+x}" ]; then + SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; +fi + +# SPARK-43540: add current working directory into executor classpath +SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" + +# Switch to spark if no USER specified (root by default) otherwise use USER directly +switch_spark_if_root() { + if [ $(id -u) -eq 0 ]; then + echo gosu spark + fi +} + +case "$1" in + driver) + shift 1 + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + executor) + shift 1 + CMD=( + ${JAVA_HOME}/bin/java + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" + -Xms"$SPARK_EXECUTOR_MEMORY" + -Xmx"$SPARK_EXECUTOR_MEMORY" + -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" + org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend + --driver-url "$SPARK_DRIVER_URL" + --executor-id "$SPARK_EXECUTOR_ID" + --cores "$SPARK_EXECUTOR_CORES" + --app-id "$SPARK_APPLICATION_ID" + --hostname "$SPARK_EXECUTOR_POD_IP" + --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" + --podName "$SPARK_EXECUTOR_POD_NAME" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + + *) + # Non-spark-on-k8s command provided, proceeding in pass-through mode... + exec "$@" + ;; +esac \ No newline at end of file diff --git a/spark/3.5.4/22.03-lts-sp3/Dockerfile b/spark/3.5.4/22.03-lts-sp3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..13cec1f28b32a38c29429c216a8c23839d489a7b --- /dev/null +++ b/spark/3.5.4/22.03-lts-sp3/Dockerfile @@ -0,0 +1,80 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM openeuler/openeuler:22.03-lts-sp3 + +ARG spark_uid=185 + + +RUN set -ex && \ + dnf update -y && \ + ln -s /lib /lib64 && \ + dnf install -y gnupg2 wget bash krb5 procps net-tools shadow dpkg java-11-openjdk && \ + groupadd --system --gid=${spark_uid} spark && \ + useradd --system --uid=${spark_uid} --gid=spark spark && \ + dnf install -y python3 python3-pip && \ + mkdir -p /opt/spark && \ + mkdir /opt/spark/python && \ + mkdir -p /opt/spark/examples && \ + mkdir -p /opt/spark/work-dir && \ + touch /opt/spark/RELEASE && \ + chown -R spark:spark /opt/spark && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + rm -rf /var/cache/dnf/* + +# Grab gosu for easy step-down from root +ENV GOSU_VERSION 1.14 +RUN set -ex; \ + wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)"; \ + chmod +x /usr/local/bin/gosu; \ + gosu nobody true +# Install Apache Spark +# https://downloads.apache.org/spark/KEYS +ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz \ + SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz.asc + +RUN set -ex; \ + export SPARK_TMP="$(mktemp -d)"; \ + cd $SPARK_TMP; \ + wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ + \ + tar -xf spark.tgz --strip-components=1; \ + chown -R spark:spark .; \ + mv jars /opt/spark/; \ + mv bin /opt/spark/; \ + mv sbin /opt/spark/; \ + mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ + mv examples /opt/spark/; \ + mv kubernetes/tests /opt/spark/; \ + mv data /opt/spark/; \ + mv python/pyspark /opt/spark/python/pyspark/; \ + mv python/lib /opt/spark/python/lib/; \ + cd ..; \ + rm -rf "$SPARK_TMP"; + +COPY entrypoint.sh /opt/ + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir +RUN chmod g+w /opt/spark/work-dir +RUN chmod a+x /opt/decom.sh +RUN chmod a+x /opt/entrypoint.sh + +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/spark/3.5.4/22.03-lts-sp3/entrypoint.sh b/spark/3.5.4/22.03-lts-sp3/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..6a5c6d2d1d9dba4f5e4f42a39553b56e07792a75 --- /dev/null +++ b/spark/3.5.4/22.03-lts-sp3/entrypoint.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Prevent any errors from being silently ignored +set -eo pipefail + +attempt_setup_fake_passwd_entry() { + # Check whether there is a passwd entry for the container UID + local myuid; myuid="$(id -u)" + # If there is no passwd entry for the container UID, attempt to fake one + # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 + # It's to resolve OpenShift random UID case. + # See also: https://github.com/docker-library/postgres/pull/448 + if ! getent passwd "$myuid" &> /dev/null; then + local wrapper + for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do + if [ -s "$wrapper" ]; then + NSS_WRAPPER_PASSWD="$(mktemp)" + NSS_WRAPPER_GROUP="$(mktemp)" + export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP + local mygid; mygid="$(id -g)" + printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" + printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" + break + fi + done + fi +} + +if [ -z "$JAVA_HOME" ]; then + JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') +fi + +SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" +for v in "${!SPARK_JAVA_OPT_@}"; do + SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) +done + +if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then + SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" +fi + +if ! [ -z "${PYSPARK_PYTHON+x}" ]; then + export PYSPARK_PYTHON +fi +if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then + export PYSPARK_DRIVER_PYTHON +fi + +# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. +# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. +if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then + export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" +fi + +if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; +fi + +if ! [ -z "${SPARK_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; +elif ! [ -z "${SPARK_HOME+x}" ]; then + SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; +fi + +# SPARK-43540: add current working directory into executor classpath +SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" + +# Switch to spark if no USER specified (root by default) otherwise use USER directly +switch_spark_if_root() { + if [ $(id -u) -eq 0 ]; then + echo gosu spark + fi +} + +case "$1" in + driver) + shift 1 + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + executor) + shift 1 + CMD=( + ${JAVA_HOME}/bin/java + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" + -Xms"$SPARK_EXECUTOR_MEMORY" + -Xmx"$SPARK_EXECUTOR_MEMORY" + -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" + org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend + --driver-url "$SPARK_DRIVER_URL" + --executor-id "$SPARK_EXECUTOR_ID" + --cores "$SPARK_EXECUTOR_CORES" + --app-id "$SPARK_APPLICATION_ID" + --hostname "$SPARK_EXECUTOR_POD_IP" + --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" + --podName "$SPARK_EXECUTOR_POD_NAME" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + + *) + # Non-spark-on-k8s command provided, proceeding in pass-through mode... + exec "$@" + ;; +esac \ No newline at end of file diff --git a/spark/3.5.4/22.03-lts-sp4/Dockerfile b/spark/3.5.4/22.03-lts-sp4/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6347fdda410fb4f5fff88744880cc7ca5abcebe8 --- /dev/null +++ b/spark/3.5.4/22.03-lts-sp4/Dockerfile @@ -0,0 +1,80 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM openeuler/openeuler:22.03-lts-sp4 + +ARG spark_uid=185 + + +RUN set -ex && \ + dnf update -y && \ + ln -s /lib /lib64 && \ + dnf install -y gnupg2 wget bash krb5 procps net-tools shadow dpkg java-11-openjdk && \ + groupadd --system --gid=${spark_uid} spark && \ + useradd --system --uid=${spark_uid} --gid=spark spark && \ + dnf install -y python3 python3-pip && \ + mkdir -p /opt/spark && \ + mkdir /opt/spark/python && \ + mkdir -p /opt/spark/examples && \ + mkdir -p /opt/spark/work-dir && \ + touch /opt/spark/RELEASE && \ + chown -R spark:spark /opt/spark && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + rm -rf /var/cache/dnf/* + +# Grab gosu for easy step-down from root +ENV GOSU_VERSION 1.14 +RUN set -ex; \ + wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)"; \ + chmod +x /usr/local/bin/gosu; \ + gosu nobody true +# Install Apache Spark +# https://downloads.apache.org/spark/KEYS +ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz \ + SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz.asc + +RUN set -ex; \ + export SPARK_TMP="$(mktemp -d)"; \ + cd $SPARK_TMP; \ + wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ + \ + tar -xf spark.tgz --strip-components=1; \ + chown -R spark:spark .; \ + mv jars /opt/spark/; \ + mv bin /opt/spark/; \ + mv sbin /opt/spark/; \ + mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ + mv examples /opt/spark/; \ + mv kubernetes/tests /opt/spark/; \ + mv data /opt/spark/; \ + mv python/pyspark /opt/spark/python/pyspark/; \ + mv python/lib /opt/spark/python/lib/; \ + cd ..; \ + rm -rf "$SPARK_TMP"; + +COPY entrypoint.sh /opt/ + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir +RUN chmod g+w /opt/spark/work-dir +RUN chmod a+x /opt/decom.sh +RUN chmod a+x /opt/entrypoint.sh + +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/spark/3.5.4/22.03-lts-sp4/entrypoint.sh b/spark/3.5.4/22.03-lts-sp4/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..6a5c6d2d1d9dba4f5e4f42a39553b56e07792a75 --- /dev/null +++ b/spark/3.5.4/22.03-lts-sp4/entrypoint.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Prevent any errors from being silently ignored +set -eo pipefail + +attempt_setup_fake_passwd_entry() { + # Check whether there is a passwd entry for the container UID + local myuid; myuid="$(id -u)" + # If there is no passwd entry for the container UID, attempt to fake one + # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 + # It's to resolve OpenShift random UID case. + # See also: https://github.com/docker-library/postgres/pull/448 + if ! getent passwd "$myuid" &> /dev/null; then + local wrapper + for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do + if [ -s "$wrapper" ]; then + NSS_WRAPPER_PASSWD="$(mktemp)" + NSS_WRAPPER_GROUP="$(mktemp)" + export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP + local mygid; mygid="$(id -g)" + printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" + printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" + break + fi + done + fi +} + +if [ -z "$JAVA_HOME" ]; then + JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') +fi + +SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" +for v in "${!SPARK_JAVA_OPT_@}"; do + SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) +done + +if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then + SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" +fi + +if ! [ -z "${PYSPARK_PYTHON+x}" ]; then + export PYSPARK_PYTHON +fi +if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then + export PYSPARK_DRIVER_PYTHON +fi + +# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. +# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. +if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then + export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" +fi + +if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; +fi + +if ! [ -z "${SPARK_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; +elif ! [ -z "${SPARK_HOME+x}" ]; then + SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; +fi + +# SPARK-43540: add current working directory into executor classpath +SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" + +# Switch to spark if no USER specified (root by default) otherwise use USER directly +switch_spark_if_root() { + if [ $(id -u) -eq 0 ]; then + echo gosu spark + fi +} + +case "$1" in + driver) + shift 1 + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + executor) + shift 1 + CMD=( + ${JAVA_HOME}/bin/java + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" + -Xms"$SPARK_EXECUTOR_MEMORY" + -Xmx"$SPARK_EXECUTOR_MEMORY" + -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" + org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend + --driver-url "$SPARK_DRIVER_URL" + --executor-id "$SPARK_EXECUTOR_ID" + --cores "$SPARK_EXECUTOR_CORES" + --app-id "$SPARK_APPLICATION_ID" + --hostname "$SPARK_EXECUTOR_POD_IP" + --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" + --podName "$SPARK_EXECUTOR_POD_NAME" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + + *) + # Non-spark-on-k8s command provided, proceeding in pass-through mode... + exec "$@" + ;; +esac \ No newline at end of file diff --git a/spark/3.5.4/24.03-lts/Dockerfile b/spark/3.5.4/24.03-lts/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..32b22c6394ffa01332a7166c84af43b02c2cbd1e --- /dev/null +++ b/spark/3.5.4/24.03-lts/Dockerfile @@ -0,0 +1,80 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM openeuler/openeuler:24.03-lts + +ARG spark_uid=185 + + +RUN set -ex && \ + dnf update -y && \ + ln -s /lib /lib64 && \ + dnf install -y gnupg2 wget bash krb5 procps net-tools shadow dpkg java-11-openjdk && \ + groupadd --system --gid=${spark_uid} spark && \ + useradd --system --uid=${spark_uid} --gid=spark spark && \ + dnf install -y python3 python3-pip && \ + mkdir -p /opt/spark && \ + mkdir /opt/spark/python && \ + mkdir -p /opt/spark/examples && \ + mkdir -p /opt/spark/work-dir && \ + touch /opt/spark/RELEASE && \ + chown -R spark:spark /opt/spark && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + rm -rf /var/cache/dnf/* + +# Grab gosu for easy step-down from root +ENV GOSU_VERSION 1.14 +RUN set -ex; \ + wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)"; \ + chmod +x /usr/local/bin/gosu; \ + gosu nobody true +# Install Apache Spark +# https://downloads.apache.org/spark/KEYS +ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz \ + SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz.asc + +RUN set -ex; \ + export SPARK_TMP="$(mktemp -d)"; \ + cd $SPARK_TMP; \ + wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ + \ + tar -xf spark.tgz --strip-components=1; \ + chown -R spark:spark .; \ + mv jars /opt/spark/; \ + mv bin /opt/spark/; \ + mv sbin /opt/spark/; \ + mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ + mv examples /opt/spark/; \ + mv kubernetes/tests /opt/spark/; \ + mv data /opt/spark/; \ + mv python/pyspark /opt/spark/python/pyspark/; \ + mv python/lib /opt/spark/python/lib/; \ + cd ..; \ + rm -rf "$SPARK_TMP"; + +COPY entrypoint.sh /opt/ + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir +RUN chmod g+w /opt/spark/work-dir +RUN chmod a+x /opt/decom.sh +RUN chmod a+x /opt/entrypoint.sh + +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/spark/3.5.4/24.03-lts/entrypoint.sh b/spark/3.5.4/24.03-lts/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..6a5c6d2d1d9dba4f5e4f42a39553b56e07792a75 --- /dev/null +++ b/spark/3.5.4/24.03-lts/entrypoint.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Prevent any errors from being silently ignored +set -eo pipefail + +attempt_setup_fake_passwd_entry() { + # Check whether there is a passwd entry for the container UID + local myuid; myuid="$(id -u)" + # If there is no passwd entry for the container UID, attempt to fake one + # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 + # It's to resolve OpenShift random UID case. + # See also: https://github.com/docker-library/postgres/pull/448 + if ! getent passwd "$myuid" &> /dev/null; then + local wrapper + for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do + if [ -s "$wrapper" ]; then + NSS_WRAPPER_PASSWD="$(mktemp)" + NSS_WRAPPER_GROUP="$(mktemp)" + export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP + local mygid; mygid="$(id -g)" + printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" + printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" + break + fi + done + fi +} + +if [ -z "$JAVA_HOME" ]; then + JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') +fi + +SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" +for v in "${!SPARK_JAVA_OPT_@}"; do + SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) +done + +if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then + SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" +fi + +if ! [ -z "${PYSPARK_PYTHON+x}" ]; then + export PYSPARK_PYTHON +fi +if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then + export PYSPARK_DRIVER_PYTHON +fi + +# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. +# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. +if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then + export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" +fi + +if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; +fi + +if ! [ -z "${SPARK_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; +elif ! [ -z "${SPARK_HOME+x}" ]; then + SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; +fi + +# SPARK-43540: add current working directory into executor classpath +SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" + +# Switch to spark if no USER specified (root by default) otherwise use USER directly +switch_spark_if_root() { + if [ $(id -u) -eq 0 ]; then + echo gosu spark + fi +} + +case "$1" in + driver) + shift 1 + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + executor) + shift 1 + CMD=( + ${JAVA_HOME}/bin/java + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" + -Xms"$SPARK_EXECUTOR_MEMORY" + -Xmx"$SPARK_EXECUTOR_MEMORY" + -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" + org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend + --driver-url "$SPARK_DRIVER_URL" + --executor-id "$SPARK_EXECUTOR_ID" + --cores "$SPARK_EXECUTOR_CORES" + --app-id "$SPARK_APPLICATION_ID" + --hostname "$SPARK_EXECUTOR_POD_IP" + --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" + --podName "$SPARK_EXECUTOR_POD_NAME" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + + *) + # Non-spark-on-k8s command provided, proceeding in pass-through mode... + exec "$@" + ;; +esac \ No newline at end of file diff --git a/spark/meta.yml b/spark/meta.yml index 0427bee284800b0a70ff3ba810bebe0f3eedfdae..5e1abf2670b11e76f88a69aa35fb319ca50645a5 100644 --- a/spark/meta.yml +++ b/spark/meta.yml @@ -15,4 +15,12 @@ 3.5.3-oe2203sp4: path: spark/3.5.3/22.03-lts-sp4/Dockerfile 3.5.3-oe2403lts: - path: spark/3.5.3/24.03-lts/Dockerfile \ No newline at end of file + path: spark/3.5.3/24.03-lts/Dockerfile +3.5.4-oe2403lts: + path: spark/3.5.4/24.03-lts/Dockerfile +3.5.4-oe2203sp3: + path: spark/3.5.4/22.03-lts-sp3/Dockerfile +3.5.4-oe2203sp4: + path: spark/3.5.4/22.03-lts-sp4/Dockerfile +3.5.4-oe2203sp1: + path: spark/3.5.4/22.03-lts-sp1/Dockerfile \ No newline at end of file