diff --git a/Bigdata/flink/2.1.0/24.03-lts-sp2/Dockerfile b/Bigdata/flink/2.1.0/24.03-lts-sp2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f5d2a92b9e468c5ef43d914cd9227c72464ebedd --- /dev/null +++ b/Bigdata/flink/2.1.0/24.03-lts-sp2/Dockerfile @@ -0,0 +1,58 @@ +ARG BASE=openeuler/openeuler:24.03-lts-sp2 +FROM ${BASE} +ARG VERSION=2.1.0 +ARG TARGETARCH + +# Install dependencies +RUN dnf install -y java-21-openjdk-headless \ + wget \ + gpg \ + gettext \ + jemalloc \ + snappy \ + shadow-utils \ + util-linux \ + coreutils \ + hostname \ + && dnf clean all \ + && rm -rf /var/cache/dnf/* + +# Grab gosu for easy step-down from root +ENV GOSU_VERSION 1.11 +RUN wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/${GOSU_VERSION}/gosu-${TARGETARCH}" && \ + chmod +x /usr/local/bin/gosu && \ + gosu nobody true + +# Configure Flink version +ENV FLINK_TGZ_URL=https://dlcdn.apache.org/flink/flink-${VERSION}/flink-${VERSION}-bin-scala_2.12.tgz +ENV FLINK_HOME=/opt/flink +ENV PATH=/opt/flink/bin:$PATH + +# Prepare environment +RUN groupadd -g 9999 flink && \ + useradd -u 9999 -g flink -d $FLINK_HOME -s /sbin/nologin flink + +WORKDIR $FLINK_HOME + +# Install Flink +RUN set -ex && \ + wget -nv -O flink.tgz "$FLINK_TGZ_URL" && \ + tar -xf flink.tgz --strip-components=1 && \ + rm flink.tgz && \ + chown -R flink:flink .; \ + # Replace default REST/RPC endpoint bind address to use the container's network interface \ + CONF_FILE="${FLINK_HOME}/conf/config.yaml"; \ + /bin/bash "$FLINK_HOME/bin/config-parser-utils.sh" "${FLINK_HOME}/conf" "${FLINK_HOME}/bin" "${FLINK_HOME}/lib" \ + "-repKV" "rest.address,localhost,0.0.0.0" \ + "-repKV" "rest.bind-address,localhost,0.0.0.0" \ + "-repKV" "jobmanager.bind-host,localhost,0.0.0.0" \ + "-repKV" "taskmanager.bind-host,localhost,0.0.0.0" \ + "-rmKV" "taskmanager.host=localhost"; + +# Configure container +COPY docker-entrypoint.sh / +RUN chmod +x /docker-entrypoint.sh +ENTRYPOINT ["/docker-entrypoint.sh"] +EXPOSE 6123 8081 +CMD ["help"] + diff --git a/Bigdata/flink/2.1.0/24.03-lts-sp2/docker-entrypoint.sh b/Bigdata/flink/2.1.0/24.03-lts-sp2/docker-entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..e2851424b1549da23caacb5060f43e01ba4d628f --- /dev/null +++ b/Bigdata/flink/2.1.0/24.03-lts-sp2/docker-entrypoint.sh @@ -0,0 +1,187 @@ +#!/usr/bin/env bash + +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### + +COMMAND_STANDALONE="standalone-job" +COMMAND_HISTORY_SERVER="history-server" + +# If unspecified, the hostname of the container is taken as the JobManager address +JOB_MANAGER_RPC_ADDRESS=${JOB_MANAGER_RPC_ADDRESS:-$(hostname -f)} +CONF_FILE_DIR="${FLINK_HOME}/conf" + +drop_privs_cmd() { + if [ $(id -u) != 0 ]; then + # Don't need to drop privs if EUID != 0 + return + elif [ -x /sbin/su-exec ]; then + # Alpine + echo su-exec flink + else + # Others + echo gosu flink + fi +} + +copy_plugins_if_required() { + if [ -z "$ENABLE_BUILT_IN_PLUGINS" ]; then + return 0 + fi + + echo "Enabling required built-in plugins" + for target_plugin in $(echo "$ENABLE_BUILT_IN_PLUGINS" | tr ';' ' '); do + echo "Linking ${target_plugin} to plugin directory" + plugin_name=${target_plugin%.jar} + + mkdir -p "${FLINK_HOME}/plugins/${plugin_name}" + if [ ! -e "${FLINK_HOME}/opt/${target_plugin}" ]; then + echo "Plugin ${target_plugin} does not exist. Exiting." + exit 1 + else + ln -fs "${FLINK_HOME}/opt/${target_plugin}" "${FLINK_HOME}/plugins/${plugin_name}" + echo "Successfully enabled ${target_plugin}" + fi + done +} + +set_config_options() { + local config_parser_script="$FLINK_HOME/bin/config-parser-utils.sh" + local config_dir="$FLINK_HOME/conf" + local bin_dir="$FLINK_HOME/bin" + local lib_dir="$FLINK_HOME/lib" + + local config_params=() + + while [ $# -gt 0 ]; do + local key="$1" + local value="$2" + + config_params+=("-D${key}=${value}") + + shift 2 + done + + if [ "${#config_params[@]}" -gt 0 ]; then + "${config_parser_script}" "${config_dir}" "${bin_dir}" "${lib_dir}" "${config_params[@]}" + fi +} + +prepare_configuration() { + local config_options=() + + config_options+=("jobmanager.rpc.address" "${JOB_MANAGER_RPC_ADDRESS}") + config_options+=("blob.server.port" "6124") + config_options+=("query.server.port" "6125") + + if [ -n "${TASK_MANAGER_NUMBER_OF_TASK_SLOTS}" ]; then + config_options+=("taskmanager.numberOfTaskSlots" "${TASK_MANAGER_NUMBER_OF_TASK_SLOTS}") + fi + + if [ ${#config_options[@]} -ne 0 ]; then + set_config_options "${config_options[@]}" + fi + + if [ -n "${FLINK_PROPERTIES}" ]; then + process_flink_properties "${FLINK_PROPERTIES}" + fi +} + +process_flink_properties() { + local flink_properties_content=$1 + local config_options=() + + local OLD_IFS="$IFS" + IFS=$'\n' + for prop in $flink_properties_content; do + prop=$(echo $prop | tr -d '[:space:]') + + if [ -z "$prop" ]; then + continue + fi + + IFS=':' read -r key value <<< "$prop" + + value=$(echo $value | envsubst) + + config_options+=("$key" "$value") + done + IFS="$OLD_IFS" + + if [ ${#config_options[@]} -ne 0 ]; then + set_config_options "${config_options[@]}" + fi +} + +maybe_enable_jemalloc() { + if [ "${DISABLE_JEMALLOC:-false}" == "false" ]; then + JEMALLOC_PATH="/usr/lib64/libjemalloc.so.2" + if [ -f "$JEMALLOC_PATH" ]; then + export LD_PRELOAD=$LD_PRELOAD:$JEMALLOC_PATH + else + if [ "$JEMALLOC_PATH" = "$JEMALLOC_FALLBACK" ]; then + MSG_PATH=$JEMALLOC_PATH + else + MSG_PATH="$JEMALLOC_PATH and $JEMALLOC_FALLBACK" + fi + echo "WARNING: attempted to load jemalloc from $MSG_PATH but the library couldn't be found. glibc will be used instead." + fi + fi +} + +maybe_enable_jemalloc + +copy_plugins_if_required + +prepare_configuration + +args=("$@") +if [ "$1" = "help" ]; then + printf "Usage: $(basename "$0") (jobmanager|${COMMAND_STANDALONE}|taskmanager|${COMMAND_HISTORY_SERVER})\n" + printf " Or $(basename "$0") help\n\n" + printf "By default, Flink image adopts jemalloc as default memory allocator. This behavior can be disabled by setting the 'DISABLE_JEMALLOC' environment variable to 'true'.\n" + exit 0 +elif [ "$1" = "jobmanager" ]; then + args=("${args[@]:1}") + + echo "Starting Job Manager" + + exec $(drop_privs_cmd) "$FLINK_HOME/bin/jobmanager.sh" start-foreground "${args[@]}" +elif [ "$1" = ${COMMAND_STANDALONE} ]; then + args=("${args[@]:1}") + + echo "Starting Job Manager" + + exec $(drop_privs_cmd) "$FLINK_HOME/bin/standalone-job.sh" start-foreground "${args[@]}" +elif [ "$1" = ${COMMAND_HISTORY_SERVER} ]; then + args=("${args[@]:1}") + + echo "Starting History Server" + + exec $(drop_privs_cmd) "$FLINK_HOME/bin/historyserver.sh" start-foreground "${args[@]}" +elif [ "$1" = "taskmanager" ]; then + args=("${args[@]:1}") + + echo "Starting Task Manager" + + exec $(drop_privs_cmd) "$FLINK_HOME/bin/taskmanager.sh" start-foreground "${args[@]}" +fi + +args=("${args[@]}") + +# Running command in pass-through mode +exec $(drop_privs_cmd) "${args[@]}" \ No newline at end of file diff --git a/Bigdata/flink/README.md b/Bigdata/flink/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7916cc842a535cb9a2ecc49b48d6ab086412eb55 --- /dev/null +++ b/Bigdata/flink/README.md @@ -0,0 +1,61 @@ +# Quick reference + +- The official Apache Flink docker image. + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative). + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community). +# Apache Flink | openEuler +Current Apache Flink docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits. + +Apache Flink is a framework and distributed processing engine for stateful computations over unbounded and bounded data streams. Flink has been designed to run in all common cluster environments, perform computations at in-memory speed and at any scale. + +Learn more on [Apache Flink Website](https://flink.apache.org/). + +# Supported tags and respective Dockerfile links +The tag of each flink docker image is consist of the version of flink and the version of basic image. The details are as follows + +| Tags | Currently | Architectures | +|---------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------|---------------| +| [1.16-22.03-lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/flink/1.16/22.03-lts/Dockerfile) | Apache flink 1.16 on openEuler 22.03-LTS | amd64, arm64 | +| [2.1.0-oe2403sp2](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/flink/2.1.0/24.03-lts-sp2/Dockerfile) | Apache flink 2.1.0 on openEuler 24.03-LTS-SP2 | amd64, arm64 | + +# Usage + +In this usage, users can select the corresponding `{Tag}` based on their requirements. + +- Starting a Session Cluster on Docker: + + A Flink Session cluster can be used to run multiple jobs. Each job needs to be submitted to the cluster after the cluster has been deployed. To deploy a Flink Session cluster with Docker, you need to start a JobManager container. To enable communication between the containers, we first set a required Flink configuration property and create a network: + ``` + $ FLINK_PROPERTIES="jobmanager.rpc.address: jobmanager" + $ docker network create flink-network + ``` + + Then we launch the JobManager: + ``` + $ docker run \ + --rm \ + --name=jobmanager \ + --network flink-network \ + --publish 8081:8081 \ + --env FLINK_PROPERTIES="${FLINK_PROPERTIES}" \ + openeuler/flink:{Tag} jobmanager + ``` + + and one or more TaskManager containers: + ``` + $ docker run \ + --rm \ + --name=taskmanager \ + --network flink-network \ + --env FLINK_PROPERTIES="${FLINK_PROPERTIES}" \ + openeuler/flink:{Tag} taskmanager + ``` + + The web interface is now available at localhost:8081. + + Learn more about [how to use Apache Flink with Docker](https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/resource-providers/standalone/docker/) + +# Question and answering +If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images). \ No newline at end of file diff --git a/Bigdata/flink/doc/image-info.yml b/Bigdata/flink/doc/image-info.yml new file mode 100644 index 0000000000000000000000000000000000000000..472da90d042ad8e5a962566e59ecc521202749d9 --- /dev/null +++ b/Bigdata/flink/doc/image-info.yml @@ -0,0 +1,70 @@ +name: flink +category: bigdata +description: Apache Flink 是一个分布式流处理框架(同时支持批处理),专注于低延迟、高吞吐的大规模数据处理。 + +environment: | + 本应用在Docker环境中运行,安装Docker执行如下命令 + ``` + yum install -y docker + ``` + +tags: | + flink镜像的Tag由其版本信息和基础镜像版本信息组成,详细内容如下 + + | Tags | Currently | Architectures | + |---------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------|---------------| + | [1.16-22.03-lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/flink/1.16/22.03-lts/Dockerfile) | Apache flink 1.16 on openEuler 22.03-LTS | amd64, arm64 | + | [2.1.0-oe2403sp2](https://gitee.com/openeuler/openeuler-docker-images/blob/master/Bigdata/flink/2.1.0/24.03-lts-sp2/Dockerfile) | Apache flink 2.1.0 on openEuler 24.03-LTS-SP2 | amd64, arm64 | + +download: | + 拉取镜像到本地 + ``` + docker pull openeuler/flink:{Tag} + ``` + +usage: | + - 在 Docker 中启动 Session 集群: + + Flink Session 集群可用于运行多个作业。集群部署完成后,每个作业都需要提交到该集群中。 + 要使用 Docker 部署 Flink Session 集群,首先需要启动一个 JobManager 容器。 + 为了让各个容器能够互相通信,我们需要先设置一个必要的 Flink 配置属性,并创建一个网络: + ``` + $ FLINK_PROPERTIES="jobmanager.rpc.address: jobmanager" + $ docker network create flink-network + ``` + + 接下来启动 JobManager: + ``` + $ docker run \ + --rm \ + --name=jobmanager \ + --network flink-network \ + --publish 8081:8081 \ + --env FLINK_PROPERTIES="${FLINK_PROPERTIES}" \ + openeuler/flink:{Tag} jobmanager + ``` + + 然后启动一个或多个 TaskManager 容器: + ``` + $ docker run \ + --rm \ + --name=taskmanager \ + --network flink-network \ + --env FLINK_PROPERTIES="${FLINK_PROPERTIES}" \ + openeuler/flink:{Tag} taskmanager + ``` + + 现在可以通过浏览器访问 `localhost:8081` 打开 Web 界面。 + + 更多关于[如何在 Docker 中使用 Apache Flink](https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/resource-providers/standalone/docker/)的信息请参考官方文档。 + +license: Apache-2.0 license + +similar_packages: + - Apache Spark Structured Streaming: Spark 的流计算 API,但底层是微批处理,延迟比 Flink 略高。 + +dependency: + - openjdk + - scala + - gosu + - jemalloc \ No newline at end of file diff --git a/Bigdata/flink/doc/picture/logo.png b/Bigdata/flink/doc/picture/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..37c980e9de40fb139ecc96baa8164f81e2d0963e Binary files /dev/null and b/Bigdata/flink/doc/picture/logo.png differ diff --git a/Bigdata/flink/meta.yml b/Bigdata/flink/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..f24e79f360d6cceddaa52c3a1df8e5f7ce1f2621 --- /dev/null +++ b/Bigdata/flink/meta.yml @@ -0,0 +1,4 @@ +1.16-22.03-lts: + path: 1.16/22.03-lts/Dockerfile +2.1.0-oe2403sp2: + path: 2.1.0/24.03-lts-sp2/Dockerfile