From 095dace8f1e1db2cc1afa27dbe955a3189a7d1e2 Mon Sep 17 00:00:00 2001
From: zhihang <zhihang161013@outlook.com>
Date: Tue, 5 Nov 2024 06:40:32 +0000
Subject: [PATCH] feat: add OPEA component 'text-generation-inference-cpu'

---
 .../2.4.0/24.03-lts/Dockerfile                | 171 ++++++++++++++++++
 text-generation-inference-cpu/meta.yml        |   3 +
 2 files changed, 174 insertions(+)
 create mode 100644 text-generation-inference-cpu/2.4.0/24.03-lts/Dockerfile
 create mode 100644 text-generation-inference-cpu/meta.yml

diff --git a/text-generation-inference-cpu/2.4.0/24.03-lts/Dockerfile b/text-generation-inference-cpu/2.4.0/24.03-lts/Dockerfile
new file mode 100644
index 00000000..5512f062
--- /dev/null
+++ b/text-generation-inference-cpu/2.4.0/24.03-lts/Dockerfile
@@ -0,0 +1,171 @@
+ARG PLATFORM=cpu
+
+FROM openeuler/openeuler:24.03-lts AS chef
+WORKDIR /usr/src
+
+RUN yum update -y && yum install -y --setopt=install_weak_deps=False \
+    gcc \
+    g++ \
+    git
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs -o rustup-init && \
+    chmod +x rustup-init && \
+    ./rustup-init -y && \
+    rm rustup-init
+
+ENV PATH="/root/.cargo/bin:$PATH"
+    
+RUN cargo install cargo-chef --locked
+
+ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
+
+FROM chef AS planner
+RUN git clone https://github.com/huggingface/text-generation-inference.git && cd text-generation-inference && git checkout v2.4.0
+
+RUN mv text-generation-inference/Cargo.lock Cargo.lock && \
+    mv text-generation-inference/Cargo.toml Cargo.toml && \
+    mv text-generation-inference/rust-toolchain.toml rust-toolchain.toml && \
+    mv text-generation-inference/proto proto && \
+    mv text-generation-inference/benchmark benchmark && \
+    mv text-generation-inference/router router && \
+    mv text-generation-inference/backends backends && \
+    mv text-generation-inference/launcher launcher && \
+    rm -rf text-generation-inference
+
+RUN cargo chef prepare --recipe-path recipe.json
+
+FROM chef AS builder
+
+RUN yum update -y && yum install -y --setopt=install_weak_deps=False \
+    python3-devel \
+    unzip \
+    openssl-devel
+RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
+    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
+    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
+    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
+    rm -f $PROTOC_ZIP
+
+COPY --from=planner /usr/src/recipe.json recipe.json
+RUN cargo chef cook --profile release-opt --recipe-path recipe.json
+
+ARG GIT_SHA
+ARG DOCKER_LABEL
+
+RUN git clone https://github.com/huggingface/text-generation-inference.git && cd text-generation-inference && git checkout v2.4.0
+
+RUN mv text-generation-inference/Cargo.lock Cargo.lock && \
+    mv text-generation-inference/Cargo.toml Cargo.toml && \
+    mv text-generation-inference/rust-toolchain.toml rust-toolchain.toml && \
+    mv text-generation-inference/proto proto && \
+    mv text-generation-inference/benchmark benchmark && \
+    mv text-generation-inference/router router && \
+    mv text-generation-inference/backends backends && \
+    mv text-generation-inference/launcher launcher && \
+    rm -rf text-generation-inference
+
+RUN cargo build --profile release-opt --offline
+
+# Text Generation Inference base image for Intel-cpu
+FROM openeuler/openeuler:24.03-lts AS cpu
+
+RUN yum update -y && yum install -y --setopt=install_weak_deps=False \
+    curl \
+    ca-certificates \
+    make \
+    g++ \
+    gcc \
+    git \
+    wget \
+    cmake \
+    numactl-devel
+
+RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 12
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
+RUN update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30
+RUN update-alternatives --set cc /usr/bin/gcc
+
+RUN update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30
+RUN update-alternatives --set c++ /usr/bin/g++
+
+
+ENV HUGGINGFACE_HUB_CACHE=/data \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    PORT=80
+
+ARG MAMBA_VERSION=23.1.0-1
+ARG PYTHON_VERSION='3.11.10'
+# Automatically set by buildx
+ARG TARGETPLATFORM
+ENV PATH /opt/conda/bin:$PATH
+
+# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
+# Install mamba
+# translating Docker's TARGETPLATFORM into mamba arches
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
+         *)              MAMBA_ARCH=x86_64   ;; \
+    esac && \
+    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
+RUN chmod +x ~/mambaforge.sh && \
+    bash ~/mambaforge.sh -b -p /opt/conda && \
+    rm ~/mambaforge.sh
+
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  exit 1 ;; \
+         *)              /opt/conda/bin/conda update -y conda &&  \
+                         /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
+    esac && \
+    /opt/conda/bin/conda clean -ya
+
+RUN conda install -c conda-forge gperftools mkl
+
+
+RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
+RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
+RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
+
+RUN pip install triton py-libnuma
+
+WORKDIR /usr/src
+
+RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout f86e93e4890dc2c989024d148d415c9aa8a1649f
+RUN git clone https://github.com/intel/torch-ccl.git && cd torch-ccl && git checkout v2.4.0+cpu+rc0
+
+RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python setup.py install
+
+RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .
+
+ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so
+ENV CCL_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
+ENV I_MPI_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
+ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
+ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
+
+# Install server
+RUN git clone https://github.com/huggingface/text-generation-inference.git && cd text-generation-inference && git checkout v2.4.0
+
+RUN mv text-generation-inference/proto proto && \
+    mv text-generation-inference/server server && \
+    rm -rf text-generation-inference
+
+RUN cd server && \
+    make gen-server && \
+    pip install -r requirements_intel.txt && \
+    pip install ".[accelerate, peft, outlines]" --no-cache-dir
+
+# Install benchmarker
+COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
+# Install router
+COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
+# Install launcher
+COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
+
+FROM ${PLATFORM} AS final
+ENV ATTENTION=paged
+ENV PREFIX_CACHING=0
+ENV PREFILL_CHUNKING=0
+ENV CUDA_GRAPHS=0
+ENTRYPOINT ["text-generation-launcher"]
+CMD ["--json-output"]
\ No newline at end of file
diff --git a/text-generation-inference-cpu/meta.yml b/text-generation-inference-cpu/meta.yml
new file mode 100644
index 00000000..f2d6d770
--- /dev/null
+++ b/text-generation-inference-cpu/meta.yml
@@ -0,0 +1,3 @@
+2.4.0-oe2403lts:
+  path: text-generation-inference-cpu/2.4.0/24.03-lts/Dockerfile
+  arch: x86_64
\ No newline at end of file
-- 
Gitee