diff --git a/AI/image-list.yml b/AI/image-list.yml index 6e214bb7063beafbc998f6c65bb1c1349290c5b2..ac2fc0db053fbd43f25c7291fad1a1460e557b3c 100644 --- a/AI/image-list.yml +++ b/AI/image-list.yml @@ -51,7 +51,7 @@ images: pytorch: pytorch text-embeddings-inference-cpu: text-embeddings-inference-cpu text-generation-inference-cpu: text-generation-inference-cpu - vllm: vllm + vllm-cpu: vllm-cpu vllm-ascend: vllm-ascend langchain: langchain open-webui: open-webui diff --git a/AI/vllm/0.8.3/24.03-lts/Dockerfile b/AI/vllm-cpu/0.8.3/24.03-lts/Dockerfile similarity index 100% rename from AI/vllm/0.8.3/24.03-lts/Dockerfile rename to AI/vllm-cpu/0.8.3/24.03-lts/Dockerfile diff --git a/AI/vllm/0.8.4/24.03-lts/Dockerfile b/AI/vllm-cpu/0.8.4/24.03-lts/Dockerfile similarity index 100% rename from AI/vllm/0.8.4/24.03-lts/Dockerfile rename to AI/vllm-cpu/0.8.4/24.03-lts/Dockerfile diff --git a/AI/vllm/0.8.5/24.03-lts/Dockerfile b/AI/vllm-cpu/0.8.5/24.03-lts/Dockerfile similarity index 100% rename from AI/vllm/0.8.5/24.03-lts/Dockerfile rename to AI/vllm-cpu/0.8.5/24.03-lts/Dockerfile diff --git a/AI/vllm/0.9.0/24.03-lts/Dockerfile b/AI/vllm-cpu/0.9.0/24.03-lts/Dockerfile similarity index 100% rename from AI/vllm/0.9.0/24.03-lts/Dockerfile rename to AI/vllm-cpu/0.9.0/24.03-lts/Dockerfile diff --git a/AI/vllm/README.md b/AI/vllm-cpu/README.md similarity index 99% rename from AI/vllm/README.md rename to AI/vllm-cpu/README.md index 67479058bf0b1cb90f9fc86ac8a6b5af1162a7af..b04a4237eecb8f0742dc9c86ad716253c9156ae5 100644 --- a/AI/vllm/README.md +++ b/AI/vllm-cpu/README.md @@ -38,6 +38,7 @@ The tag of each vLLM docker image is consist of the version of vLLM and the vers |[0.8.4-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/AI/vllm/0.8.4/24.03-lts/Dockerfile)| vLLM 0.8.4 on openEuler 24.03-LTS | amd64 | |[0.8.5-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/AI/vllm/0.8.5/24.03-lts/Dockerfile)| vLLM 0.8.5 on openEuler 24.03-LTS | amd64, arm64 | |[0.9.0-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/AI/vllm/0.9.0/24.03-lts/Dockerfile)| vLLM 0.9.0 on openEuler 24.03-LTS | amd64, arm64 | + # Usage ## Quick start 1: supported devices diff --git a/AI/vllm/meta.yml b/AI/vllm-cpu/meta.yml similarity index 65% rename from AI/vllm/meta.yml rename to AI/vllm-cpu/meta.yml index 0460cad6aa6bc0c9de595ad5449f76faab9509bc..c618c6a46136330b159f27b47f4ebaa677a6a32b 100644 --- a/AI/vllm/meta.yml +++ b/AI/vllm-cpu/meta.yml @@ -1,7 +1,3 @@ -0.6.3-oe2403lts: - path: 0.6.3/24.03-lts/Dockerfile - arch: x86_64 - 0.8.3-oe2403lts: path: 0.8.3/24.03-lts/Dockerfile @@ -13,4 +9,4 @@ path: 0.8.5/24.03-lts/Dockerfile 0.9.0-oe2403lts: - path: 0.9.0/24.03-lts/Dockerfile \ No newline at end of file + path: 0.9.0/24.03-lts/Dockerfile diff --git a/AI/vllm/0.6.3/24.03-lts/Dockerfile b/AI/vllm/0.6.3/24.03-lts/Dockerfile deleted file mode 100644 index 95986ab81e44cb43a06a43950ad92e5c52020566..0000000000000000000000000000000000000000 --- a/AI/vllm/0.6.3/24.03-lts/Dockerfile +++ /dev/null @@ -1,70 +0,0 @@ -# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform. - -FROM openeuler/openeuler:24.03-lts AS cpu-test-1 - -ENV CCACHE_DIR=/root/.cache/ccache - -ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache - -RUN --mount=type=cache,target=/var/cache/yum \ - yum update -y \ - && yum install -y curl ccache git wget vim numactl gcc g++ python3-devel python3-pip gperftools-libs numactl-libs numactl-devel \ - && yum install -y ffmpeg libSM libXext mesa-libGL \ - && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 - -# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html -# intel-openmp provides additional performance improvement vs. openmp -# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects. -RUN --mount=type=cache,target=/root/.cache/pip \ - pip3 install intel-openmp - -ENV LD_PRELOAD="/usr/lib64/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so" - -RUN echo 'ulimit -c 0' >> ~/.bashrc - -RUN pip3 install intel_extension_for_pytorch==2.4.0 - -WORKDIR /workspace - -RUN git clone https://github.com/vllm-project/vllm.git - -ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" -ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} -RUN --mount=type=cache,target=/root/.cache/pip \ - pip3 install --upgrade pip && \ - pip install -r vllm/requirements-build.txt - -# install oneDNN -RUN git clone -b rls-v3.5 https://github.com/oneapi-src/oneDNN.git - -RUN --mount=type=cache,target=/root/.cache/ccache \ - cmake -B ./oneDNN/build -S ./oneDNN -G Ninja -DONEDNN_LIBRARY_TYPE=STATIC \ - -DONEDNN_BUILD_DOC=OFF \ - -DONEDNN_BUILD_EXAMPLES=OFF \ - -DONEDNN_BUILD_TESTS=OFF \ - -DONEDNN_BUILD_GRAPH=OFF \ - -DONEDNN_ENABLE_WORKLOAD=INFERENCE \ - -DONEDNN_ENABLE_PRIMITIVE=MATMUL && \ - cmake --build ./oneDNN/build --target install --config Release - -FROM cpu-test-1 AS build - -WORKDIR /workspace/vllm -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -v -r requirements-cpu.txt - -# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... -ARG VLLM_CPU_DISABLE_AVX512 -ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} - -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/ccache \ - VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ - pip install dist/*.whl && \ - rm -rf dist - -WORKDIR /workspace/ - -RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks - -ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] \ No newline at end of file