From 2bf4dfa214c33c7476c8fc391a5600cd365b7105 Mon Sep 17 00:00:00 2001 From: zhihang Date: Tue, 22 Oct 2024 01:23:16 +0000 Subject: [PATCH 1/3] fix: add component 'vllm' --- vllm/0.6.3/24.03-lts/Dockerfile | 70 +++++++++++++++++++++++++++++++++ vllm/meta.yml | 3 ++ 2 files changed, 73 insertions(+) create mode 100644 vllm/0.6.3/24.03-lts/Dockerfile create mode 100644 vllm/meta.yml diff --git a/vllm/0.6.3/24.03-lts/Dockerfile b/vllm/0.6.3/24.03-lts/Dockerfile new file mode 100644 index 00000000..95986ab8 --- /dev/null +++ b/vllm/0.6.3/24.03-lts/Dockerfile @@ -0,0 +1,70 @@ +# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform. + +FROM openeuler/openeuler:24.03-lts AS cpu-test-1 + +ENV CCACHE_DIR=/root/.cache/ccache + +ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache + +RUN --mount=type=cache,target=/var/cache/yum \ + yum update -y \ + && yum install -y curl ccache git wget vim numactl gcc g++ python3-devel python3-pip gperftools-libs numactl-libs numactl-devel \ + && yum install -y ffmpeg libSM libXext mesa-libGL \ + && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 + +# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html +# intel-openmp provides additional performance improvement vs. openmp +# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects. +RUN --mount=type=cache,target=/root/.cache/pip \ + pip3 install intel-openmp + +ENV LD_PRELOAD="/usr/lib64/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so" + +RUN echo 'ulimit -c 0' >> ~/.bashrc + +RUN pip3 install intel_extension_for_pytorch==2.4.0 + +WORKDIR /workspace + +RUN git clone https://github.com/vllm-project/vllm.git + +ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" +ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} +RUN --mount=type=cache,target=/root/.cache/pip \ + pip3 install --upgrade pip && \ + pip install -r vllm/requirements-build.txt + +# install oneDNN +RUN git clone -b rls-v3.5 https://github.com/oneapi-src/oneDNN.git + +RUN --mount=type=cache,target=/root/.cache/ccache \ + cmake -B ./oneDNN/build -S ./oneDNN -G Ninja -DONEDNN_LIBRARY_TYPE=STATIC \ + -DONEDNN_BUILD_DOC=OFF \ + -DONEDNN_BUILD_EXAMPLES=OFF \ + -DONEDNN_BUILD_TESTS=OFF \ + -DONEDNN_BUILD_GRAPH=OFF \ + -DONEDNN_ENABLE_WORKLOAD=INFERENCE \ + -DONEDNN_ENABLE_PRIMITIVE=MATMUL && \ + cmake --build ./oneDNN/build --target install --config Release + +FROM cpu-test-1 AS build + +WORKDIR /workspace/vllm +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -v -r requirements-cpu.txt + +# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... +ARG VLLM_CPU_DISABLE_AVX512 +ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} + +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=cache,target=/root/.cache/ccache \ + VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ + pip install dist/*.whl && \ + rm -rf dist + +WORKDIR /workspace/ + +RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks + +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] \ No newline at end of file diff --git a/vllm/meta.yml b/vllm/meta.yml new file mode 100644 index 00000000..f7f81620 --- /dev/null +++ b/vllm/meta.yml @@ -0,0 +1,3 @@ +0.6.3-oe2403lts: + path: vllm/0.6.3/24.03-lts/Dockerfile + arch: x86_64 \ No newline at end of file -- Gitee From 96dc86d1048fea284dc01eae575baaef3cf69cfa Mon Sep 17 00:00:00 2001 From: zhihang Date: Tue, 22 Oct 2024 08:12:39 +0000 Subject: [PATCH 2/3] feat: add new component 'vllm' --- chatqna/1.0/24.03-lts/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chatqna/1.0/24.03-lts/Dockerfile b/chatqna/1.0/24.03-lts/Dockerfile index da3e2205..6822a098 100644 --- a/chatqna/1.0/24.03-lts/Dockerfile +++ b/chatqna/1.0/24.03-lts/Dockerfile @@ -29,7 +29,7 @@ RUN git clone https://gitee.com/zhihang161013/GenAIExamples.git RUN cp GenAIExamples/ChatQnA/chatqna.py . RUN rm -rf GenAIExamples -ENV PYTHONPATH=/usr/bin/python:/home/user +ENV PYTHONPATH=/usr/bin/python:/home/user/GenAIComps USER user -- Gitee From 57addd60e1afd2b233e1b431163b9fccd0341b52 Mon Sep 17 00:00:00 2001 From: zhihang Date: Tue, 22 Oct 2024 08:16:17 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20vllm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vllm/0.6.3/24.03-lts/Dockerfile | 70 --------------------------------- vllm/meta.yml | 3 -- 2 files changed, 73 deletions(-) delete mode 100644 vllm/0.6.3/24.03-lts/Dockerfile delete mode 100644 vllm/meta.yml diff --git a/vllm/0.6.3/24.03-lts/Dockerfile b/vllm/0.6.3/24.03-lts/Dockerfile deleted file mode 100644 index 95986ab8..00000000 --- a/vllm/0.6.3/24.03-lts/Dockerfile +++ /dev/null @@ -1,70 +0,0 @@ -# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform. - -FROM openeuler/openeuler:24.03-lts AS cpu-test-1 - -ENV CCACHE_DIR=/root/.cache/ccache - -ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache - -RUN --mount=type=cache,target=/var/cache/yum \ - yum update -y \ - && yum install -y curl ccache git wget vim numactl gcc g++ python3-devel python3-pip gperftools-libs numactl-libs numactl-devel \ - && yum install -y ffmpeg libSM libXext mesa-libGL \ - && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 - -# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html -# intel-openmp provides additional performance improvement vs. openmp -# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects. -RUN --mount=type=cache,target=/root/.cache/pip \ - pip3 install intel-openmp - -ENV LD_PRELOAD="/usr/lib64/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so" - -RUN echo 'ulimit -c 0' >> ~/.bashrc - -RUN pip3 install intel_extension_for_pytorch==2.4.0 - -WORKDIR /workspace - -RUN git clone https://github.com/vllm-project/vllm.git - -ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" -ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} -RUN --mount=type=cache,target=/root/.cache/pip \ - pip3 install --upgrade pip && \ - pip install -r vllm/requirements-build.txt - -# install oneDNN -RUN git clone -b rls-v3.5 https://github.com/oneapi-src/oneDNN.git - -RUN --mount=type=cache,target=/root/.cache/ccache \ - cmake -B ./oneDNN/build -S ./oneDNN -G Ninja -DONEDNN_LIBRARY_TYPE=STATIC \ - -DONEDNN_BUILD_DOC=OFF \ - -DONEDNN_BUILD_EXAMPLES=OFF \ - -DONEDNN_BUILD_TESTS=OFF \ - -DONEDNN_BUILD_GRAPH=OFF \ - -DONEDNN_ENABLE_WORKLOAD=INFERENCE \ - -DONEDNN_ENABLE_PRIMITIVE=MATMUL && \ - cmake --build ./oneDNN/build --target install --config Release - -FROM cpu-test-1 AS build - -WORKDIR /workspace/vllm -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -v -r requirements-cpu.txt - -# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... -ARG VLLM_CPU_DISABLE_AVX512 -ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} - -RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=cache,target=/root/.cache/ccache \ - VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ - pip install dist/*.whl && \ - rm -rf dist - -WORKDIR /workspace/ - -RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks - -ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] \ No newline at end of file diff --git a/vllm/meta.yml b/vllm/meta.yml deleted file mode 100644 index f7f81620..00000000 --- a/vllm/meta.yml +++ /dev/null @@ -1,3 +0,0 @@ -0.6.3-oe2403lts: - path: vllm/0.6.3/24.03-lts/Dockerfile - arch: x86_64 \ No newline at end of file -- Gitee