From b31d2182513a1216428a4681c49bb7096854ee43 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 9 Apr 2025 01:47:14 +0000 Subject: [PATCH] Add vllm-ascend Dockerfile --- AI/image-list.yml | 1 + .../22.03-lts/Dockerfile | 36 +++++++++ AI/vllm-ascend/README.md | 79 +++++++++++++++++++ AI/vllm-ascend/meta.yml | 2 + 4 files changed, 118 insertions(+) create mode 100644 AI/vllm-ascend/0.7.3rc2-torch_npu2.5.1-cann8.0.0-python3.10/22.03-lts/Dockerfile create mode 100644 AI/vllm-ascend/README.md create mode 100644 AI/vllm-ascend/meta.yml diff --git a/AI/image-list.yml b/AI/image-list.yml index b1ff6b5d..2542d04e 100644 --- a/AI/image-list.yml +++ b/AI/image-list.yml @@ -44,3 +44,4 @@ images: text-embeddings-inference-cpu: text-embeddings-inference-cpu text-generation-inference-cpu: text-generation-inference-cpu vllm: vllm + vllm-ascend: vllm-ascend diff --git a/AI/vllm-ascend/0.7.3rc2-torch_npu2.5.1-cann8.0.0-python3.10/22.03-lts/Dockerfile b/AI/vllm-ascend/0.7.3rc2-torch_npu2.5.1-cann8.0.0-python3.10/22.03-lts/Dockerfile new file mode 100644 index 00000000..b27a294a --- /dev/null +++ b/AI/vllm-ascend/0.7.3rc2-torch_npu2.5.1-cann8.0.0-python3.10/22.03-lts/Dockerfile @@ -0,0 +1,36 @@ +FROM quay.io/ascend/cann:8.0.0-910b-openeuler22.03-py3.10 + +ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" + + +RUN yum update -y && \ + yum install -y python3-pip git vim wget net-tools && \ + rm -rf /var/cache/yum &&\ + rm -rf /tmp/* + +RUN pip config set global.index-url ${PIP_INDEX_URL} + +# Install vLLM +ARG VLLM_REPO=https://github.com/vllm-project/vllm.git +ARG VLLM_TAG=v0.7.3 +ARG VLLM_ASCEND_REPO=https://github.com/vllm-project/vllm-ascend.git +ARG VLLM_ASCEND_TAG=v0.7.3rc2 + +RUN git clone $VLLM_REPO --branch $VLLM_TAG /workspace/vllm +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install /workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN python3 -m pip uninstall -y triton + +RUN git clone $VLLM_ASCEND_REPO --branch $VLLM_ASCEND_TAG /workspace/vllm-ascend + + +# Install vllm-ascend +RUN python3 -m pip install /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ + +# Install torch-npu +RUN bash /workspace/vllm-ascend/pta_install.sh + +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope ray + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/AI/vllm-ascend/README.md b/AI/vllm-ascend/README.md new file mode 100644 index 00000000..5ecbd903 --- /dev/null +++ b/AI/vllm-ascend/README.md @@ -0,0 +1,79 @@ +

+ + + vllm-ascend + +

+ +

+vLLM Ascend Plugin +

+ +

+| About Ascend | Documentation | #sig-ascend | Users Forum | Weekly Meeting | +

+ +

+English | 中文 +

+ +--- +*Latest News* 🔥 +- [2025/03] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/VtxO9WXa5fC-mKqlxNUJUQ) with vLLM team! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF). +- [2025/02] vLLM community officially created [vllm-project/vllm-ascend](https://github.com/vllm-project/vllm-ascend) repo for running vLLM seamlessly on the Ascend NPU. +- [2024/12] We are working with the vLLM community to support [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162). +--- +## Overview + +vLLM Ascend (`vllm-ascend`) is a community maintained hardware plugin for running vLLM seamlessly on the Ascend NPU. + +It is the recommended approach for supporting the Ascend backend within the vLLM community. It adheres to the principles outlined in the [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162), providing a hardware-pluggable interface that decouples the integration of the Ascend NPU with vLLM. + +By using vLLM Ascend plugin, popular open-source models, including Transformer-like, Mixture-of-Expert, Embedding, Multi-modal LLMs can run seamlessly on the Ascend NPU. + +## Prerequisites + +- Hardware: Atlas 800I A2 Inference series, Atlas A2 Training series +- OS: Linux +- Software: + * Python >= 3.9 + * CANN >= 8.0.0 + * PyTorch >= 2.5.1, torch-npu >= 2.5.1.dev20250320 + * vLLM (the same version as vllm-ascend) + +## Getting Started + +Please refer to [QuickStart](https://vllm-ascend.readthedocs.io/en/latest/quick_start.html) and [Installation](https://vllm-ascend.readthedocs.io/en/latest/installation.html) for more details. + +## Contributing +See [CONTRIBUTING](https://vllm-ascend.readthedocs.io/en/main/developer_guide/contributing.html) for more details, which is a step-by-step guide to help you set up development environment, build and test. + +We welcome and value any contributions and collaborations: +- Please let us know if you encounter a bug by [filing an issue](https://github.com/vllm-project/vllm-ascend/issues) +- Please use [User forum](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support) for usage questions and help. + +## Branch + +vllm-ascend has main branch and dev branch. + +- **main**: main branch,corresponds to the vLLM main branch, and is continuously monitored for quality through Ascend CI. +- **vX.Y.Z-dev**: development branch, created with part of new releases of vLLM. For example, `v0.7.3-dev` is the dev branch for vLLM `v0.7.3` version. + +Below is maintained branches: + +| Branch | Status | Note | +|------------|--------------|--------------------------------------| +| main | Maintained | CI commitment for vLLM main branch | +| v0.7.1-dev | Unmaintained | Only doc fixed is allowed | +| v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version | + +Please refer to [Versioning policy](https://vllm-ascend.readthedocs.io/en/main/developer_guide/versioning_policy.html) for more details. + +## Weekly Meeting + +- vLLM Ascend Weekly Meeting: https://tinyurl.com/vllm-ascend-meeting +- Wednesday, 15:00 - 16:00 (UTC+8, [Convert to your timezone](https://dateful.com/convert/gmt8?t=15)) + +## License + +Apache License 2.0, as found in the [LICENSE](./LICENSE) file. \ No newline at end of file diff --git a/AI/vllm-ascend/meta.yml b/AI/vllm-ascend/meta.yml new file mode 100644 index 00000000..ac0b04d3 --- /dev/null +++ b/AI/vllm-ascend/meta.yml @@ -0,0 +1,2 @@ +0.7.3rc2-torch-npu2.5.1-cann8.0.0-python3.10-oe2203lts: + path: 0.7.3rc2-torch-npu2.5.1-cann8.0.0-python3.10/22.03-lts/Dockerfile -- Gitee