diff --git a/vllm/0.7.1/24.03-lts/Dockerfile b/vllm/0.7.1/24.03-lts/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..ccf5b47604b740ffc31a694a9203f9b3f9a44f78
--- /dev/null
+++ b/vllm/0.7.1/24.03-lts/Dockerfile
@@ -0,0 +1,53 @@
+# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
+
+FROM openeuler/openeuler:24.03-lts AS cpu-test-1
+
+ARG TARGETARCH
+ARG VERSION=0.7.1
+
+ENV CCACHE_DIR=/root/.cache/ccache
+
+ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
+
+RUN --mount=type=cache,target=/var/cache/yum \
+ yum update -y \
+ && yum install -y curl ccache git wget vim numactl gcc g++ python3-devel python3-pip gperftools-libs numactl-libs numactl-devel \
+ && yum install -y ffmpeg libSM libXext mesa-libGL google-perftools \
+ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
+
+# tcmalloc provides better memory allocation efficiency, e.g., holding memory in caches to speed up access of commonly-used objects.
+RUN --mount=type=cache,target=/root/.cache/pip \
+ pip install py-cpuinfo
+
+ENV LD_PRELOAD="/usr/lib64/libtcmalloc_minimal.so.4"
+
+RUN echo 'ulimit -c 0' >> ~/.bashrc
+
+WORKDIR /workspace
+
+RUN git clone -b v${VERSION} https://github.com/vllm-project/vllm.git
+
+ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
+ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
+RUN --mount=type=cache,target=/root/.cache/pip \
+ pip3 install --upgrade pip && \
+ pip install -r vllm/requirements-build.txt
+
+FROM cpu-test-1 AS build
+
+WORKDIR /workspace/vllm
+RUN --mount=type=cache,target=/root/.cache/pip \
+ pip install -v -r requirements-cpu.txt
+
+ENV VLLM_CPU_DISABLE_AVX512="true"
+RUN --mount=type=cache,target=/root/.cache/pip \
+ --mount=type=cache,target=/root/.cache/ccache \
+ VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
+ pip install dist/*.whl && \
+ rm -rf dist
+
+WORKDIR /workspace/
+
+RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
+
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
\ No newline at end of file
diff --git a/vllm/README.md b/vllm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7f092bdc97885ab6c9bcfdf5364743c232554a93
--- /dev/null
+++ b/vllm/README.md
@@ -0,0 +1,147 @@
+# Quick reference
+
+- The official vLLM docker image.
+
+- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative).
+
+- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community).
+
+# vLLM | openEuler
+Current vLLM docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits.
+
+vLLM is a fast and easy-to-use library for LLM inference and serving. Originally developed in the Sky Computing Lab at UC Berkeley, vLLM has evloved into a community-driven project with contributions from both academia and industry.
+
+Learn more on [vLLM website](https://docs.vllm.ai/en/latest/).
+
+# Supported tags and respective Dockerfile links
+The tag of each `vllm` docker image is consist of the version of `vllm` and the version of basic image. The details are as follows
+| Tag | Currently | Architectures |
+|----------|-------------|------------------|
+|[0.6.6-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/vllm/0.6.6/24.03-lts/Dockerfile)| vLLM 0.6.6 on openEuler 24.03-LTS | amd64 |
+|[0.7.1-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/vllm/0.7.1/24.03-lts/Dockerfile)| vLLM 0.7.1 on openEuler 24.03-LTS | arm64 |
+
+# Usage
+In this usage, users can select the corresponding `{Tag}` and `container startup options` based on their requirements.
+
+- **Pull the `openeuler/vllm` image from docker**
+ ```bash
+ docker pull openeuler/vllm:{Tag}
+ ```
+
+- **Download the large model (optional)**
+
+ If you do not want to download the model when the container is running, you can download the model to your local machine first.
+ ```bash
+ huggingface-cli download --resume-download Qwen/Qwen2.5-1.5B-Instruct --local-dir /tmp/Qwen/Qwen2.5-1.5B-Instruct
+ ```
+ Additionally, you can replace the model `Qwen/Qwen2.5-1.5B-Instruct` with the local path and specify the local path in the configuration.
+
+- **Start a vllm instance**
+ ```bash
+ docker run -it -d --name vllm -p 8000:80 -v $LLM_MODEL_PATH:$LLM_MODEL_PATH openeuler/vllm:{Tag} --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 --api-key EMPTY
+ ```
+ After the instance `vllm` is started, access the vllm service through `http://localhost:8000`.
+
+- **Container startup options**
+
+ | Option | Description |
+ |--|--|
+ | `-p 8000:80` | Expose vllm service on `localhost:8000`. |
+ | `-v $LLM_MODEL_PATH:$LLM_MODEL_PATH` | Mounting the local model is optional. |
+ | `--model $LLM_MODEL_ID` | Specify a model ID or the local model path. |
+ | `--host 0.0.0.0` | Specify the host address that the service listens on. |
+ | `--port 80` | Specify the port number that the service listens on. |
+ | `--api-key EMPTY` | Provide authentication for API services to ensure that only authorized users can access them. |
+
+- **OpenAI-Compatible Server**
+
+ This server can be queried in the same format as OpenAI API. For example, to list the models:
+ ```bash
+ curl http://localhost:8000/v1/models
+ ```
+ You can pass in the argument --api-key to enable the server to check for API key in the header.
+
+- **OpenAI Completions API with vLLM**
+
+ Once your server is started, you can query the model with input prompts:
+ ```bash
+ curl http://localhost:8000/v1/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "Qwen/Qwen2.5-1.5B-Instruct",
+ "prompt": "San Francisco is a",
+ "max_tokens": 7,
+ "temperature": 0
+ }'
+ ```
+ Since this server is compatible with OpenAI API, you can use it as a drop-in replacement for any applications using OpenAI API.
+
+ For example, another way to query the server is via the openai Python package:
+ ```bash
+ from openai import OpenAI
+ # Modify OpenAI's API key and API base to use vLLM's API server.
+ openai_api_key = "EMPTY"
+ openai_api_base = "http://localhost:8000/v1"
+ client = OpenAI(
+ api_key=openai_api_key,
+ base_url=openai_api_base,
+ )
+ completion = client.completions.create(model="Qwen/Qwen2.5-1.5B-Instruct",
+ prompt="San Francisco is a")
+ print("Completion result:", completion)
+ ```
+ Additionally, if you have specified the local model path, you also need to replace `Qwen/Qwen2.5-1.5B-Instruct` with the local model path.
+
+- **OpenAI Chat Completions API with vLLM**
+
+ vLLM is designed to also support the OpenAI Chat Completions API. The chat interface is a more dynamic, interactive way to communicate with the model, allowing back-and-forth exchanges that can be stored in the chat history. This is useful for tasks that require context or more detailed explanations.
+
+ You can use the create chat completion endpoint to interact with the model:
+ ```
+ curl http://localhost:8000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "Qwen/Qwen2.5-1.5B-Instruct",
+ "messages": [
+ {"role": "user", "content": "Who won the world series in 2020?"}
+ ],
+ "chat_template": "system: You are a helpful AI assistant."
+ }'
+ ```
+ Alternatively, you can use the openai Python package:
+ ```
+ from openai import OpenAI
+ # Set OpenAI's API key and API base to use vLLM's API server.
+ openai_api_key = "EMPTY"
+ openai_api_base = "http://localhost:8000/v1"
+
+ client = OpenAI(
+ api_key=openai_api_key,
+ base_url=openai_api_base,
+ )
+
+ chat_response = client.chat.completions.create(
+ model="Qwen/Qwen2.5-1.5B-Instruct",
+ messages=[
+ {"role": "user", "content": "Tell me a joke."},
+ ],
+ extra_body={"chat_template": "system: You are a helpful AI assistant."}
+ )
+ print("Chat response:", chat_response)
+ ```
+ Additionally, if you have specified the local model path, you also need to replace `Qwen/Qwen2.5-1.5B-Instruct` with the local model path.
+
+- **View container running logs**
+
+ ```bash
+ docker logs -f vllm
+ ```
+
+- **To get an interactive shell**
+
+ ```bash
+ docker exec -it vllm /bin/bash
+ ```
+
+# Question and answering
+If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images).
\ No newline at end of file
diff --git a/vllm/meta.yml b/vllm/meta.yml
index f7f8162028e7d34ed1f626018229803967a73bde..c6314469236fa11247dda5a766c428ffa3e97931 100644
--- a/vllm/meta.yml
+++ b/vllm/meta.yml
@@ -1,3 +1,6 @@
0.6.3-oe2403lts:
path: vllm/0.6.3/24.03-lts/Dockerfile
- arch: x86_64
\ No newline at end of file
+ arch: x86_64
+0.7.1-oe2403lts:
+ path: vllm/0.7.1/24.03-lts/Dockerfile
+ arch: aarch64
\ No newline at end of file