From c7e005b9871898cf1133efe6d2cdcaaea214787b Mon Sep 17 00:00:00 2001
From: zhang_xu_hao1230 <zhangxuhao6@huawei.com>
Date: Thu, 17 Apr 2025 14:33:54 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0chunked=20prefill=20st?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_vllm_mf_qwen_7b_chunked_prefill.py   | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 tests/st/python/test_vllm_mf_qwen_7b_chunked_prefill.py

diff --git a/tests/st/python/test_vllm_mf_qwen_7b_chunked_prefill.py b/tests/st/python/test_vllm_mf_qwen_7b_chunked_prefill.py
new file mode 100644
index 0000000..14ec293
--- /dev/null
+++ b/tests/st/python/test_vllm_mf_qwen_7b_chunked_prefill.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""test mf qwen chunked prefill."""
+import pytest
+import os
+from . import set_env
+env_manager = set_env.EnvVarManager()
+env_vars = {
+    "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
+    "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
+    "vLLM_MODEL_BACKEND": "MindFormers",
+    "MS_ENABLE_LCCL": "off",
+    "HCCL_OP_EXPANSION_MODE": "AIV",
+    "ASCEND_RT_VISIBLE_DEVICES": "0,1",
+    "LCCL_DETERMINISTIC": "1",
+    "HCCL_DETERMINISTIC": "true",
+    "ATB_MATMUL_SHUFFLE_K_ENABLE": "0",
+    "ATB_LLM_LCOC_ENABLE": "0"
+}
+env_manager.setup_ai_environment(env_vars)
+import vllm_mindspore
+from vllm import LLM, SamplingParams
+
+
+class TestMfQwen_chunked_prefill:
+    """
+    Test qwen7b enable chunked prefill
+    """
+    @pytest.mark.level0
+    @pytest.mark.platform_arm_ascend910b_training
+    @pytest.mark.env_single
+    def test_mf_qwen_7b_chunked_prefill(self):
+        """
+        test case qwen_7b_chunked_prefill
+        """
+        # First prompts.
+        prompts = "请详细介绍一下，什么是大语言模型？以及什么是高性能计算？两者如何结合？？？？" + '？' * 512
+        sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
+        # Create an LLM.
+        llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+                  max_model_len=8192, block_size=16, enable_chunked_prefill=True,
+                  max_num_batched_tokens=256, gpu_memory_utilization=0.9, tensor_parallel_size=2)
+        # Generate texts from the prompts. The output is a list of RequestOutput objects
+        # that contain the prompt, generated text, and other information.
+        outputs = llm.generate(prompts, sampling_params)
+        except_list=[' many times and each time I have found something new']
+        for i, output in enumerate(outputs):
+            prompt = output.prompt
+            generated_text = output.outputs[0].text
+            print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+            assert generated_text == except_list[i]
+
+        env_manager.unset_all()
-- 
Gitee