From eb5f151e25bc58f76a3d549b7cfae5655727a352 Mon Sep 17 00:00:00 2001 From: lijiakun Date: Fri, 28 Mar 2025 10:54:19 +0800 Subject: [PATCH] test --- tests/st/python/test_custom.py | 62 ----------------- tests/st/python/test_vllm_deepseek_part.py | 77 --------------------- tests/st/python/test_vllm_mf_qwen_7b.py | 77 --------------------- tests/st/python/test_vllm_mf_qwen_7b_mss.py | 77 --------------------- 4 files changed, 293 deletions(-) delete mode 100644 tests/st/python/test_custom.py delete mode 100644 tests/st/python/test_vllm_deepseek_part.py delete mode 100644 tests/st/python/test_vllm_mf_qwen_7b.py delete mode 100644 tests/st/python/test_vllm_mf_qwen_7b_mss.py diff --git a/tests/st/python/test_custom.py b/tests/st/python/test_custom.py deleted file mode 100644 index 8afb085..0000000 --- a/tests/st/python/test_custom.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2024 The vLLM team. -# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://wwww.apache.org/licenses/LICENSE-2.0 -# -# Unless required by application law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""test demo for st.""" -import pytest -import mindspore as ms -import vllm_mindspore -from vllm_mindspore import npu_ops -import numpy as np -from mindspore import Tensor - -def testcase(): - ms.context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend") - in_block_tables = Tensor(np.load("data/block_tables.npy").astype(np.int32)) - in_input_positions = Tensor(np.load("data/input_positions.npy").astype(np.int32)) - in_input_tokens = Tensor(np.load("data/input_tokens.npy").astype(np.int32)) - in_sampled_token_ids = Tensor(np.load("data/sampled_token_ids.npy").astype(np.int32)) - in_seq_lens_tensor = Tensor(np.load("data/seq_lens_tensor.npy").astype(np.int32)) - in_slot_mapping = Tensor(np.load("data/slot_mapping.npy").astype(np.int32)) - num_seqs = 256 - num_queries = 256 - block_size = 32 - # block_tables_stride = 4 - npu_ops.adv_step_flash(num_seqs=num_seqs, - num_queries=num_queries, - block_size=block_size, - input_tokens=in_input_tokens, - sampled_token_ids=in_sampled_token_ids, - input_positions=in_input_positions, - seq_lens=in_seq_lens_tensor, - slot_mapping=in_slot_mapping, - block_tables=in_block_tables) - - out_block_tables = np.load("data/o_block_tables.npy").astype(np.int32) - out_input_positions = np.load("data/o_input_positions.npy").astype(np.int32) - out_input_tokens = np.load("data/o_input_tokens.npy").astype(np.int32) - out_sampled_token_ids = np.load("data/o_sampled_token_ids.npy").astype(np.int32) - out_seq_lens_tensor = np.load("data/o_seq_lens_tensor.npy").astype(np.int32) - out_slot_mapping = np.load("data/o_slot_mapping.npy").astype(np.int32) - assert np.allclose(in_block_tables, out_block_tables, 1e-3, 1e-3) - assert np.allclose(in_input_positions, out_input_positions, 1e-3, 1e-3) - assert np.allclose(in_input_tokens, out_input_tokens, 1e-3, 1e-3) - assert np.allclose(in_sampled_token_ids, out_sampled_token_ids, 1e-3, 1e-3) - assert np.allclose(in_seq_lens_tensor, out_seq_lens_tensor, 1e-3, 1e-3) - assert np.allclose(in_slot_mapping, out_slot_mapping, 1e-3, 1e-3) - print("passed.") - - -if __name__ == "__main__": - testcase() \ No newline at end of file diff --git a/tests/st/python/test_vllm_deepseek_part.py b/tests/st/python/test_vllm_deepseek_part.py deleted file mode 100644 index ce18a1e..0000000 --- a/tests/st/python/test_vllm_deepseek_part.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2024 The vLLM team. -# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://wwww.apache.org/licenses/LICENSE-2.0 -# -# Unless required by application law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""test mf deepseek r1.""" -import pytest -import os -from . import set_env -env_manager = set_env.EnvVarManager() -# def env -env_vars = { - "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8.yaml", - "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), - "vLLM_MODEL_BACKEND": "MindFormers", - "vLLM_MODEL_MEMORY_USE_GB": "40", - "ASCEND_TOTAL_MEMORY_GB": "60", - "MS_ENABLE_LCCL": "off", - "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", - "MS_ALLOC_CONF": "enable_vmm:True", - "LCCL_DETERMINISTIC": "1", - "HCCL_DETERMINISTIC": "true", - "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", - "ATB_LLM_LCOC_ENABLE": "0" -} -# set env -env_manager.setup_ai_environment(env_vars) -import vllm_mindspore -from vllm import LLM, SamplingParams - -class TestDeepSeek: - """ - Test Deepseek. - """ - - @pytest.mark.level0 - @pytest.mark.platform_arm_ascend910b_training - @pytest.mark.env_single - def test_deepseek_r1(self): - """ - test case deepseek r1 w8a8 - """ - - # Sample prompts. - prompts = [ - "You are a helpful assistant.<|User|>将文本分类为中性、负面或正面。 \n文本:我认为这次假期还可以。 \n情感:<|Assistant|>\n", - ] - - # Create a sampling params object. - sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) - - # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8) - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = llm.generate(prompts, sampling_params) - except_list=['ugs611ాలు哒ాలు mahassisemaSTE的道德'] - # Print the outputs. - for i, output in enumerate(outputs): - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - assert generated_text == except_list[i] - - # unset env - env_manager.unset_all() \ No newline at end of file diff --git a/tests/st/python/test_vllm_mf_qwen_7b.py b/tests/st/python/test_vllm_mf_qwen_7b.py deleted file mode 100644 index bdec7cf..0000000 --- a/tests/st/python/test_vllm_mf_qwen_7b.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2024 The vLLM team. -# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://wwww.apache.org/licenses/LICENSE-2.0 -# -# Unless required by application law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""test mf qwen.""" -import pytest -import os -from . import set_env -env_manager = set_env.EnvVarManager() -# def env -env_vars = { - "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", - "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), - "vLLM_MODEL_BACKEND": "MindFormers", - "vLLM_MODEL_MEMORY_USE_GB": "20", - "ASCEND_TOTAL_MEMORY_GB": "29", - "MS_ENABLE_LCCL": "off", - "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1", - "MS_ALLOC_CONF": "enable_vmm:True", - "LCCL_DETERMINISTIC": "1", - "HCCL_DETERMINISTIC": "true", - "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", - "ATB_LLM_LCOC_ENABLE": "0" -} -# set env -env_manager.setup_ai_environment(env_vars) -import vllm_mindspore -from vllm import LLM, SamplingParams - - -class TestMfQwen: - """ - Test Qwen. - """ - @pytest.mark.level0 - @pytest.mark.platform_arm_ascend910b_training - @pytest.mark.env_single - def test_mf_qwen(self): - """ - test case qwen2.5 7B - """ - - # Sample prompts. - prompts = [ - "You are a helpful assistant.<|User|>将文本分类为中性、负面或正面。 \n文本:我认为这次假期还可以。 \n情感:<|Assistant|>\n", - ] - - # Create a sampling params object. - sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) - - # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", gpu_memory_utilization=0.9, tensor_parallel_size=2) - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = llm.generate(prompts, sampling_params) - except_list=['中性<|Assistant|> 这句话'] - # Print the outputs. - for i, output in enumerate(outputs): - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - assert generated_text == except_list[i] - - # unset env - env_manager.unset_all() \ No newline at end of file diff --git a/tests/st/python/test_vllm_mf_qwen_7b_mss.py b/tests/st/python/test_vllm_mf_qwen_7b_mss.py deleted file mode 100644 index 1f7796c..0000000 --- a/tests/st/python/test_vllm_mf_qwen_7b_mss.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2024 The vLLM team. -# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://wwww.apache.org/licenses/LICENSE-2.0 -# -# Unless required by application law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""test mf qwen mss.""" -import pytest -import os -from . import set_env -env_manager = set_env.EnvVarManager() -# def env -env_vars = { - "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", - "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), - "vLLM_MODEL_BACKEND": "MindFormers", - "vLLM_MODEL_MEMORY_USE_GB": "20", - "ASCEND_TOTAL_MEMORY_GB": "29", - "MS_ENABLE_LCCL": "off", - "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1", - "MS_ALLOC_CONF": "enable_vmm:True", - "LCCL_DETERMINISTIC": "1", - "HCCL_DETERMINISTIC": "true", - "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", - "ATB_LLM_LCOC_ENABLE": "0" -} -# set env -env_manager.setup_ai_environment(env_vars) -import vllm_mindspore -from vllm import LLM, SamplingParams - -class TestMfQwen_mss: - """ - Test qwen. - """ - @pytest.mark.level0 - @pytest.mark.platform_arm_ascend910b_training - @pytest.mark.env_single - def test_mf_qwen_7b_mss(self): - """ - test case qwen_7b_mss - """ - - # Sample prompts. - prompts = [ - "I love Beijing, because", - ] - - # Create a sampling params object. - sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) - - # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_batched_tokens=8192, - block_size=32, gpu_memory_utilization=0.9, num_scheduler_steps=8, tensor_parallel_size=2) - # Generate texts from the prompts. The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = llm.generate(prompts, sampling_params) - except_list=[' it is a city with a long history. Which'] - # Print the outputs. - for i, output in enumerate(outputs): - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - assert generated_text == except_list[i] - - # unset env - env_manager.unset_all() \ No newline at end of file -- Gitee