From 00c70231264859257768cb53ec49366c49d50e33 Mon Sep 17 00:00:00 2001 From: moran Date: Wed, 11 Jun 2025 10:24:11 +0800 Subject: [PATCH] run ds ST case parallel --- .../multilora_inference.py} | 8 +- .../vllm_deepseek_bf16_part.py} | 11 +- .../vllm_deepseek_bf16_part_v1.py} | 13 +- .../vllm_deepseek_gptq_a16w4.py} | 10 +- .../vllm_deepseek_osl.py} | 17 +-- .../vllm_deepseek_part.py} | 18 +-- .../vllm_deepseek_part_v1.py} | 12 +- .../vllm_deepseek_smoothquant.py} | 12 +- .../vllm_deepseek_smoothquant_mss.py} | 12 +- tests/st/python/test_cases_parallel.py | 125 +++++++++++++++++- .../python/test_vllm_deepseek_mix_parallel.py | 2 +- 11 files changed, 150 insertions(+), 90 deletions(-) rename tests/st/python/{test_multilora_inference.py => cases_parallel/multilora_inference.py} (94%) rename tests/st/python/{test_vllm_deepseek_bf16_part_v1.py => cases_parallel/vllm_deepseek_bf16_part.py} (89%) rename tests/st/python/{test_vllm_deepseek_bf16_part.py => cases_parallel/vllm_deepseek_bf16_part_v1.py} (88%) rename tests/st/python/{test_vllm_deepseek_gptq_a16w4.py => cases_parallel/vllm_deepseek_gptq_a16w4.py} (92%) rename tests/st/python/{test_vllm_deepseek_osl.py => cases_parallel/vllm_deepseek_osl.py} (89%) rename tests/st/python/{test_vllm_deepseek_part.py => cases_parallel/vllm_deepseek_part.py} (89%) rename tests/st/python/{test_vllm_deepseek_part_v1.py => cases_parallel/vllm_deepseek_part_v1.py} (89%) rename tests/st/python/{test_vllm_deepseek_smoothquant.py => cases_parallel/vllm_deepseek_smoothquant.py} (88%) rename tests/st/python/{test_vllm_deepseek_smoothquant_mss.py => cases_parallel/vllm_deepseek_smoothquant_mss.py} (88%) diff --git a/tests/st/python/test_multilora_inference.py b/tests/st/python/cases_parallel/multilora_inference.py similarity index 94% rename from tests/st/python/test_multilora_inference.py rename to tests/st/python/cases_parallel/multilora_inference.py index d5e86441..7e2129a1 100644 --- a/tests/st/python/test_multilora_inference.py +++ b/tests/st/python/cases_parallel/multilora_inference.py @@ -20,7 +20,7 @@ for offline inference. """ import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -28,7 +28,6 @@ env_vars = { "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", @@ -60,7 +59,7 @@ def create_test_prompts( def process_requests(engine: LLMEngine, test_prompts: List[Tuple[str, SamplingParams, - Optional[LoRARequest]]]): + Optional[LoRARequest]]]): """Continuously process a list of prompts and handle the outputs.""" request_id = 0 @@ -101,9 +100,6 @@ def initialize_engine() -> LLMEngine: return LLMEngine.from_engine_args(engine_args) -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_multilora_inference(): """test function that sets up and runs the prompt processing.""" engine = initialize_engine() diff --git a/tests/st/python/test_vllm_deepseek_bf16_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py similarity index 89% rename from tests/st/python/test_vllm_deepseek_bf16_part_v1.py rename to tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py index 7a88aa37..6c29cc4c 100644 --- a/tests/st/python/test_vllm_deepseek_bf16_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py @@ -17,7 +17,7 @@ """test mf deepseek r1.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,14 +27,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "on", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -42,9 +40,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1_bf16(): """ test case deepseek r1 bf16 @@ -60,7 +55,7 @@ def test_deepseek_r1_bf16(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_vllm_deepseek_bf16_part.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py similarity index 88% rename from tests/st/python/test_vllm_deepseek_bf16_part.py rename to tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py index 0d434811..0a85b1ca 100644 --- a/tests/st/python/test_vllm_deepseek_bf16_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py @@ -17,7 +17,7 @@ """test mf deepseek r1.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,15 +27,11 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "on", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", - "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "ATB_LLM_LCOC_ENABLE": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -43,9 +39,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1_bf16(): """ test case deepseek r1 bf16 @@ -61,7 +54,7 @@ def test_deepseek_r1_bf16(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_vllm_deepseek_gptq_a16w4.py b/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py similarity index 92% rename from tests/st/python/test_vllm_deepseek_gptq_a16w4.py rename to tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py index f61afd84..968f805b 100644 --- a/tests/st/python/test_vllm_deepseek_gptq_a16w4.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py @@ -19,7 +19,7 @@ import os import yaml import pytest -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -29,15 +29,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -45,9 +42,6 @@ import vllm_mindspore # noqa: F401, E402 from vllm import LLM, SamplingParams # noqa: E402 -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.allcards def test_deepseek_r1_gptq_a16w4(): """ test case deepseek r1 a16w4 diff --git a/tests/st/python/test_vllm_deepseek_osl.py b/tests/st/python/cases_parallel/vllm_deepseek_osl.py similarity index 89% rename from tests/st/python/test_vllm_deepseek_osl.py rename to tests/st/python/cases_parallel/vllm_deepseek_osl.py index 5b72972b..fc782b9e 100644 --- a/tests/st/python/test_vllm_deepseek_osl.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_osl.py @@ -20,7 +20,7 @@ isort:skip_file """ import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -31,15 +31,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -47,9 +44,6 @@ import vllm_mindspore # noqa: F401, E402 from vllm import LLM, SamplingParams # noqa: E402 -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1(): """ test case deepseek r1 w8a8 @@ -71,7 +65,7 @@ def test_deepseek_r1(): "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", trust_remote_code=True, gpu_memory_utilization=0.9, - tensor_parallel_size=8, + tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. @@ -87,9 +81,6 @@ def test_deepseek_r1(): env_manager.unset_all() -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1_mss(): """ test case deepseek r1 w8a8 mss @@ -111,7 +102,7 @@ def test_deepseek_r1_mss(): "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", trust_remote_code=True, gpu_memory_utilization=0.9, - tensor_parallel_size=8, + tensor_parallel_size=2, num_scheduler_steps=8, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects diff --git a/tests/st/python/test_vllm_deepseek_part.py b/tests/st/python/cases_parallel/vllm_deepseek_part.py similarity index 89% rename from tests/st/python/test_vllm_deepseek_part.py rename to tests/st/python/cases_parallel/vllm_deepseek_part.py index 42e2db8b..7ef3e890 100644 --- a/tests/st/python/test_vllm_deepseek_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part.py @@ -17,7 +17,7 @@ """test mf deepseek r1.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,15 +27,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "on", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -43,9 +40,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1(): """ test case deepseek r1 w8a8 @@ -61,7 +55,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) @@ -76,9 +70,7 @@ def test_deepseek_r1(): # unset env env_manager.unset_all() -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single + def test_deepseek_mtp(): """ test case deepseek mtp with main model of r1-w8a8 @@ -94,7 +86,7 @@ def test_deepseek_mtp(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-MTP", - trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=8, max_model_len=4096, + trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=2, max_model_len=4096, speculative_config={"num_speculative_tokens": 1}) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/test_vllm_deepseek_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py similarity index 89% rename from tests/st/python/test_vllm_deepseek_part_v1.py rename to tests/st/python/cases_parallel/vllm_deepseek_part_v1.py index 9f5ecd72..e5eb917a 100644 --- a/tests/st/python/test_vllm_deepseek_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py @@ -17,7 +17,7 @@ """test mf deepseek r1.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,14 +27,11 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", - "ATB_LLM_LCOC_ENABLE": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "ATB_LLM_LCOC_ENABLE": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -42,9 +39,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1(): """ test case deepseek r1 w8a8 @@ -60,7 +54,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_vllm_deepseek_smoothquant.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py similarity index 88% rename from tests/st/python/test_vllm_deepseek_smoothquant.py rename to tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py index eb0ef892..48d2441a 100644 --- a/tests/st/python/test_vllm_deepseek_smoothquant.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py @@ -17,7 +17,7 @@ """test mf deepseek r1 smoothquant.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,15 +27,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -43,9 +40,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1(): """ test case deepseek r1 w8a8 @@ -61,7 +55,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_vllm_deepseek_smoothquant_mss.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py similarity index 88% rename from tests/st/python/test_vllm_deepseek_smoothquant_mss.py rename to tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py index f286bc8e..111c91e4 100644 --- a/tests/st/python/test_vllm_deepseek_smoothquant_mss.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py @@ -17,7 +17,7 @@ """test mf deepseek r1 smoothquant.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,15 +27,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -43,9 +40,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1_mss(): """ test case deepseek r1 w8a8 mss @@ -61,7 +55,7 @@ def test_deepseek_r1_mss(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, num_scheduler_steps=8, + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, num_scheduler_steps=8, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/test_cases_parallel.py b/tests/st/python/test_cases_parallel.py index 18c894f3..35d31ea8 100644 --- a/tests/st/python/test_cases_parallel.py +++ b/tests/st/python/test_cases_parallel.py @@ -50,21 +50,24 @@ def test_cases_parallel_part0(): """ commands = [ ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen > vllm_mf_qwen_7b_test_mf_qwen.log", "vllm_mf_qwen_7b_test_mf_qwen.log"), ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill " "> vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log", "vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log"), ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 &&" "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill " "> vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log", "vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log"), ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss " - "> vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log", - "vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log"), - + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/multilora_inference.py::test_multilora_inference " + "> multilora_inference_test_multilora_inference.log", + "multilora_inference_test_multilora_inference.log") ] with Pool(len(commands)) as pool: @@ -83,18 +86,22 @@ def test_cases_parallel_part1(): """ commands = [ ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss " "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log"), ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching " "> vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log", "vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log"), ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching " "> vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log", "vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log"), ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61006 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen > vllm_mf_qwen_7b_v1_test_mf_qwen.log", "vllm_mf_qwen_7b_v1_test_mf_qwen.log") ] @@ -103,6 +110,7 @@ def test_cases_parallel_part1(): results = list(pool.imap(run_command, commands)) check_results(commands, results) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single @@ -114,14 +122,17 @@ def test_cases_parallel_part2(): """ commands = [ ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " "pytest -s -v cases_parallel/vllm_qwen_7b.py::test_vllm_qwen " "> vllm_qwen_7b_test_vllm_qwen.log", "vllm_qwen_7b_test_vllm_qwen.log"), ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " "pytest -s -v cases_parallel/vllm_qwen_7b_v1.py::test_vllm_qwen " "> vllm_qwen_7b_v1_test_vllm_qwen.log", "vllm_qwen_7b_v1_test_vllm_qwen.log"), ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " "pytest -s -v cases_parallel/shm_broadcast.py::test_shm_broadcast " "> shm_broadcast_test_shm_broadcast.log", "shm_broadcast_test_shm_broadcast.log") @@ -130,3 +141,109 @@ def test_cases_parallel_part2(): with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) check_results(commands, results) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_single +def test_cases_parallel_part3(): + """ + Feature: test cases parallel. + Description: test cases parallel. + Expectation: Pass. + """ + commands = [ + ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_r1_bf16 " + "> vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log", + "vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16 " + "> vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log", + "vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_gptq_a16w4.py::test_deepseek_r1_gptq_a16w4 " + "> vllm_deepseek_gptq_a16w4_test_deepseek_r1_gptq_a16w4.log", + "vllm_deepseek_gptq_a16w4_test_deepseek_r1_gptq_a16w4.log") + ] + + with Pool(len(commands)) as pool: + results = list(pool.imap(run_command, commands)) + check_results(commands, results) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_single +def test_cases_parallel_part4(): + """ + Feature: test cases parallel. + Description: test cases parallel. + Expectation: Pass. + """ + commands = [ + ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1_mss " + "> vllm_deepseek_osl_test_deepseek_r1_mss.log", + "vllm_deepseek_osl_test_deepseek_r1_mss.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_r1 " + "> vllm_deepseek_part_test_deepseek_r1.log", + "vllm_deepseek_part_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp " + "> vllm_deepseek_part_test_deepseek_mtp.log", + "vllm_deepseek_part_test_deepseek_mtp.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1 " + "> vllm_deepseek_part_v1_test_deepseek_r1.log", + "vllm_deepseek_part_v1_test_deepseek_r1.log") + ] + + with Pool(len(commands)) as pool: + results = list(pool.imap(run_command, commands)) + check_results(commands, results) + + +@pytest.mark.level1 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_single +def test_cases_parallel_level1_part0(): + """ + Feature: test cases parallel. + Description: test cases parallel. + Expectation: Pass. + """ + commands = [ + ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss " + "> vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log", + "vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1 " + "> vllm_deepseek_osl_test_deepseek_r1.log", + "vllm_deepseek_osl_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1 " + "> vllm_deepseek_smoothquant_test_deepseek_r1.log", + "vllm_deepseek_smoothquant_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss " + "> vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log", + "vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log") + ] + + with Pool(len(commands)) as pool: + results = list(pool.imap(run_command, commands)) + check_results(commands, results) diff --git a/tests/st/python/test_vllm_deepseek_mix_parallel.py b/tests/st/python/test_vllm_deepseek_mix_parallel.py index d23097c6..eadecd8c 100644 --- a/tests/st/python/test_vllm_deepseek_mix_parallel.py +++ b/tests/st/python/test_vllm_deepseek_mix_parallel.py @@ -37,7 +37,7 @@ env_vars = { "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "HCCL_IF_BASE_PORT": "60000", + "HCCL_IF_BASE_PORT": "61000", "LCAL_COMM_ID": "127.0.0.1:10068" } env_manager.setup_ai_environment(env_vars) -- Gitee