diff --git a/tests/st/python/test_multilora_inference.py b/tests/st/python/cases_parallel/multilora_inference.py similarity index 94% rename from tests/st/python/test_multilora_inference.py rename to tests/st/python/cases_parallel/multilora_inference.py index d5e86441c8801096089acb248b8cf3dddb58fb12..7e2129a195dabdf6e6dba315571fcf4a04883d88 100644 --- a/tests/st/python/test_multilora_inference.py +++ b/tests/st/python/cases_parallel/multilora_inference.py @@ -20,7 +20,7 @@ for offline inference. """ import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -28,7 +28,6 @@ env_vars = { "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", @@ -60,7 +59,7 @@ def create_test_prompts( def process_requests(engine: LLMEngine, test_prompts: List[Tuple[str, SamplingParams, - Optional[LoRARequest]]]): + Optional[LoRARequest]]]): """Continuously process a list of prompts and handle the outputs.""" request_id = 0 @@ -101,9 +100,6 @@ def initialize_engine() -> LLMEngine: return LLMEngine.from_engine_args(engine_args) -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_multilora_inference(): """test function that sets up and runs the prompt processing.""" engine = initialize_engine() diff --git a/tests/st/python/test_vllm_deepseek_bf16_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py similarity index 89% rename from tests/st/python/test_vllm_deepseek_bf16_part_v1.py rename to tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py index 7a88aa370bb6ce1dabf0b1c8a384e7abed484de7..6c29cc4c9fd50d8d91b20fe4af7bb1529c88a3ab 100644 --- a/tests/st/python/test_vllm_deepseek_bf16_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py @@ -17,7 +17,7 @@ """test mf deepseek r1.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,14 +27,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "on", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -42,9 +40,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1_bf16(): """ test case deepseek r1 bf16 @@ -60,7 +55,7 @@ def test_deepseek_r1_bf16(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_vllm_deepseek_bf16_part.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py similarity index 88% rename from tests/st/python/test_vllm_deepseek_bf16_part.py rename to tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py index 0d43481117fbee50ae4d387795122161408e378b..0a85b1caf2b7432209bcffdefcf45abed98947ae 100644 --- a/tests/st/python/test_vllm_deepseek_bf16_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py @@ -17,7 +17,7 @@ """test mf deepseek r1.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,15 +27,11 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "on", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", - "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "ATB_LLM_LCOC_ENABLE": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -43,9 +39,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1_bf16(): """ test case deepseek r1 bf16 @@ -61,7 +54,7 @@ def test_deepseek_r1_bf16(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_vllm_deepseek_gptq_a16w4.py b/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py similarity index 92% rename from tests/st/python/test_vllm_deepseek_gptq_a16w4.py rename to tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py index f61afd845c51087650501ac9f903436a407c5c6e..968f805ba77d022abf2aa547a98116d96e1dc9ad 100644 --- a/tests/st/python/test_vllm_deepseek_gptq_a16w4.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py @@ -19,7 +19,7 @@ import os import yaml import pytest -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -29,15 +29,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -45,9 +42,6 @@ import vllm_mindspore # noqa: F401, E402 from vllm import LLM, SamplingParams # noqa: E402 -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.allcards def test_deepseek_r1_gptq_a16w4(): """ test case deepseek r1 a16w4 diff --git a/tests/st/python/test_vllm_deepseek_osl.py b/tests/st/python/cases_parallel/vllm_deepseek_osl.py similarity index 89% rename from tests/st/python/test_vllm_deepseek_osl.py rename to tests/st/python/cases_parallel/vllm_deepseek_osl.py index 5b72972b4d2c2dff89ba6f732da111563dfca7da..fc782b9e3169b0bd59c784c5a4cd1e31257847fa 100644 --- a/tests/st/python/test_vllm_deepseek_osl.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_osl.py @@ -20,7 +20,7 @@ isort:skip_file """ import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -31,15 +31,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -47,9 +44,6 @@ import vllm_mindspore # noqa: F401, E402 from vllm import LLM, SamplingParams # noqa: E402 -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1(): """ test case deepseek r1 w8a8 @@ -71,7 +65,7 @@ def test_deepseek_r1(): "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", trust_remote_code=True, gpu_memory_utilization=0.9, - tensor_parallel_size=8, + tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. @@ -87,9 +81,6 @@ def test_deepseek_r1(): env_manager.unset_all() -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1_mss(): """ test case deepseek r1 w8a8 mss @@ -111,7 +102,7 @@ def test_deepseek_r1_mss(): "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", trust_remote_code=True, gpu_memory_utilization=0.9, - tensor_parallel_size=8, + tensor_parallel_size=2, num_scheduler_steps=8, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects diff --git a/tests/st/python/test_vllm_deepseek_part.py b/tests/st/python/cases_parallel/vllm_deepseek_part.py similarity index 89% rename from tests/st/python/test_vllm_deepseek_part.py rename to tests/st/python/cases_parallel/vllm_deepseek_part.py index 42e2db8b4e4d0e8b697de882583ab9fa26b39df7..7ef3e8901bca7157ff051bf94a764d4ee8a983ef 100644 --- a/tests/st/python/test_vllm_deepseek_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part.py @@ -17,7 +17,7 @@ """test mf deepseek r1.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,15 +27,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "on", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -43,9 +40,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1(): """ test case deepseek r1 w8a8 @@ -61,7 +55,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) @@ -76,9 +70,7 @@ def test_deepseek_r1(): # unset env env_manager.unset_all() -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single + def test_deepseek_mtp(): """ test case deepseek mtp with main model of r1-w8a8 @@ -94,7 +86,7 @@ def test_deepseek_mtp(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-MTP", - trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=8, max_model_len=4096, + trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=2, max_model_len=4096, speculative_config={"num_speculative_tokens": 1}) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/test_vllm_deepseek_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py similarity index 89% rename from tests/st/python/test_vllm_deepseek_part_v1.py rename to tests/st/python/cases_parallel/vllm_deepseek_part_v1.py index 9f5ecd72c4d022cc43e86adfb91b390273235f6e..e5eb917a6a203ae81964f50da993c285ee2df2c5 100644 --- a/tests/st/python/test_vllm_deepseek_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py @@ -17,7 +17,7 @@ """test mf deepseek r1.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,14 +27,11 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", - "ATB_LLM_LCOC_ENABLE": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "ATB_LLM_LCOC_ENABLE": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -42,9 +39,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1(): """ test case deepseek r1 w8a8 @@ -60,7 +54,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_vllm_deepseek_smoothquant.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py similarity index 88% rename from tests/st/python/test_vllm_deepseek_smoothquant.py rename to tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py index eb0ef892199b2bca8703b5a7c2f27a66353d1e14..48d2441adf2e5459ad80b95c518cf9529b58a122 100644 --- a/tests/st/python/test_vllm_deepseek_smoothquant.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py @@ -17,7 +17,7 @@ """test mf deepseek r1 smoothquant.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,15 +27,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -43,9 +40,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1(): """ test case deepseek r1 w8a8 @@ -61,7 +55,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_vllm_deepseek_smoothquant_mss.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py similarity index 88% rename from tests/st/python/test_vllm_deepseek_smoothquant_mss.py rename to tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py index f286bc8e1a5177d015f9a489a7e754c6cc57f178..111c91e4bcdd4a6467ce0db0faec88599d6ee7f0 100644 --- a/tests/st/python/test_vllm_deepseek_smoothquant_mss.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py @@ -17,7 +17,7 @@ """test mf deepseek r1 smoothquant.""" import pytest import os -from . import set_env +from tests.st.python import set_env env_manager = set_env.EnvVarManager() # def env @@ -27,15 +27,12 @@ env_vars = { "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", "HCCL_OP_EXPANSION_MODE": "AIV", - "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", "MS_ALLOC_CONF": "enable_vmm:True", "LCCL_DETERMINISTIC": "1", "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "0", - "HCCL_IF_BASE_PORT": "60000", - "LCAL_COMM_ID": "127.0.0.1:10068" + "VLLM_USE_V1": "0" } # set env env_manager.setup_ai_environment(env_vars) @@ -43,9 +40,6 @@ import vllm_mindspore from vllm import LLM, SamplingParams -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single def test_deepseek_r1_mss(): """ test case deepseek r1 w8a8 mss @@ -61,7 +55,7 @@ def test_deepseek_r1_mss(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, num_scheduler_steps=8, + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, num_scheduler_steps=8, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/test_cases_parallel.py b/tests/st/python/test_cases_parallel.py index 18c894f348349365d450b515d3d301f5e3186922..35d31ea8cea26f0340fd074dec98f28a296e4b97 100644 --- a/tests/st/python/test_cases_parallel.py +++ b/tests/st/python/test_cases_parallel.py @@ -50,21 +50,24 @@ def test_cases_parallel_part0(): """ commands = [ ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen > vllm_mf_qwen_7b_test_mf_qwen.log", "vllm_mf_qwen_7b_test_mf_qwen.log"), ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill " "> vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log", "vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log"), ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 &&" "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill " "> vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log", "vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log"), ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss " - "> vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log", - "vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log"), - + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/multilora_inference.py::test_multilora_inference " + "> multilora_inference_test_multilora_inference.log", + "multilora_inference_test_multilora_inference.log") ] with Pool(len(commands)) as pool: @@ -83,18 +86,22 @@ def test_cases_parallel_part1(): """ commands = [ ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss " "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log"), ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching " "> vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log", "vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log"), ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching " "> vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log", "vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log"), ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61006 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen > vllm_mf_qwen_7b_v1_test_mf_qwen.log", "vllm_mf_qwen_7b_v1_test_mf_qwen.log") ] @@ -103,6 +110,7 @@ def test_cases_parallel_part1(): results = list(pool.imap(run_command, commands)) check_results(commands, results) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single @@ -114,14 +122,17 @@ def test_cases_parallel_part2(): """ commands = [ ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " "pytest -s -v cases_parallel/vllm_qwen_7b.py::test_vllm_qwen " "> vllm_qwen_7b_test_vllm_qwen.log", "vllm_qwen_7b_test_vllm_qwen.log"), ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " "pytest -s -v cases_parallel/vllm_qwen_7b_v1.py::test_vllm_qwen " "> vllm_qwen_7b_v1_test_vllm_qwen.log", "vllm_qwen_7b_v1_test_vllm_qwen.log"), ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " "pytest -s -v cases_parallel/shm_broadcast.py::test_shm_broadcast " "> shm_broadcast_test_shm_broadcast.log", "shm_broadcast_test_shm_broadcast.log") @@ -130,3 +141,109 @@ def test_cases_parallel_part2(): with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) check_results(commands, results) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_single +def test_cases_parallel_part3(): + """ + Feature: test cases parallel. + Description: test cases parallel. + Expectation: Pass. + """ + commands = [ + ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_r1_bf16 " + "> vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log", + "vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16 " + "> vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log", + "vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_gptq_a16w4.py::test_deepseek_r1_gptq_a16w4 " + "> vllm_deepseek_gptq_a16w4_test_deepseek_r1_gptq_a16w4.log", + "vllm_deepseek_gptq_a16w4_test_deepseek_r1_gptq_a16w4.log") + ] + + with Pool(len(commands)) as pool: + results = list(pool.imap(run_command, commands)) + check_results(commands, results) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_single +def test_cases_parallel_part4(): + """ + Feature: test cases parallel. + Description: test cases parallel. + Expectation: Pass. + """ + commands = [ + ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1_mss " + "> vllm_deepseek_osl_test_deepseek_r1_mss.log", + "vllm_deepseek_osl_test_deepseek_r1_mss.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_r1 " + "> vllm_deepseek_part_test_deepseek_r1.log", + "vllm_deepseek_part_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp " + "> vllm_deepseek_part_test_deepseek_mtp.log", + "vllm_deepseek_part_test_deepseek_mtp.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1 " + "> vllm_deepseek_part_v1_test_deepseek_r1.log", + "vllm_deepseek_part_v1_test_deepseek_r1.log") + ] + + with Pool(len(commands)) as pool: + results = list(pool.imap(run_command, commands)) + check_results(commands, results) + + +@pytest.mark.level1 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_single +def test_cases_parallel_level1_part0(): + """ + Feature: test cases parallel. + Description: test cases parallel. + Expectation: Pass. + """ + commands = [ + ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss " + "> vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log", + "vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1 " + "> vllm_deepseek_osl_test_deepseek_r1.log", + "vllm_deepseek_osl_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1 " + "> vllm_deepseek_smoothquant_test_deepseek_r1.log", + "vllm_deepseek_smoothquant_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss " + "> vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log", + "vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log") + ] + + with Pool(len(commands)) as pool: + results = list(pool.imap(run_command, commands)) + check_results(commands, results) diff --git a/tests/st/python/test_vllm_deepseek_mix_parallel.py b/tests/st/python/test_vllm_deepseek_mix_parallel.py index d23097c6abc653350c6fe1f0f2a642b8eda39ab3..eadecd8cc5b3573c14908b32d32ec22edb66c592 100644 --- a/tests/st/python/test_vllm_deepseek_mix_parallel.py +++ b/tests/st/python/test_vllm_deepseek_mix_parallel.py @@ -37,7 +37,7 @@ env_vars = { "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "HCCL_IF_BASE_PORT": "60000", + "HCCL_IF_BASE_PORT": "61000", "LCAL_COMM_ID": "127.0.0.1:10068" } env_manager.setup_ai_environment(env_vars)