From f5f929cbfe33b3f8109462f480c1b272bedabc5d Mon Sep 17 00:00:00 2001 From: moran Date: Thu, 12 Jun 2025 17:05:51 +0800 Subject: [PATCH] test hccl port --- .../cases_parallel/vllm_deepseek_bf16_part.py | 2 +- .../vllm_deepseek_bf16_part_v1.py | 2 +- .../cases_parallel/vllm_deepseek_osl.py | 2 +- .../cases_parallel/vllm_deepseek_part.py | 4 +- .../cases_parallel/vllm_deepseek_part_v1.py | 2 +- .../vllm_deepseek_smoothquant.py | 2 +- .../vllm_deepseek_smoothquant_mss.py | 2 +- .../python/cases_parallel/vllm_mf_qwen_7b.py | 2 +- .../vllm_mf_qwen_7b_chunk_prefill.py | 2 +- .../vllm_mf_qwen_7b_chunk_prefill_v1.py | 5 +- .../vllm_mf_qwen_7b_cp_pc_mss.py | 2 +- .../cases_parallel/vllm_mf_qwen_7b_mss.py | 2 +- .../vllm_mf_qwen_7b_prefix_caching.py | 2 +- .../vllm_mf_qwen_7b_prefix_caching_v1.py | 2 +- .../cases_parallel/vllm_mf_qwen_7b_v1.py | 2 +- .../st/python/cases_parallel/vllm_qwen_7b.py | 3 +- .../python/cases_parallel/vllm_qwen_7b_v1.py | 2 +- tests/st/python/test_cases_parallel.py | 220 ++++++++---------- .../python/test_vllm_deepseek_mix_parallel.py | 2 +- 19 files changed, 119 insertions(+), 143 deletions(-) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py index 6c29cc4c..96f5c3ad 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py @@ -55,7 +55,7 @@ def test_deepseek_r1_bf16(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py index 0a85b1ca..3afa425b 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py @@ -54,7 +54,7 @@ def test_deepseek_r1_bf16(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_osl.py b/tests/st/python/cases_parallel/vllm_deepseek_osl.py index 0089b937..7a08b8f5 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_osl.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_osl.py @@ -65,7 +65,7 @@ def test_deepseek_r1(): "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", trust_remote_code=True, gpu_memory_utilization=0.9, - tensor_parallel_size=2, + tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_deepseek_part.py b/tests/st/python/cases_parallel/vllm_deepseek_part.py index 7ef3e890..0ff6e15c 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part.py @@ -55,7 +55,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) @@ -86,7 +86,7 @@ def test_deepseek_mtp(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-MTP", - trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=2, max_model_len=4096, + trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=1, max_model_len=4096, speculative_config={"num_speculative_tokens": 1}) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py index e5eb917a..7030c07c 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py @@ -54,7 +54,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py index 48d2441a..6d90d381 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py @@ -55,7 +55,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py index 111c91e4..24bb8573 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py @@ -55,7 +55,7 @@ def test_deepseek_r1_mss(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, num_scheduler_steps=8, + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, num_scheduler_steps=8, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py index 64628408..0e936226 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py @@ -55,7 +55,7 @@ def test_mf_qwen(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py index f5a6b7b3..00126e03 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py @@ -64,7 +64,7 @@ def test_mf_qwen_7b_chunk_prefill(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=2, + block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=1, enable_chunked_prefill=True) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py index 2515d765..3e097f05 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py @@ -39,6 +39,9 @@ import vllm_mindspore from vllm import LLM, SamplingParams +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard def test_mf_qwen_7b_chunk_prefill(): """ test case qwen_7b_chunk_prefill @@ -63,7 +66,7 @@ def test_mf_qwen_7b_chunk_prefill(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.85, tensor_parallel_size=2) + block_size=32, gpu_memory_utilization=0.85, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. for batch_data in batch_datas: diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py index b738f9b0..ed1cfa48 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py @@ -63,7 +63,7 @@ def test_mf_qwen_7b_cp_pc_mss(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=2, + block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=1, enable_chunked_prefill=True, enable_prefix_caching=True, num_scheduler_steps=8) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py index 6af45f55..3e334ca7 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py @@ -56,7 +56,7 @@ def test_mf_qwen_7b_mss(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_batched_tokens=8192, - block_size=32, gpu_memory_utilization=0.9, num_scheduler_steps=8, tensor_parallel_size=2) + block_size=32, gpu_memory_utilization=0.9, num_scheduler_steps=8, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py index e66e8f9a..2342f10f 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py @@ -57,7 +57,7 @@ def test_mf_qwen_7b_prefix_caching(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, block_size=16, enable_prefix_caching=True, - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py index 2d13ee95..1fd4b35a 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py @@ -55,7 +55,7 @@ def test_mf_qwen_7b_prefix_caching(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - max_model_len=8192, block_size=16, tensor_parallel_size=2) + max_model_len=8192, block_size=16, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py index 122bce2d..e932a496 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py @@ -54,7 +54,7 @@ def test_mf_qwen(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_qwen_7b.py b/tests/st/python/cases_parallel/vllm_qwen_7b.py index 5c25b5d7..4c895abd 100644 --- a/tests/st/python/cases_parallel/vllm_qwen_7b.py +++ b/tests/st/python/cases_parallel/vllm_qwen_7b.py @@ -38,6 +38,7 @@ import vllm_mindspore from vllm import LLM, SamplingParams + def test_vllm_qwen(): """ test case qwen2.5 7B @@ -53,7 +54,7 @@ def test_vllm_qwen(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py b/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py index e1527ce4..c3118662 100644 --- a/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py @@ -53,7 +53,7 @@ def test_vllm_qwen(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_cases_parallel.py b/tests/st/python/test_cases_parallel.py index aa1e2b11..3bf32282 100644 --- a/tests/st/python/test_cases_parallel.py +++ b/tests/st/python/test_cases_parallel.py @@ -49,20 +49,31 @@ def test_cases_parallel_part0(): Expectation: Pass. """ commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && pytest -s -v cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen > " - "vllm_mf_qwen_7b_test_mf_qwen.log", - "vllm_mf_qwen_7b_test_mf_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill " - "> vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log", - "vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill " - "> vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log", - "vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log") + ("export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/shm_broadcast.py::test_shm_broadcast " + "> shm_broadcast_test_shm_broadcast.log", + "shm_broadcast_test_shm_broadcast.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_v1_bf16 " + "> vllm_deepseek_bf16_part_test_deepseek_v1_bf16.log", + "vllm_deepseek_bf16_part_test_deepseek_v1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61005 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16 " + "> vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log", + "vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=6 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_v1_bf16 " + "> vllm_deepseek_bf16_part_test_deepseek_v1_bf16.log", + "vllm_deepseek_bf16_part_test_deepseek_v1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=7 && export LCAL_COMM_ID=127.0.0.1:10072 && " + "export HCCL_IF_BASE_PORT=61007 && " + "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1 " + "> vllm_deepseek_osl_test_deepseek_r1.log", + "vllm_deepseek_osl_test_deepseek_r1.log") ] with Pool(len(commands)) as pool: @@ -80,25 +91,46 @@ def test_cases_parallel_part1(): Expectation: Pass. """ commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss " - "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", - "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching " - "> vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log", - "vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching " - "> vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log", - "vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=60006 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen > vllm_mf_qwen_7b_v1_test_mf_qwen.log", - "vllm_mf_qwen_7b_v1_test_mf_qwen.log") + ("export ASCEND_RT_VISIBLE_DEVICES=0 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_r1 " + "> vllm_deepseek_part_test_deepseek_r1.log", + "vllm_deepseek_part_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=1 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61001 && " + "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp " + "> vllm_deepseek_part_test_deepseek_mtp.log", + "vllm_deepseek_part_test_deepseek_mtp.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1 " + "> vllm_deepseek_part_v1_test_deepseek_r1.log", + "vllm_deepseek_part_v1_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=3 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61003 && " + "pytest -s -v cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1 " + "> vllm_deepseek_smoothquant_test_deepseek_r1.log", + "vllm_deepseek_smoothquant_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4 && export LCAL_COMM_ID=127.0.0.1:10072 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss " + "> vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log", + "vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=5 && export LCAL_COMM_ID=127.0.0.1:10073 && " + "export HCCL_IF_BASE_PORT=61005 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen " + "> vllm_mf_qwen_7b_test_mf_qwen.log", + "vllm_mf_qwen_7b_test_mf_qwen.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=6 && export LCAL_COMM_ID=127.0.0.1:10074 && " + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill " + "> vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log", + "vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=7 && export LCAL_COMM_ID=127.0.0.1:10075 && " + "export HCCL_IF_BASE_PORT=61007 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill " + "> vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log", + "vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log") ] with Pool(len(commands)) as pool: @@ -116,95 +148,36 @@ def test_cases_parallel_part2(): Expectation: Pass. """ commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " + ("export ASCEND_RT_VISIBLE_DEVICES=0 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss " + "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", + "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=1 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61001 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching " + "> vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log", + "vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching " + "> vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log", + "vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=3 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61003 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen " + "> vllm_mf_qwen_7b_v1_test_mf_qwen.log", + "vllm_mf_qwen_7b_v1_test_mf_qwen.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4 && export LCAL_COMM_ID=127.0.0.1:10072 && " + "export HCCL_IF_BASE_PORT=61004 && " "pytest -s -v cases_parallel/vllm_qwen_7b.py::test_vllm_qwen " "> vllm_qwen_7b_test_vllm_qwen.log", "vllm_qwen_7b_test_vllm_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " + ("export ASCEND_RT_VISIBLE_DEVICES=5 && export LCAL_COMM_ID=127.0.0.1:10073 && " + "export HCCL_IF_BASE_PORT=61005 && " "pytest -s -v cases_parallel/vllm_qwen_7b_v1.py::test_vllm_qwen " "> vllm_qwen_7b_v1_test_vllm_qwen.log", - "vllm_qwen_7b_v1_test_vllm_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/shm_broadcast.py::test_shm_broadcast " - "> shm_broadcast_test_shm_broadcast.log", - "shm_broadcast_test_shm_broadcast.log") - ] - - with Pool(len(commands)) as pool: - results = list(pool.imap(run_command, commands)) - check_results(commands, results) - - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single -def test_cases_parallel_part3(): - """ - Feature: test cases parallel. - Description: test cases parallel. - Expectation: Pass. - """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " - "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_r1_bf16 " - "> vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log", - "vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " - "pytest -s -v cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16 " - "> vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log", - "vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1 " - "> vllm_deepseek_osl_test_deepseek_r1.log", - "vllm_deepseek_osl_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=60006 && " - "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_r1 " - "> vllm_deepseek_part_test_deepseek_r1.log", - "vllm_deepseek_part_test_deepseek_r1.log") - ] - - with Pool(len(commands)) as pool: - results = list(pool.imap(run_command, commands)) - check_results(commands, results) - - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single -def test_cases_parallel_part4(): - """ - Feature: test cases parallel. - Description: test cases parallel. - Expectation: Pass. - """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " - "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp " - "> vllm_deepseek_part_test_deepseek_mtp.log", - "vllm_deepseek_part_test_deepseek_mtp.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " - "pytest -s -v cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1 " - "> vllm_deepseek_part_v1_test_deepseek_r1.log", - "vllm_deepseek_part_v1_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1 " - "> vllm_deepseek_smoothquant_test_deepseek_r1.log", - "vllm_deepseek_smoothquant_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=60006 && " - "pytest -s -v cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss " - "> vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log", - "vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log") + "vllm_qwen_7b_v1_test_vllm_qwen.log") ] with Pool(len(commands)) as pool: @@ -218,18 +191,17 @@ def test_cases_parallel_part4(): def test_cases_parallel_level1_part0(): """ Feature: test cases parallel. - Description: - vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss: accuracy error happens occasionally + Description: test cases parallel. Expectation: Pass. """ commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " + ("export ASCEND_RT_VISIBLE_DEVICES=0 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss " - "> vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log", - "vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log") + "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", + "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log") ] with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) - check_results(commands, results) + check_results(commands, results) \ No newline at end of file diff --git a/tests/st/python/test_vllm_deepseek_mix_parallel.py b/tests/st/python/test_vllm_deepseek_mix_parallel.py index d23097c6..aff05ef2 100644 --- a/tests/st/python/test_vllm_deepseek_mix_parallel.py +++ b/tests/st/python/test_vllm_deepseek_mix_parallel.py @@ -37,7 +37,7 @@ env_vars = { "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "HCCL_IF_BASE_PORT": "60000", + "HCCL_IF_BASE_PORT": "61095", "LCAL_COMM_ID": "127.0.0.1:10068" } env_manager.setup_ai_environment(env_vars) -- Gitee