diff --git a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py index 6c29cc4c9fd50d8d91b20fe4af7bb1529c88a3ab..96f5c3ad25fbc048bfd412739a284423f63bb4e3 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py @@ -55,7 +55,7 @@ def test_deepseek_r1_bf16(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py index 0a85b1caf2b7432209bcffdefcf45abed98947ae..3afa425ba0c7631f8ccbbe2fbea4c1586149b1dc 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py @@ -54,7 +54,7 @@ def test_deepseek_r1_bf16(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_osl.py b/tests/st/python/cases_parallel/vllm_deepseek_osl.py index 0089b9377f48c87b949de267cc093ef3994d5580..7a08b8f58903e93341718da7560674b6c8c9c79f 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_osl.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_osl.py @@ -65,7 +65,7 @@ def test_deepseek_r1(): "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", trust_remote_code=True, gpu_memory_utilization=0.9, - tensor_parallel_size=2, + tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_deepseek_part.py b/tests/st/python/cases_parallel/vllm_deepseek_part.py index 7ef3e8901bca7157ff051bf94a764d4ee8a983ef..0ff6e15c010d6cfe2215f5d0ec0eec55b879bf83 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part.py @@ -55,7 +55,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) @@ -86,7 +86,7 @@ def test_deepseek_mtp(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-MTP", - trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=2, max_model_len=4096, + trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=1, max_model_len=4096, speculative_config={"num_speculative_tokens": 1}) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py index e5eb917a6a203ae81964f50da993c285ee2df2c5..7030c07c4edba8fa8b5c81e06a8d97938f05cc3e 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py @@ -54,7 +54,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py index 48d2441adf2e5459ad80b95c518cf9529b58a122..6d90d381e04fbf37c2da473ac3dd7bdb412394f9 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py @@ -55,7 +55,7 @@ def test_deepseek_r1(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py index 111c91e4bcdd4a6467ce0db0faec88599d6ee7f0..24bb8573234eb7f5584997a2a693f17a173e4bbb 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py @@ -55,7 +55,7 @@ def test_deepseek_r1_mss(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, num_scheduler_steps=8, + trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=1, num_scheduler_steps=8, max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py index 64628408c76971543361b473d090114598280d49..0e936226a8ab2c88f00bf1bd1677ac64d7e1c9c8 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py @@ -55,7 +55,7 @@ def test_mf_qwen(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py index f5a6b7b3b090e5d817c2f9fb6629dbf6f3d48e38..00126e0319cb68aa2c0dbbd37affa83ff20191c4 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py @@ -64,7 +64,7 @@ def test_mf_qwen_7b_chunk_prefill(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=2, + block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=1, enable_chunked_prefill=True) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py index 2515d765188d107d59b5bebb16144cfd4a98b7b4..3e097f050f941cc0ee6c1f078f805017ed425773 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py @@ -39,6 +39,9 @@ import vllm_mindspore from vllm import LLM, SamplingParams +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard def test_mf_qwen_7b_chunk_prefill(): """ test case qwen_7b_chunk_prefill @@ -63,7 +66,7 @@ def test_mf_qwen_7b_chunk_prefill(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.85, tensor_parallel_size=2) + block_size=32, gpu_memory_utilization=0.85, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. for batch_data in batch_datas: diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py index b738f9b013456bfaf9b2ecfff6caf6fe0adc559e..ed1cfa48c27a8fe047f7cc96608291271edcaab4 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py @@ -63,7 +63,7 @@ def test_mf_qwen_7b_cp_pc_mss(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=2, + block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=1, enable_chunked_prefill=True, enable_prefix_caching=True, num_scheduler_steps=8) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py index 6af45f55ab21f4e9327278fbb8700b91b485049a..3e334ca7d1475d915be9c226187e25b155e9e142 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py @@ -56,7 +56,7 @@ def test_mf_qwen_7b_mss(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, max_num_batched_tokens=8192, - block_size=32, gpu_memory_utilization=0.9, num_scheduler_steps=8, tensor_parallel_size=2) + block_size=32, gpu_memory_utilization=0.9, num_scheduler_steps=8, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py index e66e8f9a833a0bc8e5887c61ee248ac132a73eaa..2342f10ff845b1db6613a45f585e464959e54e53 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py @@ -57,7 +57,7 @@ def test_mf_qwen_7b_prefix_caching(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", max_model_len=8192, block_size=16, enable_prefix_caching=True, - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py index 2d13ee954e349ab5388c68424447db533e4797e5..1fd4b35affacd4badf09567aa5efb80dbeaefe29 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py @@ -55,7 +55,7 @@ def test_mf_qwen_7b_prefix_caching(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - max_model_len=8192, block_size=16, tensor_parallel_size=2) + max_model_len=8192, block_size=16, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py index 122bce2d2f7f36fb13a7e23f96b9c39c9a83fb52..e932a496e8ed284b5565bd67365260918250b5c3 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py @@ -54,7 +54,7 @@ def test_mf_qwen(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_qwen_7b.py b/tests/st/python/cases_parallel/vllm_qwen_7b.py index 5c25b5d734c1b65dea51a085df2fda17cee799e2..4c895abd7373406eba30b646c49a9fe13d622ded 100644 --- a/tests/st/python/cases_parallel/vllm_qwen_7b.py +++ b/tests/st/python/cases_parallel/vllm_qwen_7b.py @@ -38,6 +38,7 @@ import vllm_mindspore from vllm import LLM, SamplingParams + def test_vllm_qwen(): """ test case qwen2.5 7B @@ -53,7 +54,7 @@ def test_vllm_qwen(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py b/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py index e1527ce460e218263338a25d5feb223a3474bf9b..c31186623cc1a1d0b29d8df1d577a9c3b3e68d6c 100644 --- a/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py @@ -53,7 +53,7 @@ def test_vllm_qwen(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + gpu_memory_utilization=0.9, tensor_parallel_size=1) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_cases_parallel.py b/tests/st/python/test_cases_parallel.py index aa1e2b118609374d4d8c2e2b286f58f218069141..3bf32282a3a763320722e6ed4b75d808a7660049 100644 --- a/tests/st/python/test_cases_parallel.py +++ b/tests/st/python/test_cases_parallel.py @@ -49,20 +49,31 @@ def test_cases_parallel_part0(): Expectation: Pass. """ commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && pytest -s -v cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen > " - "vllm_mf_qwen_7b_test_mf_qwen.log", - "vllm_mf_qwen_7b_test_mf_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill " - "> vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log", - "vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill " - "> vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log", - "vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log") + ("export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/shm_broadcast.py::test_shm_broadcast " + "> shm_broadcast_test_shm_broadcast.log", + "shm_broadcast_test_shm_broadcast.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_v1_bf16 " + "> vllm_deepseek_bf16_part_test_deepseek_v1_bf16.log", + "vllm_deepseek_bf16_part_test_deepseek_v1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=5 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61005 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16 " + "> vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log", + "vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=6 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_v1_bf16 " + "> vllm_deepseek_bf16_part_test_deepseek_v1_bf16.log", + "vllm_deepseek_bf16_part_test_deepseek_v1_bf16.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=7 && export LCAL_COMM_ID=127.0.0.1:10072 && " + "export HCCL_IF_BASE_PORT=61007 && " + "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1 " + "> vllm_deepseek_osl_test_deepseek_r1.log", + "vllm_deepseek_osl_test_deepseek_r1.log") ] with Pool(len(commands)) as pool: @@ -80,25 +91,46 @@ def test_cases_parallel_part1(): Expectation: Pass. """ commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss " - "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", - "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching " - "> vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log", - "vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching " - "> vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log", - "vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=60006 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen > vllm_mf_qwen_7b_v1_test_mf_qwen.log", - "vllm_mf_qwen_7b_v1_test_mf_qwen.log") + ("export ASCEND_RT_VISIBLE_DEVICES=0 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_r1 " + "> vllm_deepseek_part_test_deepseek_r1.log", + "vllm_deepseek_part_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=1 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61001 && " + "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp " + "> vllm_deepseek_part_test_deepseek_mtp.log", + "vllm_deepseek_part_test_deepseek_mtp.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1 " + "> vllm_deepseek_part_v1_test_deepseek_r1.log", + "vllm_deepseek_part_v1_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=3 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61003 && " + "pytest -s -v cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1 " + "> vllm_deepseek_smoothquant_test_deepseek_r1.log", + "vllm_deepseek_smoothquant_test_deepseek_r1.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4 && export LCAL_COMM_ID=127.0.0.1:10072 && " + "export HCCL_IF_BASE_PORT=61004 && " + "pytest -s -v cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss " + "> vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log", + "vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=5 && export LCAL_COMM_ID=127.0.0.1:10073 && " + "export HCCL_IF_BASE_PORT=61005 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen " + "> vllm_mf_qwen_7b_test_mf_qwen.log", + "vllm_mf_qwen_7b_test_mf_qwen.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=6 && export LCAL_COMM_ID=127.0.0.1:10074 && " + "export HCCL_IF_BASE_PORT=61006 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill " + "> vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log", + "vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=7 && export LCAL_COMM_ID=127.0.0.1:10075 && " + "export HCCL_IF_BASE_PORT=61007 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill " + "> vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log", + "vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log") ] with Pool(len(commands)) as pool: @@ -116,95 +148,36 @@ def test_cases_parallel_part2(): Expectation: Pass. """ commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " + ("export ASCEND_RT_VISIBLE_DEVICES=0 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss " + "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", + "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=1 && export LCAL_COMM_ID=127.0.0.1:10069 && " + "export HCCL_IF_BASE_PORT=61001 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching " + "> vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log", + "vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=2 && export LCAL_COMM_ID=127.0.0.1:10070 && " + "export HCCL_IF_BASE_PORT=61002 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching " + "> vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log", + "vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=3 && export LCAL_COMM_ID=127.0.0.1:10071 && " + "export HCCL_IF_BASE_PORT=61003 && " + "pytest -s -v cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen " + "> vllm_mf_qwen_7b_v1_test_mf_qwen.log", + "vllm_mf_qwen_7b_v1_test_mf_qwen.log"), + ("export ASCEND_RT_VISIBLE_DEVICES=4 && export LCAL_COMM_ID=127.0.0.1:10072 && " + "export HCCL_IF_BASE_PORT=61004 && " "pytest -s -v cases_parallel/vllm_qwen_7b.py::test_vllm_qwen " "> vllm_qwen_7b_test_vllm_qwen.log", "vllm_qwen_7b_test_vllm_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " + ("export ASCEND_RT_VISIBLE_DEVICES=5 && export LCAL_COMM_ID=127.0.0.1:10073 && " + "export HCCL_IF_BASE_PORT=61005 && " "pytest -s -v cases_parallel/vllm_qwen_7b_v1.py::test_vllm_qwen " "> vllm_qwen_7b_v1_test_vllm_qwen.log", - "vllm_qwen_7b_v1_test_vllm_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/shm_broadcast.py::test_shm_broadcast " - "> shm_broadcast_test_shm_broadcast.log", - "shm_broadcast_test_shm_broadcast.log") - ] - - with Pool(len(commands)) as pool: - results = list(pool.imap(run_command, commands)) - check_results(commands, results) - - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single -def test_cases_parallel_part3(): - """ - Feature: test cases parallel. - Description: test cases parallel. - Expectation: Pass. - """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " - "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_r1_bf16 " - "> vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log", - "vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " - "pytest -s -v cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16 " - "> vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log", - "vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1 " - "> vllm_deepseek_osl_test_deepseek_r1.log", - "vllm_deepseek_osl_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=60006 && " - "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_r1 " - "> vllm_deepseek_part_test_deepseek_r1.log", - "vllm_deepseek_part_test_deepseek_r1.log") - ] - - with Pool(len(commands)) as pool: - results = list(pool.imap(run_command, commands)) - check_results(commands, results) - - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend910b_training -@pytest.mark.env_single -def test_cases_parallel_part4(): - """ - Feature: test cases parallel. - Description: test cases parallel. - Expectation: Pass. - """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " - "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp " - "> vllm_deepseek_part_test_deepseek_mtp.log", - "vllm_deepseek_part_test_deepseek_mtp.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=60002 && " - "pytest -s -v cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1 " - "> vllm_deepseek_part_v1_test_deepseek_r1.log", - "vllm_deepseek_part_v1_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=60004 && " - "pytest -s -v cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1 " - "> vllm_deepseek_smoothquant_test_deepseek_r1.log", - "vllm_deepseek_smoothquant_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=60006 && " - "pytest -s -v cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss " - "> vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log", - "vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log") + "vllm_qwen_7b_v1_test_vllm_qwen.log") ] with Pool(len(commands)) as pool: @@ -218,18 +191,17 @@ def test_cases_parallel_part4(): def test_cases_parallel_level1_part0(): """ Feature: test cases parallel. - Description: - vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss: accuracy error happens occasionally + Description: test cases parallel. Expectation: Pass. """ commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=60000 && " + ("export ASCEND_RT_VISIBLE_DEVICES=0 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " "pytest -s -v cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss " - "> vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log", - "vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log") + "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", + "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log") ] with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) - check_results(commands, results) + check_results(commands, results) \ No newline at end of file diff --git a/tests/st/python/test_vllm_deepseek_mix_parallel.py b/tests/st/python/test_vllm_deepseek_mix_parallel.py index d23097c6abc653350c6fe1f0f2a642b8eda39ab3..aff05ef2fd11a80e94788b5ce92a86f0e0e4f4d6 100644 --- a/tests/st/python/test_vllm_deepseek_mix_parallel.py +++ b/tests/st/python/test_vllm_deepseek_mix_parallel.py @@ -37,7 +37,7 @@ env_vars = { "HCCL_DETERMINISTIC": "true", "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", "ATB_LLM_LCOC_ENABLE": "0", - "HCCL_IF_BASE_PORT": "60000", + "HCCL_IF_BASE_PORT": "61095", "LCAL_COMM_ID": "127.0.0.1:10068" } env_manager.setup_ai_environment(env_vars)