diff --git a/tests/st/python/cases_parallel/multilora_inference.py b/tests/st/python/cases_parallel/multilora_inference.py index 7e2129a195dabdf6e6dba315571fcf4a04883d88..c4799934e0517698139fa5670eb951c23035f277 100644 --- a/tests/st/python/cases_parallel/multilora_inference.py +++ b/tests/st/python/cases_parallel/multilora_inference.py @@ -18,11 +18,19 @@ This example shows how to use the multi-LoRA functionality for offline inference. """ -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), @@ -59,7 +67,7 @@ def create_test_prompts( def process_requests(engine: LLMEngine, test_prompts: List[Tuple[str, SamplingParams, - Optional[LoRARequest]]]): + Optional[LoRARequest]]]): """Continuously process a list of prompts and handle the outputs.""" request_id = 0 diff --git a/tests/st/python/cases_parallel/shm_broadcast.py b/tests/st/python/cases_parallel/shm_broadcast.py index d4d98fd237c021b32139ed3610cedcaccc099020..7dbb7b5c8be3c1e25469b99485c13ce671730235 100644 --- a/tests/st/python/cases_parallel/shm_broadcast.py +++ b/tests/st/python/cases_parallel/shm_broadcast.py @@ -15,7 +15,10 @@ # limitations under the License. # ============================================================================ """test cpu communicator and share memory""" -import pytest + +# type: ignore +# isort: skip_file + import multiprocessing import random import time @@ -27,8 +30,12 @@ import torch.distributed as dist import vllm_mindspore from vllm.distributed.device_communicators.shm_broadcast import MessageQueue -from vllm.distributed.utils import StatelessProcessGroup -from vllm.utils import get_ip, get_open_port, update_environment_variables, get_distributed_init_method +from vllm.utils import get_ip, get_open_port, get_distributed_init_method +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() def get_arrays(n: int, seed: int = 0) -> List[np.ndarray]: @@ -47,7 +54,9 @@ def distributed_run(fn, world_size): distributed_init_method = get_distributed_init_method("127.0.0.1", port) for i in range(number_of_processes): - p = multiprocessing.Process(target=fn, args=(distributed_init_method, i, world_size)) + p = multiprocessing.Process(target=fn, + args=(distributed_init_method, i, + world_size)) processes.append(p) p.start() diff --git a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py index 6c29cc4c9fd50d8d91b20fe4af7bb1529c88a3ab..a2d3dcaf60bdade1b351fb93a094800a5ba0bd20 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test mf deepseek r1.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b.yaml", @@ -54,8 +62,12 @@ def test_deepseek_r1_bf16(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16", + trust_remote_code=True, + gpu_memory_utilization=0.9, + tensor_parallel_size=2, + max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py index 4d4fb5c0f9782e296da5553f5bc3037ee67ed3dc..c08930c3ddde8350c1ae1c6bd9b98dde7b0f1eec 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py @@ -16,11 +16,19 @@ # limitations under the License. # ============================================================================ """test mf deepseek r1.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b.yaml", diff --git a/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py b/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py index 968f805ba77d022abf2aa547a98116d96e1dc9ad..0cff9cd16469beb2b668a85a44ec9adcfc292c31 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py @@ -16,12 +16,20 @@ # limitations under the License. # ============================================================================ """test mf deepseek r1 gptq int4 quantization.""" + +# type: ignore +# isort: skip_file + import os import yaml -import pytest -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_a16w4.yaml", diff --git a/tests/st/python/cases_parallel/vllm_deepseek_osl.py b/tests/st/python/cases_parallel/vllm_deepseek_osl.py index fc782b9e3169b0bd59c784c5a4cd1e31257847fa..f6d61e6e29b3eba159f2b9b7b6d579b4fa6f8c90 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_osl.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_osl.py @@ -18,15 +18,23 @@ test mf deepseek r1 osl. isort:skip_file """ -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": - "./config/predict_deepseek_r1_671b_w8a8_osl.yaml", + "./config/predict_deepseek_r1_671b_w8a8_osl.yaml", "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", @@ -55,14 +63,11 @@ def test_deepseek_r1(): ] # Create a sampling params object. - sampling_params = SamplingParams(temperature=0.0, - max_tokens=10, - top_k=1) + sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. llm = LLM( - model= - "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", + model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, @@ -92,14 +97,11 @@ def test_deepseek_r1_mss(): ] # Create a sampling params object. - sampling_params = SamplingParams(temperature=0.0, - max_tokens=10, - top_k=1) + sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. llm = LLM( - model= - "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", + model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl", trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, diff --git a/tests/st/python/cases_parallel/vllm_deepseek_part.py b/tests/st/python/cases_parallel/vllm_deepseek_part.py index 7ef3e8901bca7157ff051bf94a764d4ee8a983ef..e3062ba8d7a7529920adcf27d5066e86e9badb32 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_part.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test mf deepseek r1.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() -env_manager = set_env.EnvVarManager() + +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8.yaml", @@ -54,12 +62,18 @@ def test_deepseek_r1(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", + trust_remote_code=True, + gpu_memory_utilization=0.9, + tensor_parallel_size=2, + max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) - except_list = ['ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid'] + except_list = [ + 'ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid' + ] # Print the outputs. for i, output in enumerate(outputs): prompt = output.prompt @@ -86,12 +100,17 @@ def test_deepseek_mtp(): # Create an LLM. llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-MTP", - trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=2, max_model_len=4096, + trust_remote_code=True, + gpu_memory_utilization=0.7, + tensor_parallel_size=2, + max_model_len=4096, speculative_config={"num_speculative_tokens": 1}) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) - except_list = ['ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid'] + except_list = [ + 'ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid' + ] # Print the outputs. for i, output in enumerate(outputs): prompt = output.prompt diff --git a/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py index e5eb917a6a203ae81964f50da993c285ee2df2c5..b5fc5e112266387c91efbb7d6f073facd21ca4ab 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test mf deepseek r1.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + -env_manager = set_env.EnvVarManager() +def teardown_function(): + utils.cleanup_subprocesses() + + +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8.yaml", @@ -53,12 +61,18 @@ def test_deepseek_r1(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", + trust_remote_code=True, + gpu_memory_utilization=0.9, + tensor_parallel_size=2, + max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) - except_list = ['ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid'] + except_list = [ + 'ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid' + ] # Print the outputs. for i, output in enumerate(outputs): prompt = output.prompt diff --git a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py index 48d2441adf2e5459ad80b95c518cf9529b58a122..5938b0f4edf39948e18cd0ae16cbf014a3fa7161 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py @@ -15,14 +15,23 @@ # limitations under the License. # ============================================================================ """test mf deepseek r1 smoothquant.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { - "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8_smoothquant.yaml", + "MINDFORMERS_MODEL_CONFIG": + "./config/predict_deepseek_r1_671b_w8a8_smoothquant.yaml", "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", @@ -54,8 +63,13 @@ def test_deepseek_r1(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096) + llm = LLM( + model= + "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", + trust_remote_code=True, + gpu_memory_utilization=0.9, + tensor_parallel_size=2, + max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py index 111c91e4bcdd4a6467ce0db0faec88599d6ee7f0..236c9a23f20a787ee7180282c54b98ef34c1e178 100644 --- a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py +++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py @@ -15,14 +15,23 @@ # limitations under the License. # ============================================================================ """test mf deepseek r1 smoothquant.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { - "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8_smoothquant.yaml", + "MINDFORMERS_MODEL_CONFIG": + "./config/predict_deepseek_r1_671b_w8a8_smoothquant.yaml", "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), "vLLM_MODEL_BACKEND": "MindFormers", "MS_ENABLE_LCCL": "off", @@ -54,9 +63,14 @@ def test_deepseek_r1_mss(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, num_scheduler_steps=8, - max_model_len=4096) + llm = LLM( + model= + "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig", + trust_remote_code=True, + gpu_memory_utilization=0.9, + tensor_parallel_size=2, + num_scheduler_steps=8, + max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_llama3.py b/tests/st/python/cases_parallel/vllm_llama3.py index 656c744d960bbe1c497719de341f9ca7e4907db7..463910fa0c5b0ab53b3991948d66c0f397e3e8ae 100644 --- a/tests/st/python/cases_parallel/vllm_llama3.py +++ b/tests/st/python/cases_parallel/vllm_llama3.py @@ -14,39 +14,45 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ - -# isort:skip_file """test vllm llama3.""" import os -import pytest - -from tests.st.python import set_env - -env_manager = set_env.EnvVarManager() -# def env -env_vars = { - "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), - "MS_ENABLE_LCCL": "off", - "HCCL_OP_EXPANSION_MODE": "AIV", - "MS_ALLOC_CONF": "enable_vmm:True", - "LCCL_DETERMINISTIC": "1", - "HCCL_DETERMINISTIC": "true", - "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", - "ATB_LLM_LCOC_ENABLE": "0", - "VLLM_USE_V1": "1", - "HCCL_IF_BASE_PORT": "60000" -} -# set env -env_manager.setup_ai_environment(env_vars) -import vllm_mindspore -from vllm import LLM, SamplingParams +from tests.st.python import utils + +env_manager = utils.EnvVarManager() + + +def setup_function(): + # def env + env_vars = { + "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), + "MS_ENABLE_LCCL": "off", + "HCCL_OP_EXPANSION_MODE": "AIV", + "MS_ALLOC_CONF": "enable_vmm:True", + "LCCL_DETERMINISTIC": "1", + "HCCL_DETERMINISTIC": "true", + "ATB_MATMUL_SHUFFLE_K_ENABLE": "0", + "ATB_LLM_LCOC_ENABLE": "0", + "VLLM_USE_V1": "1", + "HCCL_IF_BASE_PORT": "60000" + } + # set env + env_manager.setup_ai_environment(env_vars) + # Enable vllm-mindsproe. + import vllm_mindspore + + +def teardown_function(): + # unset env + env_manager.unset_all() + utils.cleanup_subprocesses() def test_vllm_llama3_8b(): """ test case llama3.1 8B """ + from vllm import LLM, SamplingParams # Sample prompts. prompts = [ @@ -74,14 +80,12 @@ def test_vllm_llama3_8b(): print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") assert generated_text == except_list[i] - # unset env - env_manager.unset_all() - def test_vllm_llama3_1b(): """ test case llama3.2 1B """ + from vllm import LLM, SamplingParams # Sample prompts. prompts = [ diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen3_8b.py b/tests/st/python/cases_parallel/vllm_mf_qwen3_8b.py index 48de1692134eff0f30e54de79fcabe8b3e4dc52d..c6765a7895271a97bb061c6b222c67b28606fdc4 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen3_8b.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen3_8b.py @@ -15,13 +15,20 @@ # limitations under the License. # ============================================================================ """test mf qwen.""" + +# type: ignore +# isort: skip_file + import os -import pytest +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() -from tests.st.python import set_env -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen3_8b_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen3_8b_v1.py index aeb62ef7af753cda7509f7ef6b96da8c91d2379c..28b142489378643ce87a2f3c686a70bc964c14d5 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen3_8b_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen3_8b_v1.py @@ -15,13 +15,20 @@ # limitations under the License. # ============================================================================ """test mf qwen.""" + +# type: ignore +# isort: skip_file + import os -import pytest +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() -from tests.st.python import set_env -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py index 684974212ef9582def3521efbf35dab547d47c22..f60bbd75add4ed5aedfc6e3c16c389fa2eab21bd 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test mf qwen.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", @@ -55,8 +63,10 @@ def test_mf_qwen(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + gpu_memory_utilization=0.9, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py index b5738848b7de75b680179a8611691c393e525d8e..a9928ac349472238fb78111718a00aee3914f724 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test mf qwen chunk prefill.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", @@ -47,26 +55,39 @@ def test_mf_qwen_7b_chunk_prefill(): """ # Sample prompts. - batch_datas = [{ - "prompt": "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through " - "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural " - "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great " - "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the " - "strategic genius and resilience of ancient China.", - "answer": " The city's blend of traditional and modern architecture, bustling markets, and vibrant street life make it " - "a unique and fascinating destination. In short, Beijing is a city"}, - {"prompt": "I love Beijing, because", - "answer": " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"}, + batch_datas = [ + { + "prompt": + "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through " + "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural " + "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great " + "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the " + "strategic genius and resilience of ancient China.", + "answer": + " The city's blend of traditional and modern architecture, bustling markets, and vibrant street life make it " + "a unique and fascinating destination. In short, Beijing is a city" + }, + { + "prompt": + "I love Beijing, because", + "answer": + " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a" + }, ] # Create a sampling params object. sampling_params = SamplingParams(temperature=0.0, max_tokens=32, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=2, - enable_chunked_prefill=True) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + max_model_len=8192, + max_num_seqs=16, + max_num_batched_tokens=32, + block_size=32, + gpu_memory_utilization=0.9, + tensor_parallel_size=2, + enable_chunked_prefill=True) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. for batch_data in batch_datas: @@ -76,7 +97,9 @@ def test_mf_qwen_7b_chunk_prefill(): # Print the outputs. for i, output in enumerate(outputs): generated_text = output.outputs[0].text - print(f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}") + print( + f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}" + ) assert generated_text == answer # unset env diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py index 4d8a32d8d57cac741cc3f440ffe649bc2a118007..6011edb3ef3b57dd217cef206d54cd5a1dcd201d 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test mf qwen chunk prefill.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", @@ -46,25 +54,38 @@ def test_mf_qwen_7b_chunk_prefill(): """ # Sample prompts. - batch_datas = [{ - "prompt": "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through " - "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural " - "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great " - "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the " - "strategic genius and resilience of ancient China.", - "answer": " The city's blend of traditional and modern architecture, bustling markets, and vibrant street life make it " - "a unique and fascinating destination. In short, Beijing is a city"}, - {"prompt": "I love Beijing, because", - "answer": " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"}, + batch_datas = [ + { + "prompt": + "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through " + "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural " + "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great " + "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the " + "strategic genius and resilience of ancient China.", + "answer": + " The city's blend of traditional and modern architecture, bustling markets, and vibrant street life make it " + "a unique and fascinating destination. In short, Beijing is a city" + }, + { + "prompt": + "I love Beijing, because", + "answer": + " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a" + }, ] # Create a sampling params object. sampling_params = SamplingParams(temperature=0.0, max_tokens=32, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.85, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + max_model_len=8192, + max_num_seqs=16, + max_num_batched_tokens=32, + block_size=32, + gpu_memory_utilization=0.85, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. for batch_data in batch_datas: @@ -74,7 +95,9 @@ def test_mf_qwen_7b_chunk_prefill(): # Print the outputs. for i, output in enumerate(outputs): generated_text = output.outputs[0].text - print(f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}") + print( + f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}" + ) assert generated_text == answer # unset env diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py index 35605d7d9c7426d4de608c1980c1d9b4b3bea87d..c84157cf972ed28242fff325a87845a46e955ffe 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test mf qwen chunk prefill, prefix cache, mss.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", @@ -47,25 +55,40 @@ def test_mf_qwen_7b_cp_pc_mss(): """ # Sample prompts. - batch_datas = [{ - "prompt": "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through " - "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural " - "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great " - "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the " - "strategic genius and resilience of ancient China.", - "answer": ""}, - {"prompt": "I love Beijing, because", - "answer": " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"}, + batch_datas = [ + { + "prompt": + "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through " + "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural " + "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great " + "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the " + "strategic genius and resilience of ancient China.", + "answer": + "" + }, + { + "prompt": + "I love Beijing, because", + "answer": + " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a" + }, ] # Create a sampling params object. sampling_params = SamplingParams(temperature=0.0, max_tokens=32, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32, - block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=2, - enable_chunked_prefill=True, enable_prefix_caching=True, num_scheduler_steps=8) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + max_model_len=8192, + max_num_seqs=16, + max_num_batched_tokens=32, + block_size=32, + gpu_memory_utilization=0.9, + tensor_parallel_size=2, + enable_chunked_prefill=True, + enable_prefix_caching=True, + num_scheduler_steps=8) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. for _ in range(3): @@ -76,7 +99,9 @@ def test_mf_qwen_7b_cp_pc_mss(): # Print the outputs. for i, output in enumerate(outputs): generated_text = output.outputs[0].text - print(f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}") + print( + f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}" + ) assert generated_text == answer # unset env diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py index 43d863f85b555bca6bdf1b6d1e71bb97492ab275..888b61a026ad5e83257c551de61251face8ed94b 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test mf qwen mss.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", @@ -55,9 +63,14 @@ def test_mf_qwen_7b_mss(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - max_model_len=8192, max_num_batched_tokens=8192, - block_size=32, gpu_memory_utilization=0.9, num_scheduler_steps=8, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + max_model_len=8192, + max_num_batched_tokens=8192, + block_size=32, + gpu_memory_utilization=0.9, + num_scheduler_steps=8, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py index 17df205386ef6a09bf5e0c5b1d46093aabc9f5fb..1ee84a069a2b36f48d6f7501fc6b7a65fecfeecf 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py @@ -14,13 +14,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ - """test mf qwen prefix caching.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), @@ -56,9 +63,13 @@ def test_mf_qwen_7b_prefix_caching(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - max_model_len=8192, block_size=16, enable_prefix_caching=True, - gpu_memory_utilization=0.9, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + max_model_len=8192, + block_size=16, + enable_prefix_caching=True, + gpu_memory_utilization=0.9, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) @@ -67,11 +78,15 @@ def test_mf_qwen_7b_prefix_caching(): second_except_list = [' in Beijing, but I have to say that the'] for i, (output, second_output) in enumerate(zip(outputs, second_outputs)): generated_text = output.outputs[i].text - print(f"Output1 - Prompt: {prompts[i]!r}, Generated text: {generated_text!r}") + print( + f"Output1 - Prompt: {prompts[i]!r}, Generated text: {generated_text!r}" + ) assert generated_text == except_list[i] second_generated_text = second_output.outputs[i].text - print(f"Output2 - Prompt: {second_prompts[i]!r}, Generated text: {second_generated_text!r}") + print( + f"Output2 - Prompt: {second_prompts[i]!r}, Generated text: {second_generated_text!r}" + ) assert second_generated_text == second_except_list[i] env_manager.unset_all() diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py index ffde9d4684eb0bd4d661d2bdc8168dde50c09088..b45cce75b3fb7bb8b5ccacc25fa4b5b76151272a 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py @@ -14,13 +14,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ - """test mf qwen prefix caching.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), @@ -55,8 +62,11 @@ def test_mf_qwen_7b_prefix_caching(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - max_model_len=8192, block_size=16, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + max_model_len=8192, + block_size=16, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) @@ -65,11 +75,15 @@ def test_mf_qwen_7b_prefix_caching(): second_except_list = [' in Beijing, but I have to say that the'] for i, (output, second_output) in enumerate(zip(outputs, second_outputs)): generated_text = output.outputs[i].text - print(f"Output1 - Prompt: {prompts[i]!r}, Generated text: {generated_text!r}") + print( + f"Output1 - Prompt: {prompts[i]!r}, Generated text: {generated_text!r}" + ) assert generated_text == except_list[i] second_generated_text = second_output.outputs[i].text - print(f"Output2 - Prompt: {second_prompts[i]!r}, Generated text: {second_generated_text!r}") + print( + f"Output2 - Prompt: {second_prompts[i]!r}, Generated text: {second_generated_text!r}" + ) assert second_generated_text == second_except_list[i] env_manager.unset_all() diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py index 6963da0d8d8a014bb42bebf7b171816fa78bf03f..99aed9ac3171f63d9736757feb861dd6ee0c17f4 100644 --- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py @@ -15,11 +15,20 @@ # limitations under the License. # ============================================================================ """test mf qwen.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +import pytest +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml", @@ -54,8 +63,10 @@ def test_mf_qwen(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + gpu_memory_utilization=0.9, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) @@ -78,26 +89,29 @@ def test_mf_qwen_batch(): """ # Sample prompts. prompts = [ - "北京烤鸭是", - "请介绍一下华为,华为是", - "今年似乎大模型之间的内卷已经有些偃旗息鼓了,各大技术公司逐渐聪单纯追求模型参数量的竞赛中抽身," - "转向更加注重模型的实际>应用效果和效率", - ] * 2 + "北京烤鸭是", + "请介绍一下华为,华为是", + "今年似乎大模型之间的内卷已经有些偃旗息鼓了,各大技术公司逐渐聪单纯追求模型参数量的竞赛中抽身," + "转向更加注重模型的实际>应用效果和效率", + ] * 2 # Create a sampling params object. sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", block_size=32, - gpu_memory_utilization=0.9, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + block_size=32, + gpu_memory_utilization=0.9, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) except_list = [ - "享誉世界的中华美食,其制作工艺独特,", - "做什么的? 华为是一家中国公司,", - "。 \n在这一背景下,阿里云发布了通", - ] * 2 + "享誉世界的中华美食,其制作工艺独特,", + "做什么的? 华为是一家中国公司,", + "。 \n在这一背景下,阿里云发布了通", + ] * 2 # Print the outputs. for i, output in enumerate(outputs): prompt = output.prompt diff --git a/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py b/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py index d776c8d93dacc2fc6a3fc28453083c2de9ba320c..caa2b4053c443a6c92d568773fa40c8846665d1c 100644 --- a/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py @@ -15,14 +15,23 @@ # limitations under the License. # ============================================================================ """test mf qwen2.5 vl 7B.""" + +# type: ignore +# isort: skip_file + import os from PIL import Image -from tests.st.python import set_env +from tests.st.python import utils from tests.st.python.cases_parallel.similarity import compare_distance -env_manager = set_env.EnvVarManager() + +def teardown_function(): + utils.cleanup_subprocesses() + + +env_manager = utils.EnvVarManager() # def env env_vars = { "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), diff --git a/tests/st/python/cases_parallel/vllm_qwen_7b.py b/tests/st/python/cases_parallel/vllm_qwen_7b.py index b3f6b080aab8d042f7ae7c464f911b8dce4b4d22..3aba6a1fb4f07ab1b1694ef80cccaad1d2192f25 100644 --- a/tests/st/python/cases_parallel/vllm_qwen_7b.py +++ b/tests/st/python/cases_parallel/vllm_qwen_7b.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test vllm qwen.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), @@ -53,8 +61,10 @@ def test_vllm_qwen(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + gpu_memory_utilization=0.9, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py b/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py index 8672855f2b60ad81485b416aa21a414cfcf52158..8b2300e798f4d0139875eccfde6e20669fbeb509 100644 --- a/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py +++ b/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py @@ -15,11 +15,19 @@ # limitations under the License. # ============================================================================ """test vllm qwen.""" -import pytest + +# type: ignore +# isort: skip_file + import os -from tests.st.python import set_env +from tests.st.python import utils + + +def teardown_function(): + utils.cleanup_subprocesses() + -env_manager = set_env.EnvVarManager() +env_manager = utils.EnvVarManager() # def env env_vars = { "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), @@ -53,8 +61,10 @@ def test_vllm_qwen(): sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", - gpu_memory_utilization=0.9, tensor_parallel_size=2) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", + gpu_memory_utilization=0.9, + tensor_parallel_size=2) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) diff --git a/tests/st/python/test_cases_parallel.py b/tests/st/python/test_cases_parallel.py index 3724e45fd922aee494b1f6a74edaff4fc6087d03..85a4de80df52936d0b1daadcf050625ae7d00402 100644 --- a/tests/st/python/test_cases_parallel.py +++ b/tests/st/python/test_cases_parallel.py @@ -22,6 +22,12 @@ from multiprocessing.pool import Pool import pytest +from .utils import cleanup_subprocesses, tasks_resource_alloc + + +def teardown_function(): + cleanup_subprocesses() + def run_command(command_info): cmd, log_path = command_info @@ -48,27 +54,19 @@ def test_cases_parallel_part0(): Description: test cases parallel. Expectation: Pass. """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=61000 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen > vllm_mf_qwen_7b_test_mf_qwen.log", + cases = [ + (2, "cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen", "vllm_mf_qwen_7b_test_mf_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=61002 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill " - "> vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log", + (2, + "cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill", "vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=61004 &&" - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill " - "> vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log", + (2, + "cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill", "vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=61006 && " - "pytest -s -v cases_parallel/multilora_inference.py::test_multilora_inference " - "> multilora_inference_test_multilora_inference.log", + (2, "cases_parallel/multilora_inference.py::test_multilora_inference", "multilora_inference_test_multilora_inference.log") ] + commands = tasks_resource_alloc(cases) with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) @@ -84,28 +82,20 @@ def test_cases_parallel_part1(): Description: test cases parallel. Expectation: Pass. """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=61000 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss " - "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log", + cases = [ + (2, "cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss", "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=61002 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching " - "> vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log", + (2, + "cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching", "vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=61004 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching " - "> vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log", + (2, + "cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching", "vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log" ), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=61006 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen > vllm_mf_qwen_7b_v1_test_mf_qwen.log", + (2, "cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen", "vllm_mf_qwen_7b_v1_test_mf_qwen.log") ] + commands = tasks_resource_alloc(cases) with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) @@ -121,23 +111,13 @@ def test_cases_parallel_part2(): Description: test cases parallel. Expectation: Pass. """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=61000 && " - "pytest -s -v cases_parallel/vllm_qwen_7b.py::test_vllm_qwen " - "> vllm_qwen_7b_test_vllm_qwen.log", - "vllm_qwen_7b_test_vllm_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=61002 && " - "pytest -s -v cases_parallel/vllm_qwen_7b_v1.py::test_vllm_qwen " - "> vllm_qwen_7b_v1_test_vllm_qwen.log", - "vllm_qwen_7b_v1_test_vllm_qwen.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=61004 && " - "pytest -s -v cases_parallel/shm_broadcast.py::test_shm_broadcast " - "> shm_broadcast_test_shm_broadcast.log", - "shm_broadcast_test_shm_broadcast.log") - ] + cases = [(2, "cases_parallel/vllm_qwen_7b.py::test_vllm_qwen", + "vllm_qwen_7b_test_vllm_qwen.log"), + (2, "cases_parallel/vllm_qwen_7b_v1.py::test_vllm_qwen", + "vllm_qwen_7b_v1_test_vllm_qwen.log"), + (4, "cases_parallel/shm_broadcast.py::test_shm_broadcast", + "shm_broadcast_test_shm_broadcast.log")] + commands = tasks_resource_alloc(cases) with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) @@ -153,23 +133,17 @@ def test_cases_parallel_part3(): Description: test cases parallel. Expectation: Pass. """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=61000 && " - "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_r1_bf16 " - "> vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log", + cases = [ + (2, "cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_r1_bf16", "vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=61002 && " - "pytest -s -v cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16 " - "> vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log", + (2, + "cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16", "vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=61004 && " - "pytest -s -v cases_parallel/vllm_deepseek_gptq_a16w4.py::test_deepseek_r1_gptq_a16w4 " - "> vllm_deepseek_gptq_a16w4_test_deepseek_r1_gptq_a16w4.log", + (4, + "cases_parallel/vllm_deepseek_gptq_a16w4.py::test_deepseek_r1_gptq_a16w4", "vllm_deepseek_gptq_a16w4_test_deepseek_r1_gptq_a16w4.log") ] + commands = tasks_resource_alloc(cases) with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) @@ -185,28 +159,15 @@ def test_cases_parallel_part4(): Description: test cases parallel. Expectation: Pass. """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=61000 && " - "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1_mss " - "> vllm_deepseek_osl_test_deepseek_r1_mss.log", - "vllm_deepseek_osl_test_deepseek_r1_mss.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=61002 && " - "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_r1 " - "> vllm_deepseek_part_test_deepseek_r1.log", - "vllm_deepseek_part_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=61004 && " - "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp " - "> vllm_deepseek_part_test_deepseek_mtp.log", - "vllm_deepseek_part_test_deepseek_mtp.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=61006 && " - "pytest -s -v cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1 " - "> vllm_deepseek_part_v1_test_deepseek_r1.log", - "vllm_deepseek_part_v1_test_deepseek_r1.log") - ] + cases = [(2, "cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1_mss", + "vllm_deepseek_osl_test_deepseek_r1_mss.log"), + (2, "cases_parallel/vllm_deepseek_part.py::test_deepseek_r1", + "vllm_deepseek_part_test_deepseek_r1.log"), + (2, "cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp", + "vllm_deepseek_part_test_deepseek_mtp.log"), + (2, "cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1", + "vllm_deepseek_part_v1_test_deepseek_r1.log")] + commands = tasks_resource_alloc(cases) with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) @@ -222,28 +183,15 @@ def test_cases_parallel_part5(): Description: test cases parallel. Expectation: Pass. """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=61000 && " - "pytest -s -v cases_parallel/vllm_mf_qwen3_8b.py::test_mf_qwen3 " - "> vllm_mf_qwen3_8b_test_mf_qwen3.log", - "vllm_mf_qwen3_8b_test_mf_qwen3.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=61002 && " - "pytest -s -v cases_parallel/vllm_mf_qwen3_8b_v1.py::test_mf_qwen3 " - "> vllm_mf_qwen3_8b_v1_test_mf_qwen3.log", - "vllm_mf_qwen3_8b_v1_test_mf_qwen3.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=61004 && " - "pytest -s -v cases_parallel/vllm_llama3.py::test_vllm_llama3_8b " - "> vllm_llama3_8b_test_vllm_llama3.log", - "vllm_llama3_8b_test_vllm_llama3.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=5 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=61006 && " - "pytest -s -v cases_parallel/vllm_llama3.py::test_vllm_llama3_1b " - "> vllm_llama3_1b_test_vllm_llama3.log", - "vllm_llama3_1b_test_vllm_llama3.log"), - ] + cases = [(2, "cases_parallel/vllm_mf_qwen3_8b.py::test_mf_qwen3", + "vllm_mf_qwen3_8b_test_mf_qwen3.log"), + (2, "cases_parallel/vllm_mf_qwen3_8b_v1.py::test_mf_qwen3", + "vllm_mf_qwen3_8b_v1_test_mf_qwen3.log"), + (1, "cases_parallel/vllm_llama3.py::test_vllm_llama3_8b", + "vllm_llama3_8b_test_vllm_llama3.log"), + (1, "cases_parallel/vllm_llama3.py::test_vllm_llama3_1b", + "vllm_llama3_1b_test_vllm_llama3.log")] + commands = tasks_resource_alloc(cases) with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) @@ -259,12 +207,11 @@ def test_cases_parallel_part6(): Description: test cases parallel. Expectation: Pass. """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=61000 && " - "pytest -s -v cases_parallel/vllm_qwen2_5_vl_7b_v1.py::test_qwen2_5_vl_7b_v1 " - "> vllm_qwen2_5_vl_7b_v1.log", "vllm_qwen2_5_vl_7b_v1.log"), + cases = [ + (2, "cases_parallel/vllm_qwen2_5_vl_7b_v1.py::test_qwen2_5_vl_7b_v1", + "vllm_qwen2_5_vl_7b_v1.log") ] + commands = tasks_resource_alloc(cases) with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) @@ -280,28 +227,19 @@ def test_cases_parallel_level1_part0(): Description: test cases parallel. Expectation: Pass. """ - commands = [ - ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " - "export HCCL_IF_BASE_PORT=61000 && " - "pytest -s -v cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss " - "> vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log", + cases = [ + (2, + "cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss", "vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && " - "export HCCL_IF_BASE_PORT=61002 && " - "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1 " - "> vllm_deepseek_osl_test_deepseek_r1.log", + (2, "cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1", "vllm_deepseek_osl_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && " - "export HCCL_IF_BASE_PORT=61004 && " - "pytest -s -v cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1 " - "> vllm_deepseek_smoothquant_test_deepseek_r1.log", + (2, "cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1", "vllm_deepseek_smoothquant_test_deepseek_r1.log"), - ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && " - "export HCCL_IF_BASE_PORT=61006 && " - "pytest -s -v cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss " - "> vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log", + (2, + "cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss", "vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log") ] + commands = tasks_resource_alloc(cases) with Pool(len(commands)) as pool: results = list(pool.imap(run_command, commands)) diff --git a/tests/st/python/test_custom_advstepflash.py b/tests/st/python/test_custom_advstepflash.py index dd523f5d311c1d1cd682e5717c0b6acd60e123ed..8a8430ef408c2391b9380265510085bd17274759 100644 --- a/tests/st/python/test_custom_advstepflash.py +++ b/tests/st/python/test_custom_advstepflash.py @@ -15,23 +15,25 @@ # limitations under the License. # ============================================================================ """test case for custom op adv_step_flash""" - import time -import pytest -from vllm_mindspore import npu_ops -import numpy as np + import mindspore as ms +import numpy as np +import pytest import torch +from vllm_mindspore import npu_ops + +from .utils import cleanup_subprocesses + + +def teardown_function(): + cleanup_subprocesses() + -def benchmark_advance_step_op(sampled_token_ids, - input_tokens, - input_positions, - seq_lens_tensor, - num_queries, - block_size, - block_tables, - slot_mapping): +def benchmark_advance_step_op(sampled_token_ids, input_tokens, input_positions, + seq_lens_tensor, num_queries, block_size, + block_tables, slot_mapping): # update input_tokens sampled_token_ids_list = sampled_token_ids[:num_queries].squeeze(-1) input_tokens[:num_queries] = sampled_token_ids_list @@ -48,7 +50,8 @@ def benchmark_advance_step_op(sampled_token_ids, block_idx = next_input_pos // block_size block_offset = next_input_pos % block_size - current_block_table = block_tables.gather(1, block_idx.unsqueeze(-1)).squeeze(-1) + current_block_table = block_tables.gather( + 1, block_idx.unsqueeze(-1)).squeeze(-1) slot_num = current_block_table * block_size + block_offset # update slot_mapping @@ -58,12 +61,21 @@ def benchmark_advance_step_op(sampled_token_ids, def gendata(seed, num_seqs, block_size, block_num, make_tensor): """generate inputs""" np.random.seed(seed) - sampled_token_ids = np.random.randint(65536, size=(num_seqs,), dtype=np.int64) - input_tokens = np.random.randint(100, size=(num_seqs,), dtype=np.int64) # out - input_positions = np.random.randint(100, size=(num_seqs,), dtype=np.int64) # out - seq_lens_tensor = np.random.randint(block_size * block_num - 1, size=(num_seqs,), dtype=np.int64) # inplace - block_tables = np.random.randint(1024, size=(num_seqs, block_num), dtype=np.int64) - slot_mapping = np.random.randint(100, size=(num_seqs,), dtype=np.int64) # out + sampled_token_ids = np.random.randint(65536, + size=(num_seqs, ), + dtype=np.int64) + input_tokens = np.random.randint(100, size=(num_seqs, ), + dtype=np.int64) # out + input_positions = np.random.randint(100, size=(num_seqs, ), + dtype=np.int64) # out + seq_lens_tensor = np.random.randint(block_size * block_num - 1, + size=(num_seqs, ), + dtype=np.int64) # inplace + block_tables = np.random.randint(1024, + size=(num_seqs, block_num), + dtype=np.int64) + slot_mapping = np.random.randint(100, size=(num_seqs, ), + dtype=np.int64) # out return (make_tensor(sampled_token_ids), \ make_tensor(input_tokens), \ make_tensor(input_positions), \ @@ -87,14 +99,9 @@ def test_advstepflash(): print("test seed:", seed, flush=True) sampled_token_ids1, input_tokens1, input_positions1, seq_lens_tensor1, block_tables1, slot_mapping1 = \ gendata(seed, num_seqs, block_size, block_num, torch.Tensor) - benchmark_advance_step_op(sampled_token_ids1, - input_tokens1, - input_positions1, - seq_lens_tensor1, - num_queries, - block_size, - block_tables1, - slot_mapping1) + benchmark_advance_step_op(sampled_token_ids1, input_tokens1, + input_positions1, seq_lens_tensor1, num_queries, + block_size, block_tables1, slot_mapping1) sampled_token_ids2, input_tokens2, input_positions2, seq_lens_tensor2, block_tables2, slot_mapping2 = \ gendata(seed, num_seqs, block_size, block_num, ms.Tensor) diff --git a/tests/st/python/test_sampler.py b/tests/st/python/test_sampler.py index 8066748f49f92ed27bd3c6b83ccbb4361be5ff57..79bfee55602540f5fba2961009432caf276b2ba9 100644 --- a/tests/st/python/test_sampler.py +++ b/tests/st/python/test_sampler.py @@ -14,25 +14,35 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -import vllm_mindspore + +# type: ignore +# isort: skip_file + import itertools import random from dataclasses import dataclass from typing import Dict, List, Optional, Tuple from unittest.mock import Mock, patch -from mindspore import mint +import vllm_mindspore import pytest import torch -from transformers import GenerationConfig, GenerationMixin - import vllm.envs as envs -from vllm_mindspore.model_executor.layers.sampler import Sampler +from transformers import GenerationConfig, GenerationMixin from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_random_seed -from vllm_mindspore.sequence import SamplingParams, SequenceData, SequenceGroupMetadata from vllm.utils import Counter, is_pin_memory_available +from vllm_mindspore.model_executor.layers.sampler import Sampler +from vllm_mindspore.sequence import (SamplingParams, SequenceData, + SequenceGroupMetadata) + +from .utils import cleanup_subprocesses + + +def teardown_function(): + cleanup_subprocesses() + class MockLogitsSampler(Sampler): @@ -88,6 +98,7 @@ def _do_sample( pin_memory=is_pin_memory_available()) return sampler(logits=input_tensor, sampling_metadata=sampling_metadata) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -106,6 +117,7 @@ def test_sampler_all_greedy(seed: int, device: str): for nth_output in sequence_output.samples: assert nth_output.output_token == expected[i].item() + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -130,6 +142,7 @@ def test_sampler_all_random(seed: int, device: str): for nth_output in sequence_output.samples: assert nth_output.output_token == i + @pytest.mark.skip(reason="Not implemented yet") @pytest.mark.parametrize("seed", RANDOM_SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) @@ -154,6 +167,7 @@ def test_sampler_all_random_seed(seed: int, device: str): for nth_output in sequence_output.samples: assert nth_output.output_token == i + @pytest.mark.skip(reason="Not implemented yet") @pytest.mark.parametrize("seed", RANDOM_SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) @@ -176,6 +190,7 @@ def test_sampler_all_random_seed_deterministic(seed: int, device: str): assert first_sampler_output == second_sampler_output + @pytest.mark.skip(reason="Not implemented yet") @pytest.mark.parametrize("seed", RANDOM_SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) @@ -463,6 +478,7 @@ def test_sampler_min_tokens_penalty(seed: int, device: str): for test_case in test_cases: run_test_case(**test_case) + @pytest.mark.skip(reason="Not implemented yet") @pytest.mark.parametrize("seed", RANDOM_SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) @@ -566,6 +582,7 @@ def test_sampler_mixed(seed: int, device: str): # the corresponding sample in the pre-shuffled batch test_sampling() + @pytest.mark.skip(reason="Not implemented yet") @pytest.mark.parametrize("seed", RANDOM_SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) @@ -648,6 +665,7 @@ def test_sampler_top_k_top_p(seed: int, device: str): torch.testing.assert_close(hf_probs, sample_probs, rtol=0.0, atol=1e-5) assert torch.equal(hf_probs.eq(0), sample_probs.eq(0)) + @pytest.mark.skip(reason="Not implemented yet") @pytest.mark.parametrize("seed", RANDOM_SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) @@ -679,6 +697,7 @@ def test_flashinfer_fallback(seed: int, device: str): assert sampler_output == fallback_sampler_output + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -750,6 +769,7 @@ def test_sampler_repetition_penalty_mixed(device: str): assert tokens1[0] == tokens2[1] assert tokens1[1] == tokens2[0] + @pytest.mark.skip(reason="Not implemented yet") @pytest.mark.parametrize("device", CUDA_DEVICES) def test_sampler_include_gpu_probs_tensor(device: str): diff --git a/tests/st/python/test_sampler_v1.py b/tests/st/python/test_sampler_v1.py index 34954d0ccfb7d30a6359df6de82c0eab4c8a8df9..e7e614700f2d310b4c1055d527e82148dd7954e7 100644 --- a/tests/st/python/test_sampler_v1.py +++ b/tests/st/python/test_sampler_v1.py @@ -15,6 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ + +# type: ignore +# isort: skip_file + from typing import Optional import numpy as np @@ -26,11 +30,16 @@ from vllm.utils import make_tensor_with_pad from vllm.v1.sample.metadata import SamplingMetadata from vllm.v1.sample.sampler import Sampler +from .utils import cleanup_subprocesses + + +def teardown_function(): + cleanup_subprocesses() + + VOCAB_SIZE = 1024 NUM_OUTPUT_TOKENS = 20 -CUDA_DEVICES = [ - f"cuda:{0}" -] +CUDA_DEVICES = [f"cuda:{0}"] MAX_NUM_PROMPT_TOKENS = 64 @@ -239,6 +248,7 @@ def _create_weighted_output_token_list( output_token_ids.append(output_token_ids_for_batch) return output_token_ids, sorted_token_ids_in_output + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -270,6 +280,7 @@ def test_sampler_min_tokens_penalty(device: str, batch_size: int): else: assert logits[batch_idx][token_id] != -float("inf") + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -316,6 +327,7 @@ def test_sampler_presence_penalty(device: str, batch_size: int, assert non_penalized_token_id in output_token_ids[batch_idx] assert penalized_token_id not in output_token_ids[batch_idx] + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -370,6 +382,7 @@ def test_sampler_frequency_penalty(device: str, batch_size: int, assert non_penalized_token_id == most_frequent_token_id assert penalized_token_id not in distinct_sorted_token_ids_in_output + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -417,6 +430,7 @@ def test_sampler_repetition_penalty(device: str, batch_size: int, assert (non_penalized_token_id in prompt_tokens or non_penalized_token_id in output_tokens) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -458,6 +472,7 @@ def test_sampler_min_p(device: str, batch_size: int, min_p: float): # No masking when min_p is 0 assert logits[batch_idx][token_id] != -float("inf") + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -488,11 +503,12 @@ def test_sampler_logit_bias(device: str, batch_size: int, bias_value: float): biased_index = min(batch_idx, VOCAB_SIZE - 1) for token_id in range(VOCAB_SIZE): if biased_index == token_id: - assert logits_for_req[token_id].item() == pytest.approx(bias_value + - 1e-2) + assert logits_for_req[token_id].item() == pytest.approx( + bias_value + 1e-2) else: assert logits_for_req[token_id].item() == pytest.approx(1e-2) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @@ -535,6 +551,7 @@ def test_sampler_allowed_token_ids(device: str, batch_size: int, else: assert logits_for_req[token_id] != -float("inf") + @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard diff --git a/tests/st/python/test_vllm_deepseek_mix_parallel.py b/tests/st/python/test_vllm_deepseek_mix_parallel.py index eadecd8cc5b3573c14908b32d32ec22edb66c592..54c21a3548871e4a53259c1967d671cdd6f6ccab 100644 --- a/tests/st/python/test_vllm_deepseek_mix_parallel.py +++ b/tests/st/python/test_vllm_deepseek_mix_parallel.py @@ -15,16 +15,26 @@ # limitations under the License. # ============================================================================ """test mf deepseek r1.""" -import pytest + +# type: ignore +# isort: skip_file + import os -import tempfile import re - -from . import set_env +import tempfile from multiprocessing import Process, Queue -env_manager = set_env.EnvVarManager() +import pytest + +from . import utils +from .utils import cleanup_subprocesses + + +def teardown_function(): + cleanup_subprocesses() + +env_manager = utils.EnvVarManager() env_vars = { "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8.yaml", "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"), @@ -41,13 +51,14 @@ env_vars = { "LCAL_COMM_ID": "127.0.0.1:10068" } env_manager.setup_ai_environment(env_vars) + import vllm_mindspore from vllm import LLM, SamplingParams from vllm.utils import get_open_port -def dp_func(dp_size, local_dp_rank, global_dp_rank, dp_master_ip, dp_master_port, - GPUs_per_dp_rank, prompts, except_list, result_q): +def dp_func(dp_size, local_dp_rank, global_dp_rank, dp_master_ip, + dp_master_port, GPUs_per_dp_rank, prompts, except_list, result_q): os.environ["VLLM_DP_RANK"] = str(global_dp_rank) os.environ["VLLM_DP_LOCAL"] = str(local_dp_rank) os.environ["VLLM_DP_SIZE"] = str(dp_size) @@ -70,14 +81,15 @@ def dp_func(dp_size, local_dp_rank, global_dp_rank, dp_master_ip, dp_master_port max_tokens=3) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - tensor_parallel_size=GPUs_per_dp_rank, - max_model_len = 4096, - max_num_batched_tokens=8, - max_num_seqs=8, - trust_remote_code=True, - enforce_eager=True, - enable_expert_parallel=True) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", + tensor_parallel_size=GPUs_per_dp_rank, + max_model_len=4096, + max_num_batched_tokens=8, + max_num_seqs=8, + trust_remote_code=True, + enforce_eager=True, + enable_expert_parallel=True) outputs = llm.generate(prompts, sampling_params) # Print the outputs. for i, output in enumerate(outputs): @@ -88,7 +100,8 @@ def dp_func(dp_size, local_dp_rank, global_dp_rank, dp_master_ip, dp_master_port result_q.put(generated_text == except_list[i]) -def exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list): +def exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, + except_list): file = open('./config/predict_deepseek_r1_671b_w8a8.yaml', 'r') content = file.read() file.close() @@ -114,14 +127,14 @@ def exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, excep dp_per_node = dp_size // node_size - result_q = Queue() + result_q = Queue() # type: Queue[bool] procs = [] for local_dp_rank, global_dp_rank in enumerate( range(node_rank * dp_per_node, (node_rank + 1) * dp_per_node)): proc = Process(target=dp_func, - args=(dp_size, local_dp_rank, - global_dp_rank, dp_master_ip, dp_master_port, - tp_size, prompts, except_list, result_q)) + args=(dp_size, local_dp_rank, global_dp_rank, + dp_master_ip, dp_master_port, tp_size, + prompts, except_list, result_q)) proc.start() procs.append(proc) exit_code = 0 @@ -165,14 +178,20 @@ def exec_ds_without_dp(new_yaml, replaced_pattern, prompts, except_list): f.write(content) env_manager.set_env_var("MINDFORMERS_MODEL_CONFIG", new_yaml_path) - # Create a sampling params object. - sampling_params = SamplingParams(temperature=0.0, max_tokens=3, top_k=1, top_p=1.0, + sampling_params = SamplingParams(temperature=0.0, + max_tokens=3, + top_k=1, + top_p=1.0, repetition_penalty=1.0) # Create an LLM. - llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", - trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096) + llm = LLM( + model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8", + trust_remote_code=True, + gpu_memory_utilization=0.9, + tensor_parallel_size=8, + max_model_len=4096) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) @@ -187,7 +206,6 @@ def exec_ds_without_dp(new_yaml, replaced_pattern, prompts, except_list): env_manager.unset_all() - @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.allcards @@ -197,7 +215,9 @@ def test_deepseek_r1_dp4_tp2_ep4(): """ new_yaml = "dp4_tp2_ep4.yaml" - replaced_pattern = ['data_parallel: 4', 'model_parallel: 2', 'expert_parallel: 4'] + replaced_pattern = [ + 'data_parallel: 4', 'model_parallel: 2', 'expert_parallel: 4' + ] dp_size = 4 tp_size = 2 # Sample prompts. @@ -207,17 +227,23 @@ def test_deepseek_r1_dp4_tp2_ep4(): ] * 4 except_list = ['ugs611ాలు'] * 4 - exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list) + exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, + except_list) -@pytest.mark.skip(reason="Currently does not support relevant communication fusion operators in 910b") +@pytest.mark.skip( + reason= + "Currently does not support relevant communication fusion operators in 910b" +) def test_deepseek_r1_dp8_tp1_ep8(): """ test case deepseek r1 w8a8 Dp8 tp1 ep8 """ new_yaml = "dp8_tp1_ep8.yaml" - replaced_pattern = ['data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 8'] + replaced_pattern = [ + 'data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 8' + ] dp_size = 8 tp_size = 1 # Sample prompts. @@ -227,7 +253,8 @@ def test_deepseek_r1_dp8_tp1_ep8(): ] * 8 except_list = ['ugs611ాలు'] * 8 - exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list) + exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, + except_list) @pytest.mark.level1 @@ -239,7 +266,9 @@ def test_deepseek_r1_dp2_tp4_ep1(): """ new_yaml = "dp2_tp4_ep1.yaml" - replaced_pattern = ['data_parallel: 2', 'model_parallel: 4', 'expert_parallel: 1'] + replaced_pattern = [ + 'data_parallel: 2', 'model_parallel: 4', 'expert_parallel: 1' + ] dp_size = 2 tp_size = 4 # Sample prompts. @@ -249,17 +278,23 @@ def test_deepseek_r1_dp2_tp4_ep1(): ] * 2 except_list = ['ugs611ాలు'] * 2 - exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list) + exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, + except_list) -@pytest.mark.skip(reason="Currently does not support relevant communication fusion operators in 910b") +@pytest.mark.skip( + reason= + "Currently does not support relevant communication fusion operators in 910b" +) def test_deepseek_r1_dp4_tp2_ep8(): """ test case deepseek r1 w8a8 dp4 tp2 ep8 """ new_yaml = "dp4_tp2_ep8.yaml" - replaced_pattern = ['data_parallel: 4', 'model_parallel: 2', 'expert_parallel: 8'] + replaced_pattern = [ + 'data_parallel: 4', 'model_parallel: 2', 'expert_parallel: 8' + ] dp_size = 4 tp_size = 2 # Sample prompts. @@ -269,7 +304,8 @@ def test_deepseek_r1_dp4_tp2_ep8(): ] * 4 except_list = ['ugs611ాలు'] * 4 - exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list) + exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, + except_list) @pytest.mark.level1 @@ -281,7 +317,9 @@ def test_deepseek_r1_dp8_tp1_ep1(): """ new_yaml = "dp8_tp1_ep1.yaml" - replaced_pattern = ['data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 1'] + replaced_pattern = [ + 'data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 1' + ] dp_size = 8 tp_size = 1 # Sample prompts. @@ -291,7 +329,8 @@ def test_deepseek_r1_dp8_tp1_ep1(): ] * 8 except_list = ['ugs611ాలు'] * 8 - exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list) + exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, + except_list) @pytest.mark.level1 @@ -303,7 +342,9 @@ def test_deepseek_r1_dp8_tp1_ep4(): """ new_yaml = "dp8_tp1_ep4.yaml" - replaced_pattern = ['data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 4'] + replaced_pattern = [ + 'data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 4' + ] dp_size = 8 tp_size = 1 # Sample prompts. @@ -313,7 +354,8 @@ def test_deepseek_r1_dp8_tp1_ep4(): ] * 8 except_list = ['ugs611ాలు'] * 8 - exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list) + exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, + except_list) @pytest.mark.level1 @@ -325,14 +367,16 @@ def test_deepseek_r1_tp8_ep8(): """ new_yaml = "tp8_ep8.yaml" - replaced_pattern = ['data_parallel: 1', 'model_parallel: 8', 'expert_parallel: 8'] + replaced_pattern = [ + 'data_parallel: 1', 'model_parallel: 8', 'expert_parallel: 8' + ] # Sample prompts. prompts = [ "You are a helpful assistant.<|User|>将文本分类为中性、负面或正面。 \n文本:我认为这次假期还可以。 " "\n情感:<|Assistant|>\n", ] - except_list=['ugs611ాలు'] + except_list = ['ugs611ాలు'] exec_ds_without_dp(new_yaml, replaced_pattern, prompts, except_list) @@ -345,12 +389,14 @@ def test_deepseek_r1_tp8_ep4(): """ new_yaml = "tp8_ep4.yaml" - replaced_pattern = ['data_parallel: 1', 'model_parallel: 8', 'expert_parallel: 4'] + replaced_pattern = [ + 'data_parallel: 1', 'model_parallel: 8', 'expert_parallel: 4' + ] # Sample prompts. prompts = [ "You are a helpful assistant.<|User|>将文本分类为中性、负面或正面。 \n文本:我认为这次假期还可以。 " "\n情感:<|Assistant|>\n", ] - except_list=['ugs611ాలు'] + except_list = ['ugs611ాలు'] exec_ds_without_dp(new_yaml, replaced_pattern, prompts, except_list) diff --git a/tests/st/python/set_env.py b/tests/st/python/utils.py similarity index 33% rename from tests/st/python/set_env.py rename to tests/st/python/utils.py index b7aa8b685229c9147793589ea3de4d78b88e4d6b..ae0fe1d1cb498b5e633f46843106cbc01207e32f 100644 --- a/tests/st/python/set_env.py +++ b/tests/st/python/utils.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 -# encoding: utf-8 # Copyright 2025 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,40 +11,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================ + +import contextlib import os +import signal import sys -from typing import Dict, Optional - -mindformers_path = "/home/jenkins/mindspore/testcases/testcases/tests/mindformers" +from typing import Dict, List, Optional, Tuple, Union -if mindformers_path not in sys.path: - sys.path.insert(0, mindformers_path) +import psutil -current_pythonpath = os.environ.get("PYTHONPATH", "") -if current_pythonpath: - os.environ["PYTHONPATH"] = f"{mindformers_path}:{current_pythonpath}" -else: - os.environ["PYTHONPATH"] = mindformers_path class EnvVarManager: + def __init__(self): self._original_env: Dict[str, Optional[str]] = {} self._managed_vars: Dict[str, str] = {} def set_env_var(self, var_name: str, value: str) -> None: - """设置环境变量并记录原始值(如果存在)""" + """Set environment variable and record original value.""" + # Record original values corresponding to var_name, None if not exist. if var_name not in self._original_env: - # 保存原始值,即使它不存在(保存为None) self._original_env[var_name] = os.environ.get(var_name) os.environ[var_name] = value self._managed_vars[var_name] = value def unset_env_var(self, var_name: str) -> None: - """取消设置之前设置的环境变量,恢复原始值""" + """Unset environment variable with original value.""" if var_name not in self._original_env: - raise ValueError(f"Variable {var_name} was not set by this manager") + raise ValueError( + f"Variable {var_name} was not set by this manager") original_value = self._original_env[var_name] if original_value is not None: @@ -59,15 +53,97 @@ class EnvVarManager: del self._managed_vars[var_name] def unset_all(self) -> None: - """取消设置所有由该管理器设置的环境变量""" + """Unset all environment variables with original values.""" for var_name in list(self._managed_vars.keys()): self.unset_env_var(var_name) def get_managed_vars(self) -> Dict[str, str]: - """获取当前由该管理器管理的所有环境变量 """ + """get all managered variables.""" return self._managed_vars.copy() def setup_ai_environment(self, env_vars: Dict[str, str]) -> None: - """设置AI相关的环境变量,使用传入的参数""" + """Set ai environment by given values.""" + # Insert mindformers to PYTHONPATH. + mindformers_path = "/home/jenkins/mindspore/testcases/testcases/tests/mindformers" + + if mindformers_path not in sys.path: + sys.path.insert(0, mindformers_path) + + current_pythonpath = os.environ.get("PYTHONPATH", "") + if current_pythonpath: + os.environ[ + "PYTHONPATH"] = f"{mindformers_path}:{current_pythonpath}" + else: + os.environ["PYTHONPATH"] = mindformers_path + + # Update environments. for var_name, value in env_vars.items(): self.set_env_var(var_name, value) + + +def cleanup_subprocesses() -> None: + """Cleanup all subprocesses raise by main test process.""" + cur_proc = psutil.Process(os.getpid()) + children = cur_proc.children(recursive=True) + for child in children: + with contextlib.suppress(ProcessLookupError): + os.killpg(child.pid, signal.SIGKILL) + + +def tasks_resource_alloc(tasks: List[Tuple[int]]) -> List[Tuple[str]]: + """ + Allocate devices, lccl base port, hccl base port to tasks according to device requirement of each task. + + For example: + [(2, "cases_parallel/vllm_task.py::test_1", "test_1.log")] + ==> [("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && " + "export HCCL_IF_BASE_PORT=61000 && " + "pytest -s -v cases_parallel/vllm_task.py::test_1 > test_1.log", + "test_1.log")] + + Args: + tasks (List[Tuple[int]]): List of tasks. Each task contain 3 elements. + 1. device_req (int): Num of device requirements, which will occur device_req devices, + device_req ports for lccl, device_req ports for hccl. + 2. case_desc (str): The case description, such as "path_to_case/case.py::target_case". + 3. log_file (str): The logging file path. + + Returns: + List[Tuple[str]]: Append resource environment to the task commands. + """ + device_limit = 8 + device_base = 0 + lccl_base_port = 10068 + hccl_base_port = 61000 + + out_tasks: List[Tuple[str]] = [] + for task in tasks: + assert len(task) == 3 + resource_req, task_case, log_file = task + if not isinstance(resource_req, int): + raise TypeError( + "First argument of task should be a int or str, but got %s!", + str(type(resource_req))) + + device_str = ",".join( + [str(d) for d in range(device_base, device_base + resource_req)]) + lccl_str = f"127.0.0.1:{lccl_base_port}" + + commands = [ + f"export ASCEND_RT_VISIBLE_DEVICES={device_str}", + f"export LCAL_COMM_ID={lccl_str}", + f"export HCCL_IF_BASE_PORT={hccl_base_port}" + ] + device_base += resource_req + lccl_base_port += resource_req + hccl_base_port += resource_req + + commands.append(f"pytest -s -v {task_case} > {log_file}") + out_tasks.append((" && ".join(commands), log_file)) + + if device_limit > device_limit: + raise ValueError( + "Total require device %d exceeding resource limits %d !", + device_base, device_limit) + + return out_tasks