diff --git a/tests/st/python/cases_parallel/multilora_inference.py b/tests/st/python/cases_parallel/multilora_inference.py
index 7e2129a195dabdf6e6dba315571fcf4a04883d88..c4799934e0517698139fa5670eb951c23035f277 100644
--- a/tests/st/python/cases_parallel/multilora_inference.py
+++ b/tests/st/python/cases_parallel/multilora_inference.py
@@ -18,11 +18,19 @@ This example shows how to use the multi-LoRA functionality
 for offline inference.
 
 """
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
@@ -59,7 +67,7 @@ def create_test_prompts(
 
 def process_requests(engine: LLMEngine,
                      test_prompts: List[Tuple[str, SamplingParams,
-                     Optional[LoRARequest]]]):
+                                              Optional[LoRARequest]]]):
     """Continuously process a list of prompts and handle the outputs."""
     request_id = 0
 
diff --git a/tests/st/python/cases_parallel/shm_broadcast.py b/tests/st/python/cases_parallel/shm_broadcast.py
index d4d98fd237c021b32139ed3610cedcaccc099020..7dbb7b5c8be3c1e25469b99485c13ce671730235 100644
--- a/tests/st/python/cases_parallel/shm_broadcast.py
+++ b/tests/st/python/cases_parallel/shm_broadcast.py
@@ -15,7 +15,10 @@
 # limitations under the License.
 # ============================================================================
 """test cpu communicator and share memory"""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import multiprocessing
 import random
 import time
@@ -27,8 +30,12 @@ import torch.distributed as dist
 import vllm_mindspore
 
 from vllm.distributed.device_communicators.shm_broadcast import MessageQueue
-from vllm.distributed.utils import StatelessProcessGroup
-from vllm.utils import get_ip, get_open_port, update_environment_variables, get_distributed_init_method
+from vllm.utils import get_ip, get_open_port, get_distributed_init_method
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
 
 
 def get_arrays(n: int, seed: int = 0) -> List[np.ndarray]:
@@ -47,7 +54,9 @@ def distributed_run(fn, world_size):
     distributed_init_method = get_distributed_init_method("127.0.0.1", port)
 
     for i in range(number_of_processes):
-        p = multiprocessing.Process(target=fn, args=(distributed_init_method, i, world_size))
+        p = multiprocessing.Process(target=fn,
+                                    args=(distributed_init_method, i,
+                                          world_size))
         processes.append(p)
         p.start()
 
diff --git a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py
index 6c29cc4c9fd50d8d91b20fe4af7bb1529c88a3ab..a2d3dcaf60bdade1b351fb93a094800a5ba0bd20 100644
--- a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py
+++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf deepseek r1."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b.yaml",
@@ -54,8 +62,12 @@ def test_deepseek_r1_bf16():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16",
-              trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-bf16",
+        trust_remote_code=True,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2,
+        max_model_len=4096)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py
index 4d4fb5c0f9782e296da5553f5bc3037ee67ed3dc..c08930c3ddde8350c1ae1c6bd9b98dde7b0f1eec 100644
--- a/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py
+++ b/tests/st/python/cases_parallel/vllm_deepseek_bf16_part_v1.py
@@ -16,11 +16,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf deepseek r1."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b.yaml",
diff --git a/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py b/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py
index 968f805ba77d022abf2aa547a98116d96e1dc9ad..0cff9cd16469beb2b668a85a44ec9adcfc292c31 100644
--- a/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py
+++ b/tests/st/python/cases_parallel/vllm_deepseek_gptq_a16w4.py
@@ -16,12 +16,20 @@
 # limitations under the License.
 # ============================================================================
 """test mf deepseek r1 gptq int4 quantization."""
+
+# type: ignore
+# isort: skip_file
+
 import os
 import yaml
-import pytest
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_a16w4.yaml",
diff --git a/tests/st/python/cases_parallel/vllm_deepseek_osl.py b/tests/st/python/cases_parallel/vllm_deepseek_osl.py
index fc782b9e3169b0bd59c784c5a4cd1e31257847fa..f6d61e6e29b3eba159f2b9b7b6d579b4fa6f8c90 100644
--- a/tests/st/python/cases_parallel/vllm_deepseek_osl.py
+++ b/tests/st/python/cases_parallel/vllm_deepseek_osl.py
@@ -18,15 +18,23 @@
 test mf deepseek r1 osl.
 isort:skip_file
 """
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG":
-        "./config/predict_deepseek_r1_671b_w8a8_osl.yaml",
+    "./config/predict_deepseek_r1_671b_w8a8_osl.yaml",
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
     "vLLM_MODEL_BACKEND": "MindFormers",
     "MS_ENABLE_LCCL": "off",
@@ -55,14 +63,11 @@ def test_deepseek_r1():
     ]
 
     # Create a sampling params object.
-    sampling_params = SamplingParams(temperature=0.0,
-                                     max_tokens=10,
-                                     top_k=1)
+    sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
     llm = LLM(
-        model=
-        "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl",
+        model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl",
         trust_remote_code=True,
         gpu_memory_utilization=0.9,
         tensor_parallel_size=2,
@@ -92,14 +97,11 @@ def test_deepseek_r1_mss():
     ]
 
     # Create a sampling params object.
-    sampling_params = SamplingParams(temperature=0.0,
-                                     max_tokens=10,
-                                     top_k=1)
+    sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
     llm = LLM(
-        model=
-        "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl",
+        model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-osl",
         trust_remote_code=True,
         gpu_memory_utilization=0.9,
         tensor_parallel_size=2,
diff --git a/tests/st/python/cases_parallel/vllm_deepseek_part.py b/tests/st/python/cases_parallel/vllm_deepseek_part.py
index 7ef3e8901bca7157ff051bf94a764d4ee8a983ef..e3062ba8d7a7529920adcf27d5066e86e9badb32 100644
--- a/tests/st/python/cases_parallel/vllm_deepseek_part.py
+++ b/tests/st/python/cases_parallel/vllm_deepseek_part.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf deepseek r1."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
 
-env_manager = set_env.EnvVarManager()
+
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8.yaml",
@@ -54,12 +62,18 @@ def test_deepseek_r1():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8",
-              trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8",
+        trust_remote_code=True,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2,
+        max_model_len=4096)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
-    except_list = ['ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid']
+    except_list = [
+        'ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid'
+    ]
     # Print the outputs.
     for i, output in enumerate(outputs):
         prompt = output.prompt
@@ -86,12 +100,17 @@ def test_deepseek_mtp():
 
     # Create an LLM.
     llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-MTP",
-              trust_remote_code=True, gpu_memory_utilization=0.7, tensor_parallel_size=2, max_model_len=4096,
+              trust_remote_code=True,
+              gpu_memory_utilization=0.7,
+              tensor_parallel_size=2,
+              max_model_len=4096,
               speculative_config={"num_speculative_tokens": 1})
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
-    except_list = ['ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid']
+    except_list = [
+        'ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid'
+    ]
     # Print the outputs.
     for i, output in enumerate(outputs):
         prompt = output.prompt
diff --git a/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py
index e5eb917a6a203ae81964f50da993c285ee2df2c5..b5fc5e112266387c91efbb7d6f073facd21ca4ab 100644
--- a/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py
+++ b/tests/st/python/cases_parallel/vllm_deepseek_part_v1.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf deepseek r1."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
 
-env_manager = set_env.EnvVarManager()
+def teardown_function():
+    utils.cleanup_subprocesses()
+
+
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8.yaml",
@@ -53,12 +61,18 @@ def test_deepseek_r1():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8",
-              trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8",
+        trust_remote_code=True,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2,
+        max_model_len=4096)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
-    except_list = ['ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid']
+    except_list = [
+        'ugs611ాలు哒ాలు mahassisemaSTE的道德', 'ugs611ాలు哒ాలు mah战区rollerOVERlaid'
+    ]
     # Print the outputs.
     for i, output in enumerate(outputs):
         prompt = output.prompt
diff --git a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py
index 48d2441adf2e5459ad80b95c518cf9529b58a122..5938b0f4edf39948e18cd0ae16cbf014a3fa7161 100644
--- a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py
+++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant.py
@@ -15,14 +15,23 @@
 # limitations under the License.
 # ============================================================================
 """test mf deepseek r1 smoothquant."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
-    "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8_smoothquant.yaml",
+    "MINDFORMERS_MODEL_CONFIG":
+    "./config/predict_deepseek_r1_671b_w8a8_smoothquant.yaml",
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
     "vLLM_MODEL_BACKEND": "MindFormers",
     "MS_ENABLE_LCCL": "off",
@@ -54,8 +63,13 @@ def test_deepseek_r1():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig",
-              trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, max_model_len=4096)
+    llm = LLM(
+        model=
+        "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig",
+        trust_remote_code=True,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2,
+        max_model_len=4096)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py
index 111c91e4bcdd4a6467ce0db0faec88599d6ee7f0..236c9a23f20a787ee7180282c54b98ef34c1e178 100644
--- a/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py
+++ b/tests/st/python/cases_parallel/vllm_deepseek_smoothquant_mss.py
@@ -15,14 +15,23 @@
 # limitations under the License.
 # ============================================================================
 """test mf deepseek r1 smoothquant."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
-    "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8_smoothquant.yaml",
+    "MINDFORMERS_MODEL_CONFIG":
+    "./config/predict_deepseek_r1_671b_w8a8_smoothquant.yaml",
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
     "vLLM_MODEL_BACKEND": "MindFormers",
     "MS_ENABLE_LCCL": "off",
@@ -54,9 +63,14 @@ def test_deepseek_r1_mss():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig",
-              trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=2, num_scheduler_steps=8,
-              max_model_len=4096)
+    llm = LLM(
+        model=
+        "/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8-smoothquant-newconfig",
+        trust_remote_code=True,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2,
+        num_scheduler_steps=8,
+        max_model_len=4096)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/st/python/cases_parallel/vllm_llama3.py b/tests/st/python/cases_parallel/vllm_llama3.py
index 656c744d960bbe1c497719de341f9ca7e4907db7..463910fa0c5b0ab53b3991948d66c0f397e3e8ae 100644
--- a/tests/st/python/cases_parallel/vllm_llama3.py
+++ b/tests/st/python/cases_parallel/vllm_llama3.py
@@ -14,39 +14,45 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-
-# isort:skip_file
 """test vllm llama3."""
 import os
 
-import pytest
-
-from tests.st.python import set_env
-
-env_manager = set_env.EnvVarManager()
-# def env
-env_vars = {
-    "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
-    "MS_ENABLE_LCCL": "off",
-    "HCCL_OP_EXPANSION_MODE": "AIV",
-    "MS_ALLOC_CONF": "enable_vmm:True",
-    "LCCL_DETERMINISTIC": "1",
-    "HCCL_DETERMINISTIC": "true",
-    "ATB_MATMUL_SHUFFLE_K_ENABLE": "0",
-    "ATB_LLM_LCOC_ENABLE": "0",
-    "VLLM_USE_V1": "1",
-    "HCCL_IF_BASE_PORT": "60000"
-}
-# set env
-env_manager.setup_ai_environment(env_vars)
-import vllm_mindspore
-from vllm import LLM, SamplingParams
+from tests.st.python import utils
+
+env_manager = utils.EnvVarManager()
+
+
+def setup_function():
+    # def env
+    env_vars = {
+        "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
+        "MS_ENABLE_LCCL": "off",
+        "HCCL_OP_EXPANSION_MODE": "AIV",
+        "MS_ALLOC_CONF": "enable_vmm:True",
+        "LCCL_DETERMINISTIC": "1",
+        "HCCL_DETERMINISTIC": "true",
+        "ATB_MATMUL_SHUFFLE_K_ENABLE": "0",
+        "ATB_LLM_LCOC_ENABLE": "0",
+        "VLLM_USE_V1": "1",
+        "HCCL_IF_BASE_PORT": "60000"
+    }
+    # set env
+    env_manager.setup_ai_environment(env_vars)
+    # Enable vllm-mindsproe.
+    import vllm_mindspore
+
+
+def teardown_function():
+    # unset env
+    env_manager.unset_all()
+    utils.cleanup_subprocesses()
 
 
 def test_vllm_llama3_8b():
     """
     test case llama3.1 8B
     """
+    from vllm import LLM, SamplingParams
 
     # Sample prompts.
     prompts = [
@@ -74,14 +80,12 @@ def test_vllm_llama3_8b():
         print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
         assert generated_text == except_list[i]
 
-    # unset env
-    env_manager.unset_all()
-
 
 def test_vllm_llama3_1b():
     """
     test case llama3.2 1B
     """
+    from vllm import LLM, SamplingParams
 
     # Sample prompts.
     prompts = [
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen3_8b.py b/tests/st/python/cases_parallel/vllm_mf_qwen3_8b.py
index 48de1692134eff0f30e54de79fcabe8b3e4dc52d..c6765a7895271a97bb061c6b222c67b28606fdc4 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen3_8b.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen3_8b.py
@@ -15,13 +15,20 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen."""
+
+# type: ignore
+# isort: skip_file
+
 import os
 
-import pytest
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
 
-from tests.st.python import set_env
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen3_8b_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen3_8b_v1.py
index aeb62ef7af753cda7509f7ef6b96da8c91d2379c..28b142489378643ce87a2f3c686a70bc964c14d5 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen3_8b_v1.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen3_8b_v1.py
@@ -15,13 +15,20 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen."""
+
+# type: ignore
+# isort: skip_file
+
 import os
 
-import pytest
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
 
-from tests.st.python import set_env
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py
index 684974212ef9582def3521efbf35dab547d47c22..f60bbd75add4ed5aedfc6e3c16c389fa2eab21bd 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
@@ -55,8 +63,10 @@ def test_mf_qwen():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              gpu_memory_utilization=0.9, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py
index b5738848b7de75b680179a8611691c393e525d8e..a9928ac349472238fb78111718a00aee3914f724 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen chunk prefill."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
@@ -47,26 +55,39 @@ def test_mf_qwen_7b_chunk_prefill():
     """
 
     # Sample prompts.
-    batch_datas = [{
-        "prompt": "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through "
-                  "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural "
-                  "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great "
-                  "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the "
-                  "strategic genius and resilience of ancient China.",
-        "answer": " The city's blend of traditional and modern architecture, bustling markets, and vibrant street life make it "
-                  "a unique and fascinating destination. In short, Beijing is a city"},
-        {"prompt": "I love Beijing, because",
-         "answer": " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"},
+    batch_datas = [
+        {
+            "prompt":
+            "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through "
+            "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural "
+            "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great "
+            "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the "
+            "strategic genius and resilience of ancient China.",
+            "answer":
+            " The city's blend of traditional and modern architecture, bustling markets, and vibrant street life make it "
+            "a unique and fascinating destination. In short, Beijing is a city"
+        },
+        {
+            "prompt":
+            "I love Beijing, because",
+            "answer":
+            " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"
+        },
     ]
 
     # Create a sampling params object.
     sampling_params = SamplingParams(temperature=0.0, max_tokens=32, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32,
-              block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=2,
-              enable_chunked_prefill=True)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        max_model_len=8192,
+        max_num_seqs=16,
+        max_num_batched_tokens=32,
+        block_size=32,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2,
+        enable_chunked_prefill=True)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     for batch_data in batch_datas:
@@ -76,7 +97,9 @@ def test_mf_qwen_7b_chunk_prefill():
         # Print the outputs.
         for i, output in enumerate(outputs):
             generated_text = output.outputs[0].text
-            print(f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}")
+            print(
+                f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}"
+            )
             assert generated_text == answer
 
     # unset env
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py
index 4d8a32d8d57cac741cc3f440ffe649bc2a118007..6011edb3ef3b57dd217cef206d54cd5a1dcd201d 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen chunk prefill."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
@@ -46,25 +54,38 @@ def test_mf_qwen_7b_chunk_prefill():
     """
 
     # Sample prompts.
-    batch_datas = [{
-        "prompt": "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through "
-                  "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural "
-                  "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great "
-                  "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the "
-                  "strategic genius and resilience of ancient China.",
-        "answer": " The city's blend of traditional and modern architecture, bustling markets, and vibrant street life make it "
-                  "a unique and fascinating destination. In short, Beijing is a city"},
-        {"prompt": "I love Beijing, because",
-         "answer": " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"},
+    batch_datas = [
+        {
+            "prompt":
+            "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through "
+            "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural "
+            "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great "
+            "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the "
+            "strategic genius and resilience of ancient China.",
+            "answer":
+            " The city's blend of traditional and modern architecture, bustling markets, and vibrant street life make it "
+            "a unique and fascinating destination. In short, Beijing is a city"
+        },
+        {
+            "prompt":
+            "I love Beijing, because",
+            "answer":
+            " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"
+        },
     ]
 
     # Create a sampling params object.
     sampling_params = SamplingParams(temperature=0.0, max_tokens=32, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32,
-              block_size=32, gpu_memory_utilization=0.85, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        max_model_len=8192,
+        max_num_seqs=16,
+        max_num_batched_tokens=32,
+        block_size=32,
+        gpu_memory_utilization=0.85,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     for batch_data in batch_datas:
@@ -74,7 +95,9 @@ def test_mf_qwen_7b_chunk_prefill():
         # Print the outputs.
         for i, output in enumerate(outputs):
             generated_text = output.outputs[0].text
-            print(f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}")
+            print(
+                f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}"
+            )
             assert generated_text == answer
 
     # unset env
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py
index 35605d7d9c7426d4de608c1980c1d9b4b3bea87d..c84157cf972ed28242fff325a87845a46e955ffe 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen chunk prefill, prefix cache, mss."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
@@ -47,25 +55,40 @@ def test_mf_qwen_7b_cp_pc_mss():
     """
 
     # Sample prompts.
-    batch_datas = [{
-        "prompt": "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through "
-                  "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural "
-                  "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great "
-                  "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the "
-                  "strategic genius and resilience of ancient China.",
-        "answer": ""},
-        {"prompt": "I love Beijing, because",
-         "answer": " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"},
+    batch_datas = [
+        {
+            "prompt":
+            "I love Beijing, because it is a city with a long history and profound cultural heritage. Walking through "
+            "its ancient hutongs, one can almost feel the whispers of the past. The Forbidden City, an architectural "
+            "marvel that once housed emperors, stands as a testament to the city's imperial past. Meanwhile, the Great "
+            "Wall, though not within the city limits, is easily accessible from Beijing and offers a glimpse into the "
+            "strategic genius and resilience of ancient China.",
+            "answer":
+            ""
+        },
+        {
+            "prompt":
+            "I love Beijing, because",
+            "answer":
+            " it is a city with a long history. Which of the following options correctly expresses this sentence?\nA. I love Beijing, because it is a city with a"
+        },
     ]
 
     # Create a sampling params object.
     sampling_params = SamplingParams(temperature=0.0, max_tokens=32, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              max_model_len=8192, max_num_seqs=16, max_num_batched_tokens=32,
-              block_size=32, gpu_memory_utilization=0.9, tensor_parallel_size=2,
-              enable_chunked_prefill=True, enable_prefix_caching=True, num_scheduler_steps=8)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        max_model_len=8192,
+        max_num_seqs=16,
+        max_num_batched_tokens=32,
+        block_size=32,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2,
+        enable_chunked_prefill=True,
+        enable_prefix_caching=True,
+        num_scheduler_steps=8)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     for _ in range(3):
@@ -76,7 +99,9 @@ def test_mf_qwen_7b_cp_pc_mss():
             # Print the outputs.
             for i, output in enumerate(outputs):
                 generated_text = output.outputs[0].text
-                print(f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}")
+                print(
+                    f"Prompt: {output.prompt!r}, Generated text: {generated_text!r}"
+                )
                 assert generated_text == answer
 
     # unset env
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py
index 43d863f85b555bca6bdf1b6d1e71bb97492ab275..888b61a026ad5e83257c551de61251face8ed94b 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_mss.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen mss."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
@@ -55,9 +63,14 @@ def test_mf_qwen_7b_mss():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              max_model_len=8192, max_num_batched_tokens=8192,
-              block_size=32, gpu_memory_utilization=0.9, num_scheduler_steps=8, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        max_model_len=8192,
+        max_num_batched_tokens=8192,
+        block_size=32,
+        gpu_memory_utilization=0.9,
+        num_scheduler_steps=8,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py
index 17df205386ef6a09bf5e0c5b1d46093aabc9f5fb..1ee84a069a2b36f48d6f7501fc6b7a65fecfeecf 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching.py
@@ -14,13 +14,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-
 """test mf qwen prefix caching."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
@@ -56,9 +63,13 @@ def test_mf_qwen_7b_prefix_caching():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              max_model_len=8192, block_size=16, enable_prefix_caching=True,
-              gpu_memory_utilization=0.9, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        max_model_len=8192,
+        block_size=16,
+        enable_prefix_caching=True,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
@@ -67,11 +78,15 @@ def test_mf_qwen_7b_prefix_caching():
     second_except_list = [' in Beijing, but I have to say that the']
     for i, (output, second_output) in enumerate(zip(outputs, second_outputs)):
         generated_text = output.outputs[i].text
-        print(f"Output1 - Prompt: {prompts[i]!r}, Generated text: {generated_text!r}")
+        print(
+            f"Output1 - Prompt: {prompts[i]!r}, Generated text: {generated_text!r}"
+        )
         assert generated_text == except_list[i]
 
         second_generated_text = second_output.outputs[i].text
-        print(f"Output2 - Prompt: {second_prompts[i]!r}, Generated text: {second_generated_text!r}")
+        print(
+            f"Output2 - Prompt: {second_prompts[i]!r}, Generated text: {second_generated_text!r}"
+        )
         assert second_generated_text == second_except_list[i]
 
     env_manager.unset_all()
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py
index ffde9d4684eb0bd4d661d2bdc8168dde50c09088..b45cce75b3fb7bb8b5ccacc25fa4b5b76151272a 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py
@@ -14,13 +14,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-
 """test mf qwen prefix caching."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
@@ -55,8 +62,11 @@ def test_mf_qwen_7b_prefix_caching():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              max_model_len=8192, block_size=16, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        max_model_len=8192,
+        block_size=16,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
@@ -65,11 +75,15 @@ def test_mf_qwen_7b_prefix_caching():
     second_except_list = [' in Beijing, but I have to say that the']
     for i, (output, second_output) in enumerate(zip(outputs, second_outputs)):
         generated_text = output.outputs[i].text
-        print(f"Output1 - Prompt: {prompts[i]!r}, Generated text: {generated_text!r}")
+        print(
+            f"Output1 - Prompt: {prompts[i]!r}, Generated text: {generated_text!r}"
+        )
         assert generated_text == except_list[i]
 
         second_generated_text = second_output.outputs[i].text
-        print(f"Output2 - Prompt: {second_prompts[i]!r}, Generated text: {second_generated_text!r}")
+        print(
+            f"Output2 - Prompt: {second_prompts[i]!r}, Generated text: {second_generated_text!r}"
+        )
         assert second_generated_text == second_except_list[i]
 
     env_manager.unset_all()
diff --git a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py
index 6963da0d8d8a014bb42bebf7b171816fa78bf03f..99aed9ac3171f63d9736757feb861dd6ee0c17f4 100644
--- a/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py
+++ b/tests/st/python/cases_parallel/vllm_mf_qwen_7b_v1.py
@@ -15,11 +15,20 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+import pytest
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_qwen2_5_7b_instruct.yaml",
@@ -54,8 +63,10 @@ def test_mf_qwen():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              gpu_memory_utilization=0.9, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
@@ -78,26 +89,29 @@ def test_mf_qwen_batch():
     """
     # Sample prompts.
     prompts = [
-                  "北京烤鸭是",
-                  "请介绍一下华为，华为是",
-                  "今年似乎大模型之间的内卷已经有些偃旗息鼓了，各大技术公司逐渐聪单纯追求模型参数量的竞赛中抽身,"
-                  "转向更加注重模型的实际>应用效果和效率",
-              ] * 2
+        "北京烤鸭是",
+        "请介绍一下华为，华为是",
+        "今年似乎大模型之间的内卷已经有些偃旗息鼓了，各大技术公司逐渐聪单纯追求模型参数量的竞赛中抽身,"
+        "转向更加注重模型的实际>应用效果和效率",
+    ] * 2
 
     # Create a sampling params object.
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct", block_size=32,
-              gpu_memory_utilization=0.9, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        block_size=32,
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
     except_list = [
-                      "享誉世界的中华美食，其制作工艺独特，",
-                      "做什么的？ 华为是一家中国公司，",
-                      "。 \n在这一背景下，阿里云发布了通",
-                  ] * 2
+        "享誉世界的中华美食，其制作工艺独特，",
+        "做什么的？ 华为是一家中国公司，",
+        "。 \n在这一背景下，阿里云发布了通",
+    ] * 2
     # Print the outputs.
     for i, output in enumerate(outputs):
         prompt = output.prompt
diff --git a/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py b/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py
index d776c8d93dacc2fc6a3fc28453083c2de9ba320c..caa2b4053c443a6c92d568773fa40c8846665d1c 100644
--- a/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py
+++ b/tests/st/python/cases_parallel/vllm_qwen2_5_vl_7b_v1.py
@@ -15,14 +15,23 @@
 # limitations under the License.
 # ============================================================================
 """test mf qwen2.5 vl 7B."""
+
+# type: ignore
+# isort: skip_file
+
 import os
 
 from PIL import Image
 
-from tests.st.python import set_env
+from tests.st.python import utils
 from tests.st.python.cases_parallel.similarity import compare_distance
 
-env_manager = set_env.EnvVarManager()
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
+
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
diff --git a/tests/st/python/cases_parallel/vllm_qwen_7b.py b/tests/st/python/cases_parallel/vllm_qwen_7b.py
index b3f6b080aab8d042f7ae7c464f911b8dce4b4d22..3aba6a1fb4f07ab1b1694ef80cccaad1d2192f25 100644
--- a/tests/st/python/cases_parallel/vllm_qwen_7b.py
+++ b/tests/st/python/cases_parallel/vllm_qwen_7b.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test vllm qwen."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
@@ -53,8 +61,10 @@ def test_vllm_qwen():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              gpu_memory_utilization=0.9, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py b/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py
index 8672855f2b60ad81485b416aa21a414cfcf52158..8b2300e798f4d0139875eccfde6e20669fbeb509 100644
--- a/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py
+++ b/tests/st/python/cases_parallel/vllm_qwen_7b_v1.py
@@ -15,11 +15,19 @@
 # limitations under the License.
 # ============================================================================
 """test vllm qwen."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-from tests.st.python import set_env
+from tests.st.python import utils
+
+
+def teardown_function():
+    utils.cleanup_subprocesses()
+
 
-env_manager = set_env.EnvVarManager()
+env_manager = utils.EnvVarManager()
 # def env
 env_vars = {
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
@@ -53,8 +61,10 @@ def test_vllm_qwen():
     sampling_params = SamplingParams(temperature=0.0, max_tokens=10, top_k=1)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
-              gpu_memory_utilization=0.9, tensor_parallel_size=2)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/Qwen2.5-7B-Instruct",
+        gpu_memory_utilization=0.9,
+        tensor_parallel_size=2)
     # Generate texts from the prompts. The output is a list of RequestOutput objects
     # that contain the prompt, generated text, and other information.
     outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/st/python/test_cases_parallel.py b/tests/st/python/test_cases_parallel.py
index 3724e45fd922aee494b1f6a74edaff4fc6087d03..85a4de80df52936d0b1daadcf050625ae7d00402 100644
--- a/tests/st/python/test_cases_parallel.py
+++ b/tests/st/python/test_cases_parallel.py
@@ -22,6 +22,12 @@ from multiprocessing.pool import Pool
 
 import pytest
 
+from .utils import cleanup_subprocesses, tasks_resource_alloc
+
+
+def teardown_function():
+    cleanup_subprocesses()
+
 
 def run_command(command_info):
     cmd, log_path = command_info
@@ -48,27 +54,19 @@ def test_cases_parallel_part0():
     Description: test cases parallel.
     Expectation: Pass.
     """
-    commands = [
-        ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
-         "export HCCL_IF_BASE_PORT=61000 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen > vllm_mf_qwen_7b_test_mf_qwen.log",
+    cases = [
+        (2, "cases_parallel/vllm_mf_qwen_7b.py::test_mf_qwen",
          "vllm_mf_qwen_7b_test_mf_qwen.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && "
-         "export HCCL_IF_BASE_PORT=61002 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill "
-         "> vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log",
+        (2,
+         "cases_parallel/vllm_mf_qwen_7b_chunk_prefill.py::test_mf_qwen_7b_chunk_prefill",
          "vllm_mf_qwen_7b_chunk_prefill_test_mf_qwen_7b_chunk_prefill.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && "
-         "export HCCL_IF_BASE_PORT=61004 &&"
-         "pytest -s -v cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill "
-         "> vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log",
+        (2,
+         "cases_parallel/vllm_mf_qwen_7b_chunk_prefill_v1.py::test_mf_qwen_7b_chunk_prefill",
          "vllm_mf_qwen_7b_chunk_prefill_v1_test_mf_qwen_7b_chunk_prefill.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && "
-         "export HCCL_IF_BASE_PORT=61006 && "
-         "pytest -s -v cases_parallel/multilora_inference.py::test_multilora_inference "
-         "> multilora_inference_test_multilora_inference.log",
+        (2, "cases_parallel/multilora_inference.py::test_multilora_inference",
          "multilora_inference_test_multilora_inference.log")
     ]
+    commands = tasks_resource_alloc(cases)
 
     with Pool(len(commands)) as pool:
         results = list(pool.imap(run_command, commands))
@@ -84,28 +82,20 @@ def test_cases_parallel_part1():
     Description: test cases parallel.
     Expectation: Pass.
     """
-    commands = [
-        ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
-         "export HCCL_IF_BASE_PORT=61000 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss "
-         "> vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log",
+    cases = [
+        (2, "cases_parallel/vllm_mf_qwen_7b_mss.py::test_mf_qwen_7b_mss",
          "vllm_mf_qwen_7b_mss_test_mf_qwen_7b_mss.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && "
-         "export HCCL_IF_BASE_PORT=61002 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching "
-         "> vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log",
+        (2,
+         "cases_parallel/vllm_mf_qwen_7b_prefix_caching.py::test_mf_qwen_7b_prefix_caching",
          "vllm_mf_qwen_7b_prefix_caching_test_mf_qwen_7b_prefix_caching.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && "
-         "export HCCL_IF_BASE_PORT=61004 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching "
-         "> vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log",
+        (2,
+         "cases_parallel/vllm_mf_qwen_7b_prefix_caching_v1.py::test_mf_qwen_7b_prefix_caching",
          "vllm_mf_qwen_7b_prefix_caching_v1_test_mf_qwen_7b_prefix_caching.log"
          ),
-        ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && "
-         "export HCCL_IF_BASE_PORT=61006 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen > vllm_mf_qwen_7b_v1_test_mf_qwen.log",
+        (2, "cases_parallel/vllm_mf_qwen_7b_v1.py::test_mf_qwen",
          "vllm_mf_qwen_7b_v1_test_mf_qwen.log")
     ]
+    commands = tasks_resource_alloc(cases)
 
     with Pool(len(commands)) as pool:
         results = list(pool.imap(run_command, commands))
@@ -121,23 +111,13 @@ def test_cases_parallel_part2():
     Description: test cases parallel.
     Expectation: Pass.
     """
-    commands = [
-        ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
-         "export HCCL_IF_BASE_PORT=61000 && "
-         "pytest -s -v cases_parallel/vllm_qwen_7b.py::test_vllm_qwen "
-         "> vllm_qwen_7b_test_vllm_qwen.log",
-         "vllm_qwen_7b_test_vllm_qwen.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && "
-         "export HCCL_IF_BASE_PORT=61002 && "
-         "pytest -s -v cases_parallel/vllm_qwen_7b_v1.py::test_vllm_qwen "
-         "> vllm_qwen_7b_v1_test_vllm_qwen.log",
-         "vllm_qwen_7b_v1_test_vllm_qwen.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && "
-         "export HCCL_IF_BASE_PORT=61004 && "
-         "pytest -s -v cases_parallel/shm_broadcast.py::test_shm_broadcast "
-         "> shm_broadcast_test_shm_broadcast.log",
-         "shm_broadcast_test_shm_broadcast.log")
-    ]
+    cases = [(2, "cases_parallel/vllm_qwen_7b.py::test_vllm_qwen",
+              "vllm_qwen_7b_test_vllm_qwen.log"),
+             (2, "cases_parallel/vllm_qwen_7b_v1.py::test_vllm_qwen",
+              "vllm_qwen_7b_v1_test_vllm_qwen.log"),
+             (4, "cases_parallel/shm_broadcast.py::test_shm_broadcast",
+              "shm_broadcast_test_shm_broadcast.log")]
+    commands = tasks_resource_alloc(cases)
 
     with Pool(len(commands)) as pool:
         results = list(pool.imap(run_command, commands))
@@ -153,23 +133,17 @@ def test_cases_parallel_part3():
     Description: test cases parallel.
     Expectation: Pass.
     """
-    commands = [
-        ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
-         "export HCCL_IF_BASE_PORT=61000 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_r1_bf16 "
-         "> vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log",
+    cases = [
+        (2, "cases_parallel/vllm_deepseek_bf16_part.py::test_deepseek_r1_bf16",
          "vllm_deepseek_bf16_part_test_deepseek_r1_bf16.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && "
-         "export HCCL_IF_BASE_PORT=61002 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16 "
-         "> vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log",
+        (2,
+         "cases_parallel/vllm_deepseek_bf16_part_v1.py::test_deepseek_r1_bf16",
          "vllm_deepseek_bf16_part_v1_test_deepseek_r1_bf16.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7 && export LCAL_COMM_ID=127.0.0.1:10070 && "
-         "export HCCL_IF_BASE_PORT=61004 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_gptq_a16w4.py::test_deepseek_r1_gptq_a16w4 "
-         "> vllm_deepseek_gptq_a16w4_test_deepseek_r1_gptq_a16w4.log",
+        (4,
+         "cases_parallel/vllm_deepseek_gptq_a16w4.py::test_deepseek_r1_gptq_a16w4",
          "vllm_deepseek_gptq_a16w4_test_deepseek_r1_gptq_a16w4.log")
     ]
+    commands = tasks_resource_alloc(cases)
 
     with Pool(len(commands)) as pool:
         results = list(pool.imap(run_command, commands))
@@ -185,28 +159,15 @@ def test_cases_parallel_part4():
     Description: test cases parallel.
     Expectation: Pass.
     """
-    commands = [
-        ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
-         "export HCCL_IF_BASE_PORT=61000 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1_mss "
-         "> vllm_deepseek_osl_test_deepseek_r1_mss.log",
-         "vllm_deepseek_osl_test_deepseek_r1_mss.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && "
-         "export HCCL_IF_BASE_PORT=61002 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_r1 "
-         "> vllm_deepseek_part_test_deepseek_r1.log",
-         "vllm_deepseek_part_test_deepseek_r1.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && "
-         "export HCCL_IF_BASE_PORT=61004 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp "
-         "> vllm_deepseek_part_test_deepseek_mtp.log",
-         "vllm_deepseek_part_test_deepseek_mtp.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && "
-         "export HCCL_IF_BASE_PORT=61006 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1 "
-         "> vllm_deepseek_part_v1_test_deepseek_r1.log",
-         "vllm_deepseek_part_v1_test_deepseek_r1.log")
-    ]
+    cases = [(2, "cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1_mss",
+              "vllm_deepseek_osl_test_deepseek_r1_mss.log"),
+             (2, "cases_parallel/vllm_deepseek_part.py::test_deepseek_r1",
+              "vllm_deepseek_part_test_deepseek_r1.log"),
+             (2, "cases_parallel/vllm_deepseek_part.py::test_deepseek_mtp",
+              "vllm_deepseek_part_test_deepseek_mtp.log"),
+             (2, "cases_parallel/vllm_deepseek_part_v1.py::test_deepseek_r1",
+              "vllm_deepseek_part_v1_test_deepseek_r1.log")]
+    commands = tasks_resource_alloc(cases)
 
     with Pool(len(commands)) as pool:
         results = list(pool.imap(run_command, commands))
@@ -222,28 +183,15 @@ def test_cases_parallel_part5():
     Description: test cases parallel.
     Expectation: Pass.
     """
-    commands = [
-        ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
-         "export HCCL_IF_BASE_PORT=61000 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen3_8b.py::test_mf_qwen3 "
-         "> vllm_mf_qwen3_8b_test_mf_qwen3.log",
-         "vllm_mf_qwen3_8b_test_mf_qwen3.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && "
-         "export HCCL_IF_BASE_PORT=61002 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen3_8b_v1.py::test_mf_qwen3 "
-         "> vllm_mf_qwen3_8b_v1_test_mf_qwen3.log",
-         "vllm_mf_qwen3_8b_v1_test_mf_qwen3.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=4 && export LCAL_COMM_ID=127.0.0.1:10070 && "
-         "export HCCL_IF_BASE_PORT=61004 && "
-         "pytest -s -v cases_parallel/vllm_llama3.py::test_vllm_llama3_8b "
-         "> vllm_llama3_8b_test_vllm_llama3.log",
-         "vllm_llama3_8b_test_vllm_llama3.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=5 && export LCAL_COMM_ID=127.0.0.1:10071 && "
-         "export HCCL_IF_BASE_PORT=61006 && "
-         "pytest -s -v cases_parallel/vllm_llama3.py::test_vllm_llama3_1b "
-         "> vllm_llama3_1b_test_vllm_llama3.log",
-         "vllm_llama3_1b_test_vllm_llama3.log"),
-    ]
+    cases = [(2, "cases_parallel/vllm_mf_qwen3_8b.py::test_mf_qwen3",
+              "vllm_mf_qwen3_8b_test_mf_qwen3.log"),
+             (2, "cases_parallel/vllm_mf_qwen3_8b_v1.py::test_mf_qwen3",
+              "vllm_mf_qwen3_8b_v1_test_mf_qwen3.log"),
+             (1, "cases_parallel/vllm_llama3.py::test_vllm_llama3_8b",
+              "vllm_llama3_8b_test_vllm_llama3.log"),
+             (1, "cases_parallel/vllm_llama3.py::test_vllm_llama3_1b",
+              "vllm_llama3_1b_test_vllm_llama3.log")]
+    commands = tasks_resource_alloc(cases)
 
     with Pool(len(commands)) as pool:
         results = list(pool.imap(run_command, commands))
@@ -259,12 +207,11 @@ def test_cases_parallel_part6():
     Description: test cases parallel.
     Expectation: Pass.
     """
-    commands = [
-        ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
-         "export HCCL_IF_BASE_PORT=61000 && "
-         "pytest -s -v cases_parallel/vllm_qwen2_5_vl_7b_v1.py::test_qwen2_5_vl_7b_v1 "
-         "> vllm_qwen2_5_vl_7b_v1.log", "vllm_qwen2_5_vl_7b_v1.log"),
+    cases = [
+        (2, "cases_parallel/vllm_qwen2_5_vl_7b_v1.py::test_qwen2_5_vl_7b_v1",
+         "vllm_qwen2_5_vl_7b_v1.log")
     ]
+    commands = tasks_resource_alloc(cases)
 
     with Pool(len(commands)) as pool:
         results = list(pool.imap(run_command, commands))
@@ -280,28 +227,19 @@ def test_cases_parallel_level1_part0():
     Description: test cases parallel.
     Expectation: Pass.
     """
-    commands = [
-        ("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
-         "export HCCL_IF_BASE_PORT=61000 && "
-         "pytest -s -v cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss "
-         "> vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log",
+    cases = [
+        (2,
+         "cases_parallel/vllm_mf_qwen_7b_cp_pc_mss.py::test_mf_qwen_7b_cp_pc_mss",
          "vllm_mf_qwen_7b_cp_pc_mss_test_mf_qwen_7b_cp_pc_mss.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=2,3 && export LCAL_COMM_ID=127.0.0.1:10069 && "
-         "export HCCL_IF_BASE_PORT=61002 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1 "
-         "> vllm_deepseek_osl_test_deepseek_r1.log",
+        (2, "cases_parallel/vllm_deepseek_osl.py::test_deepseek_r1",
          "vllm_deepseek_osl_test_deepseek_r1.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=4,5 && export LCAL_COMM_ID=127.0.0.1:10070 && "
-         "export HCCL_IF_BASE_PORT=61004 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1 "
-         "> vllm_deepseek_smoothquant_test_deepseek_r1.log",
+        (2, "cases_parallel/vllm_deepseek_smoothquant.py::test_deepseek_r1",
          "vllm_deepseek_smoothquant_test_deepseek_r1.log"),
-        ("export ASCEND_RT_VISIBLE_DEVICES=6,7 && export LCAL_COMM_ID=127.0.0.1:10071 && "
-         "export HCCL_IF_BASE_PORT=61006 && "
-         "pytest -s -v cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss "
-         "> vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log",
+        (2,
+         "cases_parallel/vllm_deepseek_smoothquant_mss.py::test_deepseek_r1_mss",
          "vllm_deepseek_smoothquant_mss_test_deepseek_r1_mss.log")
     ]
+    commands = tasks_resource_alloc(cases)
 
     with Pool(len(commands)) as pool:
         results = list(pool.imap(run_command, commands))
diff --git a/tests/st/python/test_custom_advstepflash.py b/tests/st/python/test_custom_advstepflash.py
index dd523f5d311c1d1cd682e5717c0b6acd60e123ed..8a8430ef408c2391b9380265510085bd17274759 100644
--- a/tests/st/python/test_custom_advstepflash.py
+++ b/tests/st/python/test_custom_advstepflash.py
@@ -15,23 +15,25 @@
 # limitations under the License.
 # ============================================================================
 """test case for custom op adv_step_flash"""
-
 import time
-import pytest
-from vllm_mindspore import npu_ops
-import numpy as np
+
 import mindspore as ms
+import numpy as np
+import pytest
 import torch
 
+from vllm_mindspore import npu_ops
+
+from .utils import cleanup_subprocesses
+
+
+def teardown_function():
+    cleanup_subprocesses()
+
 
-def benchmark_advance_step_op(sampled_token_ids,
-                              input_tokens,
-                              input_positions,
-                              seq_lens_tensor,
-                              num_queries,
-                              block_size,
-                              block_tables,
-                              slot_mapping):
+def benchmark_advance_step_op(sampled_token_ids, input_tokens, input_positions,
+                              seq_lens_tensor, num_queries, block_size,
+                              block_tables, slot_mapping):
     # update input_tokens
     sampled_token_ids_list = sampled_token_ids[:num_queries].squeeze(-1)
     input_tokens[:num_queries] = sampled_token_ids_list
@@ -48,7 +50,8 @@ def benchmark_advance_step_op(sampled_token_ids,
     block_idx = next_input_pos // block_size
     block_offset = next_input_pos % block_size
 
-    current_block_table = block_tables.gather(1, block_idx.unsqueeze(-1)).squeeze(-1)
+    current_block_table = block_tables.gather(
+        1, block_idx.unsqueeze(-1)).squeeze(-1)
     slot_num = current_block_table * block_size + block_offset
 
     # update slot_mapping
@@ -58,12 +61,21 @@ def benchmark_advance_step_op(sampled_token_ids,
 def gendata(seed, num_seqs, block_size, block_num, make_tensor):
     """generate inputs"""
     np.random.seed(seed)
-    sampled_token_ids = np.random.randint(65536, size=(num_seqs,), dtype=np.int64)
-    input_tokens = np.random.randint(100, size=(num_seqs,), dtype=np.int64)  # out
-    input_positions = np.random.randint(100, size=(num_seqs,), dtype=np.int64)  # out
-    seq_lens_tensor = np.random.randint(block_size * block_num - 1, size=(num_seqs,), dtype=np.int64)  # inplace
-    block_tables = np.random.randint(1024, size=(num_seqs, block_num), dtype=np.int64)
-    slot_mapping = np.random.randint(100, size=(num_seqs,), dtype=np.int64)  # out
+    sampled_token_ids = np.random.randint(65536,
+                                          size=(num_seqs, ),
+                                          dtype=np.int64)
+    input_tokens = np.random.randint(100, size=(num_seqs, ),
+                                     dtype=np.int64)  # out
+    input_positions = np.random.randint(100, size=(num_seqs, ),
+                                        dtype=np.int64)  # out
+    seq_lens_tensor = np.random.randint(block_size * block_num - 1,
+                                        size=(num_seqs, ),
+                                        dtype=np.int64)  # inplace
+    block_tables = np.random.randint(1024,
+                                     size=(num_seqs, block_num),
+                                     dtype=np.int64)
+    slot_mapping = np.random.randint(100, size=(num_seqs, ),
+                                     dtype=np.int64)  # out
     return (make_tensor(sampled_token_ids), \
             make_tensor(input_tokens), \
             make_tensor(input_positions), \
@@ -87,14 +99,9 @@ def test_advstepflash():
     print("test seed:", seed, flush=True)
     sampled_token_ids1, input_tokens1, input_positions1, seq_lens_tensor1, block_tables1, slot_mapping1 = \
         gendata(seed, num_seqs, block_size, block_num, torch.Tensor)
-    benchmark_advance_step_op(sampled_token_ids1,
-                              input_tokens1,
-                              input_positions1,
-                              seq_lens_tensor1,
-                              num_queries,
-                              block_size,
-                              block_tables1,
-                              slot_mapping1)
+    benchmark_advance_step_op(sampled_token_ids1, input_tokens1,
+                              input_positions1, seq_lens_tensor1, num_queries,
+                              block_size, block_tables1, slot_mapping1)
 
     sampled_token_ids2, input_tokens2, input_positions2, seq_lens_tensor2, block_tables2, slot_mapping2 = \
         gendata(seed, num_seqs, block_size, block_num, ms.Tensor)
diff --git a/tests/st/python/test_sampler.py b/tests/st/python/test_sampler.py
index 8066748f49f92ed27bd3c6b83ccbb4361be5ff57..79bfee55602540f5fba2961009432caf276b2ba9 100644
--- a/tests/st/python/test_sampler.py
+++ b/tests/st/python/test_sampler.py
@@ -14,25 +14,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import vllm_mindspore
+
+# type: ignore
+# isort: skip_file
+
 import itertools
 import random
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
 from unittest.mock import Mock, patch
-from mindspore import mint
 
+import vllm_mindspore
 import pytest
 import torch
-from transformers import GenerationConfig, GenerationMixin
-
 import vllm.envs as envs
 
-from vllm_mindspore.model_executor.layers.sampler import Sampler
+from transformers import GenerationConfig, GenerationMixin
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.model_executor.utils import set_random_seed
-from vllm_mindspore.sequence import SamplingParams, SequenceData, SequenceGroupMetadata
 from vllm.utils import Counter, is_pin_memory_available
+from vllm_mindspore.model_executor.layers.sampler import Sampler
+from vllm_mindspore.sequence import (SamplingParams, SequenceData,
+                                     SequenceGroupMetadata)
+
+from .utils import cleanup_subprocesses
+
+
+def teardown_function():
+    cleanup_subprocesses()
+
 
 class MockLogitsSampler(Sampler):
 
@@ -88,6 +98,7 @@ def _do_sample(
         pin_memory=is_pin_memory_available())
     return sampler(logits=input_tensor, sampling_metadata=sampling_metadata)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -106,6 +117,7 @@ def test_sampler_all_greedy(seed: int, device: str):
         for nth_output in sequence_output.samples:
             assert nth_output.output_token == expected[i].item()
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -130,6 +142,7 @@ def test_sampler_all_random(seed: int, device: str):
         for nth_output in sequence_output.samples:
             assert nth_output.output_token == i
 
+
 @pytest.mark.skip(reason="Not implemented yet")
 @pytest.mark.parametrize("seed", RANDOM_SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
@@ -154,6 +167,7 @@ def test_sampler_all_random_seed(seed: int, device: str):
         for nth_output in sequence_output.samples:
             assert nth_output.output_token == i
 
+
 @pytest.mark.skip(reason="Not implemented yet")
 @pytest.mark.parametrize("seed", RANDOM_SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
@@ -176,6 +190,7 @@ def test_sampler_all_random_seed_deterministic(seed: int, device: str):
 
     assert first_sampler_output == second_sampler_output
 
+
 @pytest.mark.skip(reason="Not implemented yet")
 @pytest.mark.parametrize("seed", RANDOM_SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
@@ -463,6 +478,7 @@ def test_sampler_min_tokens_penalty(seed: int, device: str):
     for test_case in test_cases:
         run_test_case(**test_case)
 
+
 @pytest.mark.skip(reason="Not implemented yet")
 @pytest.mark.parametrize("seed", RANDOM_SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
@@ -566,6 +582,7 @@ def test_sampler_mixed(seed: int, device: str):
     # the corresponding sample in the pre-shuffled batch
     test_sampling()
 
+
 @pytest.mark.skip(reason="Not implemented yet")
 @pytest.mark.parametrize("seed", RANDOM_SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
@@ -648,6 +665,7 @@ def test_sampler_top_k_top_p(seed: int, device: str):
     torch.testing.assert_close(hf_probs, sample_probs, rtol=0.0, atol=1e-5)
     assert torch.equal(hf_probs.eq(0), sample_probs.eq(0))
 
+
 @pytest.mark.skip(reason="Not implemented yet")
 @pytest.mark.parametrize("seed", RANDOM_SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
@@ -679,6 +697,7 @@ def test_flashinfer_fallback(seed: int, device: str):
 
     assert sampler_output == fallback_sampler_output
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -750,6 +769,7 @@ def test_sampler_repetition_penalty_mixed(device: str):
     assert tokens1[0] == tokens2[1]
     assert tokens1[1] == tokens2[0]
 
+
 @pytest.mark.skip(reason="Not implemented yet")
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 def test_sampler_include_gpu_probs_tensor(device: str):
diff --git a/tests/st/python/test_sampler_v1.py b/tests/st/python/test_sampler_v1.py
index 34954d0ccfb7d30a6359df6de82c0eab4c8a8df9..e7e614700f2d310b4c1055d527e82148dd7954e7 100644
--- a/tests/st/python/test_sampler_v1.py
+++ b/tests/st/python/test_sampler_v1.py
@@ -15,6 +15,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
+
+# type: ignore
+# isort: skip_file
+
 from typing import Optional
 
 import numpy as np
@@ -26,11 +30,16 @@ from vllm.utils import make_tensor_with_pad
 from vllm.v1.sample.metadata import SamplingMetadata
 from vllm.v1.sample.sampler import Sampler
 
+from .utils import cleanup_subprocesses
+
+
+def teardown_function():
+    cleanup_subprocesses()
+
+
 VOCAB_SIZE = 1024
 NUM_OUTPUT_TOKENS = 20
-CUDA_DEVICES = [
-    f"cuda:{0}"
-]
+CUDA_DEVICES = [f"cuda:{0}"]
 MAX_NUM_PROMPT_TOKENS = 64
 
 
@@ -239,6 +248,7 @@ def _create_weighted_output_token_list(
         output_token_ids.append(output_token_ids_for_batch)
     return output_token_ids, sorted_token_ids_in_output
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -270,6 +280,7 @@ def test_sampler_min_tokens_penalty(device: str, batch_size: int):
             else:
                 assert logits[batch_idx][token_id] != -float("inf")
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -316,6 +327,7 @@ def test_sampler_presence_penalty(device: str, batch_size: int,
             assert non_penalized_token_id in output_token_ids[batch_idx]
             assert penalized_token_id not in output_token_ids[batch_idx]
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -370,6 +382,7 @@ def test_sampler_frequency_penalty(device: str, batch_size: int,
             assert non_penalized_token_id == most_frequent_token_id
             assert penalized_token_id not in distinct_sorted_token_ids_in_output
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -417,6 +430,7 @@ def test_sampler_repetition_penalty(device: str, batch_size: int,
             assert (non_penalized_token_id in prompt_tokens
                     or non_penalized_token_id in output_tokens)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -458,6 +472,7 @@ def test_sampler_min_p(device: str, batch_size: int, min_p: float):
                     # No masking when min_p is 0
                     assert logits[batch_idx][token_id] != -float("inf")
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -488,11 +503,12 @@ def test_sampler_logit_bias(device: str, batch_size: int, bias_value: float):
         biased_index = min(batch_idx, VOCAB_SIZE - 1)
         for token_id in range(VOCAB_SIZE):
             if biased_index == token_id:
-                assert logits_for_req[token_id].item() == pytest.approx(bias_value +
-                                                                 1e-2)
+                assert logits_for_req[token_id].item() == pytest.approx(
+                    bias_value + 1e-2)
             else:
                 assert logits_for_req[token_id].item() == pytest.approx(1e-2)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
@@ -535,6 +551,7 @@ def test_sampler_allowed_token_ids(device: str, batch_size: int,
             else:
                 assert logits_for_req[token_id] != -float("inf")
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
diff --git a/tests/st/python/test_vllm_deepseek_mix_parallel.py b/tests/st/python/test_vllm_deepseek_mix_parallel.py
index eadecd8cc5b3573c14908b32d32ec22edb66c592..54c21a3548871e4a53259c1967d671cdd6f6ccab 100644
--- a/tests/st/python/test_vllm_deepseek_mix_parallel.py
+++ b/tests/st/python/test_vllm_deepseek_mix_parallel.py
@@ -15,16 +15,26 @@
 # limitations under the License.
 # ============================================================================
 """test mf deepseek r1."""
-import pytest
+
+# type: ignore
+# isort: skip_file
+
 import os
-import tempfile
 import re
-
-from . import set_env
+import tempfile
 from multiprocessing import Process, Queue
 
-env_manager = set_env.EnvVarManager()
+import pytest
+
+from . import utils
+from .utils import cleanup_subprocesses
+
+
+def teardown_function():
+    cleanup_subprocesses()
+
 
+env_manager = utils.EnvVarManager()
 env_vars = {
     "MINDFORMERS_MODEL_CONFIG": "./config/predict_deepseek_r1_671b_w8a8.yaml",
     "ASCEND_CUSTOM_PATH": os.path.expandvars("$ASCEND_HOME_PATH/../"),
@@ -41,13 +51,14 @@ env_vars = {
     "LCAL_COMM_ID": "127.0.0.1:10068"
 }
 env_manager.setup_ai_environment(env_vars)
+
 import vllm_mindspore
 from vllm import LLM, SamplingParams
 from vllm.utils import get_open_port
 
 
-def dp_func(dp_size, local_dp_rank, global_dp_rank, dp_master_ip, dp_master_port,
-            GPUs_per_dp_rank, prompts, except_list, result_q):
+def dp_func(dp_size, local_dp_rank, global_dp_rank, dp_master_ip,
+            dp_master_port, GPUs_per_dp_rank, prompts, except_list, result_q):
     os.environ["VLLM_DP_RANK"] = str(global_dp_rank)
     os.environ["VLLM_DP_LOCAL"] = str(local_dp_rank)
     os.environ["VLLM_DP_SIZE"] = str(dp_size)
@@ -70,14 +81,15 @@ def dp_func(dp_size, local_dp_rank, global_dp_rank, dp_master_ip, dp_master_port
                                      max_tokens=3)
 
     # Create an LLM.
-    llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8",
-              tensor_parallel_size=GPUs_per_dp_rank,
-              max_model_len = 4096,
-              max_num_batched_tokens=8,
-              max_num_seqs=8,
-              trust_remote_code=True,
-              enforce_eager=True,
-              enable_expert_parallel=True)
+    llm = LLM(
+        model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8",
+        tensor_parallel_size=GPUs_per_dp_rank,
+        max_model_len=4096,
+        max_num_batched_tokens=8,
+        max_num_seqs=8,
+        trust_remote_code=True,
+        enforce_eager=True,
+        enable_expert_parallel=True)
     outputs = llm.generate(prompts, sampling_params)
     # Print the outputs.
     for i, output in enumerate(outputs):
@@ -88,7 +100,8 @@ def dp_func(dp_size, local_dp_rank, global_dp_rank, dp_master_ip, dp_master_port
         result_q.put(generated_text == except_list[i])
 
 
-def exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list):
+def exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts,
+                    except_list):
     file = open('./config/predict_deepseek_r1_671b_w8a8.yaml', 'r')
     content = file.read()
     file.close()
@@ -114,14 +127,14 @@ def exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, excep
 
         dp_per_node = dp_size // node_size
 
-        result_q = Queue()
+        result_q = Queue()  # type: Queue[bool]
         procs = []
         for local_dp_rank, global_dp_rank in enumerate(
                 range(node_rank * dp_per_node, (node_rank + 1) * dp_per_node)):
             proc = Process(target=dp_func,
-                           args=(dp_size, local_dp_rank,
-                                 global_dp_rank, dp_master_ip, dp_master_port,
-                                 tp_size, prompts, except_list, result_q))
+                           args=(dp_size, local_dp_rank, global_dp_rank,
+                                 dp_master_ip, dp_master_port, tp_size,
+                                 prompts, except_list, result_q))
             proc.start()
             procs.append(proc)
         exit_code = 0
@@ -165,14 +178,20 @@ def exec_ds_without_dp(new_yaml, replaced_pattern, prompts, except_list):
             f.write(content)
         env_manager.set_env_var("MINDFORMERS_MODEL_CONFIG", new_yaml_path)
 
-
         # Create a sampling params object.
-        sampling_params = SamplingParams(temperature=0.0, max_tokens=3, top_k=1, top_p=1.0,
+        sampling_params = SamplingParams(temperature=0.0,
+                                         max_tokens=3,
+                                         top_k=1,
+                                         top_p=1.0,
                                          repetition_penalty=1.0)
 
         # Create an LLM.
-        llm = LLM(model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8",
-                  trust_remote_code=True, gpu_memory_utilization=0.9, tensor_parallel_size=8, max_model_len=4096)
+        llm = LLM(
+            model="/home/workspace/mindspore_dataset/weight/DeepSeek-R1-W8A8",
+            trust_remote_code=True,
+            gpu_memory_utilization=0.9,
+            tensor_parallel_size=8,
+            max_model_len=4096)
         # Generate texts from the prompts. The output is a list of RequestOutput objects
         # that contain the prompt, generated text, and other information.
         outputs = llm.generate(prompts, sampling_params)
@@ -187,7 +206,6 @@ def exec_ds_without_dp(new_yaml, replaced_pattern, prompts, except_list):
     env_manager.unset_all()
 
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.allcards
@@ -197,7 +215,9 @@ def test_deepseek_r1_dp4_tp2_ep4():
     """
 
     new_yaml = "dp4_tp2_ep4.yaml"
-    replaced_pattern = ['data_parallel: 4', 'model_parallel: 2', 'expert_parallel: 4']
+    replaced_pattern = [
+        'data_parallel: 4', 'model_parallel: 2', 'expert_parallel: 4'
+    ]
     dp_size = 4
     tp_size = 2
     # Sample prompts.
@@ -207,17 +227,23 @@ def test_deepseek_r1_dp4_tp2_ep4():
     ] * 4
 
     except_list = ['ugs611ాలు'] * 4
-    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list)
+    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts,
+                    except_list)
 
 
-@pytest.mark.skip(reason="Currently does not support relevant communication fusion operators in 910b")
+@pytest.mark.skip(
+    reason=
+    "Currently does not support relevant communication fusion operators in 910b"
+)
 def test_deepseek_r1_dp8_tp1_ep8():
     """
     test case deepseek r1 w8a8 Dp8 tp1 ep8
     """
 
     new_yaml = "dp8_tp1_ep8.yaml"
-    replaced_pattern = ['data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 8']
+    replaced_pattern = [
+        'data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 8'
+    ]
     dp_size = 8
     tp_size = 1
     # Sample prompts.
@@ -227,7 +253,8 @@ def test_deepseek_r1_dp8_tp1_ep8():
     ] * 8
 
     except_list = ['ugs611ాలు'] * 8
-    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list)
+    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts,
+                    except_list)
 
 
 @pytest.mark.level1
@@ -239,7 +266,9 @@ def test_deepseek_r1_dp2_tp4_ep1():
     """
 
     new_yaml = "dp2_tp4_ep1.yaml"
-    replaced_pattern = ['data_parallel: 2', 'model_parallel: 4', 'expert_parallel: 1']
+    replaced_pattern = [
+        'data_parallel: 2', 'model_parallel: 4', 'expert_parallel: 1'
+    ]
     dp_size = 2
     tp_size = 4
     # Sample prompts.
@@ -249,17 +278,23 @@ def test_deepseek_r1_dp2_tp4_ep1():
     ] * 2
 
     except_list = ['ugs611ాలు'] * 2
-    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list)
+    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts,
+                    except_list)
 
 
-@pytest.mark.skip(reason="Currently does not support relevant communication fusion operators in 910b")
+@pytest.mark.skip(
+    reason=
+    "Currently does not support relevant communication fusion operators in 910b"
+)
 def test_deepseek_r1_dp4_tp2_ep8():
     """
     test case deepseek r1 w8a8 dp4 tp2 ep8
     """
 
     new_yaml = "dp4_tp2_ep8.yaml"
-    replaced_pattern = ['data_parallel: 4', 'model_parallel: 2', 'expert_parallel: 8']
+    replaced_pattern = [
+        'data_parallel: 4', 'model_parallel: 2', 'expert_parallel: 8'
+    ]
     dp_size = 4
     tp_size = 2
     # Sample prompts.
@@ -269,7 +304,8 @@ def test_deepseek_r1_dp4_tp2_ep8():
     ] * 4
 
     except_list = ['ugs611ాలు'] * 4
-    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list)
+    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts,
+                    except_list)
 
 
 @pytest.mark.level1
@@ -281,7 +317,9 @@ def test_deepseek_r1_dp8_tp1_ep1():
     """
 
     new_yaml = "dp8_tp1_ep1.yaml"
-    replaced_pattern = ['data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 1']
+    replaced_pattern = [
+        'data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 1'
+    ]
     dp_size = 8
     tp_size = 1
     # Sample prompts.
@@ -291,7 +329,8 @@ def test_deepseek_r1_dp8_tp1_ep1():
     ] * 8
 
     except_list = ['ugs611ాలు'] * 8
-    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list)
+    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts,
+                    except_list)
 
 
 @pytest.mark.level1
@@ -303,7 +342,9 @@ def test_deepseek_r1_dp8_tp1_ep4():
     """
 
     new_yaml = "dp8_tp1_ep4.yaml"
-    replaced_pattern = ['data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 4']
+    replaced_pattern = [
+        'data_parallel: 8', 'model_parallel: 1', 'expert_parallel: 4'
+    ]
     dp_size = 8
     tp_size = 1
     # Sample prompts.
@@ -313,7 +354,8 @@ def test_deepseek_r1_dp8_tp1_ep4():
     ] * 8
 
     except_list = ['ugs611ాలు'] * 8
-    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts, except_list)
+    exec_ds_with_dp(new_yaml, replaced_pattern, dp_size, tp_size, prompts,
+                    except_list)
 
 
 @pytest.mark.level1
@@ -325,14 +367,16 @@ def test_deepseek_r1_tp8_ep8():
     """
 
     new_yaml = "tp8_ep8.yaml"
-    replaced_pattern = ['data_parallel: 1', 'model_parallel: 8', 'expert_parallel: 8']
+    replaced_pattern = [
+        'data_parallel: 1', 'model_parallel: 8', 'expert_parallel: 8'
+    ]
     # Sample prompts.
     prompts = [
         "You are a helpful assistant.<｜User｜>将文本分类为中性、负面或正面。 \n文本：我认为这次假期还可以。 "
         "\n情感：<｜Assistant｜>\n",
     ]
 
-    except_list=['ugs611ాలు']
+    except_list = ['ugs611ాలు']
     exec_ds_without_dp(new_yaml, replaced_pattern, prompts, except_list)
 
 
@@ -345,12 +389,14 @@ def test_deepseek_r1_tp8_ep4():
     """
 
     new_yaml = "tp8_ep4.yaml"
-    replaced_pattern = ['data_parallel: 1', 'model_parallel: 8', 'expert_parallel: 4']
+    replaced_pattern = [
+        'data_parallel: 1', 'model_parallel: 8', 'expert_parallel: 4'
+    ]
     # Sample prompts.
     prompts = [
         "You are a helpful assistant.<｜User｜>将文本分类为中性、负面或正面。 \n文本：我认为这次假期还可以。 "
         "\n情感：<｜Assistant｜>\n",
     ]
 
-    except_list=['ugs611ాలు']
+    except_list = ['ugs611ాలు']
     exec_ds_without_dp(new_yaml, replaced_pattern, prompts, except_list)
diff --git a/tests/st/python/set_env.py b/tests/st/python/utils.py
similarity index 33%
rename from tests/st/python/set_env.py
rename to tests/st/python/utils.py
index b7aa8b685229c9147793589ea3de4d78b88e4d6b..ae0fe1d1cb498b5e633f46843106cbc01207e32f 100644
--- a/tests/st/python/set_env.py
+++ b/tests/st/python/utils.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python3
-# encoding: utf-8
 # Copyright 2025 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,40 +11,36 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ============================================================================
+
+import contextlib
 import os
+import signal
 import sys
-from typing import Dict, Optional
-
-mindformers_path = "/home/jenkins/mindspore/testcases/testcases/tests/mindformers"
+from typing import Dict, List, Optional, Tuple, Union
 
-if mindformers_path not in sys.path:
-    sys.path.insert(0, mindformers_path)
+import psutil
 
-current_pythonpath = os.environ.get("PYTHONPATH", "")
-if current_pythonpath:
-    os.environ["PYTHONPATH"] = f"{mindformers_path}:{current_pythonpath}"
-else:
-    os.environ["PYTHONPATH"] = mindformers_path
 
 class EnvVarManager:
+
     def __init__(self):
         self._original_env: Dict[str, Optional[str]] = {}
         self._managed_vars: Dict[str, str] = {}
 
     def set_env_var(self, var_name: str, value: str) -> None:
-        """设置环境变量并记录原始值（如果存在）"""
+        """Set environment variable and record original value."""
+        # Record original values corresponding to var_name, None if not exist.
         if var_name not in self._original_env:
-            # 保存原始值，即使它不存在（保存为None）
             self._original_env[var_name] = os.environ.get(var_name)
 
         os.environ[var_name] = value
         self._managed_vars[var_name] = value
 
     def unset_env_var(self, var_name: str) -> None:
-        """取消设置之前设置的环境变量，恢复原始值"""
+        """Unset environment variable with original value."""
         if var_name not in self._original_env:
-            raise ValueError(f"Variable {var_name} was not set by this manager")
+            raise ValueError(
+                f"Variable {var_name} was not set by this manager")
 
         original_value = self._original_env[var_name]
         if original_value is not None:
@@ -59,15 +53,97 @@ class EnvVarManager:
         del self._managed_vars[var_name]
 
     def unset_all(self) -> None:
-        """取消设置所有由该管理器设置的环境变量"""
+        """Unset all environment variables with original values."""
         for var_name in list(self._managed_vars.keys()):
             self.unset_env_var(var_name)
 
     def get_managed_vars(self) -> Dict[str, str]:
-        """获取当前由该管理器管理的所有环境变量       """
+        """get all managered variables."""
         return self._managed_vars.copy()
 
     def setup_ai_environment(self, env_vars: Dict[str, str]) -> None:
-        """设置AI相关的环境变量，使用传入的参数"""
+        """Set ai environment by given values."""
+        # Insert mindformers to PYTHONPATH.
+        mindformers_path = "/home/jenkins/mindspore/testcases/testcases/tests/mindformers"
+
+        if mindformers_path not in sys.path:
+            sys.path.insert(0, mindformers_path)
+
+        current_pythonpath = os.environ.get("PYTHONPATH", "")
+        if current_pythonpath:
+            os.environ[
+                "PYTHONPATH"] = f"{mindformers_path}:{current_pythonpath}"
+        else:
+            os.environ["PYTHONPATH"] = mindformers_path
+
+        # Update environments.
         for var_name, value in env_vars.items():
             self.set_env_var(var_name, value)
+
+
+def cleanup_subprocesses() -> None:
+    """Cleanup all subprocesses raise by main test process."""
+    cur_proc = psutil.Process(os.getpid())
+    children = cur_proc.children(recursive=True)
+    for child in children:
+        with contextlib.suppress(ProcessLookupError):
+            os.killpg(child.pid, signal.SIGKILL)
+
+
+def tasks_resource_alloc(tasks: List[Tuple[int]]) -> List[Tuple[str]]:
+    """
+    Allocate devices, lccl base port, hccl base port to tasks according to device requirement of each task.
+
+    For example:
+        [(2, "cases_parallel/vllm_task.py::test_1", "test_1.log")]
+        ==> [("export ASCEND_RT_VISIBLE_DEVICES=0,1 && export LCAL_COMM_ID=127.0.0.1:10068 && "
+              "export HCCL_IF_BASE_PORT=61000 && "
+              "pytest -s -v cases_parallel/vllm_task.py::test_1 > test_1.log",
+              "test_1.log")]
+
+    Args:
+        tasks (List[Tuple[int]]): List of tasks. Each task contain 3 elements.
+            1. device_req (int): Num of device requirements, which will occur device_req devices,
+                                 device_req ports for lccl, device_req ports for hccl.
+            2. case_desc (str): The case description, such as "path_to_case/case.py::target_case".
+            3. log_file (str): The logging file path.
+
+    Returns:
+        List[Tuple[str]]: Append resource environment to the task commands.
+    """
+    device_limit = 8
+    device_base = 0
+    lccl_base_port = 10068
+    hccl_base_port = 61000
+
+    out_tasks: List[Tuple[str]] = []
+    for task in tasks:
+        assert len(task) == 3
+        resource_req, task_case, log_file = task
+        if not isinstance(resource_req, int):
+            raise TypeError(
+                "First argument of task should be a int or str, but got %s!",
+                str(type(resource_req)))
+
+        device_str = ",".join(
+            [str(d) for d in range(device_base, device_base + resource_req)])
+        lccl_str = f"127.0.0.1:{lccl_base_port}"
+
+        commands = [
+            f"export ASCEND_RT_VISIBLE_DEVICES={device_str}",
+            f"export LCAL_COMM_ID={lccl_str}",
+            f"export HCCL_IF_BASE_PORT={hccl_base_port}"
+        ]
+        device_base += resource_req
+        lccl_base_port += resource_req
+        hccl_base_port += resource_req
+
+        commands.append(f"pytest -s -v {task_case} > {log_file}")
+        out_tasks.append((" && ".join(commands), log_file))
+
+    if device_limit > device_limit:
+        raise ValueError(
+            "Total require device %d exceeding resource limits %d !",
+            device_base, device_limit)
+
+    return out_tasks