From b754bef1e0961c084f13928412f12b4360f5a20e Mon Sep 17 00:00:00 2001
From: hangangqiang <hangangqiang2@huawei.com>
Date: Thu, 10 Apr 2025 12:23:46 +0800
Subject: [PATCH] update pkg

---
 .jenkins/test/config/dependent_packages.yaml  |  4 +-
 tests/daily_test/deepseek-r1/daily_test_ds.sh |  2 +-
 .../daily_test/llama2-13b/daily_test_llama.sh |  4 +-
 ...q_llama_runner.py => ptq_llama2_runner.py} |  0
 tests/st/ptq/ptq/ptq_network_runner.py        | 67 +----------------
 ...t_ptq_llama.py => test_llama2_accuracy.py} | 16 ++--
 tests/st/ptq/ptq/test_ptq.py                  | 74 +++++++++----------
 .../st/ptq/smooth_quant/test_smooth_quant.py  |  1 +
 8 files changed, 51 insertions(+), 117 deletions(-)
 rename tests/st/ptq/ptq/{ptq_llama_runner.py => ptq_llama2_runner.py} (100%)
 rename tests/st/ptq/ptq/{test_ptq_llama.py => test_llama2_accuracy.py} (91%)

diff --git a/.jenkins/test/config/dependent_packages.yaml b/.jenkins/test/config/dependent_packages.yaml
index e99ebbcc..d3d1a021 100644
--- a/.jenkins/test/config/dependent_packages.yaml
+++ b/.jenkins/test/config/dependent_packages.yaml
@@ -1,4 +1,4 @@
 mindspore:
-  '/mindspore/mindspore/version/202504/20250409/master_20250409114852_1abfba19d45084a81a4b7422ff3dd9207a54b9cb_newest/'
+  '/mindspore/mindspore/version/202505/20250507/master_20250507010016_8ed48766ed50bb506b8422a5ece21d9104692196_newest/'
 mindformers:
-  '/mindspore/mindformers/version/202504/20250410/dev_20250410010018_434454a0d9762726825c12529a56da2b417ec4c1_newest/'
+  '/mindspore/mindformers/version/202505/20250507/dev_20250507031508_52aaafb134332b1e2118d91e943f0ffabbad0312_newest/'
diff --git a/tests/daily_test/deepseek-r1/daily_test_ds.sh b/tests/daily_test/deepseek-r1/daily_test_ds.sh
index c2e686d2..9b9fba37 100644
--- a/tests/daily_test/deepseek-r1/daily_test_ds.sh
+++ b/tests/daily_test/deepseek-r1/daily_test_ds.sh
@@ -23,7 +23,7 @@ echo "Make sure vocab_file is settled in all yaml."
 echo "Make sure load_checkpoint is settled in predict_deepseek_r1_671b_qckpt.yaml"
 echo "Make sure following config is good for you."
 # config
-MS_PKG_LINK="https://repo.mindspore.cn/mindspore/mindspore/version/202504/20250408/br_infer_deepseek_os_20250408004507_7e391e0536245cd8b314fe60adbb2a7206c38fd2_newest/unified/aarch64/mindspore-2.6.0-cp311-cp311-linux_aarch64.whl"
+MS_PKG_LINK="https://repo.mindspore.cn/mindspore/mindspore/version/202505/20250507/master_20250507010016_8ed48766ed50bb506b8422a5ece21d9104692196_newest/unified/aarch64/mindspore-2.6.0-cp310-cp310-linux_aarch64.whl"
 
 export GSLOG=1
 export MS_ENABLE_LCCL=off
diff --git a/tests/daily_test/llama2-13b/daily_test_llama.sh b/tests/daily_test/llama2-13b/daily_test_llama.sh
index 44311461..a96d7ee0 100644
--- a/tests/daily_test/llama2-13b/daily_test_llama.sh
+++ b/tests/daily_test/llama2-13b/daily_test_llama.sh
@@ -23,8 +23,8 @@ echo "Make sure vocab_file is settled in all yaml."
 echo "Make sure load_checkpoint is settled in predict_llama2_13b_qckpt.yaml"
 echo "Make sure following config is good for you."
 # config
-MS_PKG_LINK="https://repo.mindspore.cn/mindspore/mindspore/version/202504/20250409/master_20250409114852_1abfba19d45084a81a4b7422ff3dd9207a54b9cb_newest/unified/aarch64/mindspore-2.6.0-cp310-cp310-linux_aarch64.whl"
-MF_PKG_LINK="https://repo.mindspore.cn/mindspore/mindformers/version/202504/20250410/dev_20250410010018_434454a0d9762726825c12529a56da2b417ec4c1_newest/any/mindformers-1.4.0b1-py3-none-any.whl"
+MS_PKG_LINK="https://repo.mindspore.cn/mindspore/mindspore/version/202505/20250507/master_20250507010016_8ed48766ed50bb506b8422a5ece21d9104692196_newest/unified/aarch64/mindspore-2.6.0-cp310-cp310-linux_aarch64.whl"
+MF_PKG_LINK="https://repo.mindspore.cn/mindspore/mindformers/version/202505/20250507/dev_20250507031508_52aaafb134332b1e2118d91e943f0ffabbad0312_newest/any/mindformers-1.6.0-py3-none-any.whl"
 ds_type="boolq"
 dataset="${BASEPATH}/ws/gs/tests/data/boolq-dataset/dev.jsonl"
 eval_script="eval_boolq.py"
diff --git a/tests/st/ptq/ptq/ptq_llama_runner.py b/tests/st/ptq/ptq/ptq_llama2_runner.py
similarity index 100%
rename from tests/st/ptq/ptq/ptq_llama_runner.py
rename to tests/st/ptq/ptq/ptq_llama2_runner.py
diff --git a/tests/st/ptq/ptq/ptq_network_runner.py b/tests/st/ptq/ptq/ptq_network_runner.py
index f7b54e6a..ea61d229 100644
--- a/tests/st/ptq/ptq/ptq_network_runner.py
+++ b/tests/st/ptq/ptq/ptq_network_runner.py
@@ -270,6 +270,7 @@ def eval_llama2(input_, is_quant, config_path_, ckpt_path_, quant_algo_):
     helper = MFParallelLlama2Helper(config_path_)
     helper.mf_config.load_checkpoint = "" #os.path.join(cur_dir_, ckpt_path_)
     helper.mf_config.processor.tokenizer.vocab_file = vocab_file
+    helper.mf_config.context.mode = 0
 
     device_id = int(os.environ.get('DEVICE_ID', '0'))
     helper.mf_config.context.device_id = device_id
@@ -382,64 +383,6 @@ def ptq_llama2_predict_2stage(config_path_, fp16_ckpt_path_, quant_ckpt_path_, o
     return ret
 
 
-def fp16_llama2_infer(config_path_, ckpt_path, output_dir_, example, quant_algo_):
-    """infer original float point llama2"""
-    os.environ['MS_ENABLE_INTERNAL_KERNELS'] = "on"
-    ascend_path = os.environ.get("ASCEND_HOME_PATH", "")
-    if not ascend_path:
-        os.environ['ASCEND_HOME_PATH'] = "/usr/local/Ascend/latest"
-    cur_dir_ = os.path.dirname(os.path.abspath(__file__))
-    config_path_ = os.path.join(cur_dir_, config_path_)
-    vocab_file = os.path.join(cur_dir_, "../../../data/llama2-tokenizer.model")
-
-    helper = MFParallelLlama2Helper(config_path_)
-    helper.mf_config.load_checkpoint = os.path.join(cur_dir_, ckpt_path)
-    helper.mf_config.output_dir = os.path.join(cur_dir_, output_dir_)
-    helper.mf_config.processor.tokenizer.vocab_file = vocab_file
-    device_id = int(os.environ.get('DEVICE_ID', '0'))
-    helper.mf_config.context.device_id = device_id
-    config = helper.mf_config
-
-    network = helper.create_network()
-    tokenizer = helper.create_tokenizer()
-
-    def generate_(net, tokenizer_, input_):
-        seq_len = 100
-        input_ids = tokenizer_(input_)['input_ids']
-        outputs = net.generate(input_ids, do_sample=False, max_length=seq_len, top_p=1, top_k=3)
-        return outputs
-    foutput = generate_(network, tokenizer, example)
-    ms.ms_memory_recycle()
-    file_path = f'./foutput-{quant_algo_}-{config.parallel_config.model_parallel}.npy'
-    if os.path.exists(file_path):
-        os.remove(file_path)
-    np.save(file_path, np.array(foutput))
-
-
-def ptq_llama2_predict_2stage_c8(config_path_, fp16_ckpt_path_, output_dir_, model_parallel_, quant_algo_):
-    """ptq_llama2_predict_2stage_c8"""
-    example = "Hello"
-    fp16_llama2_infer(config_path_, fp16_ckpt_path_, output_dir_, example, quant_algo_)
-    foutput = np.load(f'./foutput-{quant_algo_}-{model_parallel_}.npy')
-    qoutput, _ = eval_llama2(input_=example, is_quant=True,
-                             config_path_=config_path_, ckpt_path_=fp16_ckpt_path_,
-                             quant_algo_=quant_algo_)
-    qoutput = np.array(qoutput)
-    if model_parallel_ == 1:
-        if quant_algo_ == 'C8_Dynamic':
-            ret = np.allclose(qoutput[:, :3], foutput[:, :3], 0, 0)
-        else:
-            assert False
-    else:
-        if quant_algo_ == 'C8_Dynamic':
-            ret = np.allclose(qoutput[:, :3], foutput[:, :3], 0, 0)
-        else:
-            assert False
-    if not ret:
-        print_output(qoutput, foutput)
-    return ret
-
-
 def get_args():
     """init user options"""
     parser = argparse.ArgumentParser()
@@ -463,9 +406,7 @@ if __name__ == "__main__":
         quant_ckpt_path = f"../../../data/test_llama2/parallelLlama2-quant-1decoder-1p-{quant_algo}/rank_0/quant.ckpt"
         quant_ckpt_path = os.path.join(cur_dir, quant_ckpt_path)
         output_dir = os.path.join(cur_dir, f"../../../data/test_llama2/parallelLlama2-quant-1decoder-1p-{quant_algo}")
-        if quant_algo == "C8_Dynamic":
-            assert ptq_llama2_predict_2stage_c8(config_path, fp16_ckpt_path, output_dir, model_parallel, quant_algo)
-        elif quant_algo == "A16W4_GPTQ_per_group":
+        if quant_algo == "A16W4_GPTQ_per_group":
             assert ptq_llama2_predict_2stage(config_path_per_group, fp16_ckpt_path, quant_ckpt_path, output_dir,
                                              model_parallel, quant_algo)
         else:
@@ -479,9 +420,7 @@ if __name__ == "__main__":
         quant_ckpt_path = os.path.join(cur_dir,
                                        f"../../../data/test_llama2/parallelLlama2-quant-1decoder-2p-{quant_algo}")
         output_dir = os.path.join(cur_dir, f"../../../data/test_llama2/parallelLlama2-quant-1decoder-2p-{quant_algo}")
-        if quant_algo == "C8_Dynamic":
-            assert ptq_llama2_predict_2stage_c8(config_path, fp16_ckpt_path, output_dir, model_parallel, quant_algo)
-        elif quant_algo == "A16W4_GPTQ_per_group":
+        if quant_algo == "A16W4_GPTQ_per_group":
             assert ptq_llama2_predict_2stage(config_path_per_group, fp16_ckpt_path, quant_ckpt_path, output_dir,
                                              model_parallel, quant_algo)
         else:
diff --git a/tests/st/ptq/ptq/test_ptq_llama.py b/tests/st/ptq/ptq/test_llama2_accuracy.py
similarity index 91%
rename from tests/st/ptq/ptq/test_ptq_llama.py
rename to tests/st/ptq/ptq/test_llama2_accuracy.py
index fa599f5f..c41ea9b3 100644
--- a/tests/st/ptq/ptq/test_ptq_llama.py
+++ b/tests/st/ptq/ptq/test_llama2_accuracy.py
@@ -26,7 +26,7 @@ def ptq_predict_2stage_2p_run(quant_algo):
     Expectation: accuracy is good.
     """
     os.environ['quant_algo'] = f"{quant_algo}"
-    run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_llama_runner.py")
+    run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_llama2_runner.py")
     port = get_available_port()
     os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')")
     time.sleep(1.0)
@@ -49,7 +49,7 @@ def ptq_predict_2stage_2p_run(quant_algo):
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-def test_ptq_llama2_predict_2stage_2p_run_a8w8c8():
+def test_ptq_llama2_a8w8c8_accuracy():
     """
     Feature: test omni quant adjust parameter in two stages with two cards.
     Description: apply A8W8C8 on llama2 and check score.
@@ -60,7 +60,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a8w8c8():
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-def test_ptq_llama2_predict_2stage_2p_run_a16w8c8():
+def test_ptq_llama2_a16w8c8_accuracy():
     """
     Feature: test omni quant adjust parameter in two stages with two cards.
     Description: apply A16W8C8 on llama2 and check score.
@@ -71,7 +71,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a16w8c8():
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-def test_ptq_llama2_predict_2stage_2p_run_c8():
+def test_ptq_llama2_c8_accuracy():
     """
     Feature: test omni quant adjust parameter in two stages with two cards.
     Description: apply C8 on llama2 and check score.
@@ -82,7 +82,7 @@ def test_ptq_llama2_predict_2stage_2p_run_c8():
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-def test_ptq_llama2_predict_2stage_2p_run_a8w8():
+def test_ptq_llama2_a8w8_accuracy():
     """
     Feature: test omni quant adjust parameter in two stages with two cards.
     Description: apply A8W8 on llama2 and check score.
@@ -93,7 +93,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a8w8():
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-def test_ptq_llama2_predict_2stage_2p_run_a16w8():
+def test_ptq_llama2_a16w8_accuracy():
     """
     Feature: test omni quant adjust parameter in two stages with two cards.
     Description: apply A16W8 on llama2 and check score.
@@ -104,7 +104,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a16w8():
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-def test_ptq_llama2_predict_2stage_2p_run_a8w8_dynamic():
+def test_ptq_llama2_a8dynw8_accuracy():
     """
     Feature: test omni quant adjust parameter in two stages with two cards.
     Description: apply A8W8_Dynamic on llama2 and check score.
@@ -115,7 +115,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a8w8_dynamic():
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-def test_ptq_llama2_predict_2stage_2p_run_c8_dynamic():
+def test_ptq_llama2_c8dyn_accuracy():
     """
     Feature: test omni quant adjust parameter in two stages with two cards.
     Description: apply C8_Dynamic on llama2 and check score.
diff --git a/tests/st/ptq/ptq/test_ptq.py b/tests/st/ptq/ptq/test_ptq.py
index 51f46fe8..d38e6745 100644
--- a/tests/st/ptq/ptq/test_ptq.py
+++ b/tests/st/ptq/ptq/test_ptq.py
@@ -748,9 +748,9 @@ def test_ptq_simplenet(non_decoder):
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-# 'A8W8', 'A16W8'
-@pytest.mark.parametrize("quant_algo", ['A8W8C8', 'A16W8C8'])
-def test_ptq_llama2_predict_2stage_1p_run_part1(quant_algo):
+# 'Quant_A8W16_Deploy_A8W8_Dynamic'
+@pytest.mark.parametrize("quant_algo", ['A16W4_GPTQ', 'A16W4_AWQ', 'A8W4_GPTQ'])
+def test_ptq_llama2_predict_2stage_1p_run_part2(quant_algo):
     """
     Feature: test PTQ adjust parameter in two stages with one cards.
     Description: apply OQ on llama2 and check accuracy.
@@ -779,25 +779,25 @@ def test_ptq_llama2_predict_2stage_1p_run_part1(quant_algo):
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-# 'A8W8_Dynamic', 'Quant_A8W16_Deploy_A8W8_Dynamic'
-@pytest.mark.parametrize("quant_algo", ['C8', 'C8_Dynamic', 'A16W4_GPTQ', 'A16W4_AWQ', 'A8W4_GPTQ'])
-def test_ptq_llama2_predict_2stage_1p_run_part2(quant_algo):
+@pytest.mark.parametrize("quant_algo", ['OSL_A8W8', 'OSPQuant_A8W8', 'A8W4_GPTQ'])
+def test_ptq_llama2_predict_2stage_2p_run_part1(quant_algo):
     """
-    Feature: test PTQ adjust parameter in two stages with one cards.
+    Feature: test PTQ adjust parameter in two stages with two cards.
     Description: apply OQ on llama2 and check accuracy.
     Expectation: accuracy is good.
     """
+    os.environ['quant_algo'] = f"{quant_algo}"
     run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_network_runner.py")
     port = get_available_port()
     os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')")
     time.sleep(1.0)
     return_code = os.system(
-        f"msrun --worker_num=1 --local_worker_num=1 --master_addr=127.0.0.1 "
-        f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_1p_logs "
-        f"python {run_file} -m 1 -a {quant_algo}"
+        f"msrun --worker_num=2 --local_worker_num=2 --master_addr=127.0.0.1 "
+        f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_2p_logs "
+        f"python {run_file} -m 2 -a {quant_algo}"
     )
     if return_code != 0:
-        log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_1p_logs/worker_0.log", "r", encoding="utf-8")
+        log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_2p_logs/worker_0.log", "r", encoding="utf-8")
         for line in log_file:
             print(line, flush=True)
         log_file.close()
@@ -809,24 +809,25 @@ def test_ptq_llama2_predict_2stage_1p_run_part2(quant_algo):
 
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-@pytest.mark.parametrize("quant_algo", ['A16W4_GPTQ_per_group'])
-def test_ptq_llama2_predict_2stage_1p_run_per_group(quant_algo):
+@pytest.mark.parametrize("quant_algo", ['A8W8_FallBack', 'A16W4_GPTQ', 'A16W4_AWQ'])
+def test_ptq_llama2_predict_2stage_2p_run_part2(quant_algo):
     """
-    Feature: test PTQ adjust parameter in two stages with one cards.
+    Feature: test PTQ adjust parameter in two stages with two cards.
     Description: apply OQ on llama2 and check accuracy.
     Expectation: accuracy is good.
     """
+    os.environ['quant_algo'] = f"{quant_algo}"
     run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_network_runner.py")
     port = get_available_port()
     os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')")
     time.sleep(1.0)
     return_code = os.system(
-        f"msrun --worker_num=1 --local_worker_num=1 --master_addr=127.0.0.1 "
-        f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_1p_logs "
-        f"python {run_file} -m 1 -a {quant_algo}"
+        f"msrun --worker_num=2 --local_worker_num=2 --master_addr=127.0.0.1 "
+        f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_2p_logs "
+        f"python {run_file} -m 2 -a {quant_algo}"
     )
     if return_code != 0:
-        log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_1p_logs/worker_0.log", "r", encoding="utf-8")
+        log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_2p_logs/worker_0.log", "r", encoding="utf-8")
         for line in log_file:
             print(line, flush=True)
         log_file.close()
@@ -836,29 +837,26 @@ def test_ptq_llama2_predict_2stage_1p_run_per_group(quant_algo):
     assert return_code == 0
 
 
-@pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-# 'A16W8C8'
-@pytest.mark.parametrize("quant_algo", ['A8W8', 'A16W8', 'A8W8C8', 'OSL_A8W8', 'OSPQuant_A8W8', 'A8W4_GPTQ'])
-def test_ptq_llama2_predict_2stage_2p_run_part1(quant_algo):
+@pytest.mark.parametrize("quant_algo", ['A16W4_GPTQ_per_group'])
+def test_ptq_llama2_predict_2stage_1p_run_per_group(quant_algo):
     """
-    Feature: test PTQ adjust parameter in two stages with two cards.
+    Feature: test PTQ adjust parameter in two stages with one cards.
     Description: apply OQ on llama2 and check accuracy.
     Expectation: accuracy is good.
     """
-    os.environ['quant_algo'] = f"{quant_algo}"
     run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_network_runner.py")
     port = get_available_port()
     os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')")
     time.sleep(1.0)
     return_code = os.system(
-        f"msrun --worker_num=2 --local_worker_num=2 --master_addr=127.0.0.1 "
-        f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_2p_logs "
-        f"python {run_file} -m 2 -a {quant_algo}"
+        f"msrun --worker_num=1 --local_worker_num=1 --master_addr=127.0.0.1 "
+        f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_1p_logs "
+        f"python {run_file} -m 1 -a {quant_algo}"
     )
     if return_code != 0:
-        log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_2p_logs/worker_0.log", "r", encoding="utf-8")
+        log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_1p_logs/worker_0.log", "r", encoding="utf-8")
         for line in log_file:
             print(line, flush=True)
         log_file.close()
@@ -868,29 +866,26 @@ def test_ptq_llama2_predict_2stage_2p_run_part1(quant_algo):
     assert return_code == 0
 
 
-@pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-# 'A8W8_FallBack', 'A16W4_GPTQ', 'A16W4_AWQ'
-@pytest.mark.parametrize("quant_algo", ['C8'])
-def test_ptq_llama2_predict_2stage_2p_run_part2(quant_algo):
+@pytest.mark.parametrize("quant_algo", ['A8W8C8', 'A16W8C8'])
+def test_ptq_llama2_predict_2stage_1p_run_part1(quant_algo):
     """
-    Feature: test PTQ adjust parameter in two stages with two cards.
+    Feature: test PTQ adjust parameter in two stages with one cards.
     Description: apply OQ on llama2 and check accuracy.
     Expectation: accuracy is good.
     """
-    os.environ['quant_algo'] = f"{quant_algo}"
     run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_network_runner.py")
     port = get_available_port()
     os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')")
     time.sleep(1.0)
     return_code = os.system(
-        f"msrun --worker_num=2 --local_worker_num=2 --master_addr=127.0.0.1 "
-        f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_2p_logs "
-        f"python {run_file} -m 2 -a {quant_algo}"
+        f"msrun --worker_num=1 --local_worker_num=1 --master_addr=127.0.0.1 "
+        f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_1p_logs "
+        f"python {run_file} -m 1 -a {quant_algo}"
     )
     if return_code != 0:
-        log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_2p_logs/worker_0.log", "r", encoding="utf-8")
+        log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_1p_logs/worker_0.log", "r", encoding="utf-8")
         for line in log_file:
             print(line, flush=True)
         log_file.close()
@@ -930,10 +925,9 @@ def test_ptq_llama2_predict_2stage_2p_run_per_group(quant_algo):
     assert return_code == 0
 
 
-@pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_single
-@pytest.mark.parametrize("quant_algo", ['A8W8_Dynamic', 'C8_Dynamic'])
+@pytest.mark.parametrize("quant_algo", ['Quant_A8W16_Deploy_A8W8_Dynamic'])
 #FIXME: Quant_A8W16_Deploy_A8W8_Dynamic wait test_case for smooth_scale fusion to rmsnorm
 def test_ptq_dynamic_llama2_predict_2stage_2p_run(quant_algo):
     """
diff --git a/tests/st/ptq/smooth_quant/test_smooth_quant.py b/tests/st/ptq/smooth_quant/test_smooth_quant.py
index faf98100..f0c61b9a 100644
--- a/tests/st/ptq/smooth_quant/test_smooth_quant.py
+++ b/tests/st/ptq/smooth_quant/test_smooth_quant.py
@@ -681,6 +681,7 @@ def sq_predict_llama2_2stage():
     return res
 
 
+@pytest.mark.skip(reason="SmoothQuant is deprecated.")
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend910b_training
 @pytest.mark.env_onecard
-- 
Gitee