From b754bef1e0961c084f13928412f12b4360f5a20e Mon Sep 17 00:00:00 2001 From: hangangqiang Date: Thu, 10 Apr 2025 12:23:46 +0800 Subject: [PATCH] update pkg --- .jenkins/test/config/dependent_packages.yaml | 4 +- tests/daily_test/deepseek-r1/daily_test_ds.sh | 2 +- .../daily_test/llama2-13b/daily_test_llama.sh | 4 +- ...q_llama_runner.py => ptq_llama2_runner.py} | 0 tests/st/ptq/ptq/ptq_network_runner.py | 67 +---------------- ...t_ptq_llama.py => test_llama2_accuracy.py} | 16 ++-- tests/st/ptq/ptq/test_ptq.py | 74 +++++++++---------- .../st/ptq/smooth_quant/test_smooth_quant.py | 1 + 8 files changed, 51 insertions(+), 117 deletions(-) rename tests/st/ptq/ptq/{ptq_llama_runner.py => ptq_llama2_runner.py} (100%) rename tests/st/ptq/ptq/{test_ptq_llama.py => test_llama2_accuracy.py} (91%) diff --git a/.jenkins/test/config/dependent_packages.yaml b/.jenkins/test/config/dependent_packages.yaml index e99ebbcc..d3d1a021 100644 --- a/.jenkins/test/config/dependent_packages.yaml +++ b/.jenkins/test/config/dependent_packages.yaml @@ -1,4 +1,4 @@ mindspore: - '/mindspore/mindspore/version/202504/20250409/master_20250409114852_1abfba19d45084a81a4b7422ff3dd9207a54b9cb_newest/' + '/mindspore/mindspore/version/202505/20250507/master_20250507010016_8ed48766ed50bb506b8422a5ece21d9104692196_newest/' mindformers: - '/mindspore/mindformers/version/202504/20250410/dev_20250410010018_434454a0d9762726825c12529a56da2b417ec4c1_newest/' + '/mindspore/mindformers/version/202505/20250507/dev_20250507031508_52aaafb134332b1e2118d91e943f0ffabbad0312_newest/' diff --git a/tests/daily_test/deepseek-r1/daily_test_ds.sh b/tests/daily_test/deepseek-r1/daily_test_ds.sh index c2e686d2..9b9fba37 100644 --- a/tests/daily_test/deepseek-r1/daily_test_ds.sh +++ b/tests/daily_test/deepseek-r1/daily_test_ds.sh @@ -23,7 +23,7 @@ echo "Make sure vocab_file is settled in all yaml." echo "Make sure load_checkpoint is settled in predict_deepseek_r1_671b_qckpt.yaml" echo "Make sure following config is good for you." # config -MS_PKG_LINK="https://repo.mindspore.cn/mindspore/mindspore/version/202504/20250408/br_infer_deepseek_os_20250408004507_7e391e0536245cd8b314fe60adbb2a7206c38fd2_newest/unified/aarch64/mindspore-2.6.0-cp311-cp311-linux_aarch64.whl" +MS_PKG_LINK="https://repo.mindspore.cn/mindspore/mindspore/version/202505/20250507/master_20250507010016_8ed48766ed50bb506b8422a5ece21d9104692196_newest/unified/aarch64/mindspore-2.6.0-cp310-cp310-linux_aarch64.whl" export GSLOG=1 export MS_ENABLE_LCCL=off diff --git a/tests/daily_test/llama2-13b/daily_test_llama.sh b/tests/daily_test/llama2-13b/daily_test_llama.sh index 44311461..a96d7ee0 100644 --- a/tests/daily_test/llama2-13b/daily_test_llama.sh +++ b/tests/daily_test/llama2-13b/daily_test_llama.sh @@ -23,8 +23,8 @@ echo "Make sure vocab_file is settled in all yaml." echo "Make sure load_checkpoint is settled in predict_llama2_13b_qckpt.yaml" echo "Make sure following config is good for you." # config -MS_PKG_LINK="https://repo.mindspore.cn/mindspore/mindspore/version/202504/20250409/master_20250409114852_1abfba19d45084a81a4b7422ff3dd9207a54b9cb_newest/unified/aarch64/mindspore-2.6.0-cp310-cp310-linux_aarch64.whl" -MF_PKG_LINK="https://repo.mindspore.cn/mindspore/mindformers/version/202504/20250410/dev_20250410010018_434454a0d9762726825c12529a56da2b417ec4c1_newest/any/mindformers-1.4.0b1-py3-none-any.whl" +MS_PKG_LINK="https://repo.mindspore.cn/mindspore/mindspore/version/202505/20250507/master_20250507010016_8ed48766ed50bb506b8422a5ece21d9104692196_newest/unified/aarch64/mindspore-2.6.0-cp310-cp310-linux_aarch64.whl" +MF_PKG_LINK="https://repo.mindspore.cn/mindspore/mindformers/version/202505/20250507/dev_20250507031508_52aaafb134332b1e2118d91e943f0ffabbad0312_newest/any/mindformers-1.6.0-py3-none-any.whl" ds_type="boolq" dataset="${BASEPATH}/ws/gs/tests/data/boolq-dataset/dev.jsonl" eval_script="eval_boolq.py" diff --git a/tests/st/ptq/ptq/ptq_llama_runner.py b/tests/st/ptq/ptq/ptq_llama2_runner.py similarity index 100% rename from tests/st/ptq/ptq/ptq_llama_runner.py rename to tests/st/ptq/ptq/ptq_llama2_runner.py diff --git a/tests/st/ptq/ptq/ptq_network_runner.py b/tests/st/ptq/ptq/ptq_network_runner.py index f7b54e6a..ea61d229 100644 --- a/tests/st/ptq/ptq/ptq_network_runner.py +++ b/tests/st/ptq/ptq/ptq_network_runner.py @@ -270,6 +270,7 @@ def eval_llama2(input_, is_quant, config_path_, ckpt_path_, quant_algo_): helper = MFParallelLlama2Helper(config_path_) helper.mf_config.load_checkpoint = "" #os.path.join(cur_dir_, ckpt_path_) helper.mf_config.processor.tokenizer.vocab_file = vocab_file + helper.mf_config.context.mode = 0 device_id = int(os.environ.get('DEVICE_ID', '0')) helper.mf_config.context.device_id = device_id @@ -382,64 +383,6 @@ def ptq_llama2_predict_2stage(config_path_, fp16_ckpt_path_, quant_ckpt_path_, o return ret -def fp16_llama2_infer(config_path_, ckpt_path, output_dir_, example, quant_algo_): - """infer original float point llama2""" - os.environ['MS_ENABLE_INTERNAL_KERNELS'] = "on" - ascend_path = os.environ.get("ASCEND_HOME_PATH", "") - if not ascend_path: - os.environ['ASCEND_HOME_PATH'] = "/usr/local/Ascend/latest" - cur_dir_ = os.path.dirname(os.path.abspath(__file__)) - config_path_ = os.path.join(cur_dir_, config_path_) - vocab_file = os.path.join(cur_dir_, "../../../data/llama2-tokenizer.model") - - helper = MFParallelLlama2Helper(config_path_) - helper.mf_config.load_checkpoint = os.path.join(cur_dir_, ckpt_path) - helper.mf_config.output_dir = os.path.join(cur_dir_, output_dir_) - helper.mf_config.processor.tokenizer.vocab_file = vocab_file - device_id = int(os.environ.get('DEVICE_ID', '0')) - helper.mf_config.context.device_id = device_id - config = helper.mf_config - - network = helper.create_network() - tokenizer = helper.create_tokenizer() - - def generate_(net, tokenizer_, input_): - seq_len = 100 - input_ids = tokenizer_(input_)['input_ids'] - outputs = net.generate(input_ids, do_sample=False, max_length=seq_len, top_p=1, top_k=3) - return outputs - foutput = generate_(network, tokenizer, example) - ms.ms_memory_recycle() - file_path = f'./foutput-{quant_algo_}-{config.parallel_config.model_parallel}.npy' - if os.path.exists(file_path): - os.remove(file_path) - np.save(file_path, np.array(foutput)) - - -def ptq_llama2_predict_2stage_c8(config_path_, fp16_ckpt_path_, output_dir_, model_parallel_, quant_algo_): - """ptq_llama2_predict_2stage_c8""" - example = "Hello" - fp16_llama2_infer(config_path_, fp16_ckpt_path_, output_dir_, example, quant_algo_) - foutput = np.load(f'./foutput-{quant_algo_}-{model_parallel_}.npy') - qoutput, _ = eval_llama2(input_=example, is_quant=True, - config_path_=config_path_, ckpt_path_=fp16_ckpt_path_, - quant_algo_=quant_algo_) - qoutput = np.array(qoutput) - if model_parallel_ == 1: - if quant_algo_ == 'C8_Dynamic': - ret = np.allclose(qoutput[:, :3], foutput[:, :3], 0, 0) - else: - assert False - else: - if quant_algo_ == 'C8_Dynamic': - ret = np.allclose(qoutput[:, :3], foutput[:, :3], 0, 0) - else: - assert False - if not ret: - print_output(qoutput, foutput) - return ret - - def get_args(): """init user options""" parser = argparse.ArgumentParser() @@ -463,9 +406,7 @@ if __name__ == "__main__": quant_ckpt_path = f"../../../data/test_llama2/parallelLlama2-quant-1decoder-1p-{quant_algo}/rank_0/quant.ckpt" quant_ckpt_path = os.path.join(cur_dir, quant_ckpt_path) output_dir = os.path.join(cur_dir, f"../../../data/test_llama2/parallelLlama2-quant-1decoder-1p-{quant_algo}") - if quant_algo == "C8_Dynamic": - assert ptq_llama2_predict_2stage_c8(config_path, fp16_ckpt_path, output_dir, model_parallel, quant_algo) - elif quant_algo == "A16W4_GPTQ_per_group": + if quant_algo == "A16W4_GPTQ_per_group": assert ptq_llama2_predict_2stage(config_path_per_group, fp16_ckpt_path, quant_ckpt_path, output_dir, model_parallel, quant_algo) else: @@ -479,9 +420,7 @@ if __name__ == "__main__": quant_ckpt_path = os.path.join(cur_dir, f"../../../data/test_llama2/parallelLlama2-quant-1decoder-2p-{quant_algo}") output_dir = os.path.join(cur_dir, f"../../../data/test_llama2/parallelLlama2-quant-1decoder-2p-{quant_algo}") - if quant_algo == "C8_Dynamic": - assert ptq_llama2_predict_2stage_c8(config_path, fp16_ckpt_path, output_dir, model_parallel, quant_algo) - elif quant_algo == "A16W4_GPTQ_per_group": + if quant_algo == "A16W4_GPTQ_per_group": assert ptq_llama2_predict_2stage(config_path_per_group, fp16_ckpt_path, quant_ckpt_path, output_dir, model_parallel, quant_algo) else: diff --git a/tests/st/ptq/ptq/test_ptq_llama.py b/tests/st/ptq/ptq/test_llama2_accuracy.py similarity index 91% rename from tests/st/ptq/ptq/test_ptq_llama.py rename to tests/st/ptq/ptq/test_llama2_accuracy.py index fa599f5f..c41ea9b3 100644 --- a/tests/st/ptq/ptq/test_ptq_llama.py +++ b/tests/st/ptq/ptq/test_llama2_accuracy.py @@ -26,7 +26,7 @@ def ptq_predict_2stage_2p_run(quant_algo): Expectation: accuracy is good. """ os.environ['quant_algo'] = f"{quant_algo}" - run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_llama_runner.py") + run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_llama2_runner.py") port = get_available_port() os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')") time.sleep(1.0) @@ -49,7 +49,7 @@ def ptq_predict_2stage_2p_run(quant_algo): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -def test_ptq_llama2_predict_2stage_2p_run_a8w8c8(): +def test_ptq_llama2_a8w8c8_accuracy(): """ Feature: test omni quant adjust parameter in two stages with two cards. Description: apply A8W8C8 on llama2 and check score. @@ -60,7 +60,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a8w8c8(): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -def test_ptq_llama2_predict_2stage_2p_run_a16w8c8(): +def test_ptq_llama2_a16w8c8_accuracy(): """ Feature: test omni quant adjust parameter in two stages with two cards. Description: apply A16W8C8 on llama2 and check score. @@ -71,7 +71,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a16w8c8(): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -def test_ptq_llama2_predict_2stage_2p_run_c8(): +def test_ptq_llama2_c8_accuracy(): """ Feature: test omni quant adjust parameter in two stages with two cards. Description: apply C8 on llama2 and check score. @@ -82,7 +82,7 @@ def test_ptq_llama2_predict_2stage_2p_run_c8(): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -def test_ptq_llama2_predict_2stage_2p_run_a8w8(): +def test_ptq_llama2_a8w8_accuracy(): """ Feature: test omni quant adjust parameter in two stages with two cards. Description: apply A8W8 on llama2 and check score. @@ -93,7 +93,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a8w8(): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -def test_ptq_llama2_predict_2stage_2p_run_a16w8(): +def test_ptq_llama2_a16w8_accuracy(): """ Feature: test omni quant adjust parameter in two stages with two cards. Description: apply A16W8 on llama2 and check score. @@ -104,7 +104,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a16w8(): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -def test_ptq_llama2_predict_2stage_2p_run_a8w8_dynamic(): +def test_ptq_llama2_a8dynw8_accuracy(): """ Feature: test omni quant adjust parameter in two stages with two cards. Description: apply A8W8_Dynamic on llama2 and check score. @@ -115,7 +115,7 @@ def test_ptq_llama2_predict_2stage_2p_run_a8w8_dynamic(): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -def test_ptq_llama2_predict_2stage_2p_run_c8_dynamic(): +def test_ptq_llama2_c8dyn_accuracy(): """ Feature: test omni quant adjust parameter in two stages with two cards. Description: apply C8_Dynamic on llama2 and check score. diff --git a/tests/st/ptq/ptq/test_ptq.py b/tests/st/ptq/ptq/test_ptq.py index 51f46fe8..d38e6745 100644 --- a/tests/st/ptq/ptq/test_ptq.py +++ b/tests/st/ptq/ptq/test_ptq.py @@ -748,9 +748,9 @@ def test_ptq_simplenet(non_decoder): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -# 'A8W8', 'A16W8' -@pytest.mark.parametrize("quant_algo", ['A8W8C8', 'A16W8C8']) -def test_ptq_llama2_predict_2stage_1p_run_part1(quant_algo): +# 'Quant_A8W16_Deploy_A8W8_Dynamic' +@pytest.mark.parametrize("quant_algo", ['A16W4_GPTQ', 'A16W4_AWQ', 'A8W4_GPTQ']) +def test_ptq_llama2_predict_2stage_1p_run_part2(quant_algo): """ Feature: test PTQ adjust parameter in two stages with one cards. Description: apply OQ on llama2 and check accuracy. @@ -779,25 +779,25 @@ def test_ptq_llama2_predict_2stage_1p_run_part1(quant_algo): @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -# 'A8W8_Dynamic', 'Quant_A8W16_Deploy_A8W8_Dynamic' -@pytest.mark.parametrize("quant_algo", ['C8', 'C8_Dynamic', 'A16W4_GPTQ', 'A16W4_AWQ', 'A8W4_GPTQ']) -def test_ptq_llama2_predict_2stage_1p_run_part2(quant_algo): +@pytest.mark.parametrize("quant_algo", ['OSL_A8W8', 'OSPQuant_A8W8', 'A8W4_GPTQ']) +def test_ptq_llama2_predict_2stage_2p_run_part1(quant_algo): """ - Feature: test PTQ adjust parameter in two stages with one cards. + Feature: test PTQ adjust parameter in two stages with two cards. Description: apply OQ on llama2 and check accuracy. Expectation: accuracy is good. """ + os.environ['quant_algo'] = f"{quant_algo}" run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_network_runner.py") port = get_available_port() os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')") time.sleep(1.0) return_code = os.system( - f"msrun --worker_num=1 --local_worker_num=1 --master_addr=127.0.0.1 " - f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_1p_logs " - f"python {run_file} -m 1 -a {quant_algo}" + f"msrun --worker_num=2 --local_worker_num=2 --master_addr=127.0.0.1 " + f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_2p_logs " + f"python {run_file} -m 2 -a {quant_algo}" ) if return_code != 0: - log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_1p_logs/worker_0.log", "r", encoding="utf-8") + log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_2p_logs/worker_0.log", "r", encoding="utf-8") for line in log_file: print(line, flush=True) log_file.close() @@ -809,24 +809,25 @@ def test_ptq_llama2_predict_2stage_1p_run_part2(quant_algo): @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -@pytest.mark.parametrize("quant_algo", ['A16W4_GPTQ_per_group']) -def test_ptq_llama2_predict_2stage_1p_run_per_group(quant_algo): +@pytest.mark.parametrize("quant_algo", ['A8W8_FallBack', 'A16W4_GPTQ', 'A16W4_AWQ']) +def test_ptq_llama2_predict_2stage_2p_run_part2(quant_algo): """ - Feature: test PTQ adjust parameter in two stages with one cards. + Feature: test PTQ adjust parameter in two stages with two cards. Description: apply OQ on llama2 and check accuracy. Expectation: accuracy is good. """ + os.environ['quant_algo'] = f"{quant_algo}" run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_network_runner.py") port = get_available_port() os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')") time.sleep(1.0) return_code = os.system( - f"msrun --worker_num=1 --local_worker_num=1 --master_addr=127.0.0.1 " - f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_1p_logs " - f"python {run_file} -m 1 -a {quant_algo}" + f"msrun --worker_num=2 --local_worker_num=2 --master_addr=127.0.0.1 " + f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_2p_logs " + f"python {run_file} -m 2 -a {quant_algo}" ) if return_code != 0: - log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_1p_logs/worker_0.log", "r", encoding="utf-8") + log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_2p_logs/worker_0.log", "r", encoding="utf-8") for line in log_file: print(line, flush=True) log_file.close() @@ -836,29 +837,26 @@ def test_ptq_llama2_predict_2stage_1p_run_per_group(quant_algo): assert return_code == 0 -@pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -# 'A16W8C8' -@pytest.mark.parametrize("quant_algo", ['A8W8', 'A16W8', 'A8W8C8', 'OSL_A8W8', 'OSPQuant_A8W8', 'A8W4_GPTQ']) -def test_ptq_llama2_predict_2stage_2p_run_part1(quant_algo): +@pytest.mark.parametrize("quant_algo", ['A16W4_GPTQ_per_group']) +def test_ptq_llama2_predict_2stage_1p_run_per_group(quant_algo): """ - Feature: test PTQ adjust parameter in two stages with two cards. + Feature: test PTQ adjust parameter in two stages with one cards. Description: apply OQ on llama2 and check accuracy. Expectation: accuracy is good. """ - os.environ['quant_algo'] = f"{quant_algo}" run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_network_runner.py") port = get_available_port() os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')") time.sleep(1.0) return_code = os.system( - f"msrun --worker_num=2 --local_worker_num=2 --master_addr=127.0.0.1 " - f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_2p_logs " - f"python {run_file} -m 2 -a {quant_algo}" + f"msrun --worker_num=1 --local_worker_num=1 --master_addr=127.0.0.1 " + f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_1p_logs " + f"python {run_file} -m 1 -a {quant_algo}" ) if return_code != 0: - log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_2p_logs/worker_0.log", "r", encoding="utf-8") + log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_1p_logs/worker_0.log", "r", encoding="utf-8") for line in log_file: print(line, flush=True) log_file.close() @@ -868,29 +866,26 @@ def test_ptq_llama2_predict_2stage_2p_run_part1(quant_algo): assert return_code == 0 -@pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -# 'A8W8_FallBack', 'A16W4_GPTQ', 'A16W4_AWQ' -@pytest.mark.parametrize("quant_algo", ['C8']) -def test_ptq_llama2_predict_2stage_2p_run_part2(quant_algo): +@pytest.mark.parametrize("quant_algo", ['A8W8C8', 'A16W8C8']) +def test_ptq_llama2_predict_2stage_1p_run_part1(quant_algo): """ - Feature: test PTQ adjust parameter in two stages with two cards. + Feature: test PTQ adjust parameter in two stages with one cards. Description: apply OQ on llama2 and check accuracy. Expectation: accuracy is good. """ - os.environ['quant_algo'] = f"{quant_algo}" run_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ptq_network_runner.py") port = get_available_port() os.system(f"kill -9 $(lsof -i:{port} | " + "awk '{print $2}')") time.sleep(1.0) return_code = os.system( - f"msrun --worker_num=2 --local_worker_num=2 --master_addr=127.0.0.1 " - f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_2p_logs " - f"python {run_file} -m 2 -a {quant_algo}" + f"msrun --worker_num=1 --local_worker_num=1 --master_addr=127.0.0.1 " + f"--master_port={port} --join=True --log_dir=./test_ptq_{quant_algo}_predict_llama2_1p_logs " + f"python {run_file} -m 1 -a {quant_algo}" ) if return_code != 0: - log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_2p_logs/worker_0.log", "r", encoding="utf-8") + log_file = open(f"./test_ptq_{quant_algo}_predict_llama2_1p_logs/worker_0.log", "r", encoding="utf-8") for line in log_file: print(line, flush=True) log_file.close() @@ -930,10 +925,9 @@ def test_ptq_llama2_predict_2stage_2p_run_per_group(quant_algo): assert return_code == 0 -@pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single -@pytest.mark.parametrize("quant_algo", ['A8W8_Dynamic', 'C8_Dynamic']) +@pytest.mark.parametrize("quant_algo", ['Quant_A8W16_Deploy_A8W8_Dynamic']) #FIXME: Quant_A8W16_Deploy_A8W8_Dynamic wait test_case for smooth_scale fusion to rmsnorm def test_ptq_dynamic_llama2_predict_2stage_2p_run(quant_algo): """ diff --git a/tests/st/ptq/smooth_quant/test_smooth_quant.py b/tests/st/ptq/smooth_quant/test_smooth_quant.py index faf98100..f0c61b9a 100644 --- a/tests/st/ptq/smooth_quant/test_smooth_quant.py +++ b/tests/st/ptq/smooth_quant/test_smooth_quant.py @@ -681,6 +681,7 @@ def sq_predict_llama2_2stage(): return res +@pytest.mark.skip(reason="SmoothQuant is deprecated.") @pytest.mark.level0 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard -- Gitee