From fb4ea356a880bd1e1dfe6c1d01f6c06981c664b1 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Wed, 2 Aug 2023 04:09:02 +0000 Subject: [PATCH 01/15] add debug/accuracy_tools/api_accuracy_checker/common/config.py. Signed-off-by: sunyiming --- .../api_accuracy_checker/common/config.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 debug/accuracy_tools/api_accuracy_checker/common/config.py diff --git a/debug/accuracy_tools/api_accuracy_checker/common/config.py b/debug/accuracy_tools/api_accuracy_checker/common/config.py new file mode 100644 index 00000000000..fb06c9d74bb --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/common/config.py @@ -0,0 +1,34 @@ +import yaml +import os + +class Config: + def __init__(self, yaml_file): + if not os.path.exists(yaml_file): + raise ValueError(f"File {yaml_file} does not exist") + with open(yaml_file, 'r') as file: + config = yaml.safe_load(file) + self.dump_path = config['dump_path'] + self.jit_compile = config['jit_compile'] + self.compile_option = config['compile_option'] + self.compare_algorithm = config['compare_algorithm'] + self.real_data = config['real_data'] + self.dump_step = config['dump_step'] + + def __str__(self): + return ( + f"dump_path={self.dump_path}\n" + f"jit_compile={self.jit_compile}\n" + f"compile_option={self.compile_option}\n" + f"compare_algorithm={self.compare_algorithm}\n" + f"real_data={self.real_data}\n" + f"dump_step={self.dump_step}\n" + ) + + def update_config(self, **kwargs): + for key, value in kwargs.items(): + if hasattr(self, key): + setattr(self, key, value) + else: + raise ValueError(f"Invalid key '{key}'") + +msCheckerConfig = Config('./config.yaml') \ No newline at end of file -- Gitee From 3f41d0734e2e420c37e2120f42fd698076bb04d7 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Wed, 2 Aug 2023 04:10:22 +0000 Subject: [PATCH 02/15] update debug/accuracy_tools/api_accuracy_checker/dump/__init__.py. Signed-off-by: sunyiming --- debug/accuracy_tools/api_accuracy_checker/dump/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py b/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py index 397b31422b8..b9d6e75a19a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py @@ -2,9 +2,10 @@ from ..hook_module.register_hook import initialize_hook from .dump import pretest_hook from .info_dump import initialize_output_json from .utils import set_dump_switch +from ..common.config import msCheckerConfig initialize_hook(pretest_hook) initialize_output_json() -__all__ = ['set_dump_switch'] \ No newline at end of file +__all__ = ['set_dump_switch', 'msCheckerConfig'] \ No newline at end of file -- Gitee From eb40fb6ca9ea3a7cb1b00d3853a82647d9390852 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Wed, 2 Aug 2023 04:12:02 +0000 Subject: [PATCH 03/15] add debug/accuracy_tools/api_accuracy_checker/config.yaml. Signed-off-by: sunyiming --- debug/accuracy_tools/api_accuracy_checker/config.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 debug/accuracy_tools/api_accuracy_checker/config.yaml diff --git a/debug/accuracy_tools/api_accuracy_checker/config.yaml b/debug/accuracy_tools/api_accuracy_checker/config.yaml new file mode 100644 index 00000000000..38a1a3c47b1 --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/config.yaml @@ -0,0 +1,6 @@ +dump_path: './api_info' +jit_compile: True +compile_option: -O3 +compare_algorithm: cosine_similarity +real_data: False +dump_step: 1000 \ No newline at end of file -- Gitee From fbdbe2c1f71d6a01c4c5cc453fc5a912a0a4ef5a Mon Sep 17 00:00:00 2001 From: s30048155 Date: Wed, 2 Aug 2023 15:20:31 +0800 Subject: [PATCH 04/15] get from config --- .../api_accuracy_checker/dump/api_info.py | 5 +++-- .../api_accuracy_checker/dump/info_dump.py | 6 +++--- .../api_accuracy_checker/dump/utils.py | 12 ------------ 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index e50e95b46eb..6dd0c8438e9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -1,13 +1,14 @@ # 定义API INFO,保存基本信息,用于后续结构体的落盘,注意考虑random场景及真实数据场景 import inspect import torch -from .utils import DumpUtil, DumpConst, write_npy +from .utils import DumpConst, write_npy from ..common.utils import print_error_log +from ..common.config import msCheckerConfig class APIInfo: def __init__(self, api_name): self.api_name = api_name - self.save_real_data = DumpUtil.save_real_data + self.save_real_data = msCheckerConfig.real_data def analyze_element(self, element): if isinstance(element, (list, tuple)): diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index 0f76069a7f9..8e7a5ef2f1b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -5,13 +5,13 @@ import threading import numpy as np from .api_info import ForwardAPIInfo, BackwardAPIInfo -from .utils import DumpUtil from ..common.utils import check_file_or_directory_path +from ..common.config import msCheckerConfig lock = threading.Lock() def write_api_info_json(api_info): - dump_path = DumpUtil.dump_path + dump_path = msCheckerConfig.dump_path initialize_output_json() if isinstance(api_info, ForwardAPIInfo): file_path = os.path.join(dump_path, 'forward_info.json') @@ -48,7 +48,7 @@ def write_json(file_path, data, indent=None): lock.release() def initialize_output_json(): - dump_path = DumpUtil.dump_path + dump_path = msCheckerConfig.dump_path check_file_or_directory_path(dump_path,True) files = ['forward_info.json', 'backward_info.json', 'stack_info.json'] for file in files: diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/utils.py b/debug/accuracy_tools/api_accuracy_checker/dump/utils.py index 1aaf7a3d53a..1c1370a841d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/utils.py @@ -32,24 +32,12 @@ def set_dump_switch(switch): DumpUtil.set_dump_switch(switch) class DumpUtil(object): - save_real_data = False - dump_path = './random_data_jsons' dump_switch = None - @staticmethod - def set_dump_path(save_path): - DumpUtil.dump_path = save_path - DumpUtil.dump_init_enable = True - @staticmethod def set_dump_switch(switch): DumpUtil.dump_switch = switch - @staticmethod - def get_dump_path(): - if DumpUtil.dump_path: - return DumpUtil.dump_path - @staticmethod def get_dump_switch(): return DumpUtil.dump_switch == "ON" \ No newline at end of file -- Gitee From 389ea329f1149ad0e26e1a0d617f1b99aa3a2b3d Mon Sep 17 00:00:00 2001 From: s30048155 Date: Wed, 2 Aug 2023 15:28:22 +0800 Subject: [PATCH 05/15] bug fix --- .../api_accuracy_checker/dump/info_dump.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index 8e7a5ef2f1b..c887132f698 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -12,7 +12,6 @@ lock = threading.Lock() def write_api_info_json(api_info): dump_path = msCheckerConfig.dump_path - initialize_output_json() if isinstance(api_info, ForwardAPIInfo): file_path = os.path.join(dump_path, 'forward_info.json') stack_file_path = os.path.join(dump_path, 'stack_info.json') @@ -26,13 +25,13 @@ def write_api_info_json(api_info): raise ValueError(f"Invalid api_info type {type(api_info)}") def write_json(file_path, data, indent=None): - check_file_or_directory_path(file_path,False) + check_file_or_directory_path(os.path.dirname(file_path),True) with open(file_path, 'w') as f: f.write("{\n}") - try: - lock.acquire() - with open(file_path, 'a+') as f: - fcntl.flock(f, fcntl.LOCK_EX) + lock.acquire() + with open(file_path, 'a+') as f: + fcntl.flock(f, fcntl.LOCK_EX) + try: f.seek(0, os.SEEK_END) f.seek(f.tell() - 1, os.SEEK_SET) f.truncate() @@ -41,11 +40,11 @@ def write_json(file_path, data, indent=None): f.truncate() f.write(',\n') f.write(json.dumps(data, indent=indent)[1:-1] + '\n}') - except Exception as e: - raise ValueError(f"Json save failed:{e}") - finally: - fcntl.flock(f, fcntl.LOCK_UN) - lock.release() + except Exception as e: + raise ValueError(f"Json save failed:{e}") + finally: + fcntl.flock(f, fcntl.LOCK_UN) + lock.release() def initialize_output_json(): dump_path = msCheckerConfig.dump_path -- Gitee From 2d738020cb6ba02166b4562c7e92bc51da40c4ad Mon Sep 17 00:00:00 2001 From: s30048155 Date: Wed, 2 Aug 2023 15:48:04 +0800 Subject: [PATCH 06/15] bug fix --- debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index c887132f698..3440e8b3a89 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -26,8 +26,9 @@ def write_api_info_json(api_info): def write_json(file_path, data, indent=None): check_file_or_directory_path(os.path.dirname(file_path),True) - with open(file_path, 'w') as f: - f.write("{\n}") + if not os.path.exists(file_path): + with open(file_path, 'w') as f: + f.write("{\n}") lock.acquire() with open(file_path, 'a+') as f: fcntl.flock(f, fcntl.LOCK_EX) -- Gitee From 84ca2f7b4f48757e713070ea2442b6581e341c40 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Wed, 2 Aug 2023 08:04:20 +0000 Subject: [PATCH 07/15] update debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py. Signed-off-by: sunyiming --- .../api_accuracy_checker/dump/info_dump.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index 3440e8b3a89..386db6eeceb 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -48,10 +48,13 @@ def write_json(file_path, data, indent=None): lock.release() def initialize_output_json(): - dump_path = msCheckerConfig.dump_path + dump_path = DumpUtil.dump_path check_file_or_directory_path(dump_path,True) - files = ['forward_info.json', 'backward_info.json', 'stack_info.json'] - for file in files: - file_path = os.path.join(dump_path, file) - if os.path.exists(file_path): - raise ValueError(f"file {file_path} already exists, please remove it first or use a new dump path") \ No newline at end of file + + files_in_dir = os.listdir(dump_path) + pattern = re.compile(r"(forward|backward|stack)_info_[0-9].json") + match = re.search(pattern, ''.join(files_in_dir)) + if match: + match_file = match.group() + file_path = os.path.join(DumpUtil.dump_path, match_file) + raise ValueError(f"file {file_path} already exists, please remove it first or use a new dump path") \ No newline at end of file -- Gitee From e268320609b7292e3a55610135b446c7714a2836 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Thu, 3 Aug 2023 01:24:07 +0000 Subject: [PATCH 08/15] update debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py. Signed-off-by: sunyiming --- debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index 386db6eeceb..596bce879d1 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -48,7 +48,7 @@ def write_json(file_path, data, indent=None): lock.release() def initialize_output_json(): - dump_path = DumpUtil.dump_path + dump_path = msCheckerConfig.dump_path check_file_or_directory_path(dump_path,True) files_in_dir = os.listdir(dump_path) -- Gitee From 057312ef54cb217830c0e751963527e905281966 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Thu, 3 Aug 2023 01:27:33 +0000 Subject: [PATCH 09/15] update debug/accuracy_tools/api_accuracy_checker/common/config.py. Signed-off-by: sunyiming --- debug/accuracy_tools/api_accuracy_checker/common/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/config.py b/debug/accuracy_tools/api_accuracy_checker/common/config.py index fb06c9d74bb..60f920024ed 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/config.py @@ -31,4 +31,6 @@ class Config: else: raise ValueError(f"Invalid key '{key}'") -msCheckerConfig = Config('./config.yaml') \ No newline at end of file +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "config.yaml") +msCheckerConfig = Config(yaml_path) \ No newline at end of file -- Gitee From a8e1be7fb378f096433802dbf5bfbdf619bb7a4d Mon Sep 17 00:00:00 2001 From: sunyiming Date: Thu, 3 Aug 2023 03:56:06 +0000 Subject: [PATCH 10/15] Revert "Merge branch 'master' of gitee.com:ascend/att into master" This reverts commit ca721df1aae27c3fb49675a4c9a9d0f9c7085b63. --- ...77\347\224\250\346\226\271\346\263\225.md" | 30 ------------------- .../api_accuracy_checker/common/utils.py | 2 +- .../api_accuracy_checker/compare/algorithm.py | 5 ---- .../api_accuracy_checker/compare/compare.py | 4 +-- .../api_accuracy_checker/dump/api_info.py | 7 ++--- .../api_accuracy_checker/dump/dump.py | 4 +-- .../api_accuracy_checker/dump/info_dump.py | 7 ++--- .../api_accuracy_checker/dump/utils.py | 5 ++++ 8 files changed, 15 insertions(+), 49 deletions(-) delete mode 100644 "debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" diff --git "a/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" "b/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" deleted file mode 100644 index c2c8f456c23..00000000000 --- "a/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" +++ /dev/null @@ -1,30 +0,0 @@ -# Ascend模型精度预检工具 - -## 使用方式 - -1. 安装遇见工具 - - 将att仓代码下载到本地,并配置环境变量。假设att仓本地路径为 {att_root},环境变量应配置为 - - ``` - export PYTHONPATH=$PYTHONPATH:{att_root}/debug/accuracy_tools/ - ``` - -2. 使用工具dump模块抓取网络所有API信息 - - ``` - from api_accuracy_checker.dump import set_dump_switch - set_dump_switch("ON") - ``` - -​ dump信息默认会存盘到./api_info/路径下,后缀的数字代表rank id - -3. 将上述信息输入给run_ut模块运行精度检测并比对 - - ``` - cd run_ut - python run_ut.py --forward ./api_info/forward_info_0.json --backward ./api_info/backward_info_0.json - ``` - - forward和backward两个命令行参数根据实际情况配置。比对结果存盘位置会打屏显示,默认是'./',可以在运行run_ut.py时通过 --out_path命令行参数配置。 - diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index e62c9616ca7..dac54a79fc8 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -510,7 +510,7 @@ LINUX_FILE_NAME_LENGTH_LIMIT = 200 def check_path_length_valid(path): path = os.path.realpath(path) - return len(os.path.basename(path)) <= LINUX_FILE_NAME_LENGTH_LIMIT + return len(os.path.basename(path) <= LINUX_FILE_NAME_LENGTH_LIMIT) def check_path_pattern_valid(path): diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index 88df23cacac..9ccdb05baa8 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -39,11 +39,6 @@ def get_max_rel_err(n_value, b_value): return 1, False -def cosine_standard(compare_result): - bool_result = np.array(compare_result) > 0.99 - return np.all(bool_result), bool_result - - def cosine_sim(cpu_output, npu_output): n_value = npu_output.cpu().detach().numpy().reshape(-1) b_value = cpu_output.detach().numpy().reshape(-1) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index ae869e208e2..eb1b2586e9a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -1,6 +1,6 @@ # 进行比对及结果展示 import os -from prettytable import PrettyTable +from prettytable import Prettytable from .algorithm import compare_core, cosine_sim, cosine_standard from ..common.utils import get_json_contents, print_error_log, print_info_log, write_csv from .compare_utils import CompareConst @@ -32,7 +32,7 @@ class Comparator: "forward_and_backward_not_pass": self.test_result_cnt['forward_and_backward_fail_num'], "pass": self.test_result_cnt['success_num'] } - tb = PrettyTable() + tb = Prettytable() tb.add_column("Category", list(res_dict.keys())) tb.add_column("statistics",list(res_dict.values())) info_tb = str(tb) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index 7456ce19be2..6dd0c8438e9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -1,14 +1,12 @@ # 定义API INFO,保存基本信息,用于后续结构体的落盘,注意考虑random场景及真实数据场景 import inspect import torch -import torch_npu -from .utils import DumpUtil, DumpConst, write_npy +from .utils import DumpConst, write_npy from ..common.utils import print_error_log from ..common.config import msCheckerConfig class APIInfo: def __init__(self, api_name): - self.rank = torch_npu.npu.current_device() self.api_name = api_name self.save_real_data = msCheckerConfig.real_data @@ -108,10 +106,9 @@ class ForwardAPIInfo(APIInfo): def analyze_api_call_stack(self): stack_str = [] for (_, path, line, func, code, _) in inspect.stack()[3:]: - if not code: continue stack_line = " ".join([ "File", ", ".join([path, " ".join(["line", str(line)]), " ".join(["in", func]), - " ".join(["\n", code[0].strip()])])]) + " ".join(["\n", code[0].strip() if code else code])])]) stack_str.append(stack_line) self.stack_info_struct = {self.api_name: stack_str} diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py index ade72d3ebae..6ffc77578f1 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py @@ -24,7 +24,7 @@ import torch import threading -from .api_info import ForwardAPIInfo, BackwardAPIInfo +from .utils import ForwardAPIInfo, BackwardAPIInfo from .info_dump import write_api_info_json from .utils import DumpConst, DumpUtil from ..common.utils import print_warn_log, print_info_log, print_error_log @@ -35,7 +35,7 @@ def pretest_info_dump(name, out_feat, module, phase): if phase == DumpConst.forward: api_info = ForwardAPIInfo(name, module.input_args, module.input_kwargs) elif phase == DumpConst.backward: - api_info = BackwardAPIInfo(name, out_feat) + api_info = BackwardApiInfo(name, out_feat) else: msg = "Unexpected training phase {}.".format(phase) print_error_log(msg) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index e09085e2573..596bce879d1 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -12,15 +12,14 @@ lock = threading.Lock() def write_api_info_json(api_info): dump_path = msCheckerConfig.dump_path - rank = api_info.rank if isinstance(api_info, ForwardAPIInfo): - file_path = os.path.join(dump_path, f'forward_info_{rank}.json') - stack_file_path = os.path.join(dump_path, f'stack_info_{rank}.json') + file_path = os.path.join(dump_path, 'forward_info.json') + stack_file_path = os.path.join(dump_path, 'stack_info.json') write_json(file_path, api_info.api_info_struct) write_json(stack_file_path, api_info.stack_info_struct, indent=4) elif isinstance(api_info, BackwardAPIInfo): - file_path = os.path.join(dump_path, f'backward_info_{rank}.json') + file_path = os.path.join(dump_path, 'backward_info.json') write_json(file_path, api_info.grad_info_struct) else: raise ValueError(f"Invalid api_info type {type(api_info)}") diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/utils.py b/debug/accuracy_tools/api_accuracy_checker/dump/utils.py index e8a874764da..1c1370a841d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/utils.py @@ -6,6 +6,11 @@ import numpy as np from ..common.utils import print_error_log, CompareException, DumpException, Const, get_time, print_info_log, \ check_mode_valid, get_api_name_from_matcher +from ..common.version import __version__ + +dump_count = 0 +range_begin_flag, range_end_flag = False, False + class DumpConst: delimiter = '*' forward = 'forward' -- Gitee From f94b7611e62f690aa186f18d7d89259525f1e49e Mon Sep 17 00:00:00 2001 From: sunyiming Date: Thu, 3 Aug 2023 03:57:26 +0000 Subject: [PATCH 11/15] Revert "update debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py." This reverts commit 84ca2f7b4f48757e713070ea2442b6581e341c40. --- .../api_accuracy_checker/dump/info_dump.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index 596bce879d1..3440e8b3a89 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -50,11 +50,8 @@ def write_json(file_path, data, indent=None): def initialize_output_json(): dump_path = msCheckerConfig.dump_path check_file_or_directory_path(dump_path,True) - - files_in_dir = os.listdir(dump_path) - pattern = re.compile(r"(forward|backward|stack)_info_[0-9].json") - match = re.search(pattern, ''.join(files_in_dir)) - if match: - match_file = match.group() - file_path = os.path.join(DumpUtil.dump_path, match_file) - raise ValueError(f"file {file_path} already exists, please remove it first or use a new dump path") \ No newline at end of file + files = ['forward_info.json', 'backward_info.json', 'stack_info.json'] + for file in files: + file_path = os.path.join(dump_path, file) + if os.path.exists(file_path): + raise ValueError(f"file {file_path} already exists, please remove it first or use a new dump path") \ No newline at end of file -- Gitee From a7acda5fb06267def52fd941b88ad480473e753b Mon Sep 17 00:00:00 2001 From: sunyiming Date: Thu, 3 Aug 2023 03:58:56 +0000 Subject: [PATCH 12/15] Revert "Revert "Merge branch 'master' of gitee.com:ascend/att into master"" This reverts commit a8e1be7fb378f096433802dbf5bfbdf619bb7a4d. --- ...77\347\224\250\346\226\271\346\263\225.md" | 30 +++++++++++++++++++ .../api_accuracy_checker/common/utils.py | 2 +- .../api_accuracy_checker/compare/algorithm.py | 5 ++++ .../api_accuracy_checker/compare/compare.py | 4 +-- .../api_accuracy_checker/dump/api_info.py | 7 +++-- .../api_accuracy_checker/dump/dump.py | 4 +-- .../api_accuracy_checker/dump/info_dump.py | 7 +++-- .../api_accuracy_checker/dump/utils.py | 5 ---- 8 files changed, 49 insertions(+), 15 deletions(-) create mode 100644 "debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" diff --git "a/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" "b/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" new file mode 100644 index 00000000000..c2c8f456c23 --- /dev/null +++ "b/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" @@ -0,0 +1,30 @@ +# Ascend模型精度预检工具 + +## 使用方式 + +1. 安装遇见工具 + + 将att仓代码下载到本地,并配置环境变量。假设att仓本地路径为 {att_root},环境变量应配置为 + + ``` + export PYTHONPATH=$PYTHONPATH:{att_root}/debug/accuracy_tools/ + ``` + +2. 使用工具dump模块抓取网络所有API信息 + + ``` + from api_accuracy_checker.dump import set_dump_switch + set_dump_switch("ON") + ``` + +​ dump信息默认会存盘到./api_info/路径下,后缀的数字代表rank id + +3. 将上述信息输入给run_ut模块运行精度检测并比对 + + ``` + cd run_ut + python run_ut.py --forward ./api_info/forward_info_0.json --backward ./api_info/backward_info_0.json + ``` + + forward和backward两个命令行参数根据实际情况配置。比对结果存盘位置会打屏显示,默认是'./',可以在运行run_ut.py时通过 --out_path命令行参数配置。 + diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index dac54a79fc8..e62c9616ca7 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -510,7 +510,7 @@ LINUX_FILE_NAME_LENGTH_LIMIT = 200 def check_path_length_valid(path): path = os.path.realpath(path) - return len(os.path.basename(path) <= LINUX_FILE_NAME_LENGTH_LIMIT) + return len(os.path.basename(path)) <= LINUX_FILE_NAME_LENGTH_LIMIT def check_path_pattern_valid(path): diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index 9ccdb05baa8..88df23cacac 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -39,6 +39,11 @@ def get_max_rel_err(n_value, b_value): return 1, False +def cosine_standard(compare_result): + bool_result = np.array(compare_result) > 0.99 + return np.all(bool_result), bool_result + + def cosine_sim(cpu_output, npu_output): n_value = npu_output.cpu().detach().numpy().reshape(-1) b_value = cpu_output.detach().numpy().reshape(-1) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index eb1b2586e9a..ae869e208e2 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -1,6 +1,6 @@ # 进行比对及结果展示 import os -from prettytable import Prettytable +from prettytable import PrettyTable from .algorithm import compare_core, cosine_sim, cosine_standard from ..common.utils import get_json_contents, print_error_log, print_info_log, write_csv from .compare_utils import CompareConst @@ -32,7 +32,7 @@ class Comparator: "forward_and_backward_not_pass": self.test_result_cnt['forward_and_backward_fail_num'], "pass": self.test_result_cnt['success_num'] } - tb = Prettytable() + tb = PrettyTable() tb.add_column("Category", list(res_dict.keys())) tb.add_column("statistics",list(res_dict.values())) info_tb = str(tb) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index 6dd0c8438e9..7456ce19be2 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -1,12 +1,14 @@ # 定义API INFO,保存基本信息,用于后续结构体的落盘,注意考虑random场景及真实数据场景 import inspect import torch -from .utils import DumpConst, write_npy +import torch_npu +from .utils import DumpUtil, DumpConst, write_npy from ..common.utils import print_error_log from ..common.config import msCheckerConfig class APIInfo: def __init__(self, api_name): + self.rank = torch_npu.npu.current_device() self.api_name = api_name self.save_real_data = msCheckerConfig.real_data @@ -106,9 +108,10 @@ class ForwardAPIInfo(APIInfo): def analyze_api_call_stack(self): stack_str = [] for (_, path, line, func, code, _) in inspect.stack()[3:]: + if not code: continue stack_line = " ".join([ "File", ", ".join([path, " ".join(["line", str(line)]), " ".join(["in", func]), - " ".join(["\n", code[0].strip() if code else code])])]) + " ".join(["\n", code[0].strip()])])]) stack_str.append(stack_line) self.stack_info_struct = {self.api_name: stack_str} diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py index 6ffc77578f1..ade72d3ebae 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py @@ -24,7 +24,7 @@ import torch import threading -from .utils import ForwardAPIInfo, BackwardAPIInfo +from .api_info import ForwardAPIInfo, BackwardAPIInfo from .info_dump import write_api_info_json from .utils import DumpConst, DumpUtil from ..common.utils import print_warn_log, print_info_log, print_error_log @@ -35,7 +35,7 @@ def pretest_info_dump(name, out_feat, module, phase): if phase == DumpConst.forward: api_info = ForwardAPIInfo(name, module.input_args, module.input_kwargs) elif phase == DumpConst.backward: - api_info = BackwardApiInfo(name, out_feat) + api_info = BackwardAPIInfo(name, out_feat) else: msg = "Unexpected training phase {}.".format(phase) print_error_log(msg) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index 3440e8b3a89..ff7ca90ccd4 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -12,14 +12,15 @@ lock = threading.Lock() def write_api_info_json(api_info): dump_path = msCheckerConfig.dump_path + rank = api_info.rank if isinstance(api_info, ForwardAPIInfo): - file_path = os.path.join(dump_path, 'forward_info.json') - stack_file_path = os.path.join(dump_path, 'stack_info.json') + file_path = os.path.join(dump_path, f'forward_info_{rank}.json') + stack_file_path = os.path.join(dump_path, f'stack_info_{rank}.json') write_json(file_path, api_info.api_info_struct) write_json(stack_file_path, api_info.stack_info_struct, indent=4) elif isinstance(api_info, BackwardAPIInfo): - file_path = os.path.join(dump_path, 'backward_info.json') + file_path = os.path.join(dump_path, f'backward_info_{rank}.json') write_json(file_path, api_info.grad_info_struct) else: raise ValueError(f"Invalid api_info type {type(api_info)}") diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/utils.py b/debug/accuracy_tools/api_accuracy_checker/dump/utils.py index 1c1370a841d..e8a874764da 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/utils.py @@ -6,11 +6,6 @@ import numpy as np from ..common.utils import print_error_log, CompareException, DumpException, Const, get_time, print_info_log, \ check_mode_valid, get_api_name_from_matcher -from ..common.version import __version__ - -dump_count = 0 -range_begin_flag, range_end_flag = False, False - class DumpConst: delimiter = '*' forward = 'forward' -- Gitee From 17c890dc44ae1d1fdbafc185585be1e7ac37d8af Mon Sep 17 00:00:00 2001 From: s30048155 Date: Thu, 3 Aug 2023 15:50:16 +0800 Subject: [PATCH 13/15] update code --- .../api_accuracy_checker/common/config.py | 63 ++++++++++++++++--- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/config.py b/debug/accuracy_tools/api_accuracy_checker/common/config.py index 60f920024ed..4f656bdc3d0 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/config.py @@ -1,18 +1,49 @@ import yaml import os +from ..common.utils import check_file_or_directory_path class Config: def __init__(self, yaml_file): - if not os.path.exists(yaml_file): - raise ValueError(f"File {yaml_file} does not exist") + check_file_or_directory_path(yaml_file, False) with open(yaml_file, 'r') as file: config = yaml.safe_load(file) - self.dump_path = config['dump_path'] - self.jit_compile = config['jit_compile'] - self.compile_option = config['compile_option'] - self.compare_algorithm = config['compare_algorithm'] - self.real_data = config['real_data'] - self.dump_step = config['dump_step'] + self.dump_path = self.validate_dump_path(config['dump_path']) + self.jit_compile = self.validate_jit_compile(config['jit_compile']) + self.compile_option = self.validate_compile_option(config['compile_option']) + self.compare_algorithm = self.validate_compare_algorithm(config['compare_algorithm']) + self.real_data = self.validate_real_data(config['real_data']) + self.dump_step = self.validate_dump_step(config['dump_step']) + + def validate_dump_path(self, dump_path): + if not isinstance(dump_path, str): + raise ValueError("dump_path mast be string type") + return dump_path + + def validate_jit_compile(self, jit_compile): + if not isinstance(jit_compile, bool): + raise ValueError("jit_compile mast be bool type") + return jit_compile + + def validate_compile_option(self, compile_option): + if not isinstance(compile_option, str): + raise ValueError("compile_option mast be string type") + return compile_option + + def validate_compare_algorithm(self, compare_algorithm): + if not isinstance(compare_algorithm, str): + raise ValueError("compare_algorithm mast be string type") + return compare_algorithm + + def validate_real_data(self, real_data): + if not isinstance(real_data, bool): + raise ValueError("real_data mast be bool type") + return real_data + + def validate_dump_step(self, dump_step): + if not isinstance(dump_step, int): + raise ValueError("dump_step mast be int type") + return dump_step + def __str__(self): return ( @@ -27,10 +58,24 @@ class Config: def update_config(self, **kwargs): for key, value in kwargs.items(): if hasattr(self, key): + if key == 'dump_path': + self.validate_dump_path(value) + elif key == 'jit_compile': + self.validate_jit_compile(value) + elif key == 'compile_option': + self.validate_compile_option(value) + elif key == 'compare_algorithm': + self.validate_compare_algorithm(value) + elif key == 'real_data': + self.validate_real_data(value) + elif key == 'dump_step': + self.validate_dump_step(value) setattr(self, key, value) else: raise ValueError(f"Invalid key '{key}'") -cur_path = os.path.dirname(os.path.realpath(__file__)) + + +cur_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) yaml_path = os.path.join(cur_path, "config.yaml") msCheckerConfig = Config(yaml_path) \ No newline at end of file -- Gitee From 911ca3630befa140b594ec7f5805f4f0679575e7 Mon Sep 17 00:00:00 2001 From: s30048155 Date: Thu, 3 Aug 2023 15:55:47 +0800 Subject: [PATCH 14/15] realpath --- debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py index ff7ca90ccd4..a0765001dff 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/info_dump.py @@ -49,7 +49,7 @@ def write_json(file_path, data, indent=None): lock.release() def initialize_output_json(): - dump_path = msCheckerConfig.dump_path + dump_path = os.path.realpath(msCheckerConfig.dump_path) check_file_or_directory_path(dump_path,True) files = ['forward_info.json', 'backward_info.json', 'stack_info.json'] for file in files: -- Gitee From 4ba8178ab5178b646d3594bfa4c8bf45230a6864 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Thu, 3 Aug 2023 10:00:06 +0000 Subject: [PATCH 15/15] update debug/accuracy_tools/api_accuracy_checker/common/config.py. Signed-off-by: sunyiming --- debug/accuracy_tools/api_accuracy_checker/common/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/config.py b/debug/accuracy_tools/api_accuracy_checker/common/config.py index 4f656bdc3d0..c931c686318 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/config.py @@ -1,6 +1,6 @@ import yaml import os -from ..common.utils import check_file_or_directory_path +from api_accuracy_checker.common.utils import check_file_or_directory_path class Config: def __init__(self, yaml_file): -- Gitee