diff --git a/debug/accuracy_tools/api_accuracy_checker/README.md b/debug/accuracy_tools/api_accuracy_checker/README.md index 3ee506f042e28c30a7d3baefc3e72539df94a18b..2ec0bd97927c006464350bace31ef931bad098d2 100644 --- a/debug/accuracy_tools/api_accuracy_checker/README.md +++ b/debug/accuracy_tools/api_accuracy_checker/README.md @@ -60,14 +60,13 @@ Ascend模型精度预检工具能在昇腾NPU上扫描用户训练模型中所 ```Python from api_accuracy_checker.dump import msCheckerConfig - msCheckerConfig.update_config(dump_path="my/dump/path", real_data=True, enable_dataloader=True, target_iter=1) + msCheckerConfig.update_config(dump_path="my/dump/path", real_data=True, target_iter=1) ``` | 参数名称 | 说明 | 是否必选 | | ----------------- | ------------------------------------------------------------ | -------- | | dump_path | 设置dump路径,须为已存在目录,默认为当前目录。 | 否 | | real_data | 真实数据模式,可取值True或False,默认为False,配置为True后开启真实数据模式,dump信息增加forward_real_data和backward_real_data目录,目录下保存每个API输入的具体数值。开启真实数据模式目前仅支持单卡,且会存盘较多数据,可能对磁盘空间有较大冲击。 | 否 | - | enable_dataloader | 自动控制开关,可取值True或False,默认为False,配置为True后自动识别dump target_iter参数指定的迭代数据,并在该迭代执行完成后退出训练。 | 否 | | target_iter | 指定dump某个step的数据,默认为1,仅支持dump1个step,须指定为训练脚本中存在的step。 | 否 | 3. 将API信息输入给run_ut模块运行精度检测并比对,运行如下命令: diff --git a/debug/accuracy_tools/api_accuracy_checker/common/config.py b/debug/accuracy_tools/api_accuracy_checker/common/config.py index 821842ddd9be54b824d9ed05710fa1977a85df0a..c47911e21330efc18f9ef3b56a786eaada2fdc38 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/config.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/config.py @@ -17,7 +17,6 @@ class Config: 'real_data': bool, 'dump_step': int, 'error_data_path': str, - 'enable_dataloader': bool, 'target_iter': int, 'precision': int } @@ -35,11 +34,10 @@ class Config: def __str__(self): return '\n'.join(f"{key}={value}" for key, value in self.config.items()) - def update_config(self, dump_path, real_data=False, enable_dataloader=False, target_iter=1): + def update_config(self, dump_path, real_data=False, target_iter=1): args = { "dump_path": dump_path, "real_data": real_data, - "enable_dataloader": enable_dataloader, "target_iter": target_iter } for key, value in args.items(): diff --git a/debug/accuracy_tools/api_accuracy_checker/config.yaml b/debug/accuracy_tools/api_accuracy_checker/config.yaml index 76a31db4258fd8236ebfdb67fa26754e382adfde..22ef99c3a0edf276b8622e657bdbb517e0cadaf8 100644 --- a/debug/accuracy_tools/api_accuracy_checker/config.yaml +++ b/debug/accuracy_tools/api_accuracy_checker/config.yaml @@ -3,7 +3,6 @@ jit_compile: True real_data: False dump_step: 1000 error_data_path: './' -enable_dataloader: True target_iter: 1 precision: 14 \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py index 830ff5e4fcf1548e11119dc0f36bf7641a94143e..2a69e226cda51553345422be903dae514492f0e9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py @@ -45,9 +45,9 @@ class DumpUtil(object): @staticmethod def incr_iter_num_maybe_exit(): - if DumpUtil.call_num == msCheckerConfig.target_iter or not msCheckerConfig.enable_dataloader: + if DumpUtil.call_num == msCheckerConfig.target_iter: set_dump_switch("ON") - elif DumpUtil.call_num > msCheckerConfig.target_iter and msCheckerConfig.enable_dataloader: + elif DumpUtil.call_num > msCheckerConfig.target_iter: raise Exception("Model pretest: exit after iteration {}".format(msCheckerConfig.target_iter)) else: set_dump_switch("OFF") diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py index 03e4e545aafb020091a68dd80ed36e3f8ede4004..40ce2fee5c5f0e8b13ccc28a71ce785c0451f58d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py @@ -11,7 +11,6 @@ from api_accuracy_checker.common.utils import print_info_log, print_warn_log, ge from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, check_file_suffix -NO_GRAD_APIS = ["hardtanh"] init_environment() @@ -78,10 +77,10 @@ def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_di api_type = api_full_name.split("_")[0] api_name = api_full_name.split("_", 1)[1].rsplit("_", 2)[0] args, kwargs, need_grad = get_api_info(api_info_dict, api_name) - need_backward = api_full_name.replace("forward", "backward") in backward_content and api_name[-1] != "_" + need_backward = api_full_name.replace("forward", "backward") in backward_content need_backward = need_backward and need_grad if not need_grad: - print_warn_log("%s involves in-place operations, skip backward" % api_full_name) + print_warn_log("%s function with out=... arguments don't support automatic differentiation, skip backward." % api_full_name) npu_args, npu_kwargs = generate_npu_params(args, kwargs, need_backward) if kwargs.get("device"): del kwargs["device"] diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index e7f240185f44a492d072ec7a0d4d5996f441027e..9dba68190052437d8b1ff659abfb80ed5790e2dd 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -19,7 +19,6 @@ from api_accuracy_checker.common.config import msCheckerConfig from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen, FileCheckConst, FileChecker, \ change_mode, check_file_suffix -NO_GRAD_APIS = ["hardtanh"] def init_environment(): @@ -151,7 +150,7 @@ def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_di need_backward = api_full_name in backward_content need_backward = need_backward and need_grad if not need_grad: - print_warn_log("%s involves in-place operations, skip backward" % api_full_name) + print_warn_log("%s function with out=... arguments don't support automatic differentiation, skip backward." % api_full_name) cpu_args, cpu_kwargs = generate_cpu_params(args, kwargs, need_backward) npu_args, npu_kwargs = generate_npu_params(args, kwargs, need_backward) grad_out, npu_grad_out = None, None @@ -178,8 +177,6 @@ def get_api_info(api_info_dict, api_name): need_grad = True if api_info_dict.get("kwargs") and "out" in api_info_dict.get("kwargs"): need_grad = False - if api_name in NO_GRAD_APIS: - need_grad = False args, kwargs = gen_api_params(api_info_dict, need_grad, convert_type) return args, kwargs, need_grad