From cb185648249e8d0e8ac9d4c1e176a0dbf53b90a8 Mon Sep 17 00:00:00 2001 From: litian_drinksnow Date: Tue, 8 Aug 2023 21:18:05 +0800 Subject: [PATCH 1/4] dump scope control with dataloader --- .../api_accuracy_checker/dump/__init__.py | 2 +- .../api_accuracy_checker/dump/dump.py | 12 ++++++++++++ .../api_accuracy_checker/dump/dump_scope.py | 17 ++++++++++++++++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py b/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py index d5b19ad6847..4ba1b90b5f4 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py @@ -1,4 +1,4 @@ from api_accuracy_checker.dump.dump import set_dump_switch - +import api_accuracy_check.dump.dump_scope __all__ = ['set_dump_switch'] diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py index 8098f25db0b..958c7ce51fb 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py @@ -38,6 +38,8 @@ def set_dump_switch(switch): class DumpUtil(object): dump_switch = None + target_iter = 1 + call_num = 0 @staticmethod def set_dump_switch(switch): @@ -46,6 +48,16 @@ class DumpUtil(object): @staticmethod def get_dump_switch(): return DumpUtil.dump_switch == "ON" + + @staticmethod + def incr_iter_num_maybe_exit(): + if DumpUtil.call_num == DumpUtil.target_iter: + set_dump_switch("ON") + elif DumpUtil.call_num > DumpUtil.target_iter: + raise Exception("Model pretest: exit after iteration {}".format(DumpUtil.target_iter)) + else: + set_dump_switch("OFF") + DumpUtil.call_num += 1 class DumpConst: diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/dump_scope.py b/debug/accuracy_tools/api_accuracy_checker/dump/dump_scope.py index 51dbd75d9c8..16078173eaa 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/dump_scope.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/dump_scope.py @@ -1 +1,16 @@ -# dump范围控制 ———— 李天 \ No newline at end of file +# dump范围控制 +import torch +from torch.utils.data import Dataset, DataLoader +from torch.utils.data.dataloader import _BaseDataLoaderIter +from api_accuracy_checker.dump.dump import DumpUtil + + +def iter_tracer(func): + def func_wrapper(*args, **kwargs): + DumpUtil.dump_switch = "OFF" + result = func(*args, **kwargs) + DumpUtil.incr_iter_num_maybe_exit() + return result + return func_wrapper + +_BaseDataLoaderIter.__next__ = iter_tracer(torch.utils.data.dataloader._BaseDataLoaderIter.__next__) \ No newline at end of file -- Gitee From 0676d049b1e2341de40b7fbaaa3d54e9020ade01 Mon Sep 17 00:00:00 2001 From: litian_drinksnow Date: Tue, 8 Aug 2023 21:22:12 +0800 Subject: [PATCH 2/4] dump scope control with dataloader --- debug/accuracy_tools/api_accuracy_checker/dump/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py b/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py index 4ba1b90b5f4..1b19415c372 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py @@ -1,4 +1,4 @@ from api_accuracy_checker.dump.dump import set_dump_switch -import api_accuracy_check.dump.dump_scope +import api_accuracy_checker.dump.dump_scope __all__ = ['set_dump_switch'] -- Gitee From c94d9fbbf73bc697408042da724d87debf1727d0 Mon Sep 17 00:00:00 2001 From: litian_drinksnow Date: Tue, 8 Aug 2023 21:23:43 +0800 Subject: [PATCH 3/4] dump scope control with dataloader --- ...05\267\344\275\277\347\224\250\346\226\271\346\263\225.md" | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git "a/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" "b/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" index 74e6ff59ac1..27b22d6d83f 100644 --- "a/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" +++ "b/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" @@ -20,7 +20,7 @@ 2. 在工具中加入以下代码使用工具dump模块,启动训练抓取网络所有API信息,目前工具仅支持抓取训练的第一个迭代并且在第一个迭代后会退出训练进程。 ``` - from api_accuracy_checker.dump import set_dump_switch + import api_accuracy_checker.dump ``` ​ dump信息默认会存盘到./路径下,包括前向API信息forward_info_{pid}.json, 反向API信息backward_info_{pid}.json, 调用栈信息stack_info_{pid}.json。真实数据模式下还有forward_real_data和backward_real_data文件夹,里面有每个api输入的具体数值。forward_info与stack_info中的key值一一对应,用户可根据forward_info中API的key在stack_info中查询到其调用栈及代码行位置。 @@ -46,7 +46,7 @@ - + -- Gitee From eaec80de3d7e8ab541ae6d176fac0c80b207da0f Mon Sep 17 00:00:00 2001 From: litian_drinksnow Date: Tue, 8 Aug 2023 21:35:04 +0800 Subject: [PATCH 4/4] =?UTF-8?q?readme=E5=A2=9E=E5=8A=A0=E6=8A=A5=E9=94=99?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...267\344\275\277\347\224\250\346\226\271\346\263\225.md" | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git "a/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" "b/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" index 27b22d6d83f..a72e23484c1 100644 --- "a/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" +++ "b/debug/accuracy_tools/api_accuracy_checker/Ascend\346\250\241\345\236\213\347\262\276\345\272\246\351\242\204\346\243\200\345\267\245\345\205\267\344\275\277\347\224\250\346\226\271\346\263\225.md" @@ -17,12 +17,17 @@ export PYTHONPATH=$PYTHONPATH:{att_root}/debug/accuracy_tools/ ``` -2. 在工具中加入以下代码使用工具dump模块,启动训练抓取网络所有API信息,目前工具仅支持抓取训练的第一个迭代并且在第一个迭代后会退出训练进程。 +2. 在工具中加入以下代码使用工具dump模块,启动训练抓取网络所有API信息 ``` import api_accuracy_checker.dump ``` + 目前工具仅支持抓取训练的**第二个迭代**并且在第二个迭代后会报错退出训练进程。报错信息如下,这个报错仅用于停止训练,属于正常现象: + ``` + Exception: Model pretest: exit after iteration 1. + ``` + ​ dump信息默认会存盘到./路径下,包括前向API信息forward_info_{pid}.json, 反向API信息backward_info_{pid}.json, 调用栈信息stack_info_{pid}.json。真实数据模式下还有forward_real_data和backward_real_data文件夹,里面有每个api输入的具体数值。forward_info与stack_info中的key值一一对应,用户可根据forward_info中API的key在stack_info中查询到其调用栈及代码行位置。 有需要的话,用户可以通过msCheckerConfig.update_config来配置dump路径以及启用真实数据模式(默认为关)。注意启用真实数据模式目前仅支持单卡,且会存盘较多数据,可能对磁盘空间有较大冲击。 -- Gitee