From 3e40b2104d1547a2a3e045ae894799c18fb8d825 Mon Sep 17 00:00:00 2001 From: s30048155 Date: Wed, 1 Nov 2023 14:55:59 +0800 Subject: [PATCH 1/3] clearcode --- .../src/python/ptdbg_ascend/__init__.py | 1 + .../src/python/ptdbg_ascend/common/log.py | 1 + .../compare/distributed_compare.py | 3 +- .../debugger/precision_debugger.py | 1 + .../src/python/ptdbg_ascend/dump/dump.py | 10 ++-- .../src/python/ptdbg_ascend/dump/utils.py | 4 +- .../ptdbg_ascend/hook_module/hook_module.py | 2 + .../hook_module/wrap_npu_custom.py | 1 + .../ptdbg_ascend/online_dispatch/dispatch.py | 6 ++- .../online_dispatch/dump_compare.py | 4 +- .../ptdbg_ascend/overflow_check/info_dump.py | 7 +-- .../overflow_check/overflow_check.py | 16 +++--- .../parse_tool/lib/parse_exception.py | 4 +- .../ptdbg_ascend/src/python/setup.py | 49 ++++++++++--------- 14 files changed, 65 insertions(+), 44 deletions(-) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/__init__.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/__init__.py index 920deafde..4c3228c47 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/__init__.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/__init__.py @@ -30,6 +30,7 @@ from .common.utils import seed_all, torch_without_guard_version, print_info_log from .debugger.precision_debugger import PrecisionDebugger seed_all() + def jit_script(obj, optimize=None, _frames_up=0, _rcb=None, example_input=None): print_info_log("The torch_npu earlier than 2.1 does not support torch.jit.script. " "Therefore, to ensure that the dump data of the GPU and NPU is consistent, " diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/log.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/log.py index a7b419866..32c342355 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/log.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/log.py @@ -2,6 +2,7 @@ import os import time import sys + def _print_log(level, msg, end='\n'): current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))) pid = os.getgid() diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/compare/distributed_compare.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/compare/distributed_compare.py index d92b0a145..c58db903d 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/compare/distributed_compare.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/compare/distributed_compare.py @@ -14,7 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -import os, sys +import os +import sys import re from ..common.utils import print_error_log, CompareException, check_compare_param from .acc_compare import compare_core diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/debugger/precision_debugger.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/debugger/precision_debugger.py index 79066d6ce..b2cfdaba4 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/debugger/precision_debugger.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/debugger/precision_debugger.py @@ -108,6 +108,7 @@ class PrecisionDebugger: PrecisionDebugger.step() PrecisionDebugger.start() + def iter_tracer(func): def func_wrapper(*args, **kwargs): PrecisionDebugger.stop() diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py index dcf2a8b2d..5238754cc 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py @@ -18,12 +18,12 @@ import inspect import json import os -import numpy as np -import torch import threading - from pathlib import Path +import numpy as np +import torch + try: import torch_npu except ImportError: @@ -47,6 +47,7 @@ pkl_name = "" rank = os.getpid() multi_output_apis = ["_sort_", "npu_flash_attention"] + class DataInfo(object): def __init__(self, data, save_data, summary_data, dtype, shape): self.data = data @@ -183,6 +184,7 @@ def dump_api_tensor(dump_step, in_feat, name_template, out_feat, dump_file): if 'output' in DumpUtil.dump_mode: dump_tensor(out_feat, name_template.format("output"), dump_step, dump_file) + def rename_(): global rank global pkl_name @@ -198,6 +200,7 @@ def rename_(): os.rename(dir_name, new_name) pkl_name = os.path.join(new_name, file_name) + def dump_acc_cmp(name, in_feat, out_feat, dump_step, module): dump_file = DumpUtil.get_dump_path() dump_file = modify_dump_path(dump_file, DumpUtil.dump_switch_mode) @@ -356,5 +359,6 @@ def write_to_disk(): change_mode(pkl_name, FileCheckConst.DATA_FILE_AUTHORITY) api_list = [] + def get_pkl_file_path(): return pkl_name diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py index a7cdedb23..28990c2a6 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py @@ -19,10 +19,12 @@ range_begin_flag, range_end_flag = False, False def check_list_or_acl_mode(name_prefix): global dump_count + result = False for item in DumpUtil.dump_switch_scope: if name_prefix.startswith(item): dump_count = dump_count + 1 - return True + result = True + return result def check_range_mode(name_prefix): diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py index a3cb10bf4..83f7dcace 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py @@ -24,8 +24,10 @@ import torch.utils.hooks as full_hooks g_stop_hook = False + class HOOKModule(nn.Module): module_count = {} + def __init__(self, hook) -> None: super(HOOKModule, self).__init__() self.has_overflow = False diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py index a507805f0..f2e1e8f9d 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py @@ -48,6 +48,7 @@ class NpuOPTemplate(HOOKModule): else: return getattr(torch_npu._C._VariableFunctionsClass, str(self.op_name_))(*args, **kwargs) + def wrap_npu_op(op_name, hook): def npu_op_template(*args, **kwargs): diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dispatch.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dispatch.py index 53f104d45..1b16502fb 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dispatch.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dispatch.py @@ -1,10 +1,12 @@ import os import time -import yaml -import json from pathlib import Path from multiprocessing import Manager, Pool + +import yaml +import json import torch + from torch.utils._python_dispatch import TorchDispatchMode try: diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py index 03947fc9e..902098b95 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py @@ -44,11 +44,11 @@ class TimeStatistics: def __enter__(self): if self.debug: - self.time = datetime.now() + self.time = datetime.now().astimezone() def __exit__(self, exc_type, exc_val, exc_tb): if self.debug: - cost_time = datetime.now() - self.time + cost_time = datetime.now().astimezone() - self.time time_cost = f'Time[{self.tag}]: Dev[{self.device}], Pid[{os.getpid()}], Fun[{self.fun}], ' \ f'Id[{self.index}], time[{cost_time}]' hot_time_cost = "Hotspot " + time_cost diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py index 5e6e347ff..5a4b0bf3b 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py @@ -1,11 +1,11 @@ import inspect import fcntl -import json import os -import torch import threading +import json import numpy as np +import torch from ..common.utils import print_error_log, get_time from ..common.file_check_util import FileOpen @@ -171,7 +171,8 @@ class ForwardAPIInfo(APIInfo): def analyze_api_call_stack(self): stack_str = [] for (_, path, line, func, code, _) in inspect.stack()[3:]: - if not code: continue + if not code: + continue stack_line = " ".join([ "File", ", ".join([path, " ".join(["line", str(line)]), " ".join(["in", func]), " ".join(["\n", code[0].strip()])])]) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/overflow_check.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/overflow_check.py index e80c9d66d..dc43c2c3b 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/overflow_check.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/overflow_check.py @@ -1,12 +1,7 @@ import os -import torch from pathlib import Path -from ..common.utils import print_warn_log, get_time, print_info_log -from ..dump.dump import forward_init_status, forward_acl_dump -from .utils import OverFlowUtil, dump_overflow -from ..dump.utils import DumpUtil, Const, get_tensor_rank, create_dirs_if_not_exist -from .info_dump import write_api_info_json, ForwardAPIInfo, BackwardAPIInfo -from ..dump import dump + +import torch try: import torch_npu @@ -15,6 +10,13 @@ except ImportError: else: is_gpu = False +from ..common.utils import print_warn_log, get_time, print_info_log +from ..dump.dump import forward_init_status, forward_acl_dump +from .utils import OverFlowUtil, dump_overflow +from ..dump.utils import DumpUtil, Const, get_tensor_rank, create_dirs_if_not_exist +from .info_dump import write_api_info_json, ForwardAPIInfo, BackwardAPIInfo +from ..dump import dump + backward_init_status = False api_overflow = [] forward_api_info = {} diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_exception.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_exception.py index a82a5106f..380d84cb2 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_exception.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/parse_tool/lib/parse_exception.py @@ -39,12 +39,14 @@ def catch_exception(func): def inner(*args, **kwargs): log = logging.getLogger() line = args[-1] if len(args) == 2 else "" + result = None try: - return func(*args, **kwargs) + result = func(*args, **kwargs) except OSError: log.error("%s: command not found" % line) except ParseException: log.error("Command execution failed") except SystemExit: log.warning("Please enter the correct command") + return result return inner diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/setup.py b/debug/accuracy_tools/ptdbg_ascend/src/python/setup.py index fecc86842..02755e8e1 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/setup.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/setup.py @@ -15,36 +15,37 @@ # limitations under the License. """ -import setuptools -from pathlib import Path -import stat import os +import stat +from pathlib import Path +import setuptools VERSION = '3.0' + def generate_ptdbg_ascend_version(): - ptdbg_ascend_root = Path(__file__).parent - version_path = ptdbg_ascend_root / "ptdbg_ascend" / "common" / "version.py" - if version_path.exists(): - version_path.unlink() - flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL - modes = stat.S_IWUSR | stat.S_IRUSR - with os.fdopen(os.open(version_path, flags, modes), 'w') as f: - f.write("__version__ = '{version}'\n".format(version = VERSION)) + ptdbg_ascend_root = Path(__file__).parent + version_path = ptdbg_ascend_root / "ptdbg_ascend" / "common" / "version.py" + if version_path.exists(): + version_path.unlink() + flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL + modes = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open(version_path, flags, modes), 'w') as f: + f.write("__version__ = '{version}'\n".format(version = VERSION)) generate_ptdbg_ascend_version() setuptools.setup(name='ptdbg_ascend', - version=VERSION, - description='This is a pytorch precision comparison tools', - long_description='This is a pytorch precision comparison tools, include overflow detect tool', - packages=setuptools.find_packages(), - install_requires = [ - "wheel", - "numpy", - "pandas >= 1.3.5", - "pyyaml" - ], - include_package_data=True, - ext_modules=[], - zip_safe=False) + version=VERSION, + description='This is a pytorch precision comparison tools', + long_description='This is a pytorch precision comparison tools, include overflow detect tool', + packages=setuptools.find_packages(), + install_requires = [ + "wheel", + "numpy", + "pandas >= 1.3.5", + "pyyaml" + ], + include_package_data=True, + ext_modules=[], + zip_safe=False) -- Gitee From 3200826b22d49ecdf6f5b65acc6d0704c792c261 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Wed, 1 Nov 2023 09:20:15 +0000 Subject: [PATCH 2/3] update debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py. Signed-off-by: sunyiming --- .../src/python/ptdbg_ascend/online_dispatch/dump_compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py index 902098b95..03947fc9e 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/online_dispatch/dump_compare.py @@ -44,11 +44,11 @@ class TimeStatistics: def __enter__(self): if self.debug: - self.time = datetime.now().astimezone() + self.time = datetime.now() def __exit__(self, exc_type, exc_val, exc_tb): if self.debug: - cost_time = datetime.now().astimezone() - self.time + cost_time = datetime.now() - self.time time_cost = f'Time[{self.tag}]: Dev[{self.device}], Pid[{os.getpid()}], Fun[{self.fun}], ' \ f'Id[{self.index}], time[{cost_time}]' hot_time_cost = "Hotspot " + time_cost -- Gitee From ae029dd80db35fe42804a4a7f5abe90a053bffaf Mon Sep 17 00:00:00 2001 From: s30048155 Date: Thu, 2 Nov 2023 15:39:55 +0800 Subject: [PATCH 3/3] update --- .../ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py index 28990c2a6..b619d0cf4 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/utils.py @@ -19,12 +19,11 @@ range_begin_flag, range_end_flag = False, False def check_list_or_acl_mode(name_prefix): global dump_count - result = False for item in DumpUtil.dump_switch_scope: if name_prefix.startswith(item): dump_count = dump_count + 1 - result = True - return result + return True + return False def check_range_mode(name_prefix): -- Gitee