From 10470da6824e3ca8cbce0dc8087d055f7676ce3f Mon Sep 17 00:00:00 2001 From: h00613304 Date: Wed, 8 May 2024 22:08:34 +0800 Subject: [PATCH 1/7] api_name rename --- .../api_accuracy_checker/compare/compare.py | 49 ++-- .../api_accuracy_checker/dump/dump.py | 219 +++++++++--------- .../api_accuracy_checker/run_ut/run_ut.py | 10 +- .../src/python/ptdbg_ascend/dump/dump.py | 37 ++- 4 files changed, 178 insertions(+), 137 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index 8400749426..b4e6ac227a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -69,13 +69,15 @@ class Comparator: table_detail.add_column("Statistics") table_detail.add_row("Forward Error", str(self.test_result_cnt.get("forward_fail_num", 0))) table_detail.add_row("Backward Error", str(self.test_result_cnt.get("backward_fail_num", 0))) - table_detail.add_row("Both Forward & Backward Error", str(self.test_result_cnt.get("forward_and_backward_fail_num", 0))) + table_detail.add_row("Both Forward & Backward Error", + str(self.test_result_cnt.get("forward_and_backward_fail_num", 0))) console.print(table_total) console.print(table_detail) def get_statistics_from_result_csv(self): - checklist = [CompareConst.PASS, CompareConst.ERROR, CompareConst.WARNING, CompareConst.SPACE, CompareConst.SKIP, "skip"] + checklist = [CompareConst.PASS, CompareConst.ERROR, CompareConst.WARNING, CompareConst.SPACE, CompareConst.SKIP, + "skip"] self.test_result_cnt = { "success_num": 0, "warning_num": 0, "error_num": 0, "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, @@ -113,7 +115,7 @@ class Comparator: self.test_result_cnt['warning_num'] += 1 def write_csv_title(self): - summary_test_rows = [[self.COLUMN_API_NAME, self.COLUMN_FORWARD_SUCCESS, + summary_test_rows = [[self.COLUMN_API_NAME, self.COLUMN_FORWARD_SUCCESS, self.COLUMN_BACKWARD_SUCCESS, "Message"]] write_csv(summary_test_rows, self.save_path) @@ -143,13 +145,13 @@ class Comparator: if isinstance(fwd_result, list): for i, test_subject in enumerate(fwd_result): subject = subject_prefix + ".forward.output." + str(i) - test_subject = ["{:.{}f}".format(item, msCheckerConfig.precision) + test_subject = ["{:.{}f}".format(item, msCheckerConfig.precision) if isinstance(item, float) else item for item in test_subject] test_rows.append([subject] + list(test_subject)) if isinstance(bwd_result, list): for i, test_subject in enumerate(bwd_result): subject = subject_prefix + ".backward.output." + str(i) - test_subject = ["{:.{}f}".format(item, msCheckerConfig.precision) + test_subject = ["{:.{}f}".format(item, msCheckerConfig.precision) if isinstance(item, float) else item for item in test_subject] test_rows.append([subject] + list(test_subject)) @@ -160,7 +162,7 @@ class Comparator: self.write_detail_csv(args) def compare_output(self, full_api_name, data_info): - _, api_name, _ = full_api_name.split("*") + _, api_name, _ = full_api_name.split(".") bench_output = data_info.bench_output device_output = data_info.device_output bench_grad = data_info.bench_grad @@ -178,11 +180,14 @@ class Comparator: if backward_message: backward_column = CompareColumn() bwd_compare_alg_results = backward_column.to_column_value(CompareConst.SKIP, backward_message) - self.record_results(full_api_name, fwd_success_status, CompareConst.SKIP, fwd_compare_alg_results, [bwd_compare_alg_results]) + self.record_results(full_api_name, fwd_success_status, CompareConst.SKIP, fwd_compare_alg_results, + [bwd_compare_alg_results]) else: - self.record_results(full_api_name, fwd_success_status, bwd_success_status if bwd_compare_alg_results is not None else CompareConst.SPACE, fwd_compare_alg_results, bwd_compare_alg_results) + self.record_results(full_api_name, fwd_success_status, + bwd_success_status if bwd_compare_alg_results is not None else CompareConst.SPACE, + fwd_compare_alg_results, bwd_compare_alg_results) return fwd_success_status == CompareConst.PASS, bwd_success_status == CompareConst.PASS \ - or bwd_success_status == CompareConst.SPACE + or bwd_success_status == CompareConst.SPACE def _compare_core_wrapper(self, api_name, bench_output, device_output): detailed_result_total = [] @@ -224,7 +229,7 @@ class Comparator: if b_keys != n_keys: return CompareConst.ERROR, compare_column, "bench and npu output dict keys are different." else: - status, compare_result, message = self._compare_core(api_name, list(bench_output.values()), + status, compare_result, message = self._compare_core(api_name, list(bench_output.values()), list(device_output.values())) elif isinstance(bench_output, torch.Tensor): copy_bench_out = bench_output.detach().clone() @@ -233,7 +238,7 @@ class Comparator: compare_column.npu_type = str(copy_device_output.dtype) compare_column.shape = tuple(device_output.shape) status, compare_result, message = self._compare_torch_tensor(api_name, copy_bench_out, copy_device_output, - compare_column) + compare_column) elif isinstance(bench_output, (bool, int, float, str)): compare_column.bench_type = str(type(bench_output)) compare_column.npu_type = str(type(device_output)) @@ -241,7 +246,7 @@ class Comparator: elif bench_output is None: return CompareConst.SKIP, compare_column, "Bench output is None, skip this test." else: - return CompareConst.PASS, compare_column, + return CompareConst.PASS, compare_column, "Unexpected output type in compare_core: {}".format(type(bench_output)) return status, compare_result, message @@ -257,24 +262,24 @@ class Comparator: device_output = device_output.cpu().numpy() if cpu_shape != npu_shape: return CompareConst.ERROR, compare_column, f"The shape of bench{str(cpu_shape)} " \ - f"and npu{str(npu_shape)} not equal." + f"and npu{str(npu_shape)} not equal." if not check_dtype_comparable(bench_output, device_output): return CompareConst.ERROR, compare_column, f"Bench out dtype is {bench_output.dtype} but " \ - f"npu output dtype is {device_output.dtype}, cannot compare." + f"npu output dtype is {device_output.dtype}, cannot compare." message = "" - if bench_output.dtype in [bool, np.uint8, np.int8, np.int16, np.uint16, np.uint32, np.int32, + if bench_output.dtype in [bool, np.uint8, np.int8, np.int16, np.uint16, np.uint32, np.int32, np.int64, np.uint64]: message += f"Compare algorithm is not supported for {bench_output.dtype} data. " \ - f"Only judged by Error Rate." + f"Only judged by Error Rate." err_rate, status, msg = self._compare_bool_tensor(bench_output, device_output) message += msg + "\n" compare_column.error_rate = err_rate return status, compare_column, message else: - status, compare_column, message = self._compare_float_tensor(api_name, bench_output, device_output, + status, compare_column, message = self._compare_float_tensor(api_name, bench_output, device_output, compare_column, npu_dtype) return status, compare_column, message - + def _compare_float_tensor(self, api_name, bench_output, device_output, compare_column, dtype): message = "" abs_bench, abs_bench_with_eps = get_abs_bench_with_eps(bench_output, dtype) @@ -294,7 +299,8 @@ class Comparator: rel_err = abs_err / abs_bench_with_eps small_value_mask = get_small_value_mask(abs_bench, both_finite_mask, small_value_threshold) normal_value_mask = np.logical_and(both_finite_mask, np.logical_not(small_value_mask)) - compare_column.inf_nan_error_ratio = check_inf_nan_value(inf_nan_mask, bench_output, device_output, dtype, rtol) + compare_column.inf_nan_error_ratio = check_inf_nan_value(inf_nan_mask, bench_output, device_output, + dtype, rtol) compare_column.rel_err_ratio = check_norm_value(normal_value_mask, rel_err, rtol) compare_column.abs_err_ratio = check_small_value(abs_err, small_value_mask, small_value_atol) elif api_name in ULPStandardApi: @@ -311,7 +317,7 @@ class Comparator: else: compare_column.ulp_error_proportion = np.sum(ulp_err > 1) / bench_output.size else: - dtype_config = precision_configs.get(dtype) + dtype_config = precision_configs.get(dtype) small_value_mask = get_small_value_mask(abs_bench, both_finite_mask, dtype_config['small_value'][0]) abs_err_greater_mask = np.greater(abs_err, dtype_config['small_value_atol'][0]) compare_column.small_value_err_ratio = get_small_value_err_ratio(small_value_mask, abs_err_greater_mask) @@ -380,7 +386,6 @@ class Comparator: compare_column.error_rate = 0 return CompareConst.PASS, compare_column, "" - @staticmethod def _compare_bool_tensor(bench_output, device_output): error_nums = (bench_output != device_output).sum() @@ -389,7 +394,7 @@ class Comparator: error_rate = float(error_nums / bench_output.size) result = CompareConst.PASS if error_rate == 0 else CompareConst.ERROR return error_rate, result, "" - + @staticmethod def _get_absolute_threshold_attribute(api_name, dtype): small_value_threshold = apis_threshold.get(api_name).get(dtype).get('small_value') diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py index d8b317aa28..25591bb453 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py @@ -1,109 +1,110 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -# Copyright (C) 2023-2023. Huawei Technologies Co., Ltd. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -from api_accuracy_checker.dump.api_info import ForwardAPIInfo, BackwardAPIInfo -from api_accuracy_checker.dump.info_dump import write_api_info_json, initialize_output_json -from api_accuracy_checker.common.utils import print_error_log, CompareException, print_info_log -from api_accuracy_checker.hook_module.register_hook import initialize_hook -from api_accuracy_checker.common.config import msCheckerConfig - - -def set_dump_switch(switch): - if switch not in ["ON", "OFF"]: - print_error_log("Please set switch with 'ON' or 'OFF'.") - raise CompareException(CompareException.INVALID_PARAM_ERROR) - if switch == "ON": - initialize_hook(pretest_hook) - initialize_output_json() - DumpUtil.set_dump_switch(switch) - - -def check_dataloader_status(): - if msCheckerConfig.enable_dataloader: - error_info = ("If you want to use this function, set enable_dataloader " - "in the accuracy_tools/api_accuracy_check/config.yaml " - "to False first") - raise CompareException(CompareException.INVALID_PARAM_ERROR, error_info) - - -def start(): - check_dataloader_status() - if not DumpUtil.get_dump_switch(): - DumpUtil.incr_iter_num_maybe_exit() - - -def stop(): - check_dataloader_status() - DumpUtil.set_dump_switch("OFF") - - -def step(): - check_dataloader_status() - DumpUtil.call_num += 1 - - -class DumpUtil(object): - dump_switch = None - call_num = 0 - - @staticmethod - def set_dump_switch(switch): - DumpUtil.dump_switch = switch - - @staticmethod - def get_dump_switch(): - return DumpUtil.dump_switch == "ON" - - @staticmethod - def incr_iter_num_maybe_exit(): - if DumpUtil.call_num in msCheckerConfig.target_iter: - set_dump_switch("ON") - elif DumpUtil.call_num > max(msCheckerConfig.target_iter): - raise Exception("Model pretest: exit after iteration {}".format(DumpUtil.call_num - 1)) - else: - set_dump_switch("OFF") - - -class DumpConst: - delimiter = '*' - forward = 'forward' - backward = 'backward' - - -def pretest_info_dump(name, out_feat, module, phase): - if not DumpUtil.get_dump_switch(): - return - if phase == DumpConst.forward: - api_info = ForwardAPIInfo(name, module.input_args, module.input_kwargs) - elif phase == DumpConst.backward: - api_info = BackwardAPIInfo(name, out_feat) - else: - msg = "Unexpected training phase {}.".format(phase) - print_error_log(msg) - raise NotImplementedError(msg) - print_info_log(f"tools is dumping api: {name}" + " " * 10, end='\r') - write_api_info_json(api_info) - - -def pretest_hook(name, phase): - def pretest_info_dump_hook(module, in_feat, out_feat): - pretest_info_dump(name, out_feat, module, phase) - if hasattr(module, "input_args"): - del module.input_args - if hasattr(module, "input_kwargs"): - del module.input_kwargs - return pretest_info_dump_hook +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2023-2023. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from api_accuracy_checker.dump.api_info import ForwardAPIInfo, BackwardAPIInfo +from api_accuracy_checker.dump.info_dump import write_api_info_json, initialize_output_json +from api_accuracy_checker.common.utils import print_error_log, CompareException, print_info_log +from api_accuracy_checker.hook_module.register_hook import initialize_hook +from api_accuracy_checker.common.config import msCheckerConfig + + +def set_dump_switch(switch): + if switch not in ["ON", "OFF"]: + print_error_log("Please set switch with 'ON' or 'OFF'.") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + if switch == "ON": + initialize_hook(pretest_hook) + initialize_output_json() + DumpUtil.set_dump_switch(switch) + + +def check_dataloader_status(): + if msCheckerConfig.enable_dataloader: + error_info = ("If you want to use this function, set enable_dataloader " + "in the accuracy_tools/api_accuracy_check/config.yaml " + "to False first") + raise CompareException(CompareException.INVALID_PARAM_ERROR, error_info) + + +def start(): + check_dataloader_status() + if not DumpUtil.get_dump_switch(): + DumpUtil.incr_iter_num_maybe_exit() + + +def stop(): + check_dataloader_status() + DumpUtil.set_dump_switch("OFF") + + +def step(): + check_dataloader_status() + DumpUtil.call_num += 1 + + +class DumpUtil(object): + dump_switch = None + call_num = 0 + + @staticmethod + def set_dump_switch(switch): + DumpUtil.dump_switch = switch + + @staticmethod + def get_dump_switch(): + return DumpUtil.dump_switch == "ON" + + @staticmethod + def incr_iter_num_maybe_exit(): + if DumpUtil.call_num in msCheckerConfig.target_iter: + set_dump_switch("ON") + elif DumpUtil.call_num > max(msCheckerConfig.target_iter): + raise Exception("Model pretest: exit after iteration {}".format(DumpUtil.call_num - 1)) + else: + set_dump_switch("OFF") + + +class DumpConst: + delimiter = '*' + forward = 'forward' + backward = 'backward' + + +def pretest_info_dump(name, out_feat, module, phase): + if not DumpUtil.get_dump_switch(): + return + name = name.replace('*', '.') + if phase == DumpConst.forward: + api_info = ForwardAPIInfo(name, module.input_args, module.input_kwargs) + elif phase == DumpConst.backward: + api_info = BackwardAPIInfo(name, out_feat) + else: + msg = "Unexpected training phase {}.".format(phase) + print_error_log(msg) + raise NotImplementedError(msg) + print_info_log(f"tools is dumping api: {name}" + " " * 10, end='\r') + write_api_info_json(api_info) + + +def pretest_hook(name, phase): + def pretest_info_dump_hook(module, in_feat, out_feat): + pretest_info_dump(name, out_feat, module, phase) + if hasattr(module, "input_args"): + del module.input_args + if hasattr(module, "input_kwargs"): + del module.input_kwargs + return pretest_info_dump_hook diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 2918809578..05e9e75d1e 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -84,7 +84,7 @@ def raise_bench_data_dtype(api_name, arg, raise_dtype=None): api_name:api名称 arg:标杆输入 raise_dtype:需要转换的dtype - 输出: + 输出: arg: 转换dtype的标杆输入 ''' if api_name in hf_32_standard_api and arg.dtype == torch.float32: @@ -181,7 +181,7 @@ def run_ut(config): continue try: if msCheckerConfig.white_list: - [_, api_name, _] = api_full_name.split("*") + [_, api_name, _] = api_full_name.split(".") if api_name not in set(msCheckerConfig.white_list): continue data_info = run_torch_api(api_full_name, config.real_data_path, config.backward_content, api_info_dict) @@ -190,7 +190,7 @@ def run_ut(config): if config.save_error_data: do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) except Exception as err: - [_, api_name, _] = api_full_name.split("*") + [_, api_name, _] = api_full_name.split(".") if "expected scalar type Long" in str(err): print_warn_log(f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API " f"'int32_to_int64' list in accuracy_tools/api_accuracy_check/common/utils.py file.") @@ -212,7 +212,7 @@ def run_ut(config): def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success): if not is_fwd_success or not is_bwd_success: - api_full_name = api_full_name.replace("*", ".") + # api_full_name = api_full_name.replace("*", ".") for element in data_info.in_fwd_data_list: UtAPIInfo(api_full_name + '.forward.input', element) UtAPIInfo(api_full_name + '.forward.output.bench', data_info.bench_output) @@ -225,7 +225,7 @@ def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict): in_fwd_data_list = [] backward_message = '' - [api_type, api_name, _] = api_full_name.split("*") + [api_type, api_name, _] = api_full_name.split(".") args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path) in_fwd_data_list.append(args) in_fwd_data_list.append(kwargs) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py index b8a8e026aa..a236df771d 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py @@ -195,6 +195,41 @@ def thread_dump_data(prefix, data_info): DumpUtil.dump_thread_pool.submit(dump_data, prefix, data_info) +def underscore_replace(prefix): + """ + Replacing symbols to unify the format of ptdbg and pretest + """ + replaced_prefix = [] + consecutive_underscore_count = 0 + three_underscore_time = 0 + + for char in prefix: + if char == '_': + consecutive_underscore_count += 1 + if consecutive_underscore_count == 2: + # Two consecutive underscores, leave them unchanged + replaced_prefix.pop() + replaced_prefix.append('__') + elif consecutive_underscore_count == 3: + # Three consecutive underscores + three_underscore_time += 1 + replaced_prefix.pop() + if three_underscore_time % 2 == 1: + # Even index, replace the first underscore + replaced_prefix.append('.__') + else: + # Odd index, replace the third underscore + replaced_prefix.append('__.') + else: + # Single underscore, replace with a period + replaced_prefix.append('.') + else: + # Not an underscore, reset the count + consecutive_underscore_count = 0 + replaced_prefix.append(char) + return ''.join(replaced_prefix) + + def dump_data_by_rank_count(dump_step, prefix, data_info): print_info_log(f"ptdbg is analyzing rank{rank} api: {prefix}" + " " * 10, end='\r') if DumpUtil.is_single_rank and DumpUtil.dump_thread_pool: @@ -310,7 +345,7 @@ def dump_acc_cmp(name, in_feat, out_feat, dump_step, module): print_warn_log("The file does not exist, error: {}".format(e)) name_prefix = name - name_template = f"{name_prefix}" + "_{}" + name_template = f"{underscore_replace(name_prefix)}" + ".{}" if DumpUtil.is_single_rank is None: DumpUtil.is_single_rank = check_single_rank_folder(dump_dir) if DumpUtil.dump_switch_mode in [Const.ALL, Const.API_LIST]: -- Gitee From f0a4258fc8354d75954403235b8dbdc48e7bdf52 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Wed, 8 May 2024 22:40:33 +0800 Subject: [PATCH 2/7] api_name rename --- .../src/python/ptdbg_ascend/dump/dump.py | 37 +------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py index a236df771d..b8a8e026aa 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py @@ -195,41 +195,6 @@ def thread_dump_data(prefix, data_info): DumpUtil.dump_thread_pool.submit(dump_data, prefix, data_info) -def underscore_replace(prefix): - """ - Replacing symbols to unify the format of ptdbg and pretest - """ - replaced_prefix = [] - consecutive_underscore_count = 0 - three_underscore_time = 0 - - for char in prefix: - if char == '_': - consecutive_underscore_count += 1 - if consecutive_underscore_count == 2: - # Two consecutive underscores, leave them unchanged - replaced_prefix.pop() - replaced_prefix.append('__') - elif consecutive_underscore_count == 3: - # Three consecutive underscores - three_underscore_time += 1 - replaced_prefix.pop() - if three_underscore_time % 2 == 1: - # Even index, replace the first underscore - replaced_prefix.append('.__') - else: - # Odd index, replace the third underscore - replaced_prefix.append('__.') - else: - # Single underscore, replace with a period - replaced_prefix.append('.') - else: - # Not an underscore, reset the count - consecutive_underscore_count = 0 - replaced_prefix.append(char) - return ''.join(replaced_prefix) - - def dump_data_by_rank_count(dump_step, prefix, data_info): print_info_log(f"ptdbg is analyzing rank{rank} api: {prefix}" + " " * 10, end='\r') if DumpUtil.is_single_rank and DumpUtil.dump_thread_pool: @@ -345,7 +310,7 @@ def dump_acc_cmp(name, in_feat, out_feat, dump_step, module): print_warn_log("The file does not exist, error: {}".format(e)) name_prefix = name - name_template = f"{underscore_replace(name_prefix)}" + ".{}" + name_template = f"{name_prefix}" + "_{}" if DumpUtil.is_single_rank is None: DumpUtil.is_single_rank = check_single_rank_folder(dump_dir) if DumpUtil.dump_switch_mode in [Const.ALL, Const.API_LIST]: -- Gitee From 615a8871ae24f77eb94a3540c0d11fc4d369cf8f Mon Sep 17 00:00:00 2001 From: h00613304 Date: Wed, 8 May 2024 23:04:34 +0800 Subject: [PATCH 3/7] rename dump name --- .../ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py | 2 +- .../src/python/ptdbg_ascend/hook_module/hook_module.py | 4 ++-- .../src/python/ptdbg_ascend/hook_module/wrap_aten.py | 2 +- .../src/python/ptdbg_ascend/hook_module/wrap_distributed.py | 2 +- .../src/python/ptdbg_ascend/hook_module/wrap_functional.py | 2 +- .../src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py | 2 +- .../src/python/ptdbg_ascend/hook_module/wrap_tensor.py | 2 +- .../src/python/ptdbg_ascend/hook_module/wrap_torch.py | 2 +- .../src/python/ptdbg_ascend/hook_module/wrap_vf.py | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py index b8a8e026aa..ce30a036ce 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py @@ -310,7 +310,7 @@ def dump_acc_cmp(name, in_feat, out_feat, dump_step, module): print_warn_log("The file does not exist, error: {}".format(e)) name_prefix = name - name_template = f"{name_prefix}" + "_{}" + name_template = f"{name_prefix}" + ".{}" if DumpUtil.is_single_rank is None: DumpUtil.is_single_rank = check_single_rank_folder(dump_dir) if DumpUtil.dump_switch_mode in [Const.ALL, Const.API_LIST]: diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py index 6f23a8d42b..9a6ee24b67 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/hook_module.py @@ -43,10 +43,10 @@ class HOOKModule(nn.Module): if self.prefix not in HOOKModule.module_count: HOOKModule.module_count[self.prefix] = 1 - self.prefix += '0_' + self.prefix += '0.' else: HOOKModule.module_count[self.prefix] += 1 - self.prefix = self.prefix + str(HOOKModule.module_count[self.prefix] - 1) + '_' + self.prefix = self.prefix + str(HOOKModule.module_count[self.prefix] - 1) + '.' self.register_forward_hook(hook(self.prefix + "forward")) self.register_backward_hook(hook(self.prefix + "backward")) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_aten.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_aten.py index 57003eb525..1d06ae6fa6 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_aten.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_aten.py @@ -56,7 +56,7 @@ class AtenOPTemplate(HOOKModule): if not '.' + overload_name in op_name_: op_name_ = op_name_ + '.' + overload_name self.op = op - self.prefix_op_name_ = "Aten_" + str(op_name_) + "_" + self.prefix_op_name_ = "Aten." + str(op_name_) + "." super().__init__(hook) @torch_device_guard diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_distributed.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_distributed.py index 8d5140206a..ea5fa00c12 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_distributed.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_distributed.py @@ -49,7 +49,7 @@ class HOOKDistributedOP(object): class DistributedOPTemplate(HOOKModule): def __init__(self, op_name, hook): self.op_name_ = op_name - self.prefix_op_name_ = "Distributed_" + str(op_name) + "_" + self.prefix_op_name_ = "Distributed." + str(op_name) + "." super().__init__(hook) if self.op_name_ in Const.INPLACE_LIST: self.register_forward_pre_hook(hook(self.prefix + Const.PRE_FORWARD)) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_functional.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_functional.py index 0533f55b54..1a480e3377 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_functional.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_functional.py @@ -84,7 +84,7 @@ class HOOKFunctionalOP(object): class FunctionalOPTemplate(HOOKModule): def __init__(self, op_name, hook): self.op_name_ = op_name - self.prefix_op_name_ = "Functional_" + str(op_name) + "_" + self.prefix_op_name_ = "Functional." + str(op_name) + "." super().__init__(hook) @torch_device_guard diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py index 2b90a8f5b5..2e37d152a8 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_npu_custom.py @@ -47,7 +47,7 @@ class NpuOPTemplate(HOOKModule): def __init__(self, op_name, hook): self.op_name_ = op_name - self.prefix_op_name_ = "NPU_" + str(op_name) + "_" + self.prefix_op_name_ = "NPU." + str(op_name) + "." super().__init__(hook) @torch_device_guard diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_tensor.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_tensor.py index cddc99d91b..5649ec28cb 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_tensor.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_tensor.py @@ -47,7 +47,7 @@ class TensorOPTemplate(HOOKModule): def __init__(self, op_name, hook): self.op_name_ = op_name - self.prefix_op_name_ = "Tensor_" + str(op_name) + "_" + self.prefix_op_name_ = "Tensor." + str(op_name) + "." super().__init__(hook) @torch_device_guard diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_torch.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_torch.py index e3a4af7a85..1d988f16d8 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_torch.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_torch.py @@ -64,7 +64,7 @@ class TorchOPTemplate(HOOKModule): def __init__(self, op_name, hook): self.op_name_ = op_name - self.prefix_op_name_ = "Torch_" + str(op_name) + "_" + self.prefix_op_name_ = "Torch." + str(op_name) + "." super().__init__(hook) @torch_device_guard diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_vf.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_vf.py index 1d055b090a..d8973a363b 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_vf.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_vf.py @@ -44,7 +44,7 @@ class HOOKVfOP(object): class VfOPTemplate(HOOKModule): def __init__(self, op_name, hook): self.op_name_ = op_name - self.prefix_op_name_ = "VF_" + str(op_name) + "_" + self.prefix_op_name_ = "VF." + str(op_name) + "." super().__init__(hook) @torch_device_guard -- Gitee From ed1f8185a81a65d97fbd03e6478a51404b8e876a Mon Sep 17 00:00:00 2001 From: h00613304 Date: Wed, 8 May 2024 23:20:11 +0800 Subject: [PATCH 4/7] rename dump name --- .../ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py index a264f89c77..6b0f8c37e5 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/common/utils.py @@ -455,14 +455,14 @@ def is_starts_with(string, prefix_list): def check_stack_mode(pkl_fp): api_prefix = "" - api_pattern = r'\[\"([0-9a-zA-Z_.]+_(for|back)ward)_(in|out)put(\.[0-9]+)?' + api_pattern = r'\[\"([0-9a-zA-Z_.]+.(for|back)ward).(in|out)put(\.[0-9]+)?' is_stack_mode = False for index, line in enumerate(pkl_fp): if index == 0: api_match = re.search(api_pattern, line) api_prefix = api_match.group(1) elif api_prefix and line.startswith(f'["{api_prefix}'): - if line.startswith(f'["{api_prefix}_stack_info'): + if line.startswith(f'["{api_prefix}.stack_info'): is_stack_mode = True break else: -- Gitee From 5ba117db7074c93c43b3097c6505363216293b11 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Thu, 9 May 2024 09:35:54 +0800 Subject: [PATCH 5/7] fix ut --- .../ptdbg_ascend/test/ut/hook_module/test_wrap_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/ptdbg_ascend/test/ut/hook_module/test_wrap_torch.py b/debug/accuracy_tools/ptdbg_ascend/test/ut/hook_module/test_wrap_torch.py index ef0350ccc0..fecb88cba0 100644 --- a/debug/accuracy_tools/ptdbg_ascend/test/ut/hook_module/test_wrap_torch.py +++ b/debug/accuracy_tools/ptdbg_ascend/test/ut/hook_module/test_wrap_torch.py @@ -18,7 +18,7 @@ class TestWrapTorch(unittest.TestCase): def test_TorchOPTemplate(self): template = TorchOPTemplate(self.op_name, self.hook) self.assertEqual(template.op_name_, self.op_name) - self.assertEqual(template.prefix_op_name_, "Torch_" + str(self.op_name) + "_") + self.assertEqual(template.prefix_op_name_, "Torch." + str(self.op_name) + ".") def test_forward(self): template = TorchOPTemplate(self.op_name, self.hook) -- Gitee From 96c05c88854eb176a12fa95a37378e239e2999b0 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Thu, 9 May 2024 10:08:09 +0800 Subject: [PATCH 6/7] fix bug --- debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 05e9e75d1e..0e0e5054ea 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -212,7 +212,6 @@ def run_ut(config): def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success): if not is_fwd_success or not is_bwd_success: - # api_full_name = api_full_name.replace("*", ".") for element in data_info.in_fwd_data_list: UtAPIInfo(api_full_name + '.forward.input', element) UtAPIInfo(api_full_name + '.forward.output.bench', data_info.bench_output) @@ -380,7 +379,7 @@ def preprocess_forward_content(forward_content): arg_cache = {} for key, value in forward_content.items(): - base_key = key.rsplit('*', 1)[0] + base_key = key.rsplit('.', 1)[0] if key not in arg_cache: new_args = value['args'] -- Gitee From a2779ae5d247725c2d42f39b09664fff9835a7c0 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Thu, 9 May 2024 10:52:01 +0800 Subject: [PATCH 7/7] fix DT --- .../api_accuracy_checker/test/ut/compare/test_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/compare/test_compare.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/compare/test_compare.py index 2c9b13c4d9..a038176843 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/compare/test_compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/compare/test_compare.py @@ -70,7 +70,7 @@ class TestCompare(unittest.TestCase): def test_compare_output(self): bench_out, npu_out = torch.randn(100, 100), torch.randn(100, 100) bench_grad, npu_grad = [torch.randn(100, 100)], [torch.randn(100, 100)] - api_name = 'Functional*conv2d*0' + api_name = 'Functional.conv2d.0' data_info = UtDataInfo(bench_grad, npu_grad, bench_out, npu_out, None, None, None) is_fwd_success, is_bwd_success = self.compare.compare_output(api_name, data_info) self.assertFalse(is_fwd_success) -- Gitee