From 385b6d45e62fffa5143c0ebf278b6b49723f70a5 Mon Sep 17 00:00:00 2001 From: s30048155 Date: Thu, 26 Oct 2023 17:33:40 +0800 Subject: [PATCH 1/8] fix ut error --- .../api_accuracy_checker/test/ut/common/test_config.py | 2 +- .../api_accuracy_checker/test/ut/dump/test_dump_scopr.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_config.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_config.py index ed764987d5..a68057dfb4 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_config.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/common/test_config.py @@ -17,5 +17,5 @@ class TestConfig(unittest.TestCase): def test_update_config(self): - self.config.update_config(dump_path='/new/path/to/dump', enable_dataloader=False) + self.config.update_config(dump_path='/new/path/to/dump') self.assertEqual(self.config.dump_path, '/new/path/to/dump') diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scopr.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scopr.py index addba38e38..b892a6077a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scopr.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scopr.py @@ -10,12 +10,12 @@ class TestDumpScope(unittest.TestCase): wrapped_func = iter_tracer(dummy_func) result = wrapped_func() - self.assertEqual(DumpUtil.dump_switch, "ON") + self.assertEqual(DumpUtil.dump_switch, "OFF") self.assertEqual(result, "Hello, World!") def another_dummy_func(): return 123 wrapped_func = iter_tracer(another_dummy_func) result = wrapped_func() - self.assertEqual(DumpUtil.dump_switch, "ON") + self.assertEqual(DumpUtil.dump_switch, "OFF") self.assertEqual(result, 123) -- Gitee From d5bda346704de90f88808d8ba2e7105fa75fd4a0 Mon Sep 17 00:00:00 2001 From: s30048155 Date: Fri, 27 Oct 2023 11:07:22 +0800 Subject: [PATCH 2/8] fix chunk api --- .../api_accuracy_checker/run_ut/run_ut.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 536d607dd6..228e10b385 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -185,10 +185,11 @@ def run_backward(api_full_name, args, backward_content, grad_index, npu_args, np backward_args = backward_content[api_full_name] grad = gen_args(backward_args)[0] cpu_grad, _ = generate_cpu_params(grad, {}, False) - if grad_index is not None: + if isinstance(out, tuple): + grad_outputs = tuple(torch.randn_like(out_item) for out_item in out) + torch.autograd.backward(out, grad_outputs=grad_outputs) + elif grad_index is not None: out[grad_index].backward(cpu_grad) - elif isinstance(out, (list, tuple)): - raise NotImplementedError("Multiple backward is not supported.") else: out.backward(cpu_grad) args_grad = [] @@ -197,7 +198,10 @@ def run_backward(api_full_name, args, backward_content, grad_index, npu_args, np args_grad.append(arg.grad) grad_out = args_grad npu_grad = grad.clone().detach().npu() - if grad_index is not None: + if isinstance(npu_out, tuple): + npu_grad_outputs = tuple(torch.randn_like(npu_out_item) for npu_out_item in npu_out) + torch.autograd.backward(npu_out, grad_outputs=npu_grad_outputs) + elif grad_index is not None: npu_out[grad_index].backward(npu_grad) else: npu_out.backward(npu_grad) -- Gitee From d3fa20980bd350aecf8088c6399faf34c5e3e021 Mon Sep 17 00:00:00 2001 From: s30048155 Date: Fri, 27 Oct 2023 11:07:49 +0800 Subject: [PATCH 3/8] save real data to rank fold --- .../api_accuracy_checker/common/base_api.py | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/base_api.py b/debug/accuracy_tools/api_accuracy_checker/common/base_api.py index 2c3086184c..15ff562459 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/base_api.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/base_api.py @@ -54,12 +54,14 @@ class BaseAPIInfo: else: api_args = self.api_name + '.' + str(self.args_num) + rank = self.get_tensor_rank(arg) + if rank is not None: + rank = str(rank) if self.is_forward: - forward_real_data_path = os.path.join(self.save_path, self.forward_path) - + forward_real_data_path = os.path.join(self.save_path, self.forward_path, rank) if rank else os.path.join(self.save_path, self.forward_path) file_path = os.path.join(forward_real_data_path, f'{api_args}.pt') else: - backward_real_data_path = os.path.join(self.save_path, self.backward_path) + backward_real_data_path = os.path.join(self.save_path, self.backward_path, rank) if rank else os.path.join(self.save_path, self.backward_path) file_path = os.path.join(backward_real_data_path, f'{api_args}.pt') self.args_num += 1 pt_path = write_pt(file_path, arg.contiguous().cpu().detach()) @@ -67,6 +69,24 @@ class BaseAPIInfo: single_arg.update({'datapath' : pt_path}) single_arg.update({'requires_grad': arg.requires_grad}) return single_arg + + def get_tensor_rank(self, arg): + def get_tensor_rank_single(x): + if isinstance(x, (list, tuple)): + if len(x) > 0: + return get_tensor_rank_single(x[0]) + return None + elif isinstance(x, torch.Tensor): + device = x.device + if device.type == 'cpu': + return None + else: + return device.index + return None + rank = get_tensor_rank_single(arg) + if rank is None: + return None + return rank def analyze_builtin(self, arg): single_arg = {} -- Gitee From 7b326a7e55ed06858875d1c865ce1bf60e6ec2ed Mon Sep 17 00:00:00 2001 From: s30048155 Date: Sat, 28 Oct 2023 16:27:53 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dint=E7=B1=BB=E5=9E=8B?= =?UTF-8?q?=E4=BD=99=E5=BC=A6=E7=9B=B8=E4=BC=BC=E4=B8=BA0=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index cca521a296..f1724cbdea 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -170,6 +170,8 @@ def compare_core(bench_out, npu_out, alg): copy_bench_out = copy_bench_out.to(torch.float32) copy_npu_out = copy_npu_out.to(torch.float32) compare_result, test_success, msg = compare_torch_tensor(copy_bench_out.numpy(), copy_npu_out.cpu().numpy(), alg) + if copy_bench_out.dtype in [torch.bool, torch.uint8, torch.int8, torch.int16, torch.uint16, torch.uint32, torch.int32, torch.int64, torch.uint64] and alg == cosine_sim: + compare_result = 1 elif isinstance(bench_out, (bool, int, float, str)): compare_result, test_success, msg = compare_builtin_type(bench_out, npu_out) bench_dtype = str(type(bench_out)) -- Gitee From a21edf533d8b78361e212472a4cb6d7199172f2b Mon Sep 17 00:00:00 2001 From: s30048155 Date: Sat, 28 Oct 2023 16:32:09 +0800 Subject: [PATCH 5/8] change to NA --- debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index f1724cbdea..e4ec036289 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -171,7 +171,7 @@ def compare_core(bench_out, npu_out, alg): copy_npu_out = copy_npu_out.to(torch.float32) compare_result, test_success, msg = compare_torch_tensor(copy_bench_out.numpy(), copy_npu_out.cpu().numpy(), alg) if copy_bench_out.dtype in [torch.bool, torch.uint8, torch.int8, torch.int16, torch.uint16, torch.uint32, torch.int32, torch.int64, torch.uint64] and alg == cosine_sim: - compare_result = 1 + compare_result = CompareConst.NA elif isinstance(bench_out, (bool, int, float, str)): compare_result, test_success, msg = compare_builtin_type(bench_out, npu_out) bench_dtype = str(type(bench_out)) -- Gitee From 0d084306a1b522a001a8fede696ba0daa0365b15 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Sat, 28 Oct 2023 09:28:46 +0000 Subject: [PATCH 6/8] Revert "change to NA" This reverts commit a21edf533d8b78361e212472a4cb6d7199172f2b. --- debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index e4ec036289..f1724cbdea 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -171,7 +171,7 @@ def compare_core(bench_out, npu_out, alg): copy_npu_out = copy_npu_out.to(torch.float32) compare_result, test_success, msg = compare_torch_tensor(copy_bench_out.numpy(), copy_npu_out.cpu().numpy(), alg) if copy_bench_out.dtype in [torch.bool, torch.uint8, torch.int8, torch.int16, torch.uint16, torch.uint32, torch.int32, torch.int64, torch.uint64] and alg == cosine_sim: - compare_result = CompareConst.NA + compare_result = 1 elif isinstance(bench_out, (bool, int, float, str)): compare_result, test_success, msg = compare_builtin_type(bench_out, npu_out) bench_dtype = str(type(bench_out)) -- Gitee From ebfe0982fda24c93aa9097dd0f64c96f733593eb Mon Sep 17 00:00:00 2001 From: sunyiming Date: Sat, 28 Oct 2023 09:29:02 +0000 Subject: [PATCH 7/8] =?UTF-8?q?Revert=20"=E4=BF=AE=E5=A4=8Dint=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=E4=BD=99=E5=BC=A6=E7=9B=B8=E4=BC=BC=E4=B8=BA0?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7b326a7e55ed06858875d1c865ce1bf60e6ec2ed. --- debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index f1724cbdea..cca521a296 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -170,8 +170,6 @@ def compare_core(bench_out, npu_out, alg): copy_bench_out = copy_bench_out.to(torch.float32) copy_npu_out = copy_npu_out.to(torch.float32) compare_result, test_success, msg = compare_torch_tensor(copy_bench_out.numpy(), copy_npu_out.cpu().numpy(), alg) - if copy_bench_out.dtype in [torch.bool, torch.uint8, torch.int8, torch.int16, torch.uint16, torch.uint32, torch.int32, torch.int64, torch.uint64] and alg == cosine_sim: - compare_result = 1 elif isinstance(bench_out, (bool, int, float, str)): compare_result, test_success, msg = compare_builtin_type(bench_out, npu_out) bench_dtype = str(type(bench_out)) -- Gitee From c15254194416dab0c32fcc19c561ea660ec1b903 Mon Sep 17 00:00:00 2001 From: sunyiming Date: Tue, 31 Oct 2023 02:33:29 +0000 Subject: [PATCH 8/8] Revert "save real data to rank fold" This reverts commit d3fa20980bd350aecf8088c6399faf34c5e3e021. --- .../api_accuracy_checker/common/base_api.py | 26 +++---------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/base_api.py b/debug/accuracy_tools/api_accuracy_checker/common/base_api.py index 15ff562459..2c3086184c 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/base_api.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/base_api.py @@ -54,14 +54,12 @@ class BaseAPIInfo: else: api_args = self.api_name + '.' + str(self.args_num) - rank = self.get_tensor_rank(arg) - if rank is not None: - rank = str(rank) if self.is_forward: - forward_real_data_path = os.path.join(self.save_path, self.forward_path, rank) if rank else os.path.join(self.save_path, self.forward_path) + forward_real_data_path = os.path.join(self.save_path, self.forward_path) + file_path = os.path.join(forward_real_data_path, f'{api_args}.pt') else: - backward_real_data_path = os.path.join(self.save_path, self.backward_path, rank) if rank else os.path.join(self.save_path, self.backward_path) + backward_real_data_path = os.path.join(self.save_path, self.backward_path) file_path = os.path.join(backward_real_data_path, f'{api_args}.pt') self.args_num += 1 pt_path = write_pt(file_path, arg.contiguous().cpu().detach()) @@ -69,24 +67,6 @@ class BaseAPIInfo: single_arg.update({'datapath' : pt_path}) single_arg.update({'requires_grad': arg.requires_grad}) return single_arg - - def get_tensor_rank(self, arg): - def get_tensor_rank_single(x): - if isinstance(x, (list, tuple)): - if len(x) > 0: - return get_tensor_rank_single(x[0]) - return None - elif isinstance(x, torch.Tensor): - device = x.device - if device.type == 'cpu': - return None - else: - return device.index - return None - rank = get_tensor_rank_single(arg) - if rank is None: - return None - return rank def analyze_builtin(self, arg): single_arg = {} -- Gitee