From ef37302fa75fd583243e5f56bc81351cf414b064 Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 15 Jan 2024 15:43:25 +0800 Subject: [PATCH 1/7] =?UTF-8?q?=E7=9C=9F=E5=AE=9E=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=BF=9D=E5=AD=98=E8=B7=AF=E5=BE=84=E6=9B=B4?= =?UTF-8?q?=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_accuracy_checker/common/utils.py | 24 ++++++++++++++ .../api_accuracy_checker/dump/api_info.py | 6 ++-- .../run_ut/data_generate.py | 31 ++++++++++--------- .../api_accuracy_checker/run_ut/run_ut.py | 26 +++++++++------- 4 files changed, 60 insertions(+), 27 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index e5a6b71100..e9b129360e 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -630,3 +630,27 @@ def write_pt(file_path, tensor): full_path = os.path.realpath(file_path) file_check_util.change_mode(full_path, FileCheckConst.DATA_FILE_AUTHORITY) return full_path + + +def get_real_data_path(file_path): + targets = ['forward_real_data', 'backward_real_data'] + pattern = re.compile(r'(.*)(step\d+/)(?=({}))'.format('|'.join(targets))) + match = pattern.search(file_path) + if match: + base_path = match.group(1) + match.group(2) + target_path = file_path[len(base_path):] + return target_path + else: + raise DumpException(DumpException.INVALID_PATH_ERROR) + + +def check_real_data_mode(data_path, real_data_path): + if data_path and not real_data_path: + error_log = "The current mode is real data. The root directory of real data must be configured." + raise CompareException(CompareException.INVALID_COMPARE_MODE, error_log) + elif data_path and real_data_path: + data_path = os.path.join(real_data_path, data_path) + return os.path.realpath(data_path) + elif not data_path and real_data_path: + print_warn_log("The current mode is random data. The root directory of real data is not used.") + return data_path diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py index adb0c4b0f3..03de506d62 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/api_info.py @@ -3,7 +3,8 @@ import os import inspect import torch from api_accuracy_checker.common.config import msCheckerConfig -from api_accuracy_checker.common.utils import print_error_log, write_pt, create_directory, DumpException +from api_accuracy_checker.common.utils import print_error_log, write_pt, create_directory, DumpException, \ + get_real_data_path from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import check_path_before_create @@ -119,8 +120,9 @@ class APIInfo: file_path = os.path.join(self.save_path, f'{api_args}.pt') pt_path = write_pt(file_path, arg.contiguous().cpu().detach()) self.args_num += 1 + real_data_path = get_real_data_path(pt_path) single_arg.update({'type': 'torch.Tensor'}) - single_arg.update({'datapath': pt_path}) + single_arg.update({'datapath': real_data_path}) single_arg.update({'requires_grad': arg.requires_grad}) return single_arg diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index 5765f980d2..1c0248db5f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -19,8 +19,8 @@ import os import torch import numpy as np -from api_accuracy_checker.common.utils import Const, check_file_or_directory_path, check_object_type, print_warn_log, print_error_log, \ - CompareException +from api_accuracy_checker.common.utils import Const, check_file_or_directory_path, check_object_type, print_warn_log, \ + print_error_log, check_real_data_mode, CompareException TORCH_TYPE = ["torch.device", "torch.dtype"] TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] @@ -28,7 +28,7 @@ FLOAT_TYPE = ['torch.float32', 'torch.float', 'torch.float64', 'torch.double', ' 'torch.half', 'torch.bfloat16'] -def gen_data(info, need_grad, convert_type): +def gen_data(info, need_grad, convert_type, real_data_path=None): """ Function Description: Based on arg basic information, generate arg data @@ -40,6 +40,7 @@ def gen_data(info, need_grad, convert_type): check_object_type(info, dict) data_type = info.get('type') data_path = info.get('datapath') + data_path = check_real_data_mode(data_path, real_data_path) if data_type in TENSOR_DATA_LIST: if data_path: data = gen_real_tensor(data_path, convert_type) @@ -157,7 +158,7 @@ def gen_bool_tensor(low, high, shape): return data -def gen_args(args_info, need_grad=True, convert_type=None): +def gen_args(args_info, need_grad=True, convert_type=None, real_data_path=None): """ Function Description: Based on API basic information, generate input parameters: args, for API forward running @@ -165,14 +166,15 @@ def gen_args(args_info, need_grad=True, convert_type=None): api_info: API basic information. List need_grad: set Tensor grad for backward convert_type: convert ori_type to dist_type flag. + real_data_path: the root directory for storing real data. """ check_object_type(args_info, list) args_result = [] for arg in args_info: if isinstance(arg, (list, tuple)): - data = gen_args(arg, need_grad, convert_type) + data = gen_args(arg, need_grad, convert_type, real_data_path) elif isinstance(arg, dict): - data = gen_data(arg, need_grad, convert_type) + data = gen_data(arg, need_grad, convert_type, real_data_path) else: print_warn_log(f'Warning: {arg} is not supported') raise NotImplementedError() @@ -180,21 +182,22 @@ def gen_args(args_info, need_grad=True, convert_type=None): return args_result -def gen_kwargs(api_info, convert_type=None): +def gen_kwargs(api_info, convert_type=None, real_data_path=None): """ Function Description: Based on API basic information, generate input parameters: kwargs, for API forward running Parameter: api_info: API basic information. Dict convert_type: convert ori_type to dist_type flag. + real_data_path: the root directory for storing real data. """ check_object_type(api_info, dict) kwargs_params = api_info.get("kwargs") for key, value in kwargs_params.items(): if isinstance(value, (list, tuple)): - kwargs_params[key] = gen_list_kwargs(value, convert_type) + kwargs_params[key] = gen_list_kwargs(value, convert_type, real_data_path) elif value.get('type') in TENSOR_DATA_LIST: - kwargs_params[key] = gen_data(value, False, convert_type) + kwargs_params[key] = gen_data(value, False, convert_type, real_data_path) elif value.get('type') in TORCH_TYPE: gen_torch_kwargs(kwargs_params, key, value) else: @@ -209,7 +212,7 @@ def gen_torch_kwargs(kwargs_params, key, value): kwargs_params[key] = eval(value.get('value')) -def gen_list_kwargs(kwargs_item_value, convert_type): +def gen_list_kwargs(kwargs_item_value, convert_type, real_data_path=None): """ Function Description: When kwargs value is list, generate the list of kwargs result @@ -220,14 +223,14 @@ def gen_list_kwargs(kwargs_item_value, convert_type): kwargs_item_result = [] for item in kwargs_item_value: if item.get('type') in TENSOR_DATA_LIST: - item_value = gen_data(item, False, convert_type) + item_value = gen_data(item, False, convert_type, real_data_path) else: item_value = item.get('value') kwargs_item_result.append(item_value) return kwargs_item_result -def gen_api_params(api_info, need_grad=True, convert_type=None): +def gen_api_params(api_info, need_grad=True, convert_type=None, real_data_path=None): """ Function Description: Based on API basic information, generate input parameters: args, kwargs, for API forward running @@ -240,9 +243,9 @@ def gen_api_params(api_info, need_grad=True, convert_type=None): if convert_type and convert_type not in Const.CONVERT: error_info = f"convert_type params not support {convert_type}." raise CompareException(CompareException.INVALID_PARAM_ERROR, error_info) - kwargs_params = gen_kwargs(api_info, convert_type) + kwargs_params = gen_kwargs(api_info, convert_type, real_data_path) if api_info.get("args"): - args_params = gen_args(api_info.get("args"), need_grad, convert_type) + args_params = gen_args(api_info.get("args"), need_grad, convert_type, real_data_path) else: print_warn_log(f'Warning: No args in {api_info} ') args_params = [] diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index cf9bfe29bd..7eadcf4414 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -35,7 +35,7 @@ UT_ERROR_DATA_DIR = 'ut_error_data' + current_time RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv" RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path', - 'save_error_data', 'is_continue_run_ut', 'test_result_cnt']) + 'save_error_data', 'is_continue_run_ut', 'real_data_path', 'test_result_cnt']) not_backward_list = ['repeat_interleave'] tqdm_params = { @@ -135,7 +135,6 @@ def generate_cpu_params(input_args, input_kwargs, need_backward): def run_ut(config): print_info_log("start UT test") - api_setting_dict = get_json_contents("torch_ut_setting.json") compare = Comparator(config.result_csv_path, config.details_csv_path, config.is_continue_run_ut, config.test_result_cnt) with FileOpen(config.result_csv_path, 'r') as file: @@ -150,7 +149,7 @@ def run_ut(config): [_, api_name, _] = api_full_name.split("*") if api_name not in set(msCheckerConfig.white_list): continue - data_info = run_torch_api(api_full_name, api_setting_dict, config.backward_content, api_info_dict) + data_info = run_torch_api(api_full_name, config.real_data_path, config.backward_content, api_info_dict) is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info.bench_out, data_info.device_out, @@ -183,10 +182,10 @@ def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) UtAPIInfo(api_full_name + '.backward.output.device', data_info.device_grad_out) -def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_dict): +def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict): in_fwd_data_list = [] [api_type, api_name, _] = api_full_name.split("*") - args, kwargs, need_grad = get_api_info(api_info_dict, api_name) + args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path) in_fwd_data_list.append(args) in_fwd_data_list.append(kwargs) need_backward = api_full_name in backward_content @@ -205,6 +204,7 @@ def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_di grad_out, device_grad_out = None, None out = exec_api(api_type, api_name, cpu_args, cpu_kwargs) device_out = exec_api(api_type, api_name, device_args, device_kwargs) + api_setting_dict = get_json_contents("torch_ut_setting.json") grad_input_index = api_setting_dict.get(api_name) grad_index = None grad = None @@ -213,25 +213,25 @@ def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_di if need_backward: grad_out, device_grad_out, grad, device_grad = run_backward( - api_full_name, cpu_args, backward_content, grad_index, device_args, device_out, out) + api_full_name, cpu_args, backward_content, grad_index, device_args, device_out, out, real_data_path) if grad_index is not None: return UtDataInfo(grad_out, device_grad_out, device_out[grad_index], out[grad_index], grad, in_fwd_data_list) return UtDataInfo(grad_out, device_grad_out, device_out, out, grad, in_fwd_data_list) -def get_api_info(api_info_dict, api_name): +def get_api_info(api_info_dict, api_name, real_data_path): convert_type, api_info_dict = api_info_preprocess(api_name, api_info_dict) need_grad = True if api_info_dict.get("kwargs") and "out" in api_info_dict.get("kwargs"): need_grad = False - args, kwargs = gen_api_params(api_info_dict, need_grad, convert_type) + args, kwargs = gen_api_params(api_info_dict, need_grad, convert_type, real_data_path) return args, kwargs, need_grad -def run_backward(api_full_name, args, backward_content, grad_index, device_args, device_out, out): +def run_backward(api_full_name, args, backward_content, grad_index, device_args, device_out, out, real_data_path): backward_args = backward_content[api_full_name] - grad = gen_args(backward_args)[0] + grad = gen_args(backward_args, real_data_path=real_data_path)[0] cpu_grad, _ = generate_cpu_params(grad, {}, False) if grad_index is not None: out[grad_index].backward(cpu_grad) @@ -339,6 +339,10 @@ def _run_ut_parser(parser): help=" The path of accuracy_checking_result_{timestamp}.csv, " "when run ut is interrupted, enter the file path to continue run ut.", required=False) + parser.add_argument("-real_data_path", dest="real_data_path", default="", type=str, + help=" In real data mode, the root directory for storing real data " + "must be configured.", + required=False) def _run_ut(): @@ -386,7 +390,7 @@ def _run_ut(): UT_ERROR_DATA_DIR = ut_error_data_dir_path initialize_save_error_data() run_ut_config = RunUTConfig(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, - args.result_csv_path, test_result_cnt) + args.result_csv_path, args.real_data_path, test_result_cnt) run_ut(run_ut_config) -- Gitee From 14bbd0aeedba4b4654459c20cc5d35c8cddd928e Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 15 Jan 2024 20:30:07 +0800 Subject: [PATCH 2/7] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=B7=AF=E5=BE=84?= =?UTF-8?q?=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_accuracy_checker/common/utils.py | 21 +++++------ .../run_ut/run_overflow_check.py | 17 +++++---- .../api_accuracy_checker/run_ut/run_ut.py | 36 ++++++++----------- .../ptdbg_ascend/overflow_check/info_dump.py | 17 +++++++-- 4 files changed, 52 insertions(+), 39 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index e9b129360e..47eab44c43 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -633,11 +633,11 @@ def write_pt(file_path, tensor): def get_real_data_path(file_path): - targets = ['forward_real_data', 'backward_real_data'] - pattern = re.compile(r'(.*)(step\d+/)(?=({}))'.format('|'.join(targets))) + targets = ['forward_real_data', 'backward_real_data', 'ut_error_data\d+'] + pattern = re.compile(r'(.*?)(?=({}))'.format('|'.join(targets))) match = pattern.search(file_path) if match: - base_path = match.group(1) + match.group(2) + base_path = match.group(1) target_path = file_path[len(base_path):] return target_path else: @@ -645,12 +645,13 @@ def get_real_data_path(file_path): def check_real_data_mode(data_path, real_data_path): - if data_path and not real_data_path: - error_log = "The current mode is real data. The root directory of real data must be configured." - raise CompareException(CompareException.INVALID_COMPARE_MODE, error_log) - elif data_path and real_data_path: - data_path = os.path.join(real_data_path, data_path) - return os.path.realpath(data_path) - elif not data_path and real_data_path: + if not data_path: print_warn_log("The current mode is random data. The root directory of real data is not used.") return data_path + + if not real_data_path: + error_log = "The current mode is real data. The root directory of real data must be configured." + raise CompareException(CompareException.INVALID_COMPARE_MODE, error_log) + + full_data_path = os.path.join(real_data_path, data_path) + return os.path.realpath(full_data_path) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py index 7b23ea5579..3a09eccf19 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py @@ -44,12 +44,12 @@ def check_data_overflow(x): return check_tensor_overflow(x) -def run_overflow_check(forward_file): +def run_overflow_check(forward_file, real_data_path): print_info_log("start UT test") forward_content = get_json_contents(forward_file) for api_full_name, api_info_dict in tqdm(forward_content.items()): try: - run_torch_api(api_full_name, api_info_dict) + run_torch_api(api_full_name, api_info_dict, real_data_path) except Exception as err: api_name = api_full_name.split("_", 1)[1].rsplit("_", 2)[0] if "not implemented for 'Half'" in str(err): @@ -62,13 +62,14 @@ def run_overflow_check(forward_file): print_error_log(f"Run {api_full_name} UT Error: %s" % str(err)) -def run_torch_api(api_full_name, api_info_dict): +def run_torch_api(api_full_name, api_info_dict, real_data_path): torch.npu.clear_npu_overflow_flag() api_type = api_full_name.split("_")[0] api_name = api_full_name.split("_", 1)[1].rsplit("_", 2)[0] - args, kwargs, need_grad = get_api_info(api_info_dict, api_name) + args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path) if not need_grad: - print_warn_log("%s function with out=... arguments don't support automatic differentiation, skip backward." % api_full_name) + print_warn_log("%s function with out=... arguments don't support automatic differentiation, skip backward." + % api_full_name) npu_args, npu_kwargs = generate_device_params(args, kwargs, False) if kwargs.get("device"): del kwargs["device"] @@ -88,6 +89,10 @@ def _run_ut_parser(parser): help=" The api param tool forward result file: generate from api param tool, " "a json file.", required=True) + parser.add_argument("-real_data_path", dest="real_data_path", default="", type=str, + help=" In real data mode, the root directory for storing real data " + "must be configured.", + required=True) parser.add_argument("-j", "--jit_compile", dest="jit_compile", help=" whether to turn on jit compile", default=False, required=False) parser.add_argument("-d", "--device", dest="device_id", type=int, help=" set NPU device id to run ut", @@ -107,7 +112,7 @@ def _run_overflow_check(): except Exception as error: print_error_log(f"Set NPU device id failed. device id is: {args.device_id}") raise NotImplementedError from error - run_overflow_check(forward_file) + run_overflow_check(forward_file, args.real_data_path) if __name__ == '__main__': diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 7eadcf4414..98aaeda39e 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -212,12 +212,17 @@ def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict grad_index = grad_input_index.get('grad_index') if need_backward: - grad_out, device_grad_out, grad, device_grad = run_backward( - api_full_name, cpu_args, backward_content, grad_index, device_args, device_out, out, real_data_path) + backward_args = backward_content[api_full_name] + grad = gen_args(backward_args, real_data_path=real_data_path)[0] + bench_grad, _ = generate_cpu_params(grad, {}, False) + bench_grad_out = run_backward(cpu_args, bench_grad, grad_index, out) + device_grad = grad.clone().detach().to(current_device) + device_grad_out = run_backward(device_args, device_grad, grad_index, device_out) if grad_index is not None: - return UtDataInfo(grad_out, device_grad_out, device_out[grad_index], out[grad_index], grad, in_fwd_data_list) - return UtDataInfo(grad_out, device_grad_out, device_out, out, grad, in_fwd_data_list) + return UtDataInfo(bench_grad_out, device_grad_out, device_out[grad_index], out[grad_index], bench_grad, + in_fwd_data_list) + return UtDataInfo(bench_grad_out, device_grad_out, device_out, out, bench_grad, in_fwd_data_list) def get_api_info(api_info_dict, api_name, real_data_path): @@ -229,32 +234,21 @@ def get_api_info(api_info_dict, api_name, real_data_path): return args, kwargs, need_grad -def run_backward(api_full_name, args, backward_content, grad_index, device_args, device_out, out, real_data_path): - backward_args = backward_content[api_full_name] - grad = gen_args(backward_args, real_data_path=real_data_path)[0] - cpu_grad, _ = generate_cpu_params(grad, {}, False) +def run_backward(args, grad, grad_index, out): + if grad_index is not None: - out[grad_index].backward(cpu_grad) + out[grad_index].backward(grad) elif isinstance(out, (list, tuple)): raise NotImplementedError("Multiple backward is not supported.") else: - out.backward(cpu_grad) + out.backward(grad) args_grad = [] for arg in args: if isinstance(arg, torch.Tensor): args_grad.append(arg.grad) grad_out = args_grad - device_grad = grad.clone().detach().to(current_device) - if grad_index is not None: - device_out[grad_index].backward(device_grad) - else: - device_out.backward(device_grad) - device_args_grad = [] - for arg in device_args: - if isinstance(arg, torch.Tensor): - device_args_grad.append(arg.grad) - device_grad_out = device_args_grad - return grad_out, device_grad_out, grad, device_grad + + return grad_out def initialize_save_error_data(): diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py index 31a207a3f8..aef120217e 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py @@ -2,7 +2,7 @@ import inspect import fcntl import os import threading - +import re import json import numpy as np import torch @@ -28,6 +28,18 @@ def write_npy(file_path, tensor): return full_path +def get_real_data_path(file_path): + targets = ['forward_real_data(_\d+_\d+)?', 'backward_real_data(_\d+_\d+)?'] + pattern = re.compile(r'(.*?)(?=({}))'.format('|'.join(targets))) + match = pattern.search(file_path) + if match: + base_path = match.group(1) + target_path = file_path[len(base_path):] + return target_path + else: + raise Exception("The save path is incorrect.") + + class APIInfo: def __init__(self, api_name, is_forward, save_real_data=False): self.rank = os.getpid() @@ -93,8 +105,9 @@ class APIInfo: file_path = os.path.join(backward_real_data_path, f'{api_args}.npy') self.args_num += 1 npy_path = write_npy(file_path, arg) + real_data_path = get_real_data_path(npy_path) single_arg.update({'type': 'torch.Tensor'}) - single_arg.update({'datapath': npy_path}) + single_arg.update({'datapath': real_data_path}) single_arg.update({'requires_grad': arg.requires_grad}) return single_arg -- Gitee From 1861a72b87c43ab77c6e12a9b4a9157d42a1efe5 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 16 Jan 2024 15:50:55 +0800 Subject: [PATCH 3/7] =?UTF-8?q?=E7=9C=9F=E5=AE=9E=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E8=B7=AF=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/api_accuracy_checker/common/utils.py | 1 - debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py | 4 ++-- .../api_accuracy_checker/test/ut/dump/test_api_info.py | 5 +++-- .../test/ut/run_ut/test_data_generate.py | 6 +++--- .../api_accuracy_checker/test/ut/run_ut/test_run_ut.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index 47eab44c43..5652bf6315 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -646,7 +646,6 @@ def get_real_data_path(file_path): def check_real_data_mode(data_path, real_data_path): if not data_path: - print_warn_log("The current mode is random data. The root directory of real data is not used.") return data_path if not real_data_path: diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 98aaeda39e..19810128f9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -201,13 +201,13 @@ def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict del kwargs["device"] cpu_args, cpu_kwargs = generate_cpu_params(args, kwargs, need_backward) device_args, device_kwargs = generate_device_params(args, kwargs, need_backward) - grad_out, device_grad_out = None, None + bench_grad_out, device_grad_out = None, None out = exec_api(api_type, api_name, cpu_args, cpu_kwargs) device_out = exec_api(api_type, api_name, device_args, device_kwargs) api_setting_dict = get_json_contents("torch_ut_setting.json") grad_input_index = api_setting_dict.get(api_name) grad_index = None - grad = None + grad, bench_grad = None, None if grad_input_index is not None: grad_index = grad_input_index.get('grad_index') diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py index fb0511b8e2..8951d5523a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_api_info.py @@ -11,7 +11,7 @@ class TestAPIInfo(unittest.TestCase): def setUp(self): if os.path.exists('./step-1'): shutil.rmtree('./step-1') - self.api = APIInfo("test_api", APIInfo.get_full_save_path("./", "forward", True), True) + self.api = APIInfo("test_api", APIInfo.get_full_save_path("./", "forward_real_data", True), True) def test_analyze_element(self): element = [1, 2, 3] @@ -24,7 +24,8 @@ class TestAPIInfo(unittest.TestCase): result = self.api._analyze_tensor(tensor) self.assertEqual(result.get('type'), 'torch.Tensor') self.assertTrue(result.get('requires_grad')) - self.assertTrue(os.path.exists(result.get('datapath'))) + datapath = result.get('datapath') + self.assertTrue(datapath.startswith('forward_real_data') or datapath.startswith('backward_real_data')) def test_analyze_builtin(self): arg = slice(1, 10, 2) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py index fff5d6e4bd..50f9131e47 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_data_generate.py @@ -19,7 +19,7 @@ min_value = -5.125 class TestDataGenerateMethods(unittest.TestCase): def test_gen_api_params(self): api_info = copy.deepcopy(api_info_dict) - args_params, kwargs_params = gen_api_params(api_info, True, None) + args_params, kwargs_params = gen_api_params(api_info, True, None, None) max_diff = abs(args_params[0].max() - max_value) min_diff = abs(args_params[0].min() - min_value) self.assertEqual(len(args_params), 1) @@ -30,7 +30,7 @@ class TestDataGenerateMethods(unittest.TestCase): self.assertEqual(kwargs_params, {'inplace': False}) def test_gen_args(self): - args_result = gen_args(api_info_dict.get('args')) + args_result = gen_args(api_info_dict.get('args'), real_data_path=None) max_diff = abs(args_result[0].max() - max_value) min_diff = abs(args_result[0].min() - min_value) self.assertEqual(len(args_result), 1) @@ -40,7 +40,7 @@ class TestDataGenerateMethods(unittest.TestCase): self.assertEqual(args_result[0].shape, torch.Size([2, 2560, 24, 24])) def test_gen_data(self): - data = gen_data(api_info_dict.get('args')[0], True, None) + data = gen_data(api_info_dict.get('args')[0], True, None, None) max_diff = abs(data.max() - max_value) min_diff = abs(data.min() - min_value) self.assertEqual(data.dtype, torch.float32) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_run_ut.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_run_ut.py index bca4f0b308..21ec2f0072 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/run_ut/test_run_ut.py @@ -18,7 +18,7 @@ class TestRunUtMethods(unittest.TestCase): def test_exec_api(self): api_info = copy.deepcopy(api_info_dict) [api_type, api_name, _] = api_full_name.split("*") - args, kwargs, need_grad = get_api_info(api_info, api_name) + args, kwargs, need_grad = get_api_info(api_info, api_name, None) cpu_args, cpu_kwargs = generate_cpu_params(args, kwargs, True) out = exec_api(api_type, api_name, cpu_args, cpu_kwargs) self.assertEqual(out.dtype, torch.float64) @@ -52,7 +52,7 @@ class TestRunUtMethods(unittest.TestCase): def test_generate_cpu_params(self): api_info = copy.deepcopy(api_info_dict) [api_type, api_name, _] = api_full_name.split("*") - args, kwargs, need_grad = get_api_info(api_info, api_name) + args, kwargs, need_grad = get_api_info(api_info, api_name, None) cpu_args, cpu_kwargs = generate_cpu_params(args, kwargs, True) self.assertEqual(len(cpu_args), 1) self.assertEqual(cpu_args[0].dtype, torch.float64) -- Gitee From 61882ec0d081f247f4843caf9b9411914dbf6499 Mon Sep 17 00:00:00 2001 From: gitee Date: Wed, 17 Jan 2024 16:11:37 +0800 Subject: [PATCH 4/7] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_accuracy_checker/common/utils.py | 5 ----- .../run_ut/run_overflow_check.py | 14 +++++--------- .../{test_dump_scopr.py => test_dump_scope.py} | 0 .../ptdbg_ascend/overflow_check/info_dump.py | 17 ++--------------- 4 files changed, 7 insertions(+), 29 deletions(-) rename debug/accuracy_tools/api_accuracy_checker/test/ut/dump/{test_dump_scopr.py => test_dump_scope.py} (100%) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index 5652bf6315..210bc6b901 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -647,10 +647,5 @@ def get_real_data_path(file_path): def check_real_data_mode(data_path, real_data_path): if not data_path: return data_path - - if not real_data_path: - error_log = "The current mode is real data. The root directory of real data must be configured." - raise CompareException(CompareException.INVALID_COMPARE_MODE, error_log) - full_data_path = os.path.join(real_data_path, data_path) return os.path.realpath(full_data_path) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py index 3a09eccf19..df8d8b153b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_overflow_check.py @@ -44,12 +44,12 @@ def check_data_overflow(x): return check_tensor_overflow(x) -def run_overflow_check(forward_file, real_data_path): +def run_overflow_check(forward_file): print_info_log("start UT test") forward_content = get_json_contents(forward_file) for api_full_name, api_info_dict in tqdm(forward_content.items()): try: - run_torch_api(api_full_name, api_info_dict, real_data_path) + run_torch_api(api_full_name, api_info_dict) except Exception as err: api_name = api_full_name.split("_", 1)[1].rsplit("_", 2)[0] if "not implemented for 'Half'" in str(err): @@ -62,11 +62,11 @@ def run_overflow_check(forward_file, real_data_path): print_error_log(f"Run {api_full_name} UT Error: %s" % str(err)) -def run_torch_api(api_full_name, api_info_dict, real_data_path): +def run_torch_api(api_full_name, api_info_dict): torch.npu.clear_npu_overflow_flag() api_type = api_full_name.split("_")[0] api_name = api_full_name.split("_", 1)[1].rsplit("_", 2)[0] - args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path) + args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path='') if not need_grad: print_warn_log("%s function with out=... arguments don't support automatic differentiation, skip backward." % api_full_name) @@ -89,10 +89,6 @@ def _run_ut_parser(parser): help=" The api param tool forward result file: generate from api param tool, " "a json file.", required=True) - parser.add_argument("-real_data_path", dest="real_data_path", default="", type=str, - help=" In real data mode, the root directory for storing real data " - "must be configured.", - required=True) parser.add_argument("-j", "--jit_compile", dest="jit_compile", help=" whether to turn on jit compile", default=False, required=False) parser.add_argument("-d", "--device", dest="device_id", type=int, help=" set NPU device id to run ut", @@ -112,7 +108,7 @@ def _run_overflow_check(): except Exception as error: print_error_log(f"Set NPU device id failed. device id is: {args.device_id}") raise NotImplementedError from error - run_overflow_check(forward_file, args.real_data_path) + run_overflow_check(forward_file) if __name__ == '__main__': diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scopr.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scope.py similarity index 100% rename from debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scopr.py rename to debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scope.py diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py index aef120217e..31a207a3f8 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/overflow_check/info_dump.py @@ -2,7 +2,7 @@ import inspect import fcntl import os import threading -import re + import json import numpy as np import torch @@ -28,18 +28,6 @@ def write_npy(file_path, tensor): return full_path -def get_real_data_path(file_path): - targets = ['forward_real_data(_\d+_\d+)?', 'backward_real_data(_\d+_\d+)?'] - pattern = re.compile(r'(.*?)(?=({}))'.format('|'.join(targets))) - match = pattern.search(file_path) - if match: - base_path = match.group(1) - target_path = file_path[len(base_path):] - return target_path - else: - raise Exception("The save path is incorrect.") - - class APIInfo: def __init__(self, api_name, is_forward, save_real_data=False): self.rank = os.getpid() @@ -105,9 +93,8 @@ class APIInfo: file_path = os.path.join(backward_real_data_path, f'{api_args}.npy') self.args_num += 1 npy_path = write_npy(file_path, arg) - real_data_path = get_real_data_path(npy_path) single_arg.update({'type': 'torch.Tensor'}) - single_arg.update({'datapath': real_data_path}) + single_arg.update({'datapath': npy_path}) single_arg.update({'requires_grad': arg.requires_grad}) return single_arg -- Gitee From 3e027eb57cdb3641a918ba26214ee9e2506d42b3 Mon Sep 17 00:00:00 2001 From: gitee Date: Wed, 17 Jan 2024 17:24:26 +0800 Subject: [PATCH 5/7] =?UTF-8?q?=E4=BF=AE=E6=94=B9real=5Fdata=5Fpath?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 19810128f9..cb016c456d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -333,7 +333,7 @@ def _run_ut_parser(parser): help=" The path of accuracy_checking_result_{timestamp}.csv, " "when run ut is interrupted, enter the file path to continue run ut.", required=False) - parser.add_argument("-real_data_path", dest="real_data_path", default="", type=str, + parser.add_argument("-real_data_path", dest="real_data_path", nargs="?", const="", default="", type=str, help=" In real data mode, the root directory for storing real data " "must be configured.", required=False) -- Gitee From 9a5c214e9311df16f3d7d6a32a955b44f71594e8 Mon Sep 17 00:00:00 2001 From: gitee Date: Thu, 18 Jan 2024 17:25:46 +0800 Subject: [PATCH 6/7] fix ut --- .../api_accuracy_checker/test/ut/dump/test_dump_scope.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scope.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scope.py index b892a6077a..7712552abe 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scope.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/dump/test_dump_scope.py @@ -1,10 +1,12 @@ import unittest -from api_accuracy_checker.dump.dump_scope import * +from api_accuracy_checker.dump.dump_scope import iter_tracer from api_accuracy_checker.dump.dump import DumpUtil + class TestDumpScope(unittest.TestCase): def test_iter_tracer(self): DumpUtil.call_num = 0 + def dummy_func(): return "Hello, World!" -- Gitee From a73c707dc033607a0dd903d51121d269124f40bf Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 22 Jan 2024 15:15:15 +0800 Subject: [PATCH 7/7] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/api_accuracy_checker/common/utils.py | 8 ++++---- .../api_accuracy_checker/run_ut/data_generate.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index 210bc6b901..e0c893c06b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -634,17 +634,17 @@ def write_pt(file_path, tensor): def get_real_data_path(file_path): targets = ['forward_real_data', 'backward_real_data', 'ut_error_data\d+'] - pattern = re.compile(r'(.*?)(?=({}))'.format('|'.join(targets))) + pattern = re.compile(r'({})'.format('|'.join(targets))) match = pattern.search(file_path) if match: - base_path = match.group(1) - target_path = file_path[len(base_path):] + target_index = match.start() + target_path = file_path[target_index:] return target_path else: raise DumpException(DumpException.INVALID_PATH_ERROR) -def check_real_data_mode(data_path, real_data_path): +def get_full_data_path(data_path, real_data_path): if not data_path: return data_path full_data_path = os.path.join(real_data_path, data_path) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index 1c0248db5f..495a3b7ed0 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -20,7 +20,7 @@ import torch import numpy as np from api_accuracy_checker.common.utils import Const, check_file_or_directory_path, check_object_type, print_warn_log, \ - print_error_log, check_real_data_mode, CompareException + print_error_log, get_full_data_path, CompareException TORCH_TYPE = ["torch.device", "torch.dtype"] TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] @@ -40,7 +40,7 @@ def gen_data(info, need_grad, convert_type, real_data_path=None): check_object_type(info, dict) data_type = info.get('type') data_path = info.get('datapath') - data_path = check_real_data_mode(data_path, real_data_path) + data_path = get_full_data_path(data_path, real_data_path) if data_type in TENSOR_DATA_LIST: if data_path: data = gen_real_tensor(data_path, convert_type) -- Gitee