diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index a7922eadfcdd30930cf6d37c95071249885fc6a0..94907f01493272f95641047074c908c96a9449b8 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -79,14 +79,17 @@ class Const: API_PATTERN = r"^[A-Za-z0-9]+[_]+([A-Za-z0-9]+[_]*[A-Za-z0-9]+)[_]+[0-9]+[_]+[A-Za-z0-9]+" WRITE_FLAGS = os.O_WRONLY | os.O_CREAT WRITE_MODES = stat.S_IWUSR | stat.S_IRUSR - + + RAISE_PRECISION = { + "torch.float16" : "torch.float32", + "torch.bfloat16" : "torch.float32", + "torch.float32" : "torch.float64" + } CONVERT = { - "fp16_to_fp32": ["torch.float16", "torch.float32"], - "int32_to_int64": ["torch.int32", "torch.int64"] + "int32_to_int64": ["torch.int32", "torch.int64"], } CONVERT_API = { - "fp16_to_fp32": ["conv2d", "batch_norm", "relu", "max_pool2d", "interpolate", "group_norm", "layer_norm", "bmm", "tanh", "cross_entropy", "linear", "numel"], "int32_to_int64": ["cross_entropy"] } diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index 7caf8a7b5224b0194de7b50c8ecb095a1ffb3fc1..b0b1aaf605937f0cbf932d83811e6d2735d5a289 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -150,11 +150,11 @@ def flatten_compare_result(result): def compare_core(bench_out, npu_out, alg): msg = "" if not isinstance(bench_out, type(npu_out)): - return CompareConst.NAN, False, "bench and npu output type is different.", CompareConst.NAN, CompareConst.NAN + return [(CompareConst.NAN, "bench and npu output type is different.")], False, CompareConst.NA, CompareConst.NA if isinstance(bench_out, (list, tuple)): compare_result, test_success, bench_dtype, npu_dtype = [], True, [], [] if len(bench_out) != len(npu_out): - return CompareConst.NAN, False, "bench and npu output structure is different", CompareConst.NAN, CompareConst.NAN + return [(CompareConst.NAN, "bench and npu output structure is different")], False, CompareConst.NA, CompareConst.NA for b_out_i, n_out_i in zip(bench_out, npu_out): compare_result_i, test_success_i, bench_dtype_i, npu_dtype_i = compare_core(b_out_i, n_out_i, alg) compare_result.append(compare_result_i) @@ -164,15 +164,14 @@ def compare_core(bench_out, npu_out, alg): elif isinstance(bench_out, dict): b_keys, n_keys = set(bench_out.keys()), set(npu_out.keys()) if b_keys != n_keys: - compare_result, test_success, msg = CompareConst.NAN, False, "bench and npu output dict keys are different", \ - CompareConst.NAN, CompareConst.NAN + compare_result, test_success, bench_dtype, npu_dtype = [(CompareConst.NAN, "bench and npu output dict keys are different")], False, \ + CompareConst.NA, CompareConst.NA compare_result, test_success, bench_dtype, npu_dtype = compare_core(list(bench_out.values()), list(npu_out.values()), alg) elif isinstance(bench_out, torch.Tensor): bench_dtype = str(bench_out.dtype) npu_dtype = str(npu_out.dtype) - if bench_out.dtype == torch.bfloat16: - bench_out = bench_out.to(torch.float32) - npu_out = npu_out.to(torch.float32) + if bench_out.dtype in [torch.float32, torch.float64] and bench_out.dtype != npu_out.dtype: + npu_out = npu_out.type(bench_out.dtype) compare_result, test_success, msg = compare_torch_tensor(bench_out.detach().numpy(), npu_out.detach().cpu().numpy(), alg) elif isinstance(bench_out, (bool, int, float, str)): compare_result, test_success, msg = compare_builtin_type(bench_out, npu_out) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index 813d0cb58618eae048ea4ab2057bdc1f50185500..27efa4bd3dae3ef0e010bfa92e2212038faa401f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -7,7 +7,7 @@ import torch from tqdm import tqdm from api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args from api_accuracy_checker.common.utils import print_info_log, print_warn_log, get_json_contents, api_info_preprocess, \ - print_error_log, check_file_or_directory_path, initialize_save_path + print_error_log, check_file_or_directory_path, initialize_save_path, Const from api_accuracy_checker.compare.compare import Comparator from api_accuracy_checker.hook_module.wrap_tensor import TensorOPTemplate from api_accuracy_checker.hook_module.wrap_functional import FunctionalOPTemplate @@ -44,7 +44,7 @@ def exec_api(api_type, api_name, args, kwargs): return out -def generate_npu_params(cpu_args, cpu_kwargs, need_backward): +def generate_npu_params(input_args, input_kwargs, need_backward): def recursive_arg_to_npu(arg_in): if isinstance(arg_in, (list, tuple)): return type(arg_in)(recursive_arg_to_npu(arg) for arg in arg_in) @@ -60,10 +60,34 @@ def generate_npu_params(cpu_args, cpu_kwargs, need_backward): else: return arg_in - npu_args = recursive_arg_to_npu(cpu_args) - npu_kwargs = {key: recursive_arg_to_npu(value) for key, value in cpu_kwargs.items()} + npu_args = recursive_arg_to_npu(input_args) + npu_kwargs = {key: recursive_arg_to_npu(value) for key, value in input_kwargs.items()} return npu_args, npu_kwargs +def generate_cpu_params(input_args, input_kwargs, need_backward): + def recursive_arg_to_cpu(arg_in): + if isinstance(arg_in, (list, tuple)): + return type(arg_in)(recursive_arg_to_cpu(arg) for arg in arg_in) + elif isinstance(arg_in, torch.Tensor): + if need_backward and arg_in.requires_grad: + if str(arg_in.dtype) in Const.RAISE_PRECISION.keys(): + arg_in = arg_in.clone().type(eval(Const.RAISE_PRECISION[str(arg_in.dtype)])).detach().requires_grad_() + else: + arg_in = arg_in.clone().detach().requires_grad_() + temp_arg_in = arg_in * 1 + arg_in = temp_arg_in.type_as(arg_in) + arg_in.retain_grad() + return arg_in + else: + if str(arg_in.dtype) in Const.RAISE_PRECISION.keys(): + return arg_in.clone().type(eval(Const.RAISE_PRECISION[str(arg_in.dtype)])).detach() + return arg_in.clone().detach() + else: + return arg_in + + cpu_args = recursive_arg_to_cpu(input_args) + cpu_kwargs = {key: recursive_arg_to_cpu(value) for key, value in input_kwargs.items()} + return cpu_args, cpu_kwargs def run_ut(forward_file, backward_file, out_path, save_error_data): print_info_log("start UT test") @@ -81,10 +105,7 @@ def run_ut(forward_file, backward_file, out_path, save_error_data): do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) except Exception as err: [_, api_name, _] = api_full_name.split("*") - if "not implemented for 'Half'" in str(err): - print_warn_log(f"API {api_name} not support half tensor in CPU, please add {api_name} to CONVERT_API " - f"'fp16_to_fp32' list in accuracy_tools/api_accuracy_check/common/utils.py file.") - elif "expected scalar type Long" in str(err): + if "expected scalar type Long" in str(err): print_warn_log(f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API " f"'int32_to_int64' list in accuracy_tools/api_accuracy_check/common/utils.py file.") else: @@ -118,11 +139,12 @@ def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_di need_backward = need_backward and need_grad if inplace or not need_grad: print_warn_log("%s involves in-place operations, skip backward" % api_full_name) + cpu_args, cpu_kwargs = generate_cpu_params(args, kwargs, need_backward) npu_args, npu_kwargs = generate_npu_params(args, kwargs, need_backward) grad_out, npu_grad_out = None, None if kwargs.get("device"): del kwargs["device"] - out = exec_api(api_type, api_name, args, kwargs) + out = exec_api(api_type, api_name, cpu_args, cpu_kwargs) npu_out = exec_api(api_type, api_name, npu_args, npu_kwargs) grad_input_index = api_setting_dict.get(api_name) grad_index = None @@ -131,7 +153,7 @@ def run_torch_api(api_full_name, api_setting_dict, backward_content, api_info_di grad_index = grad_input_index.get('grad_index') if need_backward: - grad_out, npu_grad_out, grad, npu_grad = run_backward(api_full_name, args, backward_content, grad_index, npu_args, + grad_out, npu_grad_out, grad, npu_grad = run_backward(api_full_name, cpu_args, backward_content, grad_index, npu_args, npu_out, out) if grad_index is not None: return UtDataInfo(grad_out, npu_grad_out, npu_out[grad_index], out[grad_index], grad, in_fwd_data_list) @@ -153,12 +175,13 @@ def get_api_info(api_info_dict, api_name): def run_backward(api_full_name, args, backward_content, grad_index, npu_args, npu_out, out): backward_args = backward_content[api_full_name] grad = gen_args(backward_args)[0] + cpu_grad, _ = generate_cpu_params(grad, {}, False) if grad_index is not None: - out[grad_index].backward(grad) + out[grad_index].backward(cpu_grad) elif isinstance(out, (list, tuple)): raise NotImplementedError("Multiple backward is not supported.") else: - out.backward(grad) + out.backward(cpu_grad) args_grad = [] for arg in args: if isinstance(arg, torch.Tensor):