diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index 58e05cf749a9a41b46867cdb939142b696b572d7..c8ecea17b01d62de6d3ceef7c47ce339861a87e3 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -29,7 +29,7 @@ def compare_bool_tensor(cpu_output, npu_output): return error_rate, error_rate < 0.001, "" -def get_max_rel_err(n_value, b_value): +def get_msg_and_handle_value(n_value, b_value): msg = "" if not isinstance(n_value, np.ndarray) or not isinstance(b_value, np.ndarray): msg = f"Max rel err only support numpy array! The actual type is {type(n_value)}, {type(b_value)}." @@ -52,12 +52,35 @@ def get_max_rel_err(n_value, b_value): zero_mask = (b_value == 0) b_value[zero_mask] += np.finfo(float).eps n_value[zero_mask] += np.finfo(float).eps + return n_value, b_value, msg + + +def get_max_rel_err(n_value, b_value): + n_value, b_value, msg = get_msg_and_handle_value(n_value, b_value) rel_err = np.abs((n_value - b_value) / b_value).max() bool_result = rel_err < 0.001 - return rel_err, bool_result, msg +def get_rel_err_ratio_thousandth(n_value, b_value): + return get_rel_err_ratio(n_value, b_value, 0.001) + + +def get_rel_err_ratio_ten_thousandth(n_value, b_value): + ratio, bool_result, msg = get_rel_err_ratio(n_value, b_value, 0.0001) + if b_value.dtype == np.float16: + msg = f"This indicator is not used to evaluate {b_value.dtype} data" + return ratio, CompareConst.NA, msg + return ratio, bool_result, msg + +def get_rel_err_ratio(n_value, b_value, thresholding): + n_value, b_value, msg = get_msg_and_handle_value(n_value, b_value) + rel_errs = np.abs((n_value - b_value) / b_value) + ratio = np.divide(np.sum(rel_errs < thresholding), np.size(rel_errs)) + bool_result = ratio > (1 - thresholding) + return ratio, bool_result, msg + + def max_rel_err_standard(max_rel_errs): bool_result = np.array(max_rel_errs) < 0.001 return np.all(bool_result), bool_result @@ -127,33 +150,50 @@ def flatten_compare_result(result): def compare_core(bench_out, npu_out, alg): msg = "" if not isinstance(bench_out, type(npu_out)): - return CompareConst.NAN, False, "bench and npu output type is different." + return CompareConst.NAN, False, "bench and npu output type is different.", CompareConst.NAN, CompareConst.NAN if isinstance(bench_out, (list, tuple)): - compare_result, test_success = [], True + compare_result, test_success, bench_dtype, npu_dtype = [], True, [], [] if len(bench_out) != len(npu_out): - return CompareConst.NAN, False, "bench and npu output structure is different" + return CompareConst.NAN, False, "bench and npu output structure is different", CompareConst.NAN, CompareConst.NAN for b_out_i, n_out_i in zip(bench_out, npu_out): - compare_result_i, test_success_i = compare_core(b_out_i, n_out_i, alg) + compare_result_i, test_success_i, bench_dtype_i, npu_dtype_i = compare_core(b_out_i, n_out_i, alg) compare_result.append(compare_result_i) test_success = test_success and test_success_i + bench_dtype.append(bench_dtype_i) + npu_dtype.append(npu_dtype_i) elif isinstance(bench_out, dict): b_keys, n_keys = set(bench_out.keys()), set(npu_out.keys()) if b_keys != n_keys: - compare_result, test_success, msg = CompareConst.NAN, False, "bench and npu output dict keys are different" - compare_result, test_success = compare_core(list(bench_out.values()), list(npu_out.values())) + compare_result, test_success, msg = CompareConst.NAN, False, "bench and npu output dict keys are different", \ + CompareConst.NAN, CompareConst.NAN + compare_result, test_success, bench_dtype, npu_dtype = compare_core(list(bench_out.values()), list(npu_out.values()), alg) elif isinstance(bench_out, torch.Tensor): compare_result, test_success, msg = compare_torch_tensor(bench_out.detach().numpy(), npu_out.detach().cpu().numpy(), alg) + bench_dtype = str(bench_out.dtype) + npu_dtype = str(npu_out.dtype) elif isinstance(bench_out, (bool, int, float, str)): compare_result, test_success, msg = compare_builtin_type(bench_out, npu_out) + bench_dtype = str(type(bench_out)) + npu_dtype = str(type(npu_out)) elif bench_out is None: compare_result, test_success, msg = CompareConst.NA, True, "output is None" + bench_dtype = CompareConst.NAN + npu_dtype = CompareConst.NAN else: compare_result, test_success, msg = CompareConst.NA, True, "Unexpected output type \ in compare_core: {}".format(type(bench_out)) + bench_dtype = CompareConst.NAN + npu_dtype = CompareConst.NAN if isinstance(compare_result, list): compare_result = flatten_compare_result(compare_result) else: compare_result = [(compare_result, str(test_success), msg)] - return compare_result, test_success + if isinstance(bench_dtype, list): + bench_dtype = flatten_compare_result(bench_dtype) + npu_dtype = flatten_compare_result(npu_dtype) + else: + bench_dtype = [bench_dtype] + npu_dtype = [npu_dtype] + return compare_result, test_success, bench_dtype, npu_dtype diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index 7a1c069e2eff91940d26a7bf4b74bfc54554a04e..dd2d5f3b0f56c7d169897e61f2024b879ad919ba 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -2,7 +2,7 @@ import os from prettytable import PrettyTable from api_accuracy_checker.compare.algorithm import compare_core, cosine_sim, cosine_standard, get_max_rel_err, \ - compare_builtin_type + compare_builtin_type, get_rel_err_ratio_thousandth, get_rel_err_ratio_ten_thousandth from api_accuracy_checker.common.utils import get_json_contents, print_error_log, print_info_log, write_csv from api_accuracy_checker.compare.compare_utils import CompareConst @@ -26,11 +26,14 @@ class Comparator: self.compare_alg = {} self.register_compare_algorithm("Cosine Similarity", cosine_sim, cosine_standard) self.register_compare_algorithm("Max Relative Error", get_max_rel_err, None) + self.register_compare_algorithm("Thousandth Relative Error Ratio", get_rel_err_ratio_thousandth, None) + self.register_compare_algorithm("Ten Thousandth Relative Error Ratio", get_rel_err_ratio_ten_thousandth, None) self.register_compare_algorithm("Default: isEqual", compare_builtin_type, None) self.test_results = [] self.test_result_cnt = { "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, "success_num": 0 } + self.result_save_path = result_save_path def print_pretest_result(self): res_dict = { @@ -64,8 +67,11 @@ class Comparator: def write_detail_csv(self): test_rows = [[ - "Subject", "Cosine Similarity", "Cosine Similarity Pass", "Cosine Similarity Message", + "Subject", "Bench Dtype", "NPU Dtype", + "Cosine Similarity", "Cosine Similarity Pass", "Cosine Similarity Message", "Max Rel Error", "Max Rel Err Pass", "Max Rel Err Message", + "Thousandth Rel Error Ratio", "Thousandth Rel Error Ratio Pass", "Thousandth Rel Error Ratio Message", + "Ten Thousandth Rel Error Ratio", "Ten Thousandth Rel Error Ratio Pass", "Ten Thousandth Rel Error Ratio Message", "Compare Builtin Type", "Builtin Type Pass", "Builtin Type Message" ]] @@ -112,19 +118,30 @@ class Comparator: else: self.test_result_cnt['backward_fail_num'] += 1 + def _compare_core_wrapper(self, bench_out, npu_out): detailed_result_total = [] + bench_dtype_total = [] + npu_dtype_total = [] test_success_total = True for name in self.compare_alg.keys(): alg = self.compare_alg[name][0] - detailed_result, test_success = compare_core(bench_out, npu_out, alg) - if name != "Max Relative Error": + detailed_result, test_success, bench_dtype, npu_dtype = compare_core(bench_out, npu_out, alg) + bench_dtype_total = bench_dtype + npu_dtype_total = npu_dtype + if name != "Max Relative Error" and test_success != CompareConst.NA: test_success_total = test_success_total and test_success if detailed_result_total: for i in range(len(detailed_result_total)): detailed_result_total[i] += detailed_result[i] else: detailed_result_total = detailed_result + # dtype加到所有指标的前面 + for i in range(len(detailed_result_total)): + detailed_result = list(detailed_result_total[i]) + detailed_result.insert(0, bench_dtype_total[i]) + detailed_result.insert(1, npu_dtype_total[i]) + detailed_result_total[i] = tuple(detailed_result) return test_success_total, detailed_result_total @staticmethod