diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index dd4ec514c200b0943d3de5df2a4a76a0ed567e57..797f67f32faaec6c9035640fa0ebf1dffddbe650 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -21,7 +21,11 @@ def compare_bool_tensor(cpu_output, npu_output): return CompareConst.NAN, False, "" error_nums = (cpu_output != npu_output).sum() error_rate = float(error_nums / cpu_output.size) - return error_rate, error_rate == 0, "" + if error_rate == 0: + result = 'pass' + else: + result = 'error' + return error_rate, result, "" def get_msg_and_handle_value(b_value, n_value): msg = "" @@ -58,15 +62,31 @@ def get_max_abs_err(b_value, n_value): bool_result = abs_err < 0.001 return abs_err, bool_result, msg +def get_rel_err_ratio_hundredth(b_value, n_value): + ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.01) + if n_value.dtype != np.float16: + msg = f"This indicator is not used to evaluate {n_value.dtype} data" + return ratio, 'pass', msg + if bool_result: + return ratio, 'pass', msg + return ratio, 'error', msg + def get_rel_err_ratio_thousandth(b_value, n_value): - return get_rel_err_ratio(b_value, n_value, 0.001) + ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.001) + if bool_result: + return ratio, 'pass', msg + if n_value.dtype == np.float16: + return ratio, 'warning', msg + return ratio, 'error', msg def get_rel_err_ratio_ten_thousandth(b_value, n_value): ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.0001) if n_value.dtype == np.float16: msg = f"This indicator is not used to evaluate {n_value.dtype} data" - return ratio, True, msg - return ratio, bool_result, msg + return ratio, 'pass', msg + if bool_result: + return ratio, 'pass', msg + return ratio, 'warning', msg def get_rel_err_ratio(b_value, n_value, thresholding): b_value, n_value, msg = get_msg_and_handle_value(b_value, n_value) @@ -121,10 +141,10 @@ def compare_uint8_data(b_value, n_value): def compare_builtin_type(bench_out, npu_out): if not isinstance(bench_out, (bool, int, float, str)): - return CompareConst.NA, True, "" + return CompareConst.NA, 'pass', "" if bench_out != npu_out: - return CompareConst.NAN, False, "" - return True, True, "" + return CompareConst.NAN, 'error', "" + return True, 'pass', "" def flatten_compare_result(result): flatten_result = [] @@ -147,7 +167,15 @@ def compare_core(bench_out, npu_out, alg): for b_out_i, n_out_i in zip(bench_out, npu_out): compare_result_i, test_success_i, bench_dtype_i, npu_dtype_i, shape_i = compare_core(b_out_i, n_out_i, alg) compare_result.append(compare_result_i) - test_success = test_success and test_success_i + if isinstance(test_success, bool): + test_success = test_success and test_success_i + else: + if test_success_i == 'error': + test_success = 'error' + elif test_success_i == 'warning' and test_success != 'error': + test_success = 'warning' + elif test_success != 'warning' and test_success != 'error': + test_success = test_success_i bench_dtype.append(bench_dtype_i) npu_dtype.append(npu_dtype_i) shape.append(shape_i) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index 8aec6a707e6d2aea17e629331fac2d9f8d423131..208c9c5f40e934b54f6f954b77c13a637e1fd4c6 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -2,7 +2,7 @@ import os from prettytable import PrettyTable from api_accuracy_checker.compare.algorithm import compare_core, cosine_sim, cosine_standard, get_max_rel_err, get_max_abs_err, \ - compare_builtin_type, get_rel_err_ratio_thousandth, get_rel_err_ratio_ten_thousandth + compare_builtin_type, get_rel_err_ratio_hundredth, get_rel_err_ratio_thousandth, get_rel_err_ratio_ten_thousandth from api_accuracy_checker.common.utils import get_json_contents, print_info_log, write_csv from api_accuracy_checker.compare.compare_utils import CompareConst @@ -32,6 +32,7 @@ class Comparator: self.register_compare_algorithm("Cosine Similarity", cosine_sim, cosine_standard) self.register_compare_algorithm("Max Relative Error", get_max_rel_err, None) self.register_compare_algorithm("Max Absolute Error", get_max_abs_err, None) + self.register_compare_algorithm("Hundredth Relative Error Ratio", get_rel_err_ratio_hundredth, None) self.register_compare_algorithm("Thousandth Relative Error Ratio", get_rel_err_ratio_thousandth, None) self.register_compare_algorithm("Ten Thousandth Relative Error Ratio", get_rel_err_ratio_ten_thousandth, None) self.register_compare_algorithm("Default: isEqual", compare_builtin_type, None) @@ -66,6 +67,7 @@ class Comparator: "Cosine Similarity", "Cosine Similarity Message", "Max Rel Error", "Max Rel Err Message", "Max Abs Error", "Max Abs Err Message", + "Relative Error (hundredth)", "Relative Error (dual hundredth) Message", "Relative Error (dual thousandth)", "Relative Error (dual thousandth) Message", "Relative Error (dual ten thousandth)", "Relative Error (dual ten thousandth) Message", "Compare Builtin Type", "Builtin Type Message", @@ -140,29 +142,49 @@ class Comparator: bench_dtype_total = [] npu_dtype_total = [] shape_total = [] - test_success_total = True + test_success_total = [] + max_abs_error_success = False + max_cosine_success = False for name in self.compare_alg.keys(): alg = self.compare_alg[name][0] detailed_result, test_success, bench_dtype, npu_dtype, shape = compare_core(bench_out, npu_out, alg) bench_dtype_total = bench_dtype npu_dtype_total = npu_dtype shape_total = shape - if name != "Max Relative Error" and name != "Max Absolute Error": - test_success_total = test_success_total and test_success + if name not in ["Cosine Similarity", "Max Relative Error", "Max Absolute Error"]: + test_success_total.append(test_success) + if name == "Cosine Similarity": + max_cosine_success = test_success + if name == "Max Relative Error": + max_abs_error_success = test_success if detailed_result_total: for i in range(len(detailed_result_total)): detailed_result_total[i] += detailed_result[i] else: detailed_result_total = detailed_result + test_final_result = 'pass' + if not max_cosine_success: + test_final_result = 'error' + elif max_abs_error_success: + test_final_result = 'pass' + else: + if 'error' in test_success_total: + test_final_result = 'error' + elif 'warning' in test_success_total: + test_final_result = 'warning' # dtype加到所有指标的前面, 是否pass放到所有指标的后面 for i in range(len(detailed_result_total)): detailed_result = list(detailed_result_total[i]) detailed_result.insert(0, bench_dtype_total[i]) detailed_result.insert(1, npu_dtype_total[i]) detailed_result.insert(2, shape_total[i]) - detailed_result.append(str(test_success_total)) + detailed_result.append(str(test_final_result)) detailed_result_total[i] = tuple(detailed_result) - return test_success_total, detailed_result_total + if test_final_result == 'pass': + test_final_success = True + else: + test_final_success = False + return test_final_success, detailed_result_total @staticmethod def _compare_dropout(bench_out, npu_out):