diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index 84aa576553bb8ec0829e73ca5964e4ea872e3c9d..3a8021a01125b3add1395375c469fd703989494f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -11,7 +11,7 @@ def compare_torch_tensor(cpu_output, npu_output, compare_alg): npu output dtype is {npu_output.dtype}, cannot compare." if cpu_output.dtype in [bool, np.uint8, np.int8, np.int16, np.uint16, np.uint32, np.int32, np.int64, np.uint64]: if compare_alg == cosine_sim: - return CompareConst.NA, False, f"Compare algorithm {compare_alg.__name__} is not supported for {cpu_output.dtype} data." + return CompareConst.NA, True, f"Compare algorithm {compare_alg.__name__} is not supported for {cpu_output.dtype} data." return compare_bool_tensor(cpu_output, npu_output) return compare_alg(cpu_output, npu_output) @@ -25,7 +25,8 @@ def compare_bool_tensor(cpu_output, npu_output): if cpu_output.size == 0: return CompareConst.NAN, False, "There is not cpu calculation result." error_rate = float(error_nums / cpu_output.size) - return error_rate, error_rate == 0, "" + result = CompareConst.PASS if error_rate == 0 else CompareConst.ERROR + return error_rate, result, "" def get_msg_and_handle_value(b_value, n_value): @@ -69,16 +70,33 @@ def get_max_abs_err(b_value, n_value): return abs_err, bool_result, msg +def get_rel_err_ratio_hundredth(b_value, n_value): + ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.01) + if n_value.dtype != np.float16: + msg = f"This indicator is not used to evaluate {n_value.dtype} data" + return ratio, CompareConst.PASS, msg + if bool_result: + return ratio, CompareConst.PASS, msg + return ratio, CompareConst.ERROR, msg + + def get_rel_err_ratio_thousandth(b_value, n_value): - return get_rel_err_ratio(b_value, n_value, 0.001) + ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.001) + if bool_result: + return ratio, CompareConst.PASS, msg + if n_value.dtype == np.float16: + return ratio, CompareConst.WARNING, msg + return ratio, CompareConst.ERROR, msg def get_rel_err_ratio_ten_thousandth(b_value, n_value): ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.0001) if n_value.dtype == np.float16: msg = f"This indicator is not used to evaluate {n_value.dtype} data" - return ratio, True, msg - return ratio, bool_result, msg + return ratio, CompareConst.PASS, msg + if bool_result: + return ratio, CompareConst.PASS, msg + return ratio, CompareConst.WARNING, msg def get_rel_err_ratio(b_value, n_value, thresholding): @@ -139,10 +157,10 @@ def compare_uint8_data(b_value, n_value): def compare_builtin_type(bench_out, npu_out): if not isinstance(bench_out, (bool, int, float, str)): - return CompareConst.NA, True, "" + return CompareConst.NA, CompareConst.PASS, "" if bench_out != npu_out: - return CompareConst.NA, False, "" - return True, True, "" + return CompareConst.NA, CompareConst.ERROR, "" + return True, CompareConst.PASS, "" def flatten_compare_result(result): @@ -161,20 +179,20 @@ def compare_core(bench_out, npu_out, alg): if not isinstance(bench_out, type(npu_out)): return [(CompareConst.NA, "bench and npu output type is different.")], False, [CompareConst.NA], [CompareConst.NA], [CompareConst.NA] if isinstance(bench_out, (list, tuple)): - compare_result, test_success, bench_dtype, npu_dtype, shape = [], True, [], [], [] + compare_result, test_success, bench_dtype, npu_dtype, shape = [], [], [], [], [] if len(bench_out) != len(npu_out): return [(CompareConst.NA, "bench and npu output structure is different")], False, [CompareConst.NA], [CompareConst.NA], [CompareConst.NA] for b_out_i, n_out_i in zip(bench_out, npu_out): compare_result_i, test_success_i, bench_dtype_i, npu_dtype_i, shape_i = compare_core(b_out_i, n_out_i, alg) compare_result.append(compare_result_i) - test_success = test_success and test_success_i + test_success.append(test_success_i) bench_dtype.append(bench_dtype_i) npu_dtype.append(npu_dtype_i) shape.append(shape_i) elif isinstance(bench_out, dict): b_keys, n_keys = set(bench_out.keys()), set(npu_out.keys()) if b_keys != n_keys: - compare_result, test_success, bench_dtype, npu_dtype, shape = [(CompareConst.NA, "bench and npu output dict keys are different")], False, \ + compare_result, test_success, bench_dtype, npu_dtype, shape = [(CompareConst.NA, "bench and npu output dict keys are different")], [False], \ [CompareConst.NA], [CompareConst.NA], [CompareConst.NA] else: compare_result, test_success, bench_dtype, npu_dtype, shape = compare_core(list(bench_out.values()), list(npu_out.values()), alg) @@ -208,6 +226,10 @@ def compare_core(bench_out, npu_out, alg): compare_result = flatten_compare_result(compare_result) else: compare_result = [(compare_result, msg)] + if isinstance(test_success, list): + test_success = flatten_compare_result(test_success) + else: + test_success = [test_success] if isinstance(bench_dtype, list): bench_dtype = flatten_compare_result(bench_dtype) npu_dtype = flatten_compare_result(npu_dtype) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index 231610951c449195572b8a5d039da479a2cdeffe..6590869f2101f480f1573e3c1d98f9eb57ba1ab7 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -4,7 +4,7 @@ import time from rich.table import Table from rich.console import Console from api_accuracy_checker.compare.algorithm import compare_core, cosine_sim, cosine_standard, get_max_rel_err, get_max_abs_err, \ - compare_builtin_type, get_rel_err_ratio_thousandth, get_rel_err_ratio_ten_thousandth + compare_builtin_type, get_rel_err_ratio_hundredth, get_rel_err_ratio_thousandth, get_rel_err_ratio_ten_thousandth from api_accuracy_checker.common.utils import get_json_contents, print_info_log, print_error_log, write_csv, CompareException from api_accuracy_checker.compare.compare_utils import CompareConst from api_accuracy_checker.common.config import msCheckerConfig @@ -35,6 +35,7 @@ class Comparator: self.register_compare_algorithm("Cosine Similarity", cosine_sim, cosine_standard) self.register_compare_algorithm("Max Relative Error", get_max_rel_err, None) self.register_compare_algorithm("Max Absolute Error", get_max_abs_err, None) + self.register_compare_algorithm("Hundredth Relative Error Ratio", get_rel_err_ratio_hundredth, None) self.register_compare_algorithm("Thousandth Relative Error Ratio", get_rel_err_ratio_thousandth, None) self.register_compare_algorithm("Ten Thousandth Relative Error Ratio", get_rel_err_ratio_ten_thousandth, None) self.register_compare_algorithm("Default: isEqual", compare_builtin_type, None) @@ -85,6 +86,7 @@ class Comparator: "Cosine Similarity", "Cosine Similarity Message", "Max Rel Error", "Max Rel Err Message", "Max Abs Error", "Max Abs Err Message", + "Relative Error (hundredth)", "Relative Error (dual hundredth) Message", "Relative Error (dual thousandth)", "Relative Error (dual thousandth) Message", "Relative Error (dual ten thousandth)", "Relative Error (dual ten thousandth) Message", "Compare Builtin Type", "Builtin Type Message", @@ -166,24 +168,38 @@ class Comparator: bench_dtype_total = [] npu_dtype_total = [] shape_total = [] - test_success_total = True - max_abs_error_success = False + test_success_total = [] + max_abs_error_success = [] + cosine_success = [] for name in self.compare_alg.keys(): alg = self.compare_alg[name][0] detailed_result, test_success, bench_dtype, npu_dtype, shape = compare_core(bench_out, npu_out, alg) bench_dtype_total = bench_dtype npu_dtype_total = npu_dtype shape_total = shape - if name not in ["Max Relative Error", "Max Absolute Error"]: - test_success_total = test_success_total and test_success - if name == "Max Absolute Error": + if name not in ["Cosine Similarity", "Max Relative Error", "Max Absolute Error"]: + test_success_total.append(test_success) + if name == "Cosine Similarity": + cosine_success = test_success + if name == "Max Relative Error": max_abs_error_success = test_success if detailed_result_total: for i, detailed_result_item in enumerate(detailed_result): detailed_result_total[i] += detailed_result_item else: detailed_result_total = detailed_result - test_success_total = test_success_total or max_abs_error_success + test_all_result = [CompareConst.PASS for _ in range(len(detailed_result_total))] + for i, _ in enumerate(test_all_result): + if not cosine_success[i] or CompareConst.ERROR == cosine_success[i]: + test_all_result[i] = CompareConst.ERROR + elif max_abs_error_success[i] or CompareConst.PASS == max_abs_error_success[i]: + test_all_result[i] = CompareConst.PASS + else: + test_success_column = [test_success_single[i] for test_success_single in test_success_total] + if CompareConst.ERROR in test_success_column or False in test_success_column: + test_all_result[i] = CompareConst.ERROR + elif CompareConst.WARNING in test_success_column: + test_all_result[i] = CompareConst.WARNING # dtype加到所有指标的前面, 是否pass放到所有指标的后面 try: for i, detailed_tuple in enumerate(detailed_result_total): @@ -191,12 +207,13 @@ class Comparator: detailed_result.insert(0, bench_dtype_total[i]) detailed_result.insert(1, npu_dtype_total[i]) detailed_result.insert(2, shape_total[i]) - detailed_result.append(str(test_success_total)) + detailed_result.append(test_all_result[i]) detailed_result_total[i] = tuple(detailed_result) except IndexError as error: print_error_log(f"There is index error.\n{str(error)}") raise CompareException(CompareException.INVALID_DATA_ERROR) from error - return test_success_total, detailed_result_total + test_final_success = False if CompareConst.ERROR in test_all_result or CompareConst.WARNING in test_all_result else True + return test_final_success, detailed_result_total @staticmethod def _compare_dropout(bench_out, npu_out): diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py index 62044f585218cf98e26859d2ed9492289531e0eb..0bb80fbce92331360a5c08a2901b3ba8fd911f3e 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py @@ -5,6 +5,9 @@ import numpy as np class CompareConst: NAN = np.nan NA = "N/A" + PASS = 'pass' + WARNING = 'warning' + ERROR = 'error' def check_dtype_comparable(x, y): diff --git a/debug/accuracy_tools/api_accuracy_checker/test/ut/compare/test_algorithm.py b/debug/accuracy_tools/api_accuracy_checker/test/ut/compare/test_algorithm.py index 701ea6f7ae132c58801cbcac81be88238217e6b2..54d452ed01a9a6666a534a15f47d9000606b10a4 100644 --- a/debug/accuracy_tools/api_accuracy_checker/test/ut/compare/test_algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/test/ut/compare/test_algorithm.py @@ -8,12 +8,12 @@ class TestAlgorithmMethods(unittest.TestCase): cpu_output = np.array([1, 2, 3]) npu_output = np.array([1, 2, 3]) compare_alg = alg.get_max_rel_err - self.assertEqual(alg.compare_torch_tensor(cpu_output, npu_output, compare_alg), (0.0, True, '')) + self.assertEqual(alg.compare_torch_tensor(cpu_output, npu_output, compare_alg), (0.0, 'pass', '')) def test_compare_bool_tensor(self): cpu_output = np.array([True, False, True]) npu_output = np.array([True, False, True]) - self.assertEqual(alg.compare_bool_tensor(cpu_output, npu_output), (0.0, True, '')) + self.assertEqual(alg.compare_bool_tensor(cpu_output, npu_output), (0.0, 'pass', '')) def test_get_msg_and_handle_value(self): b_value = np.array([1.0, 2.0, 3.0]) @@ -33,12 +33,12 @@ class TestAlgorithmMethods(unittest.TestCase): def test_get_rel_err_ratio_thousandth(self): b_value = np.array([1.0, 2.0, 3.0]) n_value = np.array([1.0, 2.0, 3.0]) - self.assertEqual(alg.get_rel_err_ratio_thousandth(b_value, n_value), (1.0, True, '')) + self.assertEqual(alg.get_rel_err_ratio_thousandth(b_value, n_value), (1.0, 'pass', '')) def test_get_rel_err_ratio_ten_thousandth(self): b_value = np.array([1.0, 2.0, 3.0]) n_value = np.array([1.0, 2.0, 3.0]) - self.assertEqual(alg.get_rel_err_ratio_ten_thousandth(b_value, n_value), (1.0, True, '')) + self.assertEqual(alg.get_rel_err_ratio_ten_thousandth(b_value, n_value), (1.0, 'pass', '')) def test_max_rel_err_standard(self): max_rel_errs = [0.0001, 0.0002, 0.0003] @@ -65,7 +65,7 @@ class TestAlgorithmMethods(unittest.TestCase): def test_compare_builtin_type(self): bench_out = 1 npu_out = 1 - self.assertEqual(alg.compare_builtin_type(bench_out, npu_out), (True, True, '')) + self.assertEqual(alg.compare_builtin_type(bench_out, npu_out), (True, 'pass', '')) def test_flatten_compare_result(self): result = [[1, 2], [3, 4]]