From d43a5c98756dc3f00aa74c2124462acca1fd84d8 Mon Sep 17 00:00:00 2001 From: wangchao Date: Wed, 15 Nov 2023 14:24:24 +0800 Subject: [PATCH] comparison performance optimization --- .../api_accuracy_checker/compare/algorithm.py | 239 ++++++++---------- .../api_accuracy_checker/compare/compare.py | 107 ++------ 2 files changed, 140 insertions(+), 206 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index 3a8021a011..4ca5887c79 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -2,113 +2,123 @@ import torch import numpy as np from api_accuracy_checker.compare.compare_utils import CompareConst, check_dtype_comparable -from api_accuracy_checker.common.utils import Const +from api_accuracy_checker.common.utils import Const -def compare_torch_tensor(cpu_output, npu_output, compare_alg): - if not check_dtype_comparable(cpu_output, npu_output): - return CompareConst.NA, False, f"Bench out dtype is {cpu_output.dtype} but\ - npu output dtype is {npu_output.dtype}, cannot compare." - if cpu_output.dtype in [bool, np.uint8, np.int8, np.int16, np.uint16, np.uint32, np.int32, np.int64, np.uint64]: - if compare_alg == cosine_sim: - return CompareConst.NA, True, f"Compare algorithm {compare_alg.__name__} is not supported for {cpu_output.dtype} data." - return compare_bool_tensor(cpu_output, npu_output) - return compare_alg(cpu_output, npu_output) +class CompareColumn: + def __init__(self): + self.bench_type = CompareConst.NA + self.npu_type = CompareConst.NA + self.shape = CompareConst.NA + self.cosine_sim = CompareConst.NA + self.max_abs_err = CompareConst.NA + self.rel_err_hundredth = CompareConst.NA + self.rel_err_thousandth = CompareConst.NA + self.rel_err_ten_thousandth = CompareConst.NA + self.error_rate = CompareConst.NA + def to_column_value(self, is_pass, message): + return [self.bench_type, self.npu_type, self.shape, self.cosine_sim, self.max_abs_err, self.rel_err_hundredth, + self.rel_err_thousandth, self.rel_err_ten_thousandth, self.error_rate, is_pass, message] -def compare_bool_tensor(cpu_output, npu_output): + +def compare_torch_tensor(cpu_output, npu_output, compare_column): cpu_shape = cpu_output.shape npu_shape = npu_output.shape if cpu_shape != npu_shape: - return CompareConst.NA, False, "" + return CompareConst.ERROR, compare_column, f"The shape of bench{str(cpu_shape)} " \ + f"and npu{str(npu_shape)} not equal." + if not check_dtype_comparable(cpu_output, npu_output): + return CompareConst.ERROR, compare_column, f"Bench out dtype is {cpu_output.dtype} but " \ + f"npu output dtype is {npu_output.dtype}, cannot compare." + message = "" + if cpu_output.dtype in [bool, np.uint8, np.int8, np.int16, np.uint16, np.uint32, np.int32, np.int64, np.uint64]: + message += f"Compare algorithm cosine_sim is not supported for {cpu_output.dtype} data. " \ + f"Only judged by Error Rate." + err_rate, status, msg = compare_bool_tensor(cpu_output, npu_output) + message += msg + "\n" + compare_column.error_rate = err_rate + return status, compare_column, message + # cos + cos_res, cos_status, msg = cosine_sim(cpu_output, npu_output) + compare_column.cosine_sim = cos_res + message += msg + "\n" + if not cos_status: + return CompareConst.ERROR, compare_column, message + # abs err + b_value, n_value = get_msg_and_handle_value(cpu_output, npu_output) + abs_err = np.abs(b_value - n_value) + max_abs_res, max_abs_status = get_max_abs_err(abs_err) + compare_column.max_abs_err = max_abs_res + if max_abs_status: + return CompareConst.PASS, compare_column, message + # rel err + rel_err = get_rel_err(abs_err, b_value) + if n_value.dtype in np.float16: + hundred_res, hundred_status = get_rel_err_ratio(rel_err, 0.01) + compare_column.rel_err_hundredth = hundred_res + if not hundred_status: + return CompareConst.ERROR, compare_column, message + thousand_res, thousand_status = get_rel_err_ratio(rel_err, 0.001) + compare_column.rel_err_thousandth = thousand_res + if n_value.dtype == np.float16: + if thousand_status: + return CompareConst.PASS, compare_column, message + return CompareConst.WARNING, compare_column, message + ten_thousand_res, ten_thousand_status = get_rel_err_ratio(rel_err, 0.0001) + compare_column.rel_err_ten_thousandth = ten_thousand_res + if n_value.dtype in [np.float32, np.float64]: + if not thousand_status: + return CompareConst.ERROR, compare_column, message + if not ten_thousand_status: + return CompareConst.WARNING, compare_column, message + return CompareConst.PASS, compare_column, message + + +def compare_bool_tensor(cpu_output, npu_output): error_nums = (cpu_output != npu_output).sum() if cpu_output.size == 0: - return CompareConst.NAN, False, "There is not cpu calculation result." + return CompareConst.NAN, CompareConst.ERROR, "There is not cpu calculation result." error_rate = float(error_nums / cpu_output.size) result = CompareConst.PASS if error_rate == 0 else CompareConst.ERROR return error_rate, result, "" def get_msg_and_handle_value(b_value, n_value): - msg = "" - if not isinstance(b_value, np.ndarray) or not isinstance(n_value, np.ndarray): - msg = f"Max rel err only support numpy array! The actual type is {type(b_value)}, {type(n_value)}." - return CompareConst.NA, False, msg - if b_value.shape != n_value.shape: - msg = f"Shape of bench and npu outputs don't match. bench: {b_value.shape}, npu: {n_value.shape}." - return CompareConst.NA, False, msg - if n_value.dtype in Const.FLOAT_TYPE: zero_mask = (n_value == 0) # 给0的地方加上eps防止除0 - n_value[zero_mask] += np.finfo(n_value.dtype).eps + n_value[zero_mask] += np.finfo(n_value.dtype).eps # 根据n_value为0的位置给n_value也加上eps,否则两者都是0的情况下相对误差会是1 - b_value[zero_mask] += np.finfo(n_value.dtype).eps + b_value[zero_mask] += np.finfo(n_value.dtype).eps else: # int type + float eps 会报错,所以这里要强转 b_value, n_value = b_value.astype(float), n_value.astype(float) zero_mask = (n_value == 0) - n_value[zero_mask] += np.finfo(float).eps - b_value[zero_mask] += np.finfo(float).eps - return b_value, n_value, msg - - -def get_max_rel_err(b_value, n_value): - b_value, n_value, msg = get_msg_and_handle_value(b_value, n_value) - rel_err = np.abs((n_value - b_value) / b_value).max() - if n_value.dtype == np.float32: - bool_result = rel_err < 0.0001 - else: - bool_result = rel_err < 0.001 - return rel_err, bool_result, msg - - -def get_max_abs_err(b_value, n_value): - b_value, n_value, msg = get_msg_and_handle_value(b_value, n_value) - abs_err = np.abs(b_value - n_value).max() - bool_result = abs_err < 0.001 - return abs_err, bool_result, msg + n_value[zero_mask] += np.finfo(float).eps + b_value[zero_mask] += np.finfo(float).eps + return b_value, n_value -def get_rel_err_ratio_hundredth(b_value, n_value): - ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.01) - if n_value.dtype != np.float16: - msg = f"This indicator is not used to evaluate {n_value.dtype} data" - return ratio, CompareConst.PASS, msg - if bool_result: - return ratio, CompareConst.PASS, msg - return ratio, CompareConst.ERROR, msg +def get_rel_err(abs_err, b_value): + rel_err = np.abs(abs_err / b_value) + return rel_err -def get_rel_err_ratio_thousandth(b_value, n_value): - ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.001) - if bool_result: - return ratio, CompareConst.PASS, msg - if n_value.dtype == np.float16: - return ratio, CompareConst.WARNING, msg - return ratio, CompareConst.ERROR, msg - - -def get_rel_err_ratio_ten_thousandth(b_value, n_value): - ratio, bool_result, msg = get_rel_err_ratio(b_value, n_value, 0.0001) - if n_value.dtype == np.float16: - msg = f"This indicator is not used to evaluate {n_value.dtype} data" - return ratio, CompareConst.PASS, msg - if bool_result: - return ratio, CompareConst.PASS, msg - return ratio, CompareConst.WARNING, msg +def get_max_abs_err(abs_err): + max_abs_err = abs_err.max() + bool_result = max_abs_err < 0.001 + return max_abs_err, bool_result -def get_rel_err_ratio(b_value, n_value, thresholding): - b_value, n_value, msg = get_msg_and_handle_value(b_value, n_value) - rel_errs = np.abs((n_value - b_value) / b_value) - ratio = np.divide(np.sum(rel_errs < thresholding), np.size(rel_errs)) +def get_rel_err_ratio(rel_err, thresholding): + ratio = np.divide(np.sum(rel_err < thresholding), np.size(rel_err)) bool_result = ratio > (1 - thresholding) - return ratio, bool_result, msg + return ratio, bool_result def max_rel_err_standard(max_rel_errs): - bool_result = np.array(max_rel_errs) < 0.001 + bool_result = np.array(max_rel_errs) < 0.001 return np.all(bool_result), bool_result @@ -128,19 +138,19 @@ def cosine_sim(cpu_output, npu_output): return -1, False, msg if len(n_value) == 1: msg = "All the data in npu dump data is scalar. Please refer to other compare algorithms." - return cos, True, msg + return cos, True, msg n_value_max = np.max(np.abs(n_value)) b_value_max = np.max(np.abs(b_value)) if n_value_max <= np.finfo(float).eps and b_value_max <= np.finfo(float).eps: - return cos, True, msg + return cos, True, msg elif n_value_max <= np.finfo(float).eps: msg = "All the data is zero in npu dump data." - return CompareConst.NA, False, msg + return CompareConst.NA, False, msg elif b_value_max <= np.finfo(float).eps: msg = "All the data is zero in bench dump data." - return CompareConst.NA, False, msg + return CompareConst.NA, False, msg else: - n_value = n_value_max.astype(float) / n_value_max + n_value = n_value_max.astype(float) / n_value_max b_value = b_value_max.astype(float) / b_value_max cos = np.dot(n_value, b_value) / (np.linalg.norm(n_value) * np.linalg.norm(b_value)) if np.isnan(cos): @@ -173,69 +183,44 @@ def flatten_compare_result(result): return flatten_result -# 本函数用alg比对bench_out 和npu_out,返回详细比对结果compare_result和标志比对是否通过的布尔变量test_success -def compare_core(bench_out, npu_out, alg): - msg = "" +def compare_core(bench_out, npu_out): + compare_column = CompareColumn() if not isinstance(bench_out, type(npu_out)): - return [(CompareConst.NA, "bench and npu output type is different.")], False, [CompareConst.NA], [CompareConst.NA], [CompareConst.NA] + return CompareConst.ERROR, compare_column, "bench and npu output type is different." if isinstance(bench_out, (list, tuple)): - compare_result, test_success, bench_dtype, npu_dtype, shape = [], [], [], [], [] + status, compare_result, message = [], [], [] if len(bench_out) != len(npu_out): - return [(CompareConst.NA, "bench and npu output structure is different")], False, [CompareConst.NA], [CompareConst.NA], [CompareConst.NA] + return CompareConst.ERROR, compare_column, "bench and npu output structure is different." for b_out_i, n_out_i in zip(bench_out, npu_out): - compare_result_i, test_success_i, bench_dtype_i, npu_dtype_i, shape_i = compare_core(b_out_i, n_out_i, alg) + status_i, compare_result_i, message_i = compare_core(b_out_i, n_out_i) + status.append(status_i) compare_result.append(compare_result_i) - test_success.append(test_success_i) - bench_dtype.append(bench_dtype_i) - npu_dtype.append(npu_dtype_i) - shape.append(shape_i) + message.append(message_i) elif isinstance(bench_out, dict): b_keys, n_keys = set(bench_out.keys()), set(npu_out.keys()) if b_keys != n_keys: - compare_result, test_success, bench_dtype, npu_dtype, shape = [(CompareConst.NA, "bench and npu output dict keys are different")], [False], \ - [CompareConst.NA], [CompareConst.NA], [CompareConst.NA] + return CompareConst.ERROR, compare_column, "bench and npu output dict keys are different." else: - compare_result, test_success, bench_dtype, npu_dtype, shape = compare_core(list(bench_out.values()), list(npu_out.values()), alg) + status, compare_result, message = compare_core(list(bench_out.values()), list(npu_out.values())) elif isinstance(bench_out, torch.Tensor): copy_bench_out = bench_out.detach().clone() copy_npu_out = npu_out.detach().clone() - bench_dtype = [str(copy_bench_out.dtype)] - npu_dtype = [str(copy_npu_out.dtype)] - shape = [tuple(npu_out.shape)] + compare_column.bench_type = str(copy_bench_out.dtype) + compare_column.npu_type = str(copy_npu_out.dtype) + compare_column.shape = tuple(npu_out.shape) if copy_npu_out.dtype == torch.bfloat16: copy_bench_out = copy_bench_out.to(torch.float32) copy_npu_out = copy_npu_out.to(torch.float32) - compare_result, test_success, msg = compare_torch_tensor(copy_bench_out.numpy(), copy_npu_out.cpu().numpy(), alg) + status, compare_result, message = compare_torch_tensor(copy_bench_out.numpy(), copy_npu_out.cpu().numpy(), + compare_column) elif isinstance(bench_out, (bool, int, float, str)): - compare_result, test_success, msg = compare_builtin_type(bench_out, npu_out) - bench_dtype = [str(type(bench_out))] - npu_dtype = [str(type(npu_out))] - shape = [str(type(npu_out))] + compare_column.bench_dtype = str(type(bench_out)) + compare_column.npu_dtype = str(type(npu_out)) + compare_column.shape = str(type(npu_out)) + status, compare_result, message = compare_builtin_type(bench_out, npu_out, compare_column) elif bench_out is None: - compare_result, test_success, msg = CompareConst.NA, True, "output is None" - bench_dtype = [CompareConst.NA] - npu_dtype = [CompareConst.NA] - shape = [CompareConst.NA] - else: - compare_result, test_success, msg = CompareConst.NA, True, "Unexpected output type \ - in compare_core: {}".format(type(bench_out)) - bench_dtype = [CompareConst.NA] - npu_dtype = [CompareConst.NA] - shape = [CompareConst.NA] - if isinstance(compare_result, list): - compare_result = flatten_compare_result(compare_result) + return CompareConst.PASS, compare_column, "Output is None." else: - compare_result = [(compare_result, msg)] - if isinstance(test_success, list): - test_success = flatten_compare_result(test_success) - else: - test_success = [test_success] - if isinstance(bench_dtype, list): - bench_dtype = flatten_compare_result(bench_dtype) - npu_dtype = flatten_compare_result(npu_dtype) - shape = flatten_compare_result(shape) - else: - bench_dtype = [bench_dtype] - npu_dtype = [npu_dtype] - shape = [shape] - return compare_result, test_success, bench_dtype, npu_dtype, shape \ No newline at end of file + return CompareConst.PASS, compare_column, "Unexpected output type in compare_core: {}".format(type(bench_out)) + + return status, compare_result, message diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py index d5de8ce021..6260b90ed4 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare.py @@ -3,10 +3,9 @@ import os import time from rich.table import Table from rich.console import Console -from api_accuracy_checker.compare.algorithm import compare_core, cosine_sim, cosine_standard, get_max_rel_err, get_max_abs_err, \ - compare_builtin_type, get_rel_err_ratio_hundredth, get_rel_err_ratio_thousandth, get_rel_err_ratio_ten_thousandth -from api_accuracy_checker.common.utils import get_json_contents, print_info_log, print_error_log, write_csv, CompareException -from api_accuracy_checker.compare.compare_utils import CompareConst +from api_accuracy_checker.compare.algorithm import compare_core +from api_accuracy_checker.common.utils import get_json_contents, write_csv +from api_accuracy_checker.compare.compare_utils import CompareConst from api_accuracy_checker.common.config import msCheckerConfig @@ -14,7 +13,7 @@ class Comparator: TEST_FILE_NAME = "accuracy_checking_result_" + time.strftime("%Y%m%d%H%M%S") + ".csv" DETAIL_TEST_FILE_NAME = "accuracy_checking_details_" + time.strftime("%Y%m%d%H%M%S") + ".csv" - # consts for result csv + # consts for result csv COLUMN_API_NAME = "API name" COLUMN_FORWARD_SUCCESS = "Forward Test Success" COLUMN_BACKWARD_SUCCESS = "Backward Test Success" @@ -31,14 +30,6 @@ class Comparator: self.stack_info = get_json_contents(stack_info_json_path) else: self.stack_info = None - self.compare_alg = {} - self.register_compare_algorithm("Cosine Similarity", cosine_sim, cosine_standard) - self.register_compare_algorithm("Max Relative Error", get_max_rel_err, None) - self.register_compare_algorithm("Max Absolute Error", get_max_abs_err, None) - self.register_compare_algorithm("Hundredth Relative Error Ratio", get_rel_err_ratio_hundredth, None) - self.register_compare_algorithm("Thousandth Relative Error Ratio", get_rel_err_ratio_thousandth, None) - self.register_compare_algorithm("Ten Thousandth Relative Error Ratio", get_rel_err_ratio_ten_thousandth, None) - self.register_compare_algorithm("Default: isEqual", compare_builtin_type, None) self.test_result_cnt = { "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, "success_num": 0, @@ -83,15 +74,15 @@ class Comparator: detail_test_rows = [[ "Npu Name", "Bench Dtype", "NPU Dtype", "Shape", - "Cosine Similarity", "Cosine Similarity Message", - "Max Rel Error", "Max Rel Err Message", - "Max Abs Error", "Max Abs Err Message", - "Relative Error (hundredth)", "Relative Error (dual hundredth) Message", - "Relative Error (dual thousandth)", "Relative Error (dual thousandth) Message", - "Relative Error (dual ten thousandth)", "Relative Error (dual ten thousandth) Message", - "Compare Builtin Type", "Builtin Type Message", - "Pass" - ]] + "Cosine Similarity", + "Max Abs Error", + "Relative Error (hundredth)", + "Relative Error (dual thousandth)", + "Relative Error (dual ten thousandth)", + "Error Rate", + "Status", + "Message" + ]] write_csv(detail_test_rows, self.detail_save_path) def write_summary_csv(self, test_result): @@ -132,14 +123,10 @@ class Comparator: self.write_summary_csv(args) self.write_detail_csv(args) - - def register_compare_algorithm(self, name, compare_func, standard): - self.compare_alg.update({name: (compare_func, standard)}) - def compare_output(self, api_name, bench_out, npu_out, bench_grad=None, npu_grad=None): self.test_result_cnt["total_num"] += 1 if "dropout" in api_name: - is_fwd_success, fwd_compare_alg_results = self._compare_dropout(bench_out, npu_out) + is_fwd_success, fwd_compare_alg_results = self._compare_dropout(bench_out, npu_out) else: is_fwd_success, fwd_compare_alg_results = self._compare_core_wrapper(bench_out, npu_out) if bench_grad and npu_grad: @@ -167,60 +154,22 @@ class Comparator: self.test_result_cnt['forward_or_backward_fail_num'] += 1 return is_fwd_success, is_bwd_success - - def _compare_core_wrapper(self, bench_out, npu_out): + @staticmethod + def _compare_core_wrapper(bench_out, npu_out): detailed_result_total = [] - bench_dtype_total = [] - npu_dtype_total = [] - shape_total = [] - test_success_total = [] - max_abs_error_success = [] - cosine_success = [] - for name in self.compare_alg.keys(): - alg = self.compare_alg[name][0] - detailed_result, test_success, bench_dtype, npu_dtype, shape = compare_core(bench_out, npu_out, alg) - bench_dtype_total = bench_dtype - npu_dtype_total = npu_dtype - shape_total = shape - if name not in ["Cosine Similarity", "Max Relative Error", "Max Absolute Error"]: - test_success_total.append(test_success) - if name == "Cosine Similarity": - cosine_success = test_success - if name == "Max Absolute Error": - max_abs_error_success = test_success - if detailed_result_total: - for i, detailed_result_item in enumerate(detailed_result): - detailed_result_total[i] += detailed_result_item - else: - detailed_result_total = detailed_result - test_all_result = [CompareConst.PASS for _ in range(len(detailed_result_total))] - for i, _ in enumerate(test_all_result): - if not cosine_success[i] or CompareConst.ERROR == cosine_success[i]: - test_all_result[i] = CompareConst.ERROR - elif max_abs_error_success[i] or CompareConst.PASS == max_abs_error_success[i]: - test_all_result[i] = CompareConst.PASS - else: - test_success_column = [test_success_single[i] for test_success_single in test_success_total] - if CompareConst.ERROR in test_success_column or False in test_success_column: - test_all_result[i] = CompareConst.ERROR - elif CompareConst.WARNING in test_success_column: - test_all_result[i] = CompareConst.WARNING - # dtype加到所有指标的前面, 是否pass放到所有指标的后面 - try: - for i, detailed_tuple in enumerate(detailed_result_total): - detailed_result = list(detailed_tuple) - detailed_result.insert(0, bench_dtype_total[i]) - detailed_result.insert(1, npu_dtype_total[i]) - detailed_result.insert(2, shape_total[i]) - detailed_result.append(test_all_result[i]) - detailed_result_total[i] = tuple(detailed_result) - except IndexError as error: - print_error_log(f"There is index error.\n{str(error)}") - raise CompareException(CompareException.INVALID_DATA_ERROR) from error - test_final_success = False if CompareConst.ERROR in test_all_result or CompareConst.WARNING in test_all_result \ - else True + test_final_success = True + status, compare_result, message = compare_core(bench_out, npu_out) + if not isinstance(status, list): + detailed_result_total.append(compare_result.to_column_value(status, message)) + if status in [CompareConst.ERROR, CompareConst.WARNING]: + test_final_success = False + else: + for i in range(len(status)): + detailed_result_total.append(compare_result[i].to_column_value(status[i], message[i])) + if status[i] in [CompareConst.ERROR, CompareConst.WARNING]: + test_final_success = False return test_final_success, detailed_result_total - + @staticmethod def _compare_dropout(bench_out, npu_out): tensor_num = bench_out.numel() -- Gitee