diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/benchmark_compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/benchmark_compare.py index a85b42b0af12c60eef3933da686a289d0b6a350c..da06a31696626a43e65c647698e108234fdcd670 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/benchmark_compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/benchmark_compare.py @@ -10,12 +10,14 @@ from api_accuracy_checker.common.utils import print_info_log, print_warn_log, pr CompareException from api_accuracy_checker.common.config import msCheckerConfig from api_accuracy_checker.compare.compare_utils import CompareConst, BENCHMARK_COMPARE_RESULT_FILE_NAME, \ -BENCHMARK_COMPARE_DETAILS_FILE_NAME, result_mapping, Benchmark_Compare_Support_List, BenchmarkCompareColumn +BENCHMARK_COMPARE_DETAILS_FILE_NAME, result_mapping, Benchmark_Compare_Support_List, Benchmark_Compare_Unsupport_List, \ + BenchmarkCompareColumn from api_accuracy_checker.run_ut.run_ut import get_validated_result_csv_path from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker, change_mode CompareConfig = namedtuple('CompareConfig', ['npu_csv_path', 'gpu_csv_path', 'result_csv_path', 'details_csv_path']) +unsupported_message = 'This data type does not support benchmark compare.' benchmark_algorithms_thresholds = { @@ -156,42 +158,71 @@ def analyse_csv(npu_data, gpu_data, config): part_api_name = row_npu[BenchmarkCompareColumn.API_NAME] row_gpu = gpu_data[gpu_data[BenchmarkCompareColumn.API_NAME] == part_api_name] api_name, direction_status, _, _ = part_api_name.split(".") - check_status = True + binary_consistency_check = False if row_gpu.empty: print_warn_log(f'This API : {part_api_name} does not exist in the GPU data.') continue + if len(row_gpu) > 1: + msg = f'This API : {part_api_name} has multiple records in the GPU data.' + raise CompareException(CompareException.INVALID_DATA_ERROR, msg) + row_gpu = row_gpu.iloc[0] if row_npu[BenchmarkCompareColumn.DEVICE_DTYPE] in Benchmark_Compare_Support_List: - row_gpu = row_gpu.iloc[0] bs = BenchmarkStandard(part_api_name, row_npu, row_gpu) bs.get_result() write_detail_csv(bs.to_column_value(), config.details_csv_path) else: - check_status = False + binary_consistency_check = True - if api_name != last_api_name and last_api_name is not None: - if last_api_dtype in Benchmark_Compare_Support_List: - write_csv([[last_api_name, forward_status, backward_status, message]], config.result_csv_path) + if last_api_name is not None and api_name != last_api_name: + if last_api_dtype in Benchmark_Compare_Unsupport_List: + message = unsupported_message + write_csv([[last_api_name, "skip", "skip", message]], config.result_csv_path) forward_status, backward_status = CompareConst.NA, CompareConst.NA message = '' else: - message = 'This data type does not support benchmarking.' - write_csv([[last_api_name, "skip", "skip", message]], config.result_csv_path) - if direction_status == 'forward' and check_status: - forward_status = forward_status and result_mapping.get(bs.final_result) \ - if forward_status != CompareConst.NA else result_mapping.get(bs.final_result) - if direction_status == 'backward' and check_status: - backward_status = backward_status and result_mapping.get(bs.final_result) \ - if backward_status != CompareConst.NA else result_mapping.get(bs.final_result) + write_csv([[last_api_name, forward_status, backward_status, message]], config.result_csv_path) + forward_status, backward_status = CompareConst.NA, CompareConst.NA + message = '' + + is_supported = row_npu[BenchmarkCompareColumn.DEVICE_DTYPE] not in Benchmark_Compare_Unsupport_List last_api_name = api_name - if not pd.isna(row_npu[BenchmarkCompareColumn.DEVICE_DTYPE]): - last_api_dtype = row_npu[BenchmarkCompareColumn.DEVICE_DTYPE] + if pd.isna(row_npu[BenchmarkCompareColumn.DEVICE_DTYPE]): + continue + last_api_dtype = row_npu[BenchmarkCompareColumn.DEVICE_DTYPE] + + if not is_supported: + continue + + if binary_consistency_check: + new_status = check_error_rate(row_npu[BenchmarkCompareColumn.ERROR_RATE], + row_gpu[BenchmarkCompareColumn.ERROR_RATE]) + else: + new_status = result_mapping.get(bs.final_result) + + if direction_status == 'forward': + forward_status = update_status(forward_status, new_status) + elif direction_status == 'backward': + backward_status = update_status(backward_status, new_status) + else: + print_error_log(f"Invalid direction status: {direction_status}") if last_api_name is not None: - if last_api_dtype in Benchmark_Compare_Support_List: - write_csv([[last_api_name, forward_status, backward_status, message]], config.result_csv_path) - else: - message = 'This data type does not support benchmarking.' + if last_api_dtype in Benchmark_Compare_Unsupport_List: + message = unsupported_message write_csv([[last_api_name, "skip", "skip", message]], config.result_csv_path) + else: + write_csv([[last_api_name, forward_status, backward_status, message]], config.result_csv_path) + + +def check_error_rate(npu_error_rate, gpu_error_rate): + return npu_error_rate == 0 and gpu_error_rate == 0 + + +def update_status(status, new_status): + if status != CompareConst.NA: + return status and new_status + else: + return new_status def check_csv_columns(columns, csv_type): diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py index a09347a7350ba38baeb66f878ca7165eb6b92ede..d96944bcec613009deafc1a5b4128511778e2730 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py @@ -8,6 +8,7 @@ current_time = time.strftime("%Y%m%d%H%M%S") BENCHMARK_COMPARE_RESULT_FILE_NAME = "benchmark_compare_result_" + current_time + ".csv" BENCHMARK_COMPARE_DETAILS_FILE_NAME = "benchmark_compare_details_" + current_time + ".csv" Benchmark_Compare_Support_List = ['torch.float16', 'torch.bfloat16', 'torch.float32'] +Benchmark_Compare_Unsupport_List = ['torch.float64'] result_mapping = { 'pass' : True, 'warning': False, @@ -90,6 +91,7 @@ class BenchmarkCompareColumn: MEAN_REL_ERR_STATUS = '相对误差平均值判定结果' EB_RATIO = '误差均衡性比值' EB_STATUS = '误差均衡性判定结果' + ERROR_RATE = '错误率' FORWWARD_STATUS = 'Forward Test Success' BACKWARD_STATUS = 'Backward Test Success' MESSAGE = 'Message' @@ -98,7 +100,8 @@ class BenchmarkCompareColumn: def to_required_columns(): return [BenchmarkCompareColumn.API_NAME, BenchmarkCompareColumn.DEVICE_DTYPE, BenchmarkCompareColumn.SMALL_VALUE_ERROR_RATE, BenchmarkCompareColumn.RMSE, - BenchmarkCompareColumn.MAX_REL_ERR, BenchmarkCompareColumn.MEAN_REL_ERR, BenchmarkCompareColumn.EB] + BenchmarkCompareColumn.MAX_REL_ERR, BenchmarkCompareColumn.MEAN_REL_ERR, BenchmarkCompareColumn.EB, + BenchmarkCompareColumn.ERROR_RATE] @staticmethod def get_detail_csv_title():