diff --git a/debug/accuracy_tools/api_accuracy_checker/common/api_precision_standard.yaml b/debug/accuracy_tools/api_accuracy_checker/common/api_precision_standard.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29665e91551704887621594721517a92d31129ce --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/common/api_precision_standard.yaml @@ -0,0 +1,459 @@ +# Copyright (c) 2024 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +AbsoluteThreshStandard: + mul: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + mul_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __mul__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __imul__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __rmul__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + add: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + add_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __add__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __iadd__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __radd__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + div: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + div_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __div__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __idiv__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + divide: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + divide_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + floor_divide: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + floor_divide_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + true_divide: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + true_divide_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + leaky_relu: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + leaky_relu_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + prelu: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + reciprocal: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + reciprocal_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + rsqrt: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + rsqrt_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + square: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + square_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + sub: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + sub_: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + rsub: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __isub__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + __sub__: + torch.float32: + rtol: 0.000001 + small_value: 0.000001 + small_value_atol: 0.000001 + torch.float16: + rtol: 0.001 + small_value: 0.001 + small_value_atol: 0.001 + torch.bfloat16: + rtol: 0.004 + small_value: 0.001 + small_value_atol: 0.001 + \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index eee58ef7ae5e278993ff8eade41052de7b9deec1..e4d8e418333f2855efe7d949dc51586188206ada 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -633,7 +633,7 @@ def write_pt(file_path, tensor): def get_real_data_path(file_path): - targets = ['forward_real_data', 'backward_real_data', 'ut_error_data\d+'] + targets = ['forward_real_data', 'backward_real_data', 'ut_error_data\d+', 'absolute_standard_api_data\d+'] pattern = re.compile(r'({})'.format('|'.join(targets))) match = pattern.search(file_path) if match: diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py index 394ea9cf0e49158355dfce62c58f65fd1a3722b5..2b5e2490a47b58340577d93c1ac58bd46d2ecb90 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/algorithm.py @@ -119,3 +119,10 @@ def get_small_value_mask(abs_bench, both_finite_mask, small_value_threshold): small_value_mask = np.less_equal(abs_bench, small_value_threshold) small_value_mask = np.logical_and(small_value_mask, both_finite_mask) return small_value_mask + + +def get_abs_bench_with_eps(bench, dtype): + abs_bench = np.abs(bench) + eps = np.finfo(bench.dtype).eps if dtype != torch.bfloat16 else 2 ** -8 + abs_bench_with_eps = abs_bench + eps + return abs_bench_with_eps diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/benchmark_compare.py b/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_compare.py similarity index 59% rename from debug/accuracy_tools/api_accuracy_checker/compare/benchmark_compare.py rename to debug/accuracy_tools/api_accuracy_checker/compare/api_precision_compare.py index d42be6be91d038ff581be407e10faeaf5ae120ab..a4b7ae5e62975e07415d5571cb1c10a0c8bafeae 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/benchmark_compare.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/api_precision_compare.py @@ -5,22 +5,38 @@ import csv import math from collections import namedtuple import pandas as pd - +import numpy as np +import torch +import yaml from api_accuracy_checker.common.utils import print_info_log, print_warn_log, print_error_log, write_csv, \ CompareException, create_directory from api_accuracy_checker.common.config import msCheckerConfig from api_accuracy_checker.compare.compare_utils import CompareConst, BENCHMARK_COMPARE_RESULT_FILE_NAME, \ BENCHMARK_COMPARE_DETAILS_FILE_NAME, Benchmark_Compare_Support_List, Benchmark_Compare_Unsupport_List, \ - BenchmarkCompareColumn + BenchmarkCompareColumn, Binary_Compare_Unsupport_List from api_accuracy_checker.run_ut.run_ut import get_validated_result_csv_path -from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker, change_mode +from api_accuracy_checker.run_ut.data_generate import gen_real_tensor +from api_accuracy_checker.compare.algorithm import get_rel_err, get_abs_err, get_finite_and_infinite_mask, \ + get_small_value_mask, get_abs_bench_with_eps +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker, change_mode, FileOpen from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import check_path_before_create - -CompareConfig = namedtuple('CompareConfig', ['npu_csv_path', 'gpu_csv_path', 'result_csv_path', 'details_csv_path']) +CompareConfig = namedtuple('CompareConfig', ['npu_csv_path', 'gpu_csv_path', 'result_csv_path', 'details_csv_path', +'npu_abs_data_path', 'gpu_abs_data_path']) unsupported_message = 'This data type does not support benchmark compare.' +cur_path = os.path.dirname(os.path.realpath(__file__)) +tool_path = os.path.dirname(cur_path) +standard_yaml_path = os.path.join(tool_path, "common/api_precision_standard.yaml") + + +with FileOpen(standard_yaml_path, 'r') as f: + Apis = yaml.safe_load(f) + AbsoluteStandardApi = Apis.get('AbsoluteThreshStandard') + AbsoluteStandardApiName = list(AbsoluteStandardApi.keys()) + + benchmark_algorithms_thresholds = { 'small_value' : { 'error_threshold' : 2, @@ -118,6 +134,59 @@ class BenchmarkStandard: return abs(x / y) +class AbsoluteThresholdStandard: + @classmethod + def _check_inf_nan_value(cls, inf_nan_mask, npu_abs_data, gpu_abs_data, dtype, rtol): + abs_gpu_with_eps = get_abs_bench_with_eps(gpu_abs_data, dtype) + golden_same_dtype = gpu_abs_data.astype(npu_abs_data.dtype) + a_min = np.finfo(npu_abs_data.dtype).min if dtype != torch.bfloat16 else -3.3895313892515355e+38 + a_max = np.finfo(npu_abs_data.dtype).max if dtype != torch.bfloat16 else 3.3895313892515355e+38 + golden_clip = np.clip(golden_same_dtype, a_min, a_max) + npu_clip = np.clip(npu_abs_data, a_min, a_max) + clipped_abs_ae = np.abs(npu_clip - golden_clip) + clipped_re = clipped_abs_ae / abs_gpu_with_eps + pass_mask = np.less_equal(clipped_re, rtol) + both_nan_mask = np.logical_and(np.isnan(npu_abs_data), np.isnan(golden_clip)) + pass_mask = np.logical_or(pass_mask, both_nan_mask) + not_pass_mask = np.logical_not(pass_mask) + not_pass_mask = np.logical_and(not_pass_mask, inf_nan_mask) + + inf_nan_err_cnt = np.sum(not_pass_mask) + return inf_nan_err_cnt > 0 + + @classmethod + def _check_small_value(cls, abs_err, small_value_mask, small_value_atol): + greater_mask = np.greater(abs_err, small_value_atol) + err_mask = np.logical_and(greater_mask, small_value_mask) + err_cnt = np.sum(err_mask) + return err_cnt > 0 + + @classmethod + def _check_norm_value(cls, normal_value_mask, rel_err, rtol): + err_mask = np.greater(rel_err, rtol) + err_mask = np.logical_and(err_mask, normal_value_mask) + err_cnt = np.sum(err_mask) + return err_cnt > 0 + + @classmethod + def check(cls, dedicated_api_name, npu_abs_data, gpu_abs_data, npu_dtype): + abs_bench = np.abs(gpu_abs_data) + abs_bench_with_eps = get_abs_bench_with_eps(gpu_abs_data, npu_dtype) + abs_err = get_abs_err(gpu_abs_data, npu_abs_data) + rel_err = abs_err / abs_bench_with_eps + both_finite_mask, inf_nan_mask = get_finite_and_infinite_mask(gpu_abs_data, npu_abs_data) + small_value_threshold = AbsoluteStandardApi.get(dedicated_api_name).get(str(npu_dtype)).get('small_value') + small_value_mask = get_small_value_mask(abs_bench, both_finite_mask, small_value_threshold) + normal_value_mask = np.logical_and(both_finite_mask, np.logical_not(small_value_mask)) + small_value_atol = AbsoluteStandardApi.get(dedicated_api_name).get(str(npu_dtype)).get('small_value_atol') + rtol = AbsoluteStandardApi.get(dedicated_api_name).get(str(npu_dtype)).get('rtol') + inf_nan_value_checker = cls._check_inf_nan_value(inf_nan_mask, npu_abs_data, gpu_abs_data, npu_dtype, rtol) + small_value_checker = cls._check_small_value(abs_err, small_value_mask, small_value_atol) + norm_value_checker = cls._check_norm_value(normal_value_mask, rel_err, rtol) + return inf_nan_value_checker, small_value_checker, norm_value_checker + + + def write_detail_csv(content, save_path): rows = [] content = ["{:.{}f}".format(item, msCheckerConfig.precision) \ @@ -126,7 +195,7 @@ def write_detail_csv(content, save_path): write_csv(rows, save_path) -def benchmark_compare(config): +def api_precision_compare(config): print_info_log("start benchmark compare task") print_info_log(f"Compare task result will be saved in {config.result_csv_path}") print_info_log(f"Compare task detail will be saved in {config.details_csv_path}") @@ -154,14 +223,16 @@ def benchmark_compare(config): def analyse_csv(npu_data, gpu_data, config): forward_status, backward_status = [], [] - last_api_name = None - last_api_dtype = None + last_api_name, last_api_dtype = None, None + enable_absolute_threshold = True if config.npu_abs_data_path and config.gpu_abs_data_path else False + disable_absolute_threshold_message = f'The comparison method of this api is absolute threshold method. ' + \ + f'Please provide the data path.' for _, row_npu in npu_data.iterrows(): message = '' part_api_name = row_npu[BenchmarkCompareColumn.API_NAME] row_gpu = gpu_data[gpu_data[BenchmarkCompareColumn.API_NAME] == part_api_name] api_name, direction_status, _, _ = part_api_name.split(".") - binary_consistency_check = False + binary_consistency_check, absolute_threshold_check, benchmark_compare_check = False, False, False if row_gpu.empty: print_warn_log(f'This API : {part_api_name} does not exist in the GPU data.') continue @@ -169,19 +240,36 @@ def analyse_csv(npu_data, gpu_data, config): msg = f'This API : {part_api_name} has multiple records in the GPU data.' raise CompareException(CompareException.INVALID_DATA_ERROR, msg) row_gpu = row_gpu.iloc[0] - if row_npu[BenchmarkCompareColumn.DEVICE_DTYPE] in Benchmark_Compare_Support_List: + _, dedicated_api_name, _ = api_name.split("*") + if row_npu[BenchmarkCompareColumn.DEVICE_DTYPE] not in Binary_Compare_Unsupport_List: + binary_consistency_check = True + elif dedicated_api_name in AbsoluteStandardApiName: + if pd.isna(row_npu[BenchmarkCompareColumn.DEVICE_DTYPE]): + continue + if enable_absolute_threshold: + npu_abs_data, gpu_abs_data, npu_dtype = load_api_data(part_api_name, config.npu_abs_data_path, + config.gpu_abs_data_path) + inf_nan_value_checker, small_value_checker, norm_value_checker = AbsoluteThresholdStandard.check( + dedicated_api_name, npu_abs_data, gpu_abs_data, npu_dtype) + absolute_threshold_check = True + elif row_npu[BenchmarkCompareColumn.DEVICE_DTYPE] in Benchmark_Compare_Support_List: bs = BenchmarkStandard(part_api_name, row_npu, row_gpu) bs.get_result() write_detail_csv(bs.to_column_value(), config.details_csv_path) - else: - binary_consistency_check = True + benchmark_compare_check = True if last_api_name is not None and api_name != last_api_name: + _, last_dedicated_api_name, _ = last_api_name.split("*") if last_api_dtype in Benchmark_Compare_Unsupport_List: message = unsupported_message write_csv([[last_api_name, "skip", "skip", message]], config.result_csv_path) forward_status, backward_status = [], [] message = '' + elif last_dedicated_api_name in AbsoluteStandardApiName and not enable_absolute_threshold: + message = disable_absolute_threshold_message + write_csv([[last_api_name, "skip", "skip", message]], config.result_csv_path) + forward_status, backward_status = [], [] + message = '' else: forward_result = get_api_checker_result(forward_status) backward_result = get_api_checker_result(backward_status) @@ -197,11 +285,13 @@ def analyse_csv(npu_data, gpu_data, config): if not is_supported: continue - + new_status = CompareConst.NA if binary_consistency_check: new_status = check_error_rate(row_npu[BenchmarkCompareColumn.ERROR_RATE], row_gpu[BenchmarkCompareColumn.ERROR_RATE]) - else: + elif absolute_threshold_check: + new_status = get_absolute_check_status(inf_nan_value_checker, small_value_checker, norm_value_checker) + elif benchmark_compare_check: new_status = bs.final_result if direction_status == 'forward': @@ -225,6 +315,11 @@ def check_error_rate(npu_error_rate, gpu_error_rate): return CompareConst.PASS if npu_error_rate == 0 and gpu_error_rate == 0 else CompareConst.ERROR +def get_absolute_check_status(inf_nan_value_checker, small_value_checker, norm_value_checker): + return CompareConst.ERROR if inf_nan_value_checker or small_value_checker or norm_value_checker \ + else CompareConst.PASS + + def get_api_checker_result(status): if not status: return CompareConst.NA @@ -234,6 +329,21 @@ def get_api_checker_result(status): return CompareConst.PASS +def load_api_data(part_api_name, npu_path, gpu_path): + api_data_name = part_api_name + ".pt" + npu_abs_data_path = os.path.join(npu_path, api_data_name) + npu_abs_data = gen_real_tensor(npu_abs_data_path, False) + npu_dtype = npu_abs_data.dtype + gpu_abs_data_path = os.path.join(gpu_path, api_data_name) + gpu_abs_data = gen_real_tensor(gpu_abs_data_path, False) + if npu_dtype == torch.bfloat16: + npu_abs_data = npu_abs_data.to(torch.float32) + gpu_abs_data = gpu_abs_data.to(torch.float32) + npu_abs_data = npu_abs_data.numpy() + gpu_abs_data = gpu_abs_data.numpy() + return npu_abs_data, gpu_abs_data, npu_dtype + + def check_csv_columns(columns, csv_type): required_columns = BenchmarkCompareColumn.to_required_columns() missing_columns = [column for column in required_columns if column not in columns] @@ -242,15 +352,10 @@ def check_csv_columns(columns, csv_type): raise CompareException(CompareException.INVALID_DATA_ERROR, msg) -def _benchmark_compare(parser=None): - if not parser: - parser = argparse.ArgumentParser() - _benchmark_compare_parser(parser) +def _api_precision_compare(): + parser = argparse.ArgumentParser() + _api_precision_compare_parser(parser) args = parser.parse_args(sys.argv[1:]) - _benchmark_compare_command(args) - - -def _benchmark_compare_command(args): npu_csv_path = get_validated_result_csv_path(args.npu_csv_path, 'detail') gpu_csv_path = get_validated_result_csv_path(args.gpu_csv_path, 'detail') out_path = os.path.realpath(args.out_path) if args.out_path else "./" @@ -260,11 +365,14 @@ def _benchmark_compare_command(args): out_path = out_path_checker.common_check() result_csv_path = os.path.join(out_path, BENCHMARK_COMPARE_RESULT_FILE_NAME) details_csv_path = os.path.join(out_path, BENCHMARK_COMPARE_DETAILS_FILE_NAME) - compare_config = CompareConfig(npu_csv_path, gpu_csv_path, result_csv_path, details_csv_path) - benchmark_compare(compare_config) + npu_abs_data_path = os.path.realpath(args.npu_abs_data_path) if args.npu_abs_data_path else None + gpu_abs_data_path = os.path.realpath(args.gpu_abs_data_path) if args.gpu_abs_data_path else None + compare_config = CompareConfig(npu_csv_path, gpu_csv_path, result_csv_path, details_csv_path, npu_abs_data_path, + gpu_abs_data_path) + api_precision_compare(compare_config) -def _benchmark_compare_parser(parser): +def _api_precision_compare_parser(parser): parser.add_argument("-npu", "--npu_csv_path", dest="npu_csv_path", default="", type=str, help=" , Accuracy_checking_details.csv generated on the NPU by using the " "api_accuracy_checker tool.", @@ -272,13 +380,19 @@ def _benchmark_compare_parser(parser): parser.add_argument("-gpu", "--gpu_csv_path", dest="gpu_csv_path", default="", type=str, help=" Accuracy_checking_details.csv generated on the GPU by using the " "api_accuracy_checker tool.", - required=False) + required=True) parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str, help=" The benchmark compare task result out path.", required=False) + parser.add_argument("-npu_abs_path", "--npu_abs_data_path", dest="npu_abs_data_path", default="", type=str, + help=" , Data of the absolute threshold method API generated on the NPU ", + required=False) + parser.add_argument("-gpu_abs_path", "--gpu_abs_data_path", dest="gpu_abs_data_path", default="", type=str, + help=" Data of the absolute threshold method API generated on the GPU ", + required=False) if __name__ == '__main__': - _benchmark_compare() - print_info_log("Benchmark compare task completed.") + _api_precision_compare() + print_info_log("Api precision compare task completed.") \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py b/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py index d96944bcec613009deafc1a5b4128511778e2730..28cc4cf42e40318635d4c7491a54e5e448986f95 100644 --- a/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/compare/compare_utils.py @@ -5,15 +5,11 @@ from api_accuracy_checker.common.utils import Const, print_warn_log current_time = time.strftime("%Y%m%d%H%M%S") -BENCHMARK_COMPARE_RESULT_FILE_NAME = "benchmark_compare_result_" + current_time + ".csv" -BENCHMARK_COMPARE_DETAILS_FILE_NAME = "benchmark_compare_details_" + current_time + ".csv" +BENCHMARK_COMPARE_RESULT_FILE_NAME = "api_precision_compare_result_" + current_time + ".csv" +BENCHMARK_COMPARE_DETAILS_FILE_NAME = "api_precision_compare_details_" + current_time + ".csv" Benchmark_Compare_Support_List = ['torch.float16', 'torch.bfloat16', 'torch.float32'] Benchmark_Compare_Unsupport_List = ['torch.float64'] -result_mapping = { - 'pass' : True, - 'warning': False, - 'error' : False -} +Binary_Compare_Unsupport_List = ['torch.float16', 'torch.bfloat16', 'torch.float32', 'torch.float64'] DETAIL_TEST_ROWS = [[ diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index ed7ce7e11d7b479b1100fd16f6deba2316430da0..e417650abb84c0c72fd6552d0eefae27c6a33fe8 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -16,6 +16,7 @@ else: current_device = "npu" import torch from tqdm import tqdm +import yaml from api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args from api_accuracy_checker.common.utils import print_info_log, print_warn_log, get_json_contents, api_info_preprocess, \ print_error_log, initialize_save_path, Const, create_directory @@ -33,10 +34,12 @@ from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen current_time = time.strftime("%Y%m%d%H%M%S") UT_ERROR_DATA_DIR = 'ut_error_data' + current_time +ABSOLUTE_STANDARD_API_DATA_DIR = 'absolute_standard_api_data' + current_time RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv" RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path', - 'save_error_data', 'is_continue_run_ut', 'real_data_path']) + 'save_error_data', 'is_continue_run_ut', 'real_data_path', + 'save_abs_standard_api']) not_backward_list = ['repeat_interleave'] not_detach_set = {'resize_', 'resize_as_', 'set_', 'transpose_', 't_', 'squeeze_', 'unsqueeze_'} @@ -56,6 +59,15 @@ tqdm_params = { } +cur_path = os.path.dirname(os.path.realpath(__file__)) +tool_path = os.path.dirname(cur_path) +standard_yaml_path = os.path.join(tool_path, "common/api_precision_standard.yaml") +with FileOpen(standard_yaml_path, 'r') as f: + Apis = yaml.safe_load(f) + AbsoluteStandardApi = Apis.get('AbsoluteThreshStandard') + AbsoluteStandardApiName = list(AbsoluteStandardApi.keys()) + + def exec_api(api_type, api_name, args, kwargs): if api_type == "Functional": functional_api = FunctionalOPTemplate(api_name, str, False) @@ -154,8 +166,8 @@ def run_ut(config): if api_full_name in api_name_set: continue try: - if msCheckerConfig.white_list: - [_, api_name, _] = api_full_name.split("*") + [_, api_name, _] = api_full_name.split("*") + if msCheckerConfig.white_list: if api_name not in set(msCheckerConfig.white_list): continue data_info = run_torch_api(api_full_name, config.real_data_path, config.backward_content, api_info_dict) @@ -164,6 +176,8 @@ def run_ut(config): data_info.device_out, data_info.bench_grad_out, data_info.device_grad_out) + if config.save_abs_standard_api and api_name in AbsoluteStandardApiName: + save_api_with_absolute_standard(api_full_name, data_info) if config.save_error_data: do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) except Exception as err: @@ -189,12 +203,24 @@ def do_save_error_data(api_full_name, data_info, is_fwd_success, is_bwd_success) if not is_fwd_success or not is_bwd_success: api_full_name = api_full_name.replace("*", ".") for element in data_info.in_fwd_data_list: - UtAPIInfo(api_full_name + '.forward.input', element) - UtAPIInfo(api_full_name + '.forward.output.bench', data_info.bench_out) - UtAPIInfo(api_full_name + '.forward.output.device', data_info.device_out) - UtAPIInfo(api_full_name + '.backward.input', data_info.grad_in) - UtAPIInfo(api_full_name + '.backward.output.bench', data_info.bench_grad_out) - UtAPIInfo(api_full_name + '.backward.output.device', data_info.device_grad_out) + UtAPIInfo(api_full_name + '.forward.input', element, msCheckerConfig.error_data_path, UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.forward.output.bench', data_info.bench_out, msCheckerConfig.error_data_path, + UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.forward.output.device', data_info.device_out, msCheckerConfig.error_data_path, + UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.backward.input', data_info.grad_in, msCheckerConfig.error_data_path, + UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.backward.output.bench', data_info.bench_grad_out, msCheckerConfig.error_data_path, + UT_ERROR_DATA_DIR) + UtAPIInfo(api_full_name + '.backward.output.device', data_info.device_grad_out, msCheckerConfig.error_data_path, + UT_ERROR_DATA_DIR) + + +def save_api_with_absolute_standard(api_full_name, data_info): + UtAPIInfo(api_full_name + '.forward.output', data_info.device_out, + msCheckerConfig.absolute_standard_api_data_path, ABSOLUTE_STANDARD_API_DATA_DIR) + UtAPIInfo(api_full_name + '.backward.output', data_info.device_grad_out, + msCheckerConfig.absolute_standard_api_data_path, ABSOLUTE_STANDARD_API_DATA_DIR) def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict): @@ -278,6 +304,13 @@ def initialize_save_error_data(): initialize_save_path(error_data_path, UT_ERROR_DATA_DIR) +def initialize_save_abs_standard_api_data(): + data_path_checker = FileChecker(msCheckerConfig.absolute_standard_api_data_path, FileCheckConst.DIR, + ability=FileCheckConst.WRITE_ABLE) + data_path = data_path_checker.common_check() + initialize_save_path(data_path, ABSOLUTE_STANDARD_API_DATA_DIR) + + def get_validated_result_csv_path(result_csv_path, mode): if mode not in ['result', 'detail']: raise ValueError("The csv mode must be result or detail") @@ -342,6 +375,8 @@ def _run_ut_parser(parser): required=False) parser.add_argument("-f", "--filter_api", dest="filter_api", action="store_true", help=" Whether to filter the api in the forward_input_file.", required=False) + parser.add_argument('-save_abs_standard_api', dest="save_abs_standard_api", action="store_true", + help=" Save api with absolute standard.", required=False) def preprocess_forward_content(forward_content): @@ -402,6 +437,7 @@ def run_ut_command(args): out_path_checker = FileChecker(out_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE) out_path = out_path_checker.common_check() save_error_data = args.save_error_data + save_abs_standard_api = args.save_abs_standard_api forward_content = get_json_contents(forward_file) if args.filter_api: forward_content = preprocess_forward_content(forward_content) @@ -422,8 +458,10 @@ def run_ut_command(args): global UT_ERROR_DATA_DIR UT_ERROR_DATA_DIR = 'ut_error_data' + time_info initialize_save_error_data() + if save_abs_standard_api: + initialize_save_abs_standard_api_data() run_ut_config = RunUTConfig(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data, - args.result_csv_path, args.real_data_path) + args.result_csv_path, args.real_data_path, save_abs_standard_api) run_ut(run_ut_config) @@ -438,9 +476,9 @@ class UtDataInfo: class UtAPIInfo(APIInfo): - def __init__(self, api_name, element): + def __init__(self, api_name, element, parent_path, dir_name): super().__init__(api_name, - save_path=self.get_full_save_path(msCheckerConfig.error_data_path, UT_ERROR_DATA_DIR), + save_path=self.get_full_save_path(parent_path, dir_name), is_save_data=True) self.analyze_element(element)