From d1745e4f0a71cfa05f63a4d4c37d950c419037e0 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 22 Oct 2024 19:26:43 +0800 Subject: [PATCH 1/8] compare result replace nan with Nan --- .../msprobe/core/compare/npy_compare.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py index c4b84163bf..e0f8ec3c32 100644 --- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py @@ -167,7 +167,7 @@ class GetCosineSimilarity(TensorComparisonBasic): if n_value == CompareConst.SHAPE_UNMATCH: return CompareConst.SHAPE_UNMATCH, '' if n_value == CompareConst.NAN: - return "N/A", '' + return CompareConst.N_A, '' if not n_value.shape: return CompareConst.UNSUPPORTED, '' @@ -191,7 +191,7 @@ class GetCosineSimilarity(TensorComparisonBasic): return CompareConst.NAN, 'Cannot compare by Cosine Similarity, the dump data has NaN.' result = format_value(cos) result = self.correct_data(result) - return 1.0 if float(result) > 0.99999 else result, '' + return result, '' class GetMaxAbsErr(TensorComparisonBasic): @@ -205,10 +205,12 @@ class GetMaxAbsErr(TensorComparisonBasic): if n_value == CompareConst.SHAPE_UNMATCH: return CompareConst.SHAPE_UNMATCH, "" if n_value == CompareConst.NAN: - return "N/A", "" + return CompareConst.N_A, "" temp_res = n_value - b_value max_value = np.max(np.abs(temp_res)) + if np.isnan(max_value): + return CompareConst.NAN, "" return format_value(max_value), "" @@ -235,7 +237,7 @@ class GetMaxRelativeErr(TensorComparisonBasic): if n_value == CompareConst.SHAPE_UNMATCH: return CompareConst.SHAPE_UNMATCH, '' if n_value == CompareConst.NAN: - return "N/A", '' + return CompareConst.N_A, '' if relative_err is None: relative_err = get_relative_err(n_value, b_value) @@ -257,7 +259,7 @@ class GetThousandErrRatio(TensorComparisonBasic): if n_value == CompareConst.SHAPE_UNMATCH: return CompareConst.SHAPE_UNMATCH, "" if n_value == CompareConst.NAN: - return "N/A", "" + return CompareConst.N_A, "" if not n_value.shape: return CompareConst.NAN, "" @@ -279,7 +281,7 @@ class GetFiveThousandErrRatio(TensorComparisonBasic): if n_value == CompareConst.SHAPE_UNMATCH: return CompareConst.SHAPE_UNMATCH, "" if n_value == CompareConst.NAN: - return "N/A", "" + return CompareConst.N_A, "" if not n_value.shape: return CompareConst.NAN, "" -- Gitee From 87cd101fb5f5a5ed4fc1fa813ed6d68ee1785bc0 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 22 Oct 2024 19:34:12 +0800 Subject: [PATCH 2/8] compare result replace nan with Nan --- debug/accuracy_tools/msprobe/core/compare/npy_compare.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py index e0f8ec3c32..7e479b83b2 100644 --- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py @@ -210,7 +210,8 @@ class GetMaxAbsErr(TensorComparisonBasic): temp_res = n_value - b_value max_value = np.max(np.abs(temp_res)) if np.isnan(max_value): - return CompareConst.NAN, "" + message = 'Cannot compare by MaxRelativeError, the data contains nan/inf/-inf in dump data.' + return CompareConst.NAN, message return format_value(max_value), "" @@ -243,7 +244,7 @@ class GetMaxRelativeErr(TensorComparisonBasic): relative_err = get_relative_err(n_value, b_value) max_relative_err = np.max(np.abs(relative_err)) if np.isnan(max_relative_err): - message = 'Cannot compare by MaxRelativeError, the data contains nan in dump data.' + message = 'Cannot compare by MaxRelativeError, the data contains nan/inf/-inf in dump data.' return CompareConst.NAN, message return format_value(max_relative_err), '' -- Gitee From 8f8957d2ab8194e2edc8c00f5c82a81c3a95aed4 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 22 Oct 2024 20:03:09 +0800 Subject: [PATCH 3/8] compare result replace nan with Nan --- debug/accuracy_tools/msprobe/core/compare/npy_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py index 7e479b83b2..0646d24c01 100644 --- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py @@ -191,7 +191,7 @@ class GetCosineSimilarity(TensorComparisonBasic): return CompareConst.NAN, 'Cannot compare by Cosine Similarity, the dump data has NaN.' result = format_value(cos) result = self.correct_data(result) - return result, '' + return 1.0 if float(result) > 0.99999 else result, '' class GetMaxAbsErr(TensorComparisonBasic): -- Gitee From e78120b86968564500052df666dc948cdab43e08 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 24 Oct 2024 14:55:11 +0800 Subject: [PATCH 4/8] compare result replace nan with Nan --- debug/accuracy_tools/msprobe/core/compare/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 4963d40978..91b15012ec 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -15,6 +15,7 @@ import os import re +import math import numpy as np from msprobe.core.common.const import Const, CompareConst from msprobe.core.common.utils import CompareException, check_regex_prefix_format_valid, logger @@ -274,9 +275,9 @@ def get_accuracy(result, n_dict, b_dict, summary_compare=False, md5_compare=Fals ] npu_summary_data = n_dict.get(CompareConst.SUMMARY)[n_start + index] - result_item.extend(npu_summary_data) + result_item.extend(CompareConst.NAN if math.isnan(x) else x for x in npu_summary_data) bench_summary_data = b_dict.get(CompareConst.SUMMARY)[b_start + index] - result_item.extend(bench_summary_data) + result_item.extend(CompareConst.NAN if math.isnan(x) else x for x in bench_summary_data) if summary_compare: start_idx = CompareConst.SUMMARY_COMPARE_RESULT_HEADER.index(CompareConst.MAX_DIFF) -- Gitee From 76bf344b04327ed28bdc63d72a61ccb485d40f18 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 24 Oct 2024 15:12:57 +0800 Subject: [PATCH 5/8] compare result replace nan with Nan --- debug/accuracy_tools/msprobe/core/compare/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 91b15012ec..89fb7014f2 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -275,9 +275,9 @@ def get_accuracy(result, n_dict, b_dict, summary_compare=False, md5_compare=Fals ] npu_summary_data = n_dict.get(CompareConst.SUMMARY)[n_start + index] - result_item.extend(CompareConst.NAN if math.isnan(x) else x for x in npu_summary_data) + result_item.extend([CompareConst.NAN if math.isnan(x) else x for x in npu_summary_data]) bench_summary_data = b_dict.get(CompareConst.SUMMARY)[b_start + index] - result_item.extend(CompareConst.NAN if math.isnan(x) else x for x in bench_summary_data) + result_item.extend([CompareConst.NAN if math.isnan(x) else x for x in bench_summary_data]) if summary_compare: start_idx = CompareConst.SUMMARY_COMPARE_RESULT_HEADER.index(CompareConst.MAX_DIFF) -- Gitee From 28199622a2ad33c15b43cb2178e29c66ac92cbd5 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 24 Oct 2024 15:31:04 +0800 Subject: [PATCH 6/8] compare result replace nan with Nan --- debug/accuracy_tools/msprobe/core/compare/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 89fb7014f2..ca3e8204e2 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -275,9 +275,11 @@ def get_accuracy(result, n_dict, b_dict, summary_compare=False, md5_compare=Fals ] npu_summary_data = n_dict.get(CompareConst.SUMMARY)[n_start + index] - result_item.extend([CompareConst.NAN if math.isnan(x) else x for x in npu_summary_data]) + result_item.extend([CompareConst.NAN if isinstance(x, float) and math.isnan(x) + else x for x in npu_summary_data]) bench_summary_data = b_dict.get(CompareConst.SUMMARY)[b_start + index] - result_item.extend([CompareConst.NAN if math.isnan(x) else x for x in bench_summary_data]) + result_item.extend([CompareConst.NAN if isinstance(x, float) and math.isnan(x) + else x for x in bench_summary_data]) if summary_compare: start_idx = CompareConst.SUMMARY_COMPARE_RESULT_HEADER.index(CompareConst.MAX_DIFF) -- Gitee From 5eea5ffc2bffe0ddcfa19f7525ad274bce2f2a3f Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 25 Oct 2024 18:21:47 +0800 Subject: [PATCH 7/8] compare result replace nan with Nan --- .../msprobe/core/compare/utils.py | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index ca3e8204e2..0190bcce16 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -278,26 +278,33 @@ def get_accuracy(result, n_dict, b_dict, summary_compare=False, md5_compare=Fals result_item.extend([CompareConst.NAN if isinstance(x, float) and math.isnan(x) else x for x in npu_summary_data]) bench_summary_data = b_dict.get(CompareConst.SUMMARY)[b_start + index] - result_item.extend([CompareConst.NAN if isinstance(x, float) and math.isnan(x) + result_item.extend([CompareConst.NAN if isinstance(x, float) and math.isnan(x) else x for x in bench_summary_data]) if summary_compare: start_idx = CompareConst.SUMMARY_COMPARE_RESULT_HEADER.index(CompareConst.MAX_DIFF) warning_flag = False for i, (npu_val, bench_val) in enumerate(zip(npu_summary_data, bench_summary_data)): - if isinstance(npu_val, (float, int)) and isinstance(bench_val, (float, int)): + if (isinstance(npu_val, (float, int)) and isinstance(bench_val, (float, int)) and + not type(npu_val) is bool and not type(bench_val) is bool): diff = npu_val - bench_val - if bench_val != 0: - relative = str(abs((diff / bench_val) * 100)) + '%' + if math.isnan(diff): + diff = CompareConst.NAN + relative = CompareConst.NAN else: - relative = CompareConst.N_A + if bench_val != 0: + relative = str(abs(diff / bench_val)) + else: + relative = CompareConst.N_A + magnitude_diff = abs(diff) / (max(abs(npu_val), abs(bench_val)) + 1e-10) + if magnitude_diff > 0.5: + warning_flag = True + result_item[start_idx + i] = diff result_item[start_idx + i + 4] = relative - magnitude_diff = abs(diff) / (max(abs(npu_val), abs(bench_val)) + 1e-10) - if magnitude_diff > 0.5: - warning_flag = True else: - result_item[start_idx + i] = CompareConst.NONE + result_item[start_idx + i] = CompareConst.N_A + result_item[start_idx + i + 4] = CompareConst.N_A accuracy_check = CompareConst.WARNING if warning_flag else "" err_msg += "Need double check api accuracy." if warning_flag else "" for i in range(start_idx, len(result_item)): -- Gitee From a2f48a1e28950c83539370fdab12c162f519420c Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Sat, 26 Oct 2024 09:02:24 +0800 Subject: [PATCH 8/8] compare result replace nan with Nan --- .../msprobe/core/compare/acc_compare.py | 24 ++++++++++++------- .../msprobe/core/compare/utils.py | 2 +- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index d5b5381ef3..75d1dadf0e 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -15,6 +15,7 @@ import multiprocessing import os +import math import pandas as pd from tqdm import tqdm from msprobe.core.common.file_utils import load_json @@ -61,19 +62,26 @@ class Comparator: start_idx = CompareConst.SUMMARY_COMPARE_RESULT_HEADER.index(CompareConst.MAX_DIFF) warning_flag = False for i, (npu_val, bench_val) in enumerate(zip(npu_summary_data, bench_summary_data)): - if isinstance(npu_val, (float, int)) and isinstance(bench_val, (float, int)): + if (isinstance(npu_val, (float, int)) and isinstance(bench_val, (float, int)) and + not type(npu_val) is bool and not type(bench_val) is bool): diff = npu_val - bench_val - if bench_val != 0: - relative = str(abs((diff / bench_val) * 100)) + '%' + if math.isnan(diff): + diff = CompareConst.NAN + relative = CompareConst.NAN else: - relative = "N/A" + if bench_val != 0: + relative = str(abs((diff / bench_val) * 100)) + '%' + else: + relative = CompareConst.N_A + magnitude_diff = abs(diff) / (max(abs(npu_val), abs(bench_val)) + 1e-10) + if magnitude_diff > 0.5: + warning_flag = True + result_item[start_idx + i] = diff result_item[start_idx + i + 4] = relative - magnitude_diff = abs(diff) / (max(abs(npu_val), abs(bench_val)) + 1e-10) - if magnitude_diff > 0.5: - warning_flag = True else: - result_item[start_idx + i] = CompareConst.NONE + result_item[start_idx + i] = CompareConst.N_A + result_item[start_idx + i + 4] = CompareConst.N_A accuracy_check = CompareConst.WARNING if warning_flag else "" err_msg += "Need double check api accuracy." if warning_flag else "" for i in range(start_idx, len(result_item)): diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 0190bcce16..b59cf30048 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -293,7 +293,7 @@ def get_accuracy(result, n_dict, b_dict, summary_compare=False, md5_compare=Fals relative = CompareConst.NAN else: if bench_val != 0: - relative = str(abs(diff / bench_val)) + relative = str(abs((diff / bench_val) * 100)) + '%' else: relative = CompareConst.N_A magnitude_diff = abs(diff) / (max(abs(npu_val), abs(bench_val)) + 1e-10) -- Gitee