diff --git a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py index cb0e13e383cc603e0d2cf152b35f28831013bb08..19c66e83e98c683db970397c73305c0b8ea530e2 100644 --- a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py +++ b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py @@ -157,20 +157,23 @@ class CompareRealData: 用于读取excel中的NPU_Name和Bench_Name,根据映射关系找到npy或pt文件,然后读取文件中的数据进行比较,计算余弦相似度、欧式距离 最大绝对误差、最大相对误差、千分之一误差率、千分之五误差率并生成错误信息 """ - error_file, relative_err, error_flag = None, None, False + relative_err, error_flag, err_msg = None, False, None data_name_pair = op_name_mapping_dict.get(npu_op_name) npu_data_name = data_name_pair[0] bench_data_name = data_name_pair[1] + error_file = data_name_pair + if str(npu_data_name) == CompareConst.NO_REAL_DATA_FLAG: # 没有npu真实数据 - n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True + n_value, b_value, error_flag = CompareConst.NO_REAL_DATA, CompareConst.NO_REAL_DATA, True + err_msg = "NPU does not have data file." elif str(bench_data_name) == CompareConst.NO_REAL_DATA_FLAG: # 没有bench真实数据 - n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True - error_file = 'no_bench_data' + n_value, b_value, error_flag = CompareConst.NO_REAL_DATA, CompareConst.NO_REAL_DATA, True + err_msg = "Bench does not have data file." elif str(bench_data_name) == CompareConst.N_A: # bench没匹配 - n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True - error_file = None + n_value, b_value, error_flag = CompareConst.API_UNMATCH, CompareConst.API_UNMATCH, True + err_msg = "Bench api/module unmatched." else: npu_dir = input_param.get(CompareConst.NPU_DUMP_DATA_DIR) bench_dir = input_param.get(CompareConst.BENCH_DUMP_DATA_DIR) @@ -187,8 +190,9 @@ class CompareRealData: error_flag = True # 通过n_value, b_value同时得到错误标志和错误信息 - n_value, b_value, error_flag, err_msg = get_error_flag_and_msg(n_value, b_value, - error_flag=error_flag, error_file=error_file) + if not err_msg: + n_value, b_value, error_flag, err_msg = get_error_flag_and_msg(n_value, b_value, error_flag=error_flag, + error_file=error_file) result_list, err_msg = compare_ops_apply(n_value, b_value, error_flag, err_msg) @@ -218,11 +222,16 @@ class CompareRealData: = self.compare_by_op(npu_op_name, bench_op_name, dump_path_dict, input_param) if is_print_compare_log: - logger.info( - "[{}] Compare result: cosine {}, max_abs_err {}, max_relative_err {}, {}, \ - one_thousand_err_ratio {}, " - "five_thousand_err_ratio {}".format(npu_op_name, cos_sim, max_abs_err, max_relative_err, - err_msg, one_thousand_err_ratio, five_thousand_err_ratio)) + if "does not have data file" in err_msg: + logger.info(f"[{npu_op_name}] Compare result: {err_msg} ") + elif "Bench api/module unmatched" in err_msg: + logger.info(f"[{npu_op_name}] Compare result: {err_msg} ") + else: + logger.info( + "[{}] Compare result: cosine {}, max_abs_err {}, max_relative_err {}, {}, \ + one_thousand_err_ratio {}, " + "five_thousand_err_ratio {}".format(npu_op_name, cos_sim, max_abs_err, max_relative_err, + err_msg, one_thousand_err_ratio, five_thousand_err_ratio)) cos_result.append(cos_sim) euc_dist_result.append(euc_dist) max_err_result.append(max_abs_err) diff --git a/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md b/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md index 6727e01fe0c6000c715e64829b6609e1062415a1..7a2eca853c5f456b4c0f69eba9ba1081a4755c72 100644 --- a/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md +++ b/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md @@ -268,15 +268,18 @@ MD5 模式: ### 3.5 错误信息提示(Err_message)——真实数据模式、统计数据模式 1. "Need double check api accuracy.":四个统计值中至少 1 个相对误差 > 0.5(统计数据模式); -2. "Fuzzy matching data, the comparison arruracy may be affected.":NPU 或 Bench 的真实数据名没有匹配上(真实数据模式); -3. "Dump file: {} not found or read failed.":NPU 或 Bench 的真实数据不存在或者读取出错(真实数据模式); -4. "No bench data matched.":Bench 的 API 没有匹配上、Bench 真实数据不存在或读取出错(真实数据模式); -5. "This is empty data, can not compare.":读取到的数据为空(真实数据模式); -6. "Shape of NPU and bench Tensor do not match. Skipped.":NPU 和 Bench 的数据结构不一致(真实数据模式); -7. "The Position of inf or nan in NPU and bench Tensor do not match.":NPU 和 Bench 的数据有 nan/inf(真实数据模式); -8. "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'.":NPU 为0维张量(真实数据模式); -9. "Dtype of NPU and bench Tensor do not match.":NPU 和 Bench 数据的数据类型不同(真实数据模式); -10. "":除以上情况的其余情况(真实数据模式、统计数据模式)。 +2. "Fuzzy matching data, the comparison accuracy may be affected.":NPU 或 Bench 的真实数据名没有匹配上(真实数据模式); +3. "Dump file: {} not found or read failed.":NPU 或 Bench 的真实数据者读取出错(真实数据模式); +4. "No bench data matched.":Bench 的 API 没有匹配上(真实数据模式,统计数据模式); +5. "NPU does not have data file.": NPU的真实数据不存在(真实数据模式); +6. "Bench does not have data file.": Bench的真实数据不存在(真实数据模式); +7. "Bench api/module unmatched.":Bench 的 API 没有匹配上(真实数据模式); +8. "This is empty data, can not compare.":读取到的数据为空(真实数据模式); +9. "Shape of NPU and bench Tensor do not match. Skipped.":NPU 和 Bench 的数据结构不一致(真实数据模式); +10. "The Position of inf or nan in NPU and bench Tensor do not match.":NPU 和 Bench 的数据有 nan/inf(真实数据模式); +11. "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'.":NPU 为0维张量(真实数据模式); +12. "Dtype of NPU and bench Tensor do not match.":NPU 和 Bench 数据的数据类型不同(真实数据模式); +13. "":除以上情况的其余情况(真实数据模式、统计数据模式)。 除以上错误信息提示外,异常数据颜色高亮标记的原因叠加呈现于此列。 diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py index c30c0c16de3acd6e638121691df4b08fbfa297b3..afcdd25744bf030a98629902cce4d4efa44a78b8 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py @@ -23,7 +23,7 @@ o_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.i 'torch.float32', 'torch.float32', [2, 2], [2, 2], 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 1, 1, 1, 1, 1, 1, 1, 1, - 'None', 'Dump file: None not found or read failed.', ['-1', '-1']]] + 'None', 'NPU does not have data file.', ['-1', '-1']]] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) o_result = pd.DataFrame(o_data, columns=columns) @@ -160,7 +160,7 @@ class TestCompareRealData(unittest.TestCase): input_param = {'npu_dump_data_dir': base_dir, 'bench_dump_data_dir': base_dir} result = compare_real_data.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param) self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - 'unsupported', 'Dump file: None not found or read failed.']) + 'unsupported', 'NPU does not have data file.']) pt_name = 'Functional.linear.0.forward.input.0.pt' op_name_mapping_dict = {'Functional.linear.0.forward.input.0': [pt_name, pt_name]} @@ -186,7 +186,7 @@ class TestCompareRealData(unittest.TestCase): result = compare_real_data.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param) self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - 'unsupported', 'Dump file: None not found or read failed.']) + 'unsupported', 'NPU does not have data file.']) def test_compare_ops(self): generate_dump_json(base_dir3) @@ -221,7 +221,7 @@ class TestCompareRealData(unittest.TestCase): o_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - 1, 1, 1, 1, 1, 1, 1, 1, 'None', 'Dump file: None not found or read failed.', ['-1', '-1']]] + 1, 1, 1, 1, 1, 1, 1, 1, 'None', 'NPU does not have data file.', ['-1', '-1']]] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) o_result = pd.DataFrame(o_data, columns=columns)