From affdd2edef5f8c5000583d316fe4b93a75a19a35 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Thu, 20 Feb 2025 15:46:26 +0800
Subject: [PATCH 01/25] compare add euclidean distance

---
 .../msprobe/core/common/const.py              |  8 +-
 .../msprobe/core/compare/acc_compare.py       | 11 ++-
 .../msprobe/core/compare/highlight.py         | 13 ++-
 .../core/compare/multiprocessing_compute.py   |  2 +
 .../msprobe/core/compare/npy_compare.py       | 51 ++++++++++++
 .../msprobe/core/compare/utils.py             |  8 +-
 .../compare/test_acc_compare_npy_compare.py   | 80 ++++++++++++++++++-
 7 files changed, 160 insertions(+), 13 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py
index d9623b80712..bf0883667c0 100644
--- a/debug/accuracy_tools/msprobe/core/common/const.py
+++ b/debug/accuracy_tools/msprobe/core/common/const.py
@@ -256,6 +256,7 @@ class CompareConst:
     MEAN_DIFF = "Mean diff"
     NORM_DIFF = "L2norm diff"
     COSINE = "Cosine"
+    EUC_DIST = "EucDist"
     MAX_ABS_ERR = "MaxAbsErr"
     MAX_RELATIVE_ERR = "MaxRelativeErr"
     MIN_RELATIVE_ERR = "MinRelativeErr"
@@ -330,8 +331,8 @@ class CompareConst:
     ULP_ERR_STATUS = "ulp_err_status"
 
     COMPARE_RESULT_HEADER = [
-        NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, COSINE, MAX_ABS_ERR, MAX_RELATIVE_ERR,
-        ONE_THOUSANDTH_ERR_RATIO, FIVE_THOUSANDTHS_ERR_RATIO,
+        NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, COSINE, EUC_DIST,
+        MAX_ABS_ERR, MAX_RELATIVE_ERR, ONE_THOUSANDTH_ERR_RATIO, FIVE_THOUSANDTHS_ERR_RATIO,
         NPU_MAX, NPU_MIN, NPU_MEAN, NPU_NORM, BENCH_MAX, BENCH_MIN, BENCH_MEAN, BENCH_NORM, ACCURACY, ERROR_MESSAGE
     ]
 
@@ -357,7 +358,8 @@ class CompareConst:
         Const.MD5: MD5_COMPARE_RESULT_HEADER
     }
 
-    ALL_COMPARE_INDEX = [COSINE, MAX_ABS_ERR, MAX_RELATIVE_ERR, ONE_THOUSANDTH_ERR_RATIO, FIVE_THOUSANDTHS_ERR_RATIO]
+    ALL_COMPARE_INDEX = [COSINE, EUC_DIST, MAX_ABS_ERR, MAX_RELATIVE_ERR, ONE_THOUSANDTH_ERR_RATIO,
+                         FIVE_THOUSANDTHS_ERR_RATIO]
     SUMMARY_COMPARE_INDEX = [MAX_DIFF, MIN_DIFF, MEAN_DIFF, NORM_DIFF,
                              MAX_RELATIVE_ERR, MIN_RELATIVE_ERR, MEAN_RELATIVE_ERR, NORM_RELATIVE_ERR]
 
diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
index 55229d72657..0672e32404c 100644
--- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
@@ -456,6 +456,7 @@ class Comparator:
 
     def compare_ops(self, idx, dump_path_dict, result_df, lock, input_param):
         cos_result = []
+        euc_dist_result = []
         max_err_result = []
         max_relative_err_result = []
         err_mess = []
@@ -469,8 +470,8 @@ class Comparator:
             if is_print_compare_log:
                 logger.info("start compare: {}".format(npu_op_name))
 
-            cos_sim, max_abs_err, max_relative_err, one_thousand_err_ratio, five_thousand_err_ratio, err_msg = \
-                self.compare_by_op(npu_op_name, bench_op_name, dump_path_dict, input_param, bench_data)
+            cos_sim, euc_dist, max_abs_err, max_relative_err, one_thousand_err_ratio, five_thousand_err_ratio, err_msg \
+                = self.compare_by_op(npu_op_name, bench_op_name, dump_path_dict, input_param, bench_data)
 
             if is_print_compare_log:
                 logger.info(
@@ -479,6 +480,7 @@ class Comparator:
                     "five_thousand_err_ratio {}".format(npu_op_name, cos_sim, max_abs_err, max_relative_err,
                                                         err_msg, one_thousand_err_ratio, five_thousand_err_ratio))
             cos_result.append(cos_sim)
+            euc_dist_result.append(euc_dist)
             max_err_result.append(max_abs_err)
             max_relative_err_result.append(max_relative_err)
             err_mess.append(err_msg)
@@ -487,6 +489,7 @@ class Comparator:
 
         cr = ComparisonResult(
             cos_result=cos_result,
+            euc_dist_result=euc_dist_result,
             max_err_result=max_err_result,
             max_relative_err_result=max_relative_err_result,
             err_msgs=err_mess,
@@ -496,9 +499,9 @@ class Comparator:
 
         return _save_cmp_result(idx, cr, result_df, lock)
 
-    def do_multi_process(self, input_parma, result_df):
+    def do_multi_process(self, input_param, result_df):
         try:
-            result_df = _handle_multi_process(self.compare_ops, input_parma, result_df,
+            result_df = _handle_multi_process(self.compare_ops, input_param, result_df,
                                               multiprocessing.Manager().RLock())
             return result_df
         except ValueError as e:
diff --git a/debug/accuracy_tools/msprobe/core/compare/highlight.py b/debug/accuracy_tools/msprobe/core/compare/highlight.py
index cf3e1c4c03e..d95729c6d85 100644
--- a/debug/accuracy_tools/msprobe/core/compare/highlight.py
+++ b/debug/accuracy_tools/msprobe/core/compare/highlight.py
@@ -17,6 +17,7 @@ import abc
 import math
 import multiprocessing
 import re
+from abc import ABC
 from collections import namedtuple
 
 import numpy as np
@@ -97,6 +98,13 @@ class CheckCosineSimilarity(HighlightCheck):
                                    "compared to the input/parameters's")
 
 
+class CheckEuclideanDistance(HighlightCheck):
+    """检查欧式距离"""
+
+    def apply(self, info, color_columns, dump_mode):
+        pass
+
+
 class CheckMaxRelativeDiff(HighlightCheck):
     """检查最大相对差异"""
 
@@ -146,11 +154,14 @@ class HighlightRules:
     }
 
     # 用于比较输入和输出的规则
+    # 真实数据检查规则
     compare_rules = {
         "check_order_magnitude": CheckOrderMagnitude(),
         "check_one_thousand_error": CheckOneThousandErrorRatio(),
-        "check_cosine_similarity": CheckCosineSimilarity()
+        "check_cosine_similarity": CheckCosineSimilarity(),
+        "check_euclidean_distance": CheckEuclideanDistance()
     }
+    # 统计量数据检查规则
     summary_compare_rules = {
         "check_order_magnitude": CheckOrderMagnitude(),
         "check_max_relative_diff": CheckMaxRelativeDiff(),
diff --git a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
index c2c1461e452..560a6f603eb 100644
--- a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
+++ b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
@@ -110,6 +110,7 @@ def read_dump_data(result_df):
 @dataclass
 class ComparisonResult:
     cos_result: list
+    euc_dist_result: list
     max_err_result:  list
     max_relative_err_result: list
     err_msgs: list
@@ -135,6 +136,7 @@ def _save_cmp_result(offset, result: ComparisonResult, result_df, lock):
         for i, _ in enumerate(result.cos_result):
             process_index = i + offset
             result_df.loc[process_index, CompareConst.COSINE] = result.cos_result[i]
+            result_df.loc[process_index, CompareConst.EUC_DIST] = result.euc_dist_result[i]
             result_df.loc[process_index, CompareConst.MAX_ABS_ERR] = result.max_err_result[i]
             result_df.loc[process_index, CompareConst.MAX_RELATIVE_ERR] = result.max_relative_err_result[i]
             result_df.loc[process_index, CompareConst.ERROR_MESSAGE] = result.err_msgs[i]
diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
index c551985780c..db391726c79 100644
--- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
@@ -168,6 +168,7 @@ def statistics_data_check(result_dict):
 
 class TensorComparisonBasic(abc.ABC):
     """NPU和bench中npy数据的比较模板"""
+
     @abc.abstractmethod
     def apply(self, n_value, b_value, relative_err):
         raise NotImplementedError
@@ -190,6 +191,7 @@ def get_relative_err(n_value, b_value):
 
 class GetCosineSimilarity(TensorComparisonBasic):
     """计算cosine相似度"""
+
     @staticmethod
     def correct_data(result):
         if result == CompareConst.NAN:
@@ -224,8 +226,54 @@ class GetCosineSimilarity(TensorComparisonBasic):
         return result, ""
 
 
+class GetEuclideanDistance(TensorComparisonBasic):
+    """计算欧式距离"""
+
+    def apply(self, n_value, b_value, relative_err):
+        msg = ''
+
+        # 检查输入维度是否一致
+        if n_value.shape != b_value.shape:
+            msg = f"Cannot compare by Euclidean Distance, shapes of tensors do not match: \
+            npu:{n_value.shape} vs bench:{b_value.shape}"
+            return CompareConst.UNSUPPORTED, msg
+
+        # 检查输入是否为空
+        if n_value.size == 0 or b_value.size == 0:
+            msg = f"Cannot compare by Euclidean Distance, sizes of tensors must not be empty: \
+            npu:{n_value.size} vs bench:{b_value.size}"
+            return CompareConst.NAN, msg
+
+        # 检查是否包含 NaN 或 Inf
+        if np.any(np.isnan(n_value)) or np.any(np.isnan(b_value)):
+            msg = "Tensor contains NaN values."
+            return CompareConst.NAN, msg
+        if np.any(np.isinf(n_value)) or np.any(np.isinf(b_value)):
+            msg = "Tensor contains Inf values."
+            return CompareConst.NAN, msg
+
+        # 处理零向量
+        if np.all(n_value == 0) and np.all(b_value == 0):
+            return 0.0, "Zero tensors"
+
+        # 输入为标量
+        if np.ndim(n_value) == 0 or np.ndim(b_value) == 0:
+            msg = "Cannot compare by Euclidean Distance, input must be a vector, not a scalar."
+            return CompareConst.UNSUPPORTED, msg
+
+        # 大数值溢出
+        if np.any(np.abs(n_value) > 1e10) or np.any(np.abs(b_value) > 1e10):
+            msg = "tensors's values are large, which may cause overflow."
+
+        # 计算欧式距离
+        distance = np.linalg.norm(n_value - b_value)
+
+        return distance, msg
+
+
 class GetMaxAbsErr(TensorComparisonBasic):
     """计算最大绝对误差"""
+
     def apply(self, n_value, b_value, relative_err):
         temp_res = n_value - b_value
         max_value = np.max(np.abs(temp_res))
@@ -237,6 +285,7 @@ class GetMaxAbsErr(TensorComparisonBasic):
 
 class GetMaxRelativeErr(TensorComparisonBasic):
     """计算最大相对误差"""
+
     def apply(self, n_value, b_value, relative_err):
         max_relative_err = np.max(np.abs(relative_err))
         if np.isnan(max_relative_err):
@@ -247,6 +296,7 @@ class GetMaxRelativeErr(TensorComparisonBasic):
 
 class GetErrRatio(TensorComparisonBasic):
     """计算相对误差小于指定阈值(千分之一、千分之五)的比例"""
+
     def __init__(self, threshold):
         self.threshold = threshold
 
@@ -264,6 +314,7 @@ class GetErrRatio(TensorComparisonBasic):
 class CompareOps:
     compare_ops = {
         "cosine_similarity": GetCosineSimilarity(),
+        "euclidean_distance": GetEuclideanDistance(),
         "max_abs_error": GetMaxAbsErr(),
         "max_relative_error": GetMaxRelativeErr(),
         "one_thousand_err_ratio": GetErrRatio(CompareConst.THOUSAND_RATIO_THRESHOLD),
diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py
index a2edf57e5bb..72b75ab254e 100644
--- a/debug/accuracy_tools/msprobe/core/compare/utils.py
+++ b/debug/accuracy_tools/msprobe/core/compare/utils.py
@@ -285,9 +285,9 @@ def result_item_init(n_info, b_info, dump_mode):
             md5_compare_result = CompareConst.PASS if n_info.struct[2] == b_info.struct[2] else CompareConst.DIFF
             result_item.extend([n_info.struct[2], b_info.struct[2], md5_compare_result])
         elif dump_mode == Const.SUMMARY:
-            result_item.extend([" "] * 8)
+            result_item.extend([" "] * 8)  # 8个统计量数据情况的比对指标
         else:
-            result_item.extend([" "] * 5)
+            result_item.extend([" "] * 6)  # 6个真实数据情况的比对指标
     else:
         err_msg = "index out of bounds error will occur in result_item_init, please check!\n" \
                   f"npu_info_struct is {n_info.struct}\n" \
@@ -453,9 +453,9 @@ def get_un_match_accuracy(result, n_dict, dump_mode):
             result.append(result_item)
             continue
         if dump_mode == Const.SUMMARY:
-            result_item.extend([CompareConst.N_A] * 8)
+            result_item.extend([CompareConst.N_A] * 8)  # 8个统计量数据情况的比对指标
         if dump_mode == Const.ALL:
-            result_item.extend([CompareConst.N_A] * 5)
+            result_item.extend([CompareConst.N_A] * 6)  # 6个真实数据情况的比对指标
 
         npu_summary_data = safe_get_value(summary_reorder, index, "summary_reorder")
         bench_summary_data = [CompareConst.N_A] * 4
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
index aec6cdc5117..cee6d5565bf 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
@@ -20,7 +20,7 @@ from unittest.mock import patch
 from msprobe.core.common.const import CompareConst
 from msprobe.core.compare.npy_compare import handle_inf_nan, reshape_value, get_error_flag_and_msg, \
     npy_data_check, statistics_data_check, get_relative_err, GetCosineSimilarity, GetMaxAbsErr, GetMaxRelativeErr, \
-    GetErrRatio, error_value_process, compare_ops_apply
+    GetErrRatio, error_value_process, compare_ops_apply, GetEuclideanDistance
 
 
 op_name = 'Functional.conv2d.0.backward.input.0'
@@ -473,3 +473,81 @@ class TestUtilsMethods(unittest.TestCase):
         a, b = compare_ops_apply(n_value, b_value, error_flag, err_msg)
         self.assertEqual(a, [1.0, 0.0, 0.0, 1.0, 1.0])
         self.assertEqual(b, '')
+
+
+class TestGetEuclideanDistance(unittest.TestCase):
+
+    def setUp(self):
+        self.euc_distance = GetEuclideanDistance()
+
+    def test_shape_mismatch(self):
+        # 测试当两个张量的形状不匹配时，返回 UNSUPPORTED
+        n_value = np.array([1, 2, 3])
+        b_value = np.array([1, 2])
+
+        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        self.assertEqual(result, CompareConst.UNSUPPORTED)
+        self.assertIn("Cannot compare by Euclidean Distance", msg)
+
+    def test_empty_tensor(self):
+        # 测试当输入的张量为空时，返回 NAN
+        n_value = np.array([])
+        b_value = np.array([1, 2, 3])
+
+        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        self.assertEqual(result, CompareConst.NAN)
+        self.assertIn("sizes of tensors must not be empty", msg)
+
+    def test_nan_in_tensor(self):
+        # 测试当张量包含 NaN 值时，返回 NAN
+        n_value = np.array([1, 2, np.nan])
+        b_value = np.array([1, 2, 3])
+
+        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        self.assertEqual(result, CompareConst.NAN)
+        self.assertIn("Tensor contains NaN values", msg)
+
+    def test_inf_in_tensor(self):
+        # 测试当张量包含 Inf 值时，返回 NAN
+        n_value = np.array([1, 2, np.inf])
+        b_value = np.array([1, 2, 3])
+
+        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        self.assertEqual(result, CompareConst.NAN)
+        self.assertIn("Tensor contains Inf values", msg)
+
+    def test_zero_tensors(self):
+        # 测试两个零张量的欧式距离
+        n_value = np.array([0, 0, 0])
+        b_value = np.array([0, 0, 0])
+
+        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        self.assertEqual(result, 0.0)
+        self.assertIn("Zero tensors", msg)
+
+    def test_scalars(self):
+        # 测试当输入是标量时，返回 UNSUPPORTED
+        n_value = np.array(5)
+        b_value = np.array(10)
+
+        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        self.assertEqual(result, CompareConst.UNSUPPORTED)
+        self.assertIn("input must be a vector, not a scalar", msg)
+
+    def test_large_values(self):
+        # 测试当张量包含大值时，应该返回大数值溢出的警告
+        n_value = np.array([1e11, 1e11, 1e11])
+        b_value = np.array([1e10, 1e10, 1e10])
+
+        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        self.assertIn("tensors's values are large", msg)
+
+    def test_euclidean_distance(self):
+        # 测试计算两个张量之间的欧式距离
+        n_value = np.array([1, 2, 3])
+        b_value = np.array([4, 5, 6])
+
+        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        expected_distance = np.linalg.norm(n_value - b_value)
+        self.assertEqual(result, expected_distance)
+        self.assertEqual(msg, '')
-- 
Gitee


From 820bd952616ff9b6500f436915a1b2b8f03b1334 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Mon, 24 Feb 2025 10:58:33 +0800
Subject: [PATCH 02/25] compare add euclidean distance

---
 .../msprobe/core/compare/acc_compare.py       |  9 ++--
 .../core/compare/multiprocessing_compute.py   | 10 ++---
 .../test/core_ut/compare/test_acc_compare.py  | 38 +++++++++-------
 .../compare/test_acc_compare_npy_compare.py   |  4 +-
 .../core_ut/compare/test_acc_compare_utils.py | 44 +++++++++++--------
 .../test_cmp_multiprocessing_compute.py       | 20 +++++----
 6 files changed, 69 insertions(+), 56 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
index 0672e32404c..4f4bdd0b105 100644
--- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
@@ -459,9 +459,10 @@ class Comparator:
         euc_dist_result = []
         max_err_result = []
         max_relative_err_result = []
-        err_mess = []
         one_thousand_err_ratio_result = []
         five_thousand_err_ratio_result = []
+        err_mess = []
+
         is_print_compare_log = input_param.get("is_print_compare_log")
         bench_data = load_json(input_param.get("bench_json_path")).get('data')
         for i in range(len(result_df)):
@@ -483,18 +484,18 @@ class Comparator:
             euc_dist_result.append(euc_dist)
             max_err_result.append(max_abs_err)
             max_relative_err_result.append(max_relative_err)
-            err_mess.append(err_msg)
             one_thousand_err_ratio_result.append(one_thousand_err_ratio)
             five_thousand_err_ratio_result.append(five_thousand_err_ratio)
+            err_mess.append(err_msg)
 
         cr = ComparisonResult(
             cos_result=cos_result,
             euc_dist_result=euc_dist_result,
             max_err_result=max_err_result,
             max_relative_err_result=max_relative_err_result,
-            err_msgs=err_mess,
             one_thousand_err_ratio_result=one_thousand_err_ratio_result,
-            five_thousand_err_ratio_result=five_thousand_err_ratio_result
+            five_thousand_err_ratio_result=five_thousand_err_ratio_result,
+            err_msgs=err_mess
         )
 
         return _save_cmp_result(idx, cr, result_df, lock)
diff --git a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
index 560a6f603eb..20849afa920 100644
--- a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
+++ b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -113,9 +113,9 @@ class ComparisonResult:
     euc_dist_result: list
     max_err_result:  list
     max_relative_err_result: list
-    err_msgs: list
     one_thousand_err_ratio_result: list
     five_thousand_err_ratio_result: list
+    err_msgs: list
 
 
 def _save_cmp_result(offset, result: ComparisonResult, result_df, lock):
@@ -139,13 +139,13 @@ def _save_cmp_result(offset, result: ComparisonResult, result_df, lock):
             result_df.loc[process_index, CompareConst.EUC_DIST] = result.euc_dist_result[i]
             result_df.loc[process_index, CompareConst.MAX_ABS_ERR] = result.max_err_result[i]
             result_df.loc[process_index, CompareConst.MAX_RELATIVE_ERR] = result.max_relative_err_result[i]
-            result_df.loc[process_index, CompareConst.ERROR_MESSAGE] = result.err_msgs[i]
-            result_df.loc[process_index, CompareConst.ACCURACY] = (
-                check_accuracy(result.cos_result[i], result.max_err_result[i]))
             result_df.loc[process_index, CompareConst.ONE_THOUSANDTH_ERR_RATIO] = (
                 result.one_thousand_err_ratio_result)[i]
             result_df.loc[process_index, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = (
                 result.five_thousand_err_ratio_result)[i]
+            result_df.loc[process_index, CompareConst.ACCURACY] = (
+                check_accuracy(result.cos_result[i], result.max_err_result[i]))
+            result_df.loc[process_index, CompareConst.ERROR_MESSAGE] = result.err_msgs[i]
         return result_df
     except ValueError as e:
         logger.error('result dataframe is not found.')
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py
index b4566fcfe6f..c882e331f55 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py
@@ -191,17 +191,21 @@ summary_line_3 = ['Functional_batch_norm_0_forward.output.2', 'Functional_batch_
                   'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1,
                   'Warning', '']
 line_input = ['Functional.batch.norm.0.forward.input.0', 'Functional.batch.norm.0.forward.input.0', 'torch.float16',
-              'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 1, 1, 1, 0.95, 1, 1, 1, 1, 1, 1.01, 1, 1, 1,
+              'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 1, 0.5, 1, 1, 0.95, 1,
+              1, 1, 1, 1, 1.01, 1, 1, 1,
               'Yes', '']
 line_1 = ['Functional.batch.norm.0.forward.output.0', 'Functional.batch.norm.0.forward.output.0', 'torch.float16',
-          'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1, 1, 0.59, 1, 'nan', 0, 1, 1, 19, 1, 1, 1,
-          'Warning', '']
+          'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 0.5, 1, 1, 0.59, 1,
+          'nan', 0, 1, 1, 19, 1, 1, 1,
+          'Yes', '']
 line_2 = ['Functional.batch.norm.0.forward.output.1', 'Functional.batch.norm.0.forward.output.1', 'torch.float16',
-          'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.9, 1, 1, 0.8, 1, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1,
-          'Warning', '']
+          'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.9, 0.5, 1, 1, 0.8, 1,
+          0, 0.12, 0, 1, 1, 0.1, 1, 1,
+          'Yes', '']
 line_3 = ['Functional.batch.norm.0.forward.output.2', 'Functional.batch.norm.0.forward.output.2', 'torch.float16',
-          'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1.1e+10, 1, 0.85, 1, 9, 0.12, 0, 1, 1, 0.1, 1,
-          1, 1, 'Warning', '']
+          'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 0.5, 1.1e+10, 1, 0.85, 1,
+          9, 0.12, 0, 1, 1, 0.1, 1, 1,
+          'Yes', '']
 
 op_data = {
     'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3],
@@ -363,7 +367,7 @@ class TestUtilsMethods(unittest.TestCase):
                            'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', '', '',
                            1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', 'File']]
         result_all = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
-                       'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '',
+                       'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '',
                        1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', 'File', '-1']]
         columns_md5_stack_mode_true = CompareConst.MD5_COMPARE_RESULT_HEADER + ['NPU_Stack_Info']
         result_table_md5_true = pd.DataFrame(result_md5, columns=columns_md5_stack_mode_true, dtype=object)
@@ -403,10 +407,10 @@ class TestUtilsMethods(unittest.TestCase):
                            'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', '', '',
                            1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '']]
         result_all_test = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
-                            'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '',
+                            'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '',
                             1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '', '-1']]
         result_all = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
-                       'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '',
+                       'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '',
                        1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']]
         columns_md5_stack_mode_true = CompareConst.MD5_COMPARE_RESULT_HEADER
         result_table_md5_true = pd.DataFrame(result_md5, columns=columns_md5_stack_mode_true, dtype='object')
@@ -632,10 +636,10 @@ class TestUtilsMethods(unittest.TestCase):
     def test_do_multi_process(self):
         data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']]
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']]
         o_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
-                   'torch.float32', 'torch.float32', [2, 2], [2, 2], 'unsupported', 'unsupported', 'unsupported',
-                   'unsupported', 'unsupported',
+                   'torch.float32', 'torch.float32', [2, 2], [2, 2],
+                   'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported',
                    1, 1, 1, 1, 1, 1, 1, 1, 'None', 'No bench data matched.', '-1']]
         columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name']
         result_df = pd.DataFrame(data, columns=columns)
@@ -669,7 +673,7 @@ class TestUtilsMethods(unittest.TestCase):
         result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, {})
 
         self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported',
-                                  'No bench data matched.'])
+                                  'unsupported', 'No bench data matched.'])
 
     def test_compare_by_op_2(self):
         npu_op_name = 'Functional.linear.0.forward.input.0'
@@ -691,7 +695,7 @@ class TestUtilsMethods(unittest.TestCase):
                                               {'Functional.linear.0.forward': {'input_args': [
                                                   {'data_name': 'Functional.linear.0.forward.input.0.pt'}]}})
         self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported',
-                                  f'Dump file: {pt_path} not found.'])
+                                  'unsupported', f'Dump file: {pt_path} not found.'])
 
         pt_name = 'Functional.linear.0.forward.input.0.pt'
         pt_path = os.path.join(base_dir, pt_name)
@@ -699,13 +703,13 @@ class TestUtilsMethods(unittest.TestCase):
         input_param = {'npu_dump_data_dir': base_dir, 'bench_dump_data_dir': base_dir}
         result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, {})
         self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported',
-                                  'Bench does not have data file.'])
+                                  'unsupported', 'Bench does not have data file.'])
 
         generate_pt(base_dir)
         result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param,
                                               {'Functional.linear.0.forward': {'input_args': [
                                                   {'data_name': 'Functional.linear.0.forward.input.0.pt'}]}})
-        self.assertEqual(result, [1.0, 0.0, 0.0, 1.0, 1.0, ''])
+        self.assertEqual(result, [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, ''])
 
     def test_get_bench_data_name_input(self):
         bench_op_name = "Functional.linear.0.forward.input.0"
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
index cee6d5565bf..5c10d96cb8d 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
@@ -471,7 +471,7 @@ class TestUtilsMethods(unittest.TestCase):
         error_flag = False
         err_msg = ''
         a, b = compare_ops_apply(n_value, b_value, error_flag, err_msg)
-        self.assertEqual(a, [1.0, 0.0, 0.0, 1.0, 1.0])
+        self.assertEqual(a, [1.0, 0.0, 0.0, 0.0, 1.0, 1.0])
         self.assertEqual(b, '')
 
 
@@ -492,7 +492,7 @@ class TestGetEuclideanDistance(unittest.TestCase):
     def test_empty_tensor(self):
         # 测试当输入的张量为空时，返回 NAN
         n_value = np.array([])
-        b_value = np.array([1, 2, 3])
+        b_value = np.array([])
 
         result, msg = self.euc_distance.apply(n_value, b_value, None)
         self.assertEqual(result, CompareConst.NAN)
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py
index ab8703dcd35..2e9a4657266 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py
@@ -221,28 +221,34 @@ o_result_unmatch_2 = [
      'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None']
 ]
 o_result_unmatch_3 = [
-    ['Functional.conv2d.0.forward.input.0', 'N/A', 'torch.float32', 'N/A', [1, 1, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A',
-     'N/A', 'N/A', 3.029174327850342, -2.926689624786377, -0.06619918346405029, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+    ['Functional.conv2d.0.forward.input.0', 'N/A', 'torch.float32', 'N/A', [1, 1, 28, 28], 'N/A',
+     'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+     3.029174327850342, -2.926689624786377, -0.06619918346405029, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
      'No bench data matched.', 'None', '-1'],
-    ['Functional.conv2d.0.forward.input.1', 'N/A', 'torch.float32', 'N/A', [16, 1, 5, 5], 'N/A', 'N/A', 'N/A', 'N/A',
-     'N/A', 'N/A', 0.19919930398464203, -0.19974489510059357, 0.006269412115216255, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+    ['Functional.conv2d.0.forward.input.1', 'N/A', 'torch.float32', 'N/A', [16, 1, 5, 5], 'N/A',
+     'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+     0.19919930398464203, -0.19974489510059357, 0.006269412115216255, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
      'No bench data matched.', 'None', '-1'],
-    ['Functional.conv2d.0.forward.input.2', 'N/A', 'torch.float32', 'N/A', [16], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
-     'N/A', 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+    ['Functional.conv2d.0.forward.input.2', 'N/A', 'torch.float32', 'N/A', [16], 'N/A',
+     'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+     0.19734230637550354, -0.18177609145641327, 0.007903944700956345, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
      'No bench data matched.', 'None', '-1'],
-    ['Functional.conv2d.0.forward.parameters.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A',
-     'N/A', 'N/A',
-     'N/A', 'N/A', 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'],
-    ['Functional.conv2d.0.forward.parameters.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A',
-     'N/A',
-     'N/A', 'N/A', 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'],
-    ['Functional.conv2d.0.forward.output.0', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A',
-     'N/A', 'N/A', 2.1166646480560303, -2.190781354904175, -0.003579073818400502, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+    ['Functional.conv2d.0.forward.parameters.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A',
+     'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+     1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'],
+    ['Functional.conv2d.0.forward.parameters.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A',
+     'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+     1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'],
+    ['Functional.conv2d.0.forward.output.0', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A',
+     'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+     2.1166646480560303, -2.190781354904175, -0.003579073818400502, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
      'No bench data matched.', 'None', '-1'],
-    ['Functional.conv2d.0.parameters_grad.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A',
-     'N/A', 'N/A', 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'],
-    ['Functional.conv2d.0.parameters_grad.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A',
-     'N/A', 'N/A', 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1']
+    ['Functional.conv2d.0.parameters_grad.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A',
+     'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+     1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'],
+    ['Functional.conv2d.0.parameters_grad.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A',
+     'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
+     1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1']
 ]
 
 # test_merge_tensor
@@ -558,7 +564,7 @@ class TestUtilsMethods(unittest.TestCase):
         dump_mode = Const.ALL
         result_item = result_item_init(n_info, b_info, dump_mode)
         self.assertEqual(result_item, ['Tensor.add.0.forward.input.0', 'Tensor.add.0.forward.input.0',
-                                       'torch.float32', 'torch.float32', [96], [96], ' ', ' ', ' ', ' ', ' '])
+                                       'torch.float32', 'torch.float32', [96], [96], ' ', ' ', ' ', ' ', ' ', ' '])
 
         dump_mode = Const.SUMMARY
         result_item = result_item_init(n_info, b_info, dump_mode)
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py
index 9c2dea835fe..3fa16b0d9d4 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py
@@ -16,12 +16,12 @@ from test_acc_compare import generate_dump_json
 
 data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
          'torch.float32', 'torch.float32', [2, 2], [2, 2],
-         '', '', '', '', '',
+         '', '', '', '', '', '',
          1, 1, 1, 1, 1, 1, 1, 1,
          'Yes', '', '-1']]
 o_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
            'torch.float32', 'torch.float32', [2, 2], [2, 2],
-           'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported',
+           'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported',
            1, 1, 1, 1, 1, 1, 1, 1,
            'None', 'No bench data matched.', '-1']]
 columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name']
@@ -34,9 +34,9 @@ class TestUtilsMethods(unittest.TestCase):
 
     def setUp(self):
         self.result_df = pd.DataFrame(columns=[
-            CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
-            CompareConst.ERROR_MESSAGE, CompareConst.ACCURACY,
-            CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO
+            CompareConst.COSINE, CompareConst.EUC_DIST, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
+            CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO,
+            CompareConst.ACCURACY, CompareConst.ERROR_MESSAGE
         ])
         os.makedirs(base_dir, mode=0o750, exist_ok=True)
         self.lock = threading.Lock()
@@ -72,9 +72,10 @@ class TestUtilsMethods(unittest.TestCase):
             cos_result=[0.99, 0.98],
             max_err_result=[0.01, 0.02],
             max_relative_err_result=[0.001, 0.002],
-            err_msgs=['', 'Error in comparison'],
+            euc_dist_result=[0.5, 0.49],
             one_thousand_err_ratio_result=[0.1, 0.2],
-            five_thousand_err_ratio_result=[0.05, 0.1]
+            five_thousand_err_ratio_result=[0.05, 0.1],
+            err_msgs=['', 'Error in comparison']
         )
         offset = 0
         updated_df = _save_cmp_result(offset, comparison_result, self.result_df, self.lock)
@@ -88,9 +89,10 @@ class TestUtilsMethods(unittest.TestCase):
             cos_result=[0.99],
             max_err_result=[],
             max_relative_err_result=[0.001],
-            err_msgs=[''],
+            euc_dist_result=[0.5],
             one_thousand_err_ratio_result=[0.1],
-            five_thousand_err_ratio_result=[0.05]
+            five_thousand_err_ratio_result=[0.05],
+            err_msgs=['']
         )
         with self.assertRaises(CompareException) as context:
             _save_cmp_result(0, comparison_result, self.result_df, self.lock)
-- 
Gitee


From 1fc978d5cd78390543e6b092d017b89d1951aaeb Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Mon, 24 Feb 2025 11:14:06 +0800
Subject: [PATCH 03/25] compare add euclidean distance

---
 .../msprobe/core/compare/npy_compare.py       |  2 +-
 .../core_ut/compare/test_cmp_highlight.py     | 30 +++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
index db391726c79..7147f4d3dba 100644
--- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_highlight.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_highlight.py
index f561a3e05ec..3261bce5d6d 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_highlight.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_highlight.py
@@ -26,7 +26,7 @@ def generate_result_xlsx(base_dir):
     data_path = os.path.join(base_dir, 'target_result.xlsx')
     data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
              'torch.float32', 'torch.float32', [2, 2], [2, 2],
-             '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
+             '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
             ]
     columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name']
     result_df = pd.DataFrame(data, columns=columns)
@@ -101,8 +101,8 @@ class TestUtilsMethods(unittest.TestCase):
         self.assertEqual(result, None)
 
     def test_CheckOneThousandErrorRatio_str(self):
-        api_in = [1, 1, 1, 1, 1, 1, 1, 1, 1, "unsupported"]
-        api_out = [1, 1, 1, 1, 1, 1, 1, 1, 1, "unsupported"]
+        api_in = [1, 1, 1, 1, 1, 1, 0.9, 0.5, 1, 1, "unsupported"]
+        api_out = [1, 1, 1, 1, 1, 1, 0.9, 0.5, 1, 1, "unsupported"]
         info = (api_in, api_out, 1)
         color_columns = ()
         dump_mode = Const.ALL
@@ -113,8 +113,8 @@ class TestUtilsMethods(unittest.TestCase):
 
     @patch("msprobe.core.compare.highlight.add_highlight_row_info")
     def test_CheckOneThousandErrorRatio_red(self, mock_add_highlight_row_info):
-        api_in = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-        api_out = [1, 1, 1, 1, 1, 1, 1, 1, 1, 0.5]
+        api_in = [1, 1, 1, 1, 1, 1, 0.9, 0.5, 1, 1, 1]
+        api_out = [1, 1, 1, 1, 1, 1, 0.9, 0.5, 1, 1, 0.5]
         info = (api_in, api_out, 1)
         ColorColumns = namedtuple('ColorColumns', ['red', 'yellow'])
         color_columns = ColorColumns(red=[], yellow=[])
@@ -315,7 +315,7 @@ class TestUtilsMethods(unittest.TestCase):
         columns = CompareConst.COMPARE_RESULT_HEADER
         data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '']
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '']
                 ]
         result_df = pd.DataFrame(data, columns=columns)
 
@@ -329,7 +329,7 @@ class TestUtilsMethods(unittest.TestCase):
     def test_highlight_rows_xlsx_red(self):
         data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
                 ]
         columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name']
         result_df = pd.DataFrame(data, columns=columns)
@@ -342,7 +342,7 @@ class TestUtilsMethods(unittest.TestCase):
     def test_highlight_rows_xlsx_yellow(self):
         data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
                 ]
         columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name']
         result_df = pd.DataFrame(data, columns=columns)
@@ -356,7 +356,7 @@ class TestUtilsMethods(unittest.TestCase):
     def test_highlight_rows_xlsx_malicious_columns(self, mock_save_book):
         data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
                 ]
         columns = CompareConst.COMPARE_RESULT_HEADER + ['=Data_name']
         result_df = pd.DataFrame(data, columns=columns)
@@ -378,10 +378,10 @@ class TestUtilsMethods(unittest.TestCase):
     def test_highlight_rows_xlsx_malicious_type(self, mock_save_book):
         data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  '=torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'],
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'],
                 ['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  '=torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
                 ]
         columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name']
         result_df = pd.DataFrame(data, columns=columns)
@@ -416,10 +416,10 @@ class TestUtilsMethods(unittest.TestCase):
     def test_update_highlight_err_msg(self):
         data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'],
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'],
                 ['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                  'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                 '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
+                 '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']
                 ]
         columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name']
         result_df = pd.DataFrame(data, columns=columns)
@@ -433,10 +433,10 @@ class TestUtilsMethods(unittest.TestCase):
 
         t_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                    'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                   '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', 'a\nb', '-1'],
+                   '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', 'a\nb', '-1'],
                   ['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0',
                    'torch.float32', 'torch.float32', [2, 2], [2, 2],
-                   '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', 'd', '-1']
+                   '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', 'd', '-1']
                   ]
         target_result_df = pd.DataFrame(t_data, columns=columns)
         self.assertTrue(result_df.equals(target_result_df))
-- 
Gitee


From a01d5cb49d528302c01c731c6efc32874aa9d2ac Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Mon, 24 Feb 2025 11:26:18 +0800
Subject: [PATCH 04/25] compare add euclidean distance

---
 debug/accuracy_tools/msprobe/core/compare/acc_compare.py     | 4 ++--
 debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
index 4f4bdd0b105..f0ac97a0293 100644
--- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py
@@ -311,9 +311,9 @@ class Comparator:
                 ]
 
                 if self.dump_mode == Const.SUMMARY:
-                    result_item = base_result_item + [" "] * 8
+                    result_item = base_result_item + [" "] * 8  # 8个统计量数据情况的比对指标
                 else:
-                    result_item = base_result_item + [" "] * 5
+                    result_item = base_result_item + [" "] * 6  # 6个真实数据情况的比对指标
 
                 npu_summary_data = npu_ops_all.get(ms_op_name).get("summary")
                 result_item.extend(npu_summary_data)
diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py
index 8509a7f38ad..de507e87665 100644
--- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py
+++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py
@@ -125,7 +125,8 @@ class MSComparator(Comparator):
             result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING
             result_df.loc[warning_flag, CompareConst.ERROR_MESSAGE] = 'Need double check api accuracy.'
         else:
-            fill_cols = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
+            fill_cols = [CompareConst.COSINE, CompareConst.EUC_DIST,
+                         CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
                          CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO,
                          CompareConst.ERROR_MESSAGE]
             result_df.loc[~condition_no_bench, fill_cols] = ''
-- 
Gitee


From 739fe1cad25c7c7d8aeaf53e773a4d07aaeea429 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Mon, 24 Feb 2025 15:25:43 +0800
Subject: [PATCH 05/25] compare add euclidean distance

---
 debug/accuracy_tools/msprobe/core/common/const.py     |  2 +-
 .../msprobe/mindspore/compare/ms_graph_compare.py     | 11 ++++++-----
 .../mindspore_ut/compare/test_ms_graph_compare.py     |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py
index bf0883667c0..b60d06d10bf 100644
--- a/debug/accuracy_tools/msprobe/core/common/const.py
+++ b/debug/accuracy_tools/msprobe/core/common/const.py
@@ -469,7 +469,7 @@ class CompareConst:
         BENCH_MEAN: None, BENCH_NORM: None, ACCURACY: '', ERROR_MESSAGE: ''
     }
     MS_GRAPH_NPY = {
-        COSINE: None, MAX_ABS_ERR: None, MAX_RELATIVE_ERR: None, ONE_THOUSANDTH_ERR_RATIO: None,
+        COSINE: None, EUC_DIST: None, MAX_ABS_ERR: None, MAX_RELATIVE_ERR: None, ONE_THOUSANDTH_ERR_RATIO: None,
         FIVE_THOUSANDTHS_ERR_RATIO: None
     }
     MS_GRAPH_STATISTIC = {
diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py
index 701988ba483..153f4fd6552 100644
--- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py
+++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py
@@ -195,11 +195,12 @@ class GraphMSComparator:
             if not error_flag:
                 result_list, err_msg = compare_ops_apply(n_value, b_value, False, "")
                 result_dict[CompareConst.COSINE] = result_list[0]
-                result_dict[CompareConst.MAX_ABS_ERR] = result_list[1]
-                result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[2]
-                result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[3]
-                result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[4]
-                result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[1])
+                result_dict[CompareConst.EUC_DIST] = result_list[1]
+                result_dict[CompareConst.MAX_ABS_ERR] = result_list[2]
+                result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[3]
+                result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[4]
+                result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[5]
+                result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[2])
                 result_dict[CompareConst.ERROR_MESSAGE] = err_msg
 
             return pd.Series(result_dict)
diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_graph_compare.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_graph_compare.py
index e3fd9348efe..c2e7c9368c3 100644
--- a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_graph_compare.py
+++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_graph_compare.py
@@ -78,7 +78,7 @@ class TestMsGraphCompare(unittest.TestCase):
 
         result_correct = (
             f"[['{npu_file_path}', '{bench_file_path}', dtype('float16'), dtype('float16'), (10, 10), (10, 10), "
-            f"44.0, 44.0, 44.0, inf, 44.0, 44.0, 44.0, inf, 'Yes', '', 1.0, 0.0, 0.0, 1.0, 1.0]]")
+            f"44.0, 44.0, 44.0, inf, 44.0, 44.0, 44.0, inf, 'Yes', '', 1.0, 0.0, 0.0, 0.0, 1.0, 1.0]]")
 
         self.assertNotEqual(len(files), 0)
         self.assertEqual(result, result_correct)
-- 
Gitee


From 56fbae42a7d74aa162eb8dac7fe82a30f9d5b5b2 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Mon, 24 Feb 2025 19:56:23 +0800
Subject: [PATCH 06/25] compare add euclidean distance

---
 .../core/compare/multiprocessing_compute.py   |  9 ++-
 .../msprobe/core/compare/npy_compare.py       | 36 +----------
 .../compare/test_acc_compare_npy_compare.py   | 62 -------------------
 3 files changed, 8 insertions(+), 99 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
index 20849afa920..f79671827c1 100644
--- a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
+++ b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py
@@ -15,8 +15,11 @@
 
 import multiprocessing
 from dataclasses import dataclass
+from functools import partial
+
 import pandas as pd
 from tqdm import tqdm
+
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import CompareException
 from msprobe.core.common.const import CompareConst
@@ -44,7 +47,7 @@ def _handle_multi_process(func, input_parma, result_df, lock):
 
     progress_bar = tqdm(total=len(result_df), desc="API/Module Item Compare Process", unit="row", ncols=100)
 
-    def update_progress(size, progress_lock):
+    def update_progress(size, progress_lock, extra_param=None):
         with progress_lock:
             progress_bar.update(size)
 
@@ -54,8 +57,10 @@ def _handle_multi_process(func, input_parma, result_df, lock):
         result = pool.apply_async(func,
                                   args=(idx, op_name_mapping_dict, df_chunk, lock, input_parma),
                                   error_callback=err_call,
-                                  callback=update_progress(chunk_size, lock))
+                                  callback=partial(update_progress, chunk_size, lock)
+                                  )
         results.append(result)
+
     final_results = [r.get() for r in results]
     pool.close()
     pool.join()
diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
index 7147f4d3dba..d060b4013f0 100644
--- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
@@ -232,41 +232,7 @@ class GetEuclideanDistance(TensorComparisonBasic):
     def apply(self, n_value, b_value, relative_err):
         msg = ''
 
-        # 检查输入维度是否一致
-        if n_value.shape != b_value.shape:
-            msg = f"Cannot compare by Euclidean Distance, shapes of tensors do not match: \
-            npu:{n_value.shape} vs bench:{b_value.shape}"
-            return CompareConst.UNSUPPORTED, msg
-
-        # 检查输入是否为空
-        if n_value.size == 0 or b_value.size == 0:
-            msg = f"Cannot compare by Euclidean Distance, sizes of tensors must not be empty: \
-            npu:{n_value.size} vs bench:{b_value.size}"
-            return CompareConst.NAN, msg
-
-        # 检查是否包含 NaN 或 Inf
-        if np.any(np.isnan(n_value)) or np.any(np.isnan(b_value)):
-            msg = "Tensor contains NaN values."
-            return CompareConst.NAN, msg
-        if np.any(np.isinf(n_value)) or np.any(np.isinf(b_value)):
-            msg = "Tensor contains Inf values."
-            return CompareConst.NAN, msg
-
-        # 处理零向量
-        if np.all(n_value == 0) and np.all(b_value == 0):
-            return 0.0, "Zero tensors"
-
-        # 输入为标量
-        if np.ndim(n_value) == 0 or np.ndim(b_value) == 0:
-            msg = "Cannot compare by Euclidean Distance, input must be a vector, not a scalar."
-            return CompareConst.UNSUPPORTED, msg
-
-        # 大数值溢出
-        if np.any(np.abs(n_value) > 1e10) or np.any(np.abs(b_value) > 1e10):
-            msg = "tensors's values are large, which may cause overflow."
-
-        # 计算欧式距离
-        distance = np.linalg.norm(n_value - b_value)
+        distance = np.linalg.norm(n_value - b_value, ord=2)
 
         return distance, msg
 
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
index 5c10d96cb8d..c9096694a77 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
@@ -480,68 +480,6 @@ class TestGetEuclideanDistance(unittest.TestCase):
     def setUp(self):
         self.euc_distance = GetEuclideanDistance()
 
-    def test_shape_mismatch(self):
-        # 测试当两个张量的形状不匹配时，返回 UNSUPPORTED
-        n_value = np.array([1, 2, 3])
-        b_value = np.array([1, 2])
-
-        result, msg = self.euc_distance.apply(n_value, b_value, None)
-        self.assertEqual(result, CompareConst.UNSUPPORTED)
-        self.assertIn("Cannot compare by Euclidean Distance", msg)
-
-    def test_empty_tensor(self):
-        # 测试当输入的张量为空时，返回 NAN
-        n_value = np.array([])
-        b_value = np.array([])
-
-        result, msg = self.euc_distance.apply(n_value, b_value, None)
-        self.assertEqual(result, CompareConst.NAN)
-        self.assertIn("sizes of tensors must not be empty", msg)
-
-    def test_nan_in_tensor(self):
-        # 测试当张量包含 NaN 值时，返回 NAN
-        n_value = np.array([1, 2, np.nan])
-        b_value = np.array([1, 2, 3])
-
-        result, msg = self.euc_distance.apply(n_value, b_value, None)
-        self.assertEqual(result, CompareConst.NAN)
-        self.assertIn("Tensor contains NaN values", msg)
-
-    def test_inf_in_tensor(self):
-        # 测试当张量包含 Inf 值时，返回 NAN
-        n_value = np.array([1, 2, np.inf])
-        b_value = np.array([1, 2, 3])
-
-        result, msg = self.euc_distance.apply(n_value, b_value, None)
-        self.assertEqual(result, CompareConst.NAN)
-        self.assertIn("Tensor contains Inf values", msg)
-
-    def test_zero_tensors(self):
-        # 测试两个零张量的欧式距离
-        n_value = np.array([0, 0, 0])
-        b_value = np.array([0, 0, 0])
-
-        result, msg = self.euc_distance.apply(n_value, b_value, None)
-        self.assertEqual(result, 0.0)
-        self.assertIn("Zero tensors", msg)
-
-    def test_scalars(self):
-        # 测试当输入是标量时，返回 UNSUPPORTED
-        n_value = np.array(5)
-        b_value = np.array(10)
-
-        result, msg = self.euc_distance.apply(n_value, b_value, None)
-        self.assertEqual(result, CompareConst.UNSUPPORTED)
-        self.assertIn("input must be a vector, not a scalar", msg)
-
-    def test_large_values(self):
-        # 测试当张量包含大值时，应该返回大数值溢出的警告
-        n_value = np.array([1e11, 1e11, 1e11])
-        b_value = np.array([1e10, 1e10, 1e10])
-
-        result, msg = self.euc_distance.apply(n_value, b_value, None)
-        self.assertIn("tensors's values are large", msg)
-
     def test_euclidean_distance(self):
         # 测试计算两个张量之间的欧式距离
         n_value = np.array([1, 2, 3])
-- 
Gitee


From 02fa286f4b0216080d35f8222ab2a3ef14e971a1 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Mon, 24 Feb 2025 19:58:16 +0800
Subject: [PATCH 07/25] compare add euclidean distance

---
 debug/accuracy_tools/msprobe/core/compare/highlight.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/highlight.py b/debug/accuracy_tools/msprobe/core/compare/highlight.py
index d95729c6d85..ead0f6bda87 100644
--- a/debug/accuracy_tools/msprobe/core/compare/highlight.py
+++ b/debug/accuracy_tools/msprobe/core/compare/highlight.py
@@ -17,7 +17,6 @@ import abc
 import math
 import multiprocessing
 import re
-from abc import ABC
 from collections import namedtuple
 
 import numpy as np
-- 
Gitee


From da88a94a3195b69aa43d946a764aac8c16a72854 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Mon, 24 Feb 2025 20:05:22 +0800
Subject: [PATCH 08/25] compare add euclidean distance

---
 debug/accuracy_tools/msprobe/core/compare/highlight.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/highlight.py b/debug/accuracy_tools/msprobe/core/compare/highlight.py
index ead0f6bda87..1983313249f 100644
--- a/debug/accuracy_tools/msprobe/core/compare/highlight.py
+++ b/debug/accuracy_tools/msprobe/core/compare/highlight.py
@@ -97,13 +97,6 @@ class CheckCosineSimilarity(HighlightCheck):
                                    "compared to the input/parameters's")
 
 
-class CheckEuclideanDistance(HighlightCheck):
-    """检查欧式距离"""
-
-    def apply(self, info, color_columns, dump_mode):
-        pass
-
-
 class CheckMaxRelativeDiff(HighlightCheck):
     """检查最大相对差异"""
 
@@ -157,8 +150,7 @@ class HighlightRules:
     compare_rules = {
         "check_order_magnitude": CheckOrderMagnitude(),
         "check_one_thousand_error": CheckOneThousandErrorRatio(),
-        "check_cosine_similarity": CheckCosineSimilarity(),
-        "check_euclidean_distance": CheckEuclideanDistance()
+        "check_cosine_similarity": CheckCosineSimilarity()
     }
     # 统计量数据检查规则
     summary_compare_rules = {
-- 
Gitee


From b3278e3e41bfd7a38b14e27f1511948979076d47 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Tue, 25 Feb 2025 11:48:50 +0800
Subject: [PATCH 09/25] compare add euclidean distance

---
 .../msprobe/core/compare/npy_compare.py       | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
index d060b4013f0..cfd0db3f1fe 100644
--- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
@@ -70,7 +70,7 @@ def get_error_flag_and_msg(n_value, b_value, error_flag=False, error_file=None):
         error_flag = True
         return CompareConst.NONE, CompareConst.NONE, error_flag, err_msg
     if not n_value.shape:  # 判断数据是否为0维张量
-        err_msg = (f"This is type of 0-d tensor, can not calculate '{CompareConst.COSINE}', "
+        err_msg = (f"This is type of 0-d tensor, can not calculate '{CompareConst.COSINE}', {CompareConst.EUC_DIST}"
                    f"'{CompareConst.ONE_THOUSANDTH_ERR_RATIO}' and '{CompareConst.FIVE_THOUSANDTHS_ERR_RATIO}'. ")
         error_flag = False  # 0-d tensor 最大绝对误差、最大相对误差仍然支持计算，因此error_flag设置为False，不做统一处理
         return n_value, b_value, error_flag, err_msg
@@ -170,7 +170,7 @@ class TensorComparisonBasic(abc.ABC):
     """NPU和bench中npy数据的比较模板"""
 
     @abc.abstractmethod
-    def apply(self, n_value, b_value, relative_err):
+    def apply(self, n_value, b_value, relative_err, err_msg):
         raise NotImplementedError
 
 
@@ -200,9 +200,9 @@ class GetCosineSimilarity(TensorComparisonBasic):
             return round(float(result), 6)
         return result
 
-    def apply(self, n_value, b_value, relative_err):
-        if not n_value.shape:
-            return CompareConst.UNSUPPORTED, ""
+    def apply(self, n_value, b_value, relative_err, err_msg):
+        if "This is type of 0-d tensor" in n_value:
+            return CompareConst.UNSUPPORTED, err_msg
 
         with np.errstate(divide="ignore", invalid="ignore"):
             if len(n_value) == 1:
@@ -229,7 +229,7 @@ class GetCosineSimilarity(TensorComparisonBasic):
 class GetEuclideanDistance(TensorComparisonBasic):
     """计算欧式距离"""
 
-    def apply(self, n_value, b_value, relative_err):
+    def apply(self, n_value, b_value, relative_err, err_msg):
         msg = ''
 
         distance = np.linalg.norm(n_value - b_value, ord=2)
@@ -240,7 +240,7 @@ class GetEuclideanDistance(TensorComparisonBasic):
 class GetMaxAbsErr(TensorComparisonBasic):
     """计算最大绝对误差"""
 
-    def apply(self, n_value, b_value, relative_err):
+    def apply(self, n_value, b_value, relative_err, err_msg):
         temp_res = n_value - b_value
         max_value = np.max(np.abs(temp_res))
         if np.isnan(max_value):
@@ -252,7 +252,7 @@ class GetMaxAbsErr(TensorComparisonBasic):
 class GetMaxRelativeErr(TensorComparisonBasic):
     """计算最大相对误差"""
 
-    def apply(self, n_value, b_value, relative_err):
+    def apply(self, n_value, b_value, relative_err, err_msg):
         max_relative_err = np.max(np.abs(relative_err))
         if np.isnan(max_relative_err):
             msg = "Cannot compare by MaxRelativeError, the data contains nan/inf/-inf in dump data."
@@ -266,9 +266,9 @@ class GetErrRatio(TensorComparisonBasic):
     def __init__(self, threshold):
         self.threshold = threshold
 
-    def apply(self, n_value, b_value, relative_err):
-        if not n_value.shape:
-            return CompareConst.UNSUPPORTED, ""
+    def apply(self, n_value, b_value, relative_err, err_msg):
+        if "This is type of 0-d tensor" in n_value:
+            return CompareConst.UNSUPPORTED, err_msg
 
         if not np.size(relative_err):
             return CompareConst.NAN, ""
@@ -312,7 +312,7 @@ def compare_ops_apply(n_value, b_value, error_flag, err_msg):
     n_value, b_value = reshape_value(n_value, b_value)
 
     for op in CompareOps.compare_ops.values():
-        result, msg = op.apply(n_value, b_value, relative_err)
+        result, msg = op.apply(n_value, b_value, relative_err, err_msg)
         result_list.append(result)
         err_msg += msg
     return result_list, err_msg
-- 
Gitee


From fc665daa328e8ef29655485a63a53632b64994d0 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Tue, 25 Feb 2025 14:23:44 +0800
Subject: [PATCH 10/25] compare add euclidean distance

---
 debug/accuracy_tools/msprobe/core/compare/npy_compare.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
index cfd0db3f1fe..6728b8b35f4 100644
--- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
@@ -201,7 +201,7 @@ class GetCosineSimilarity(TensorComparisonBasic):
         return result
 
     def apply(self, n_value, b_value, relative_err, err_msg):
-        if "This is type of 0-d tensor" in n_value:
+        if "This is type of 0-d tensor" in err_msg:
             return CompareConst.UNSUPPORTED, err_msg
 
         with np.errstate(divide="ignore", invalid="ignore"):
@@ -267,7 +267,7 @@ class GetErrRatio(TensorComparisonBasic):
         self.threshold = threshold
 
     def apply(self, n_value, b_value, relative_err, err_msg):
-        if "This is type of 0-d tensor" in n_value:
+        if "This is type of 0-d tensor" in err_msg:
             return CompareConst.UNSUPPORTED, err_msg
 
         if not np.size(relative_err):
-- 
Gitee


From 23c9871ea5c66595fdcb81c9e228416bb7a3d12b Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Tue, 25 Feb 2025 14:25:27 +0800
Subject: [PATCH 11/25] compare add euclidean distance

---
 debug/accuracy_tools/msprobe/core/compare/npy_compare.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
index 6728b8b35f4..68a2e9bea28 100644
--- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
@@ -230,11 +230,12 @@ class GetEuclideanDistance(TensorComparisonBasic):
     """计算欧式距离"""
 
     def apply(self, n_value, b_value, relative_err, err_msg):
-        msg = ''
+        if "This is type of 0-d tensor" in err_msg:
+            return CompareConst.UNSUPPORTED, err_msg
 
         distance = np.linalg.norm(n_value - b_value, ord=2)
 
-        return distance, msg
+        return distance, ""
 
 
 class GetMaxAbsErr(TensorComparisonBasic):
-- 
Gitee


From f6ec2cddfbdfa8b0c9cb61791c2b0a430e9e5457 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Tue, 25 Feb 2025 15:13:56 +0800
Subject: [PATCH 12/25] compare add euclidean distance

---
 .../msprobe/core/compare/npy_compare.py       |  2 +-
 .../compare/test_acc_compare_npy_compare.py   | 69 +++++++++++++------
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
index 68a2e9bea28..4103d361fec 100644
--- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
+++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py
@@ -70,7 +70,7 @@ def get_error_flag_and_msg(n_value, b_value, error_flag=False, error_file=None):
         error_flag = True
         return CompareConst.NONE, CompareConst.NONE, error_flag, err_msg
     if not n_value.shape:  # 判断数据是否为0维张量
-        err_msg = (f"This is type of 0-d tensor, can not calculate '{CompareConst.COSINE}', {CompareConst.EUC_DIST}"
+        err_msg = (f"This is type of 0-d tensor, can not calculate '{CompareConst.COSINE}', '{CompareConst.EUC_DIST}', "
                    f"'{CompareConst.ONE_THOUSANDTH_ERR_RATIO}' and '{CompareConst.FIVE_THOUSANDTHS_ERR_RATIO}'. ")
         error_flag = False  # 0-d tensor 最大绝对误差、最大相对误差仍然支持计算，因此error_flag设置为False，不做统一处理
         return n_value, b_value, error_flag, err_msg
diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
index c9096694a77..da315b657c8 100644
--- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
+++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py
@@ -113,7 +113,7 @@ class TestUtilsMethods(unittest.TestCase):
         n_value, b_value, error_flag, err_msg = get_error_flag_and_msg(n_value, b_value, error_flag=error_flag)
 
         self.assertFalse(error_flag)
-        self.assertEqual(err_msg, "This is type of 0-d tensor, can not calculate 'Cosine', "
+        self.assertEqual(err_msg, "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', "
                                   "'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. ")
 
     def test_get_error_flag_and_msg_shape_unmatch(self):
@@ -239,15 +239,17 @@ class TestUtilsMethods(unittest.TestCase):
         b_value_1 = np.array(1)
         relative_err = get_relative_err(n_value_1, b_value_1)
         n_value_1, b_value_1 = reshape_value(n_value_1, b_value_1)
-        result, err_msg = op.apply(n_value_1, b_value_1, relative_err)
+        err_msg = "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. "
+        result, err_msg = op.apply(n_value_1, b_value_1, relative_err, err_msg)
         self.assertEqual(result, CompareConst.UNSUPPORTED)
-        self.assertEqual(err_msg, "")
+        self.assertEqual(err_msg, "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. ")
 
         n_value_2 = np.array([1, 2])
         b_value_2 = np.array([1, 2])
         relative_err = get_relative_err(n_value_2, b_value_2)
         n_value_2, b_value_2 = reshape_value(n_value_2, b_value_2)
-        result, err_msg = op.apply(n_value_2, b_value_2, relative_err)
+        err_msg = ""
+        result, err_msg = op.apply(n_value_2, b_value_2, relative_err, err_msg)
         self.assertEqual(result, 1.0)
         self.assertEqual(err_msg, "")
 
@@ -255,7 +257,8 @@ class TestUtilsMethods(unittest.TestCase):
         b_value_3 = np.array([0, 0])
         relative_err = get_relative_err(n_value_3, b_value_3)
         n_value_3, b_value_3 = reshape_value(n_value_3, b_value_3)
-        result, err_msg = op.apply(n_value_3, b_value_3, relative_err)
+        err_msg = ""
+        result, err_msg = op.apply(n_value_3, b_value_3, relative_err, err_msg)
         self.assertEqual(result, 1.0)
         self.assertEqual(err_msg, "")
 
@@ -263,7 +266,8 @@ class TestUtilsMethods(unittest.TestCase):
         b_value_4 = np.array([1, 2])
         relative_err = get_relative_err(n_value_4, b_value_4)
         n_value_4, b_value_4 = reshape_value(n_value_4, b_value_4)
-        result, err_msg = op.apply(n_value_4, b_value_4, relative_err)
+        err_msg = ""
+        result, err_msg = op.apply(n_value_4, b_value_4, relative_err, err_msg)
         self.assertEqual(result, CompareConst.NAN)
         self.assertEqual(err_msg, 'Cannot compare by Cosine Similarity, All the data is Zero in npu dump data.')
 
@@ -271,7 +275,8 @@ class TestUtilsMethods(unittest.TestCase):
         b_value_5 = np.array([0, 0])
         relative_err = get_relative_err(n_value_5, b_value_5)
         n_value_5, b_value_5 = reshape_value(n_value_5, b_value_5)
-        result, err_msg = op.apply(n_value_5, b_value_5, relative_err)
+        err_msg = ""
+        result, err_msg = op.apply(n_value_5, b_value_5, relative_err, err_msg)
         self.assertEqual(result, CompareConst.NAN)
         self.assertEqual(err_msg, 'Cannot compare by Cosine Similarity, All the data is Zero in Bench dump data.')
 
@@ -282,7 +287,9 @@ class TestUtilsMethods(unittest.TestCase):
         b_value_1 = np.array([1])
         relative_err = get_relative_err(n_value_1, b_value_1)
         n_value_1, b_value_1 = reshape_value(n_value_1, b_value_1)
-        result, err_msg = op.apply(n_value_1, b_value_1, relative_err)
+        err_msg = ""
+
+        result, err_msg = op.apply(n_value_1, b_value_1, relative_err, err_msg)
         self.assertEqual(result, CompareConst.UNSUPPORTED)
         self.assertEqual(err_msg, "This is a 1-d tensor of length 1.")
 
@@ -294,8 +301,9 @@ class TestUtilsMethods(unittest.TestCase):
         b_value = np.array([1, 1])
         relative_err = get_relative_err(n_value, b_value)
         n_value, b_value = reshape_value(n_value, b_value)
+        err_msg = ""
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, CompareConst.NAN)
         self.assertEqual(err_msg, "Cannot compare by Cosine Similarity, the dump data has NaN.")
@@ -319,8 +327,9 @@ class TestUtilsMethods(unittest.TestCase):
         b_value = np.array([0, 0])
         relative_err = get_relative_err(n_value, b_value)
         n_value, b_value = reshape_value(n_value, b_value)
+        err_msg = ""
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, 2.0)
         self.assertEqual(err_msg, "")
@@ -333,8 +342,9 @@ class TestUtilsMethods(unittest.TestCase):
         b_value = np.array([1, 1])
         relative_err = get_relative_err(n_value, b_value)
         n_value, b_value = reshape_value(n_value, b_value)
+        err_msg = ""
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, CompareConst.NAN)
         self.assertEqual(err_msg, "Cannot compare by MaxAbsError, the data contains nan/inf/-inf in dump data.")
@@ -347,8 +357,9 @@ class TestUtilsMethods(unittest.TestCase):
         b_value = np.array([1, 1])
         relative_err = get_relative_err(n_value, b_value)
         n_value, b_value = reshape_value(n_value, b_value)
+        err_msg = ""
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, 1.0)
         self.assertEqual(err_msg, "")
@@ -361,8 +372,9 @@ class TestUtilsMethods(unittest.TestCase):
         b_value = np.array([1, 1])
         relative_err = get_relative_err(n_value, b_value)
         n_value, b_value = reshape_value(n_value, b_value)
+        err_msg = ""
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, CompareConst.NAN)
         self.assertEqual(err_msg, "Cannot compare by MaxRelativeError, the data contains nan/inf/-inf in dump data.")
@@ -375,8 +387,9 @@ class TestUtilsMethods(unittest.TestCase):
         b_value = np.array([1, 1])
         relative_err = get_relative_err(n_value, b_value)
         n_value, b_value = reshape_value(n_value, b_value)
+        err_msg = ""
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, 0.5)
         self.assertEqual(err_msg, "")
@@ -387,11 +400,12 @@ class TestUtilsMethods(unittest.TestCase):
         n_value = np.array(1)   # 标量
         b_value = np.array(1)
         relative_err = np.array(0)
+        err_msg = "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. "
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, CompareConst.UNSUPPORTED)
-        self.assertEqual(err_msg, "")
+        self.assertEqual(err_msg, "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. ")
 
     def test_GetThousandErrRatio_not_size(self):
         op = GetErrRatio(CompareConst.THOUSAND_RATIO_THRESHOLD)
@@ -399,8 +413,9 @@ class TestUtilsMethods(unittest.TestCase):
         n_value = np.array([1, 2])
         b_value = np.array([1, 2])
         relative_err = np.array([])     # 空数组
+        err_msg = ""
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, CompareConst.NAN)
         self.assertEqual(err_msg, "")
@@ -412,8 +427,9 @@ class TestUtilsMethods(unittest.TestCase):
         b_value = np.array([1, 1])
         relative_err = get_relative_err(n_value, b_value)
         n_value, b_value = reshape_value(n_value, b_value)
+        err_msg = ""
 
-        result, err_msg = op.apply(n_value, b_value, relative_err)
+        result, err_msg = op.apply(n_value, b_value, relative_err, err_msg)
 
         self.assertEqual(result, 0.5)
         self.assertEqual(err_msg, "")
@@ -480,12 +496,25 @@ class TestGetEuclideanDistance(unittest.TestCase):
     def setUp(self):
         self.euc_distance = GetEuclideanDistance()
 
-    def test_euclidean_distance(self):
+    def test_euclidean_distance_normal(self):
         # 测试计算两个张量之间的欧式距离
         n_value = np.array([1, 2, 3])
         b_value = np.array([4, 5, 6])
+        relative_err = None
+        err_msg = ""
 
-        result, msg = self.euc_distance.apply(n_value, b_value, None)
+        result, msg = self.euc_distance.apply(n_value, b_value, relative_err, err_msg)
         expected_distance = np.linalg.norm(n_value - b_value)
         self.assertEqual(result, expected_distance)
         self.assertEqual(msg, '')
+
+    def test_euclidean_distance_0d_tensor(self):
+        # 测试计算两个张量之间的欧式距离
+        n_value = np.array(1)
+        b_value = np.array(1)
+        relative_err = None
+        err_msg = "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. "
+
+        result, msg = self.euc_distance.apply(n_value, b_value, relative_err, err_msg)
+        self.assertEqual(result, CompareConst.UNSUPPORTED)
+        self.assertEqual(msg, "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. ")
-- 
Gitee


From 1fc646f16ad906f08dab3a6d12501a004974e101 Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Wed, 26 Feb 2025 10:05:25 +0800
Subject: [PATCH 13/25] compare add euclidean distance

---
 .../docs/10.accuracy_compare_PyTorch.md       | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md b/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md
index e98478de0e1..e1d521d4472 100644
--- a/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md
+++ b/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md
@@ -257,11 +257,11 @@ PyTorch 精度比对是以 CPU 或 GPU 的计算结果为标杆，通过计算
 
 统计量有 4 种：最大值（max）、最小值（min）、平均值（mean）和 L2-范数（L2 norm）。
 
-|dump 数据模式|Cosine (tensor 余弦相似度)|MaxAbsErr (tensor 最大绝对误差)|MaxRelativeErr (tensor 最大相对误差)|One Thousandth Err Ratio (tensor 相对误差小于千分之一的比例)|Five Thousandth Err Ratio (tensor 相对误差小于千分之五的比例)|NPU 和 bench 的统计量绝对误差 (max, min, mean, L2 norm) diff| NPU 和 bench 的统计量相对误差 (max, min, mean, L2 norm) RelativeErr |NPU 和 bench 的统计量 (max, min, mean, L2 norm)|NPU MD5 (NPU 数据 CRC-32 值)|BENCH MD5 (bench 数据 CRC-32 值)|Result (比对结果)|Accuracy Reached or Not (计算精度是否达标)|Err_message (错误信息提示)|NPU_Stack_Info (堆栈信息)|Data_Name (NPU 真实数据名)|
-|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
-|真实数据模式|√|√|√|√|√|||√||||√|√|√|√|
-|统计数据模式||||||√|√|√|||√||√|√||
-|MD5 模式|||||||||√|√|√|||√||
+|dump 数据模式|Cosine (tensor 余弦相似度)|EucDist (tensor 欧式距离)|MaxAbsErr (tensor 最大绝对误差)|MaxRelativeErr (tensor 最大相对误差)|One Thousandth Err Ratio (tensor 相对误差小于千分之一的比例)|Five Thousandth Err Ratio (tensor 相对误差小于千分之五的比例)|NPU 和 bench 的统计量绝对误差 (max, min, mean, L2 norm) diff| NPU 和 bench 的统计量相对误差 (max, min, mean, L2 norm) RelativeErr |NPU 和 bench 的统计量 (max, min, mean, L2 norm)|NPU MD5 (NPU 数据 CRC-32 值)|BENCH MD5 (bench 数据 CRC-32 值)|Result (比对结果)|Accuracy Reached or Not (计算精度是否达标)|Err_message (错误信息提示)|NPU_Stack_Info (堆栈信息)|Data_Name (NPU 真实数据名)|
+|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
+|真实数据模式|√|√|√|√|√|√|||√||||√|√|√|√|
+|统计数据模式|||||||√|√|√|||√||√|√||
+|MD5 模式||||||||||√|√|√|||√||
 
 上表中NPU_Stack_Info字段需要配置-s参数生成。
 
@@ -320,7 +320,7 @@ MD5 模式：
 5. "This is empty data, can not compare."：读取到的数据为空（真实数据模式）；
 6. "Shape of NPU and bench Tensor do not match. Skipped."：NPU 和 Bench 的数据结构不一致（真实数据模式）；
 7. "The Position of inf or nan in NPU and bench Tensor do not match."：NPU 和 Bench 的数据有 nan/inf（真实数据模式）；
-8. "This is type of 0-d tensor, can not calculate 'Cosine', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'."：NPU 为0维张量（真实数据模式）；
+8. "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'."：NPU 为0维张量（真实数据模式）；
 9.  "Dtype of NPU and bench Tensor do not match."：NPU 和 Bench 数据的数据类型不同（真实数据模式）；
 10. ""：除以上情况的其余情况（真实数据模式、统计数据模式）。
 
@@ -330,13 +330,15 @@ MD5 模式：
 
 1. Cosine：通过计算两个向量的余弦值来判断其相似度，数值越接近于 1 说明计算出的两个张量越相似，实际可接受阈值为大于 0.99。在计算中可能会存在 nan，主要由于可能会出现其中一个向量为 0。
 
-2. MaxAbsErr：当最大绝对误差越接近 0 表示其计算的误差越小，实际可接受阈值为小于 0.001。
+2. EucDist：通过计算两个向量的欧式距离来判断其相似度，定义为多维空间中两个点之间的绝对距离。数值越接近0，张量越相似，数值越大，差异越大。
 
-3. MaxRelativeErr：当最大相对误差越接近 0 表示其计算的误差越小。
+3. MaxAbsErr：当最大绝对误差越接近 0 表示其计算的误差越小，实际可接受阈值为小于 0.001。
+
+4. MaxRelativeErr：当最大相对误差越接近 0 表示其计算的误差越小。
 
    当 dump 数据中存在 0 或 Nan 时，比对结果中最大相对误差则出现 inf 或 Nan 的情况，属于正常现象。
 
-4. One Thousandth Err Ratio（相对误差小于千分之一的元素比例）、Five Thousandths Err Ratio（相对误差小于千分之五的元素比例）精度指标：是指 NPU 的 Tensor 中的元素逐个与对应的标杆数据对比，相对误差小于千分之一、千分之五的比例占总元素个数的比例。该数据仅作为精度下降趋势的参考，并不参与计算精度是否通过的判定。
+5. One Thousandth Err Ratio（相对误差小于千分之一的元素比例）、Five Thousandths Err Ratio（相对误差小于千分之五的元素比例）精度指标：是指 NPU 的 Tensor 中的元素逐个与对应的标杆数据对比，相对误差小于千分之一、千分之五的比例占总元素个数的比例。该数据仅作为精度下降趋势的参考，并不参与计算精度是否通过的判定。
 
 ## 4 多卡比对结果提取汇总通信算子数据
 
-- 
Gitee


From a7137d267ad81c72a35fea1f475290038253f9ba Mon Sep 17 00:00:00 2001
From: zhouxianqi <13165993773@163.com>
Date: Wed, 26 Feb 2025 16:26:49 +0800
Subject: [PATCH 14/25] bug_fix_for_matrix_rank

---
 .../cluster_analyse/analysis/comm_matrix_analysis.py            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/profiler/msprof_analyze/cluster_analyse/analysis/comm_matrix_analysis.py b/profiler/msprof_analyze/cluster_analyse/analysis/comm_matrix_analysis.py
index a87803438ae..2ad5797cc92 100644
--- a/profiler/msprof_analyze/cluster_analyse/analysis/comm_matrix_analysis.py
+++ b/profiler/msprof_analyze/cluster_analyse/analysis/comm_matrix_analysis.py
@@ -100,7 +100,6 @@ class CommMatrixAnalysis(BaseAnalysis):
                 tmp_link[f"{src_rank}-{dst_rank}"] = link_dict
             return tmp_link
 
-        project_local_global_rank_map = dict()
         default_value = {
             Constant.TRANSPORT_TYPE: '',
             Constant.TRANSIT_TIME_MS: 0,
@@ -109,6 +108,7 @@ class CommMatrixAnalysis(BaseAnalysis):
         }
         for op_name, op_dict in step_dict.items():
             link_info = defaultdict(lambda: copy.deepcopy(default_value))
+            project_local_global_rank_map = dict()
             for rank_id, rank_dict in op_dict.items():
                 process_link_key(rank_id, rank_dict)
             step_dict[op_name] = convert_local_to_global_rank()
-- 
Gitee


From c857b5c0ee82bb3c3656b05b3bcac5786d70935c Mon Sep 17 00:00:00 2001
From: lcw <lichangwei4@huawei.com>
Date: Tue, 25 Feb 2025 19:18:33 +0800
Subject: [PATCH 15/25] =?UTF-8?q?=E3=80=90Bugfix=E3=80=91=E8=B5=84?=
 =?UTF-8?q?=E6=96=99=E4=BF=AE=E6=94=B9=EF=BC=8C=E9=99=8D=E4=BD=8E=E6=BA=A2?=
 =?UTF-8?q?=E5=87=BA=E6=A3=80=E6=B5=8B=E5=8A=9F=E8=83=BD=E7=9A=84=E4=BC=98?=
 =?UTF-8?q?=E5=85=88=E7=BA=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 debug/accuracy_tools/msprobe/README.md | 38 +++++++++++++-------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/README.md b/debug/accuracy_tools/msprobe/README.md
index 0e68d1f8d9b..e31490f01e9 100644
--- a/debug/accuracy_tools/msprobe/README.md
+++ b/debug/accuracy_tools/msprobe/README.md
@@ -83,21 +83,21 @@ PyTorch 场景的[离线预检](./docs/07.accuracy_checker_PyTorch.md)和[在线
 
 MindSpore 动态图场景的[离线预检](./docs/09.accuracy_checker_MindSpore.md)
 
-### 3 精度比对
+### 3 分级可视化构图比对
 
-该功能进行 PyTorch 整网 API 粒度的数据 dump、精度比对，进而定位训练场景下的精度问题。
+该功能将msprobe工具dump的精度数据进行解析，还原模型图结构，实现模型各个层级的精度数据比对，方便用户理解模型结构、分析精度问题。
 
-[PyTorch 场景的精度比对](./docs/10.accuracy_compare_PyTorch.md)
+[PyTorch 场景的分级可视化构图比对](./docs/21.visualization_PyTorch.md)
 
-[MindSpore 场景的精度比对](./docs/11.accuracy_compare_MindSpore.md)
+[MindSpore 场景的分级可视化构图比对](./docs/22.visualization_MindSpore.md)
 
-### 4 溢出检测与解析
+### 4 精度比对
 
-溢出检测与解析是在执行精度数据 dump 时，判断是否存在输入正常但输出存在溢出的 API，从而判断是否为正常溢出。对应 config.json 中的 overflow_check。
+该功能进行 PyTorch 整网 API 粒度的数据 dump、精度比对，进而定位训练场景下的精度问题。
 
-[PyTorch 场景的溢出检测与解析](./docs/12.overflow_check_PyTorch.md)
+[PyTorch 场景的精度比对](./docs/10.accuracy_compare_PyTorch.md)
 
-[MindSpore 场景的溢出检测与解析](./docs/13.overflow_check_MindSpore.md)
+[MindSpore 场景的精度比对](./docs/11.accuracy_compare_MindSpore.md)
 
 ### 5 数据解析
 
@@ -129,27 +129,27 @@ MindSpore 动态图场景的[离线预检](./docs/09.accuracy_checker_MindSpore.
 
 [兼容 PyTorch 和 MindSpore 框架的训练状态监控](./docs/19.monitor.md)
 
-### 10 分级可视化构图比对
-
-该功能将msprobe工具dump的精度数据进行解析，还原模型图结构，实现模型各个层级的精度数据比对，方便用户理解模型结构、分析精度问题。
-
-[PyTorch 场景的分级可视化构图比对](./docs/21.visualization_PyTorch.md)
-
-[MindSpore 场景的分级可视化构图比对](./docs/22.visualization_MindSpore.md)
-
-
-### 11 单算子API自动生成脚本
+### 10 单算子API自动生成脚本
 
 该功能将msprobe工具dump的精度数据进行解析，自动生成单API脚本，用于复现整网中出现的算子问题，降低用户复现问题的成本，供开发分析算子问题。
 
 [PyTorch 单算子API自动生成脚本](./docs/23.generate_operator_PyTorch.md)
 
-### 12 数码关联
+### 11 数码关联
 
 该功能只支持 MindSpore 静态图场景，用于将IR图与dump数据进行关联，获取dump数据和代码调用栈的关联关系。
 
 [MindSpore 场景的数码关联](./docs/24.code_mapping_Mindspore.md)
 
+### 12 溢出检测与解析
+
+溢出检测与解析是在执行精度数据 dump 时，判断是否存在输入正常但输出存在溢出的 API，从而判断是否为正常溢出。对应 config.json 中的 overflow_check。 
+推荐直接使用[数据采集](#1-数据采集)功能采集统计量信息检测溢出问题。
+
+[PyTorch 场景的溢出检测与解析](./docs/12.overflow_check_PyTorch.md)
+
+[MindSpore 场景的溢出检测与解析](./docs/13.overflow_check_MindSpore.md)
+
 ## 📑 补充材料
 
 [无标杆比对功能在 PyTorch 场景的性能基线报告](./docs/S02.report_free_benchmarking_validation_performance_baseline.md)
-- 
Gitee


From 44c10ee85be3856cb3d99971f9948746435747b4 Mon Sep 17 00:00:00 2001
From: jiangchao_j <chaojiang_j@163.com>
Date: Wed, 26 Feb 2025 17:47:06 +0800
Subject: [PATCH 16/25] fix l2norm bug in acl dump

---
 .../ccsrc/core/AclDumpDataProcessor.cpp       | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp
index 0fe3443fa1f..72178d6486a 100644
--- a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp
+++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp
@@ -56,23 +56,25 @@ constexpr const char* kStatsHeaderShape = "Shape";
 constexpr const char* kStatsHeaderMax = "Max Value";
 constexpr const char* kStatsHeaderMin = "Min Value";
 constexpr const char* kStatsHeaderAvg = "Avg Value";
-constexpr const char* kStatsHeaderL2Norm = "L2 Norm Value";
+constexpr const char* kStatsHeaderL2Norm = "l2norm";
+constexpr const char* kStatsHeaderL2NormInCsv = "L2Norm Value";
 constexpr const char* kStatsHeaderMD5 = "MD5 Value";
 constexpr const char* kStatsHeaderNan = "Nan Count";
+constexpr const char* kStatsHeaderNanInCsv = "NaN Count";
 constexpr const char* kStatsHeaderNegInf = "Negative Inf Count";
 constexpr const char* kStatsHeaderPosInf = "Positive Inf Count";
 constexpr const char* kRankId = "RANK_ID";
 constexpr const char* kDigitalNumbers = "0123456789";
 
-static const std::map<DebuggerSummaryOption, std::string> summaryOptionHeaderStrMap = {
-    {DebuggerSummaryOption::MAX, kStatsHeaderMax},
-    {DebuggerSummaryOption::MIN, kStatsHeaderMin},
-    {DebuggerSummaryOption::MEAN, kStatsHeaderAvg},
-    {DebuggerSummaryOption::L2NORM, kStatsHeaderL2Norm},
-    {DebuggerSummaryOption::NAN_CNT, kStatsHeaderNan},
-    {DebuggerSummaryOption::NEG_INF_CNT, kStatsHeaderNegInf},
-    {DebuggerSummaryOption::POS_INF_CNT, kStatsHeaderPosInf},
-    {DebuggerSummaryOption::MD5, kStatsHeaderMD5},
+static const std::map<DebuggerSummaryOption, std::pair<std::string, std::string>> summaryOptionHeaderStrMap = {
+    {DebuggerSummaryOption::MAX, {kStatsHeaderMax, kStatsHeaderMax}},
+    {DebuggerSummaryOption::MIN, {kStatsHeaderMin, kStatsHeaderMin}},
+    {DebuggerSummaryOption::MEAN, {kStatsHeaderAvg, kStatsHeaderAvg}},
+    {DebuggerSummaryOption::L2NORM, {kStatsHeaderL2Norm, kStatsHeaderL2NormInCsv}},
+    {DebuggerSummaryOption::NAN_CNT, {kStatsHeaderNan, kStatsHeaderNanInCsv}},
+    {DebuggerSummaryOption::NEG_INF_CNT, {kStatsHeaderNegInf, kStatsHeaderNegInf}},
+    {DebuggerSummaryOption::POS_INF_CNT, {kStatsHeaderPosInf, kStatsHeaderPosInf}},
+    {DebuggerSummaryOption::MD5, {kStatsHeaderMD5, kStatsHeaderMD5}},
 };
 
 class AclTensorStats {
@@ -170,7 +172,7 @@ static std::map<uint32_t, DebuggerSummaryOption> ParseTensorSummaryHeaderOrder(c
     for (uint32_t pos = 0; pos < segs.size(); ++pos) {
         const std::string& opt = segs[pos];
         for (auto it = summaryOptionHeaderStrMap.begin(); it != summaryOptionHeaderStrMap.end(); ++it) {
-            if (opt == it->second) {
+            if (opt == it->second.first) {
                 ret[pos] = it->first;
                 break;
             }
@@ -233,7 +235,7 @@ std::string AclTensorStats::GetCsvHeader() const
     ret.append("Op Type,Op Name,Task ID,Stream ID,Timestamp,Input/Output,Slot,Data Size,Data Type,Format,Shape");
     for (auto it = stats.begin(); it != stats.end(); it++) {
         ret.append(",");
-        ret.append(summaryOptionHeaderStrMap.at(it->first));
+        ret.append(summaryOptionHeaderStrMap.at(it->first).second);
     }
     ret.append("\n");
 
-- 
Gitee


From 604447ed375723ced665bce776015eb342461fcc Mon Sep 17 00:00:00 2001
From: Linwei-Ying <lwying007@126.com>
Date: Thu, 27 Feb 2025 15:11:25 +0800
Subject: [PATCH 17/25] compare add euclidean distance

---
 debug/accuracy_tools/msprobe/visualization/utils.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/visualization/utils.py b/debug/accuracy_tools/msprobe/visualization/utils.py
index 20a881e2cdb..acfc5b59124 100644
--- a/debug/accuracy_tools/msprobe/visualization/utils.py
+++ b/debug/accuracy_tools/msprobe/visualization/utils.py
@@ -181,11 +181,8 @@ class GraphConst:
     STR_MAX_LEN = 50
     SMALL_VALUE = 1e-3
     MD5_INDEX_LIST = [CompareConst.RESULT]
-    REAL_DATA_INDEX_LIST = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
-                            CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO]
-    SUMMARY_INDEX_LIST = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF,
-                          CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR,
-                          CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR]
+    REAL_DATA_INDEX_LIST = CompareConst.ALL_COMPARE_INDEX
+    SUMMARY_INDEX_LIST = CompareConst.SUMMARY_COMPARE_INDEX
     VALUE_INDEX_LIST = [Const.MAX, Const.MIN, Const.MEAN, Const.NORM]
     APIS_BETWEEN_MODULES = 'Apis_Between_Modules'
     NULL = 'null'
-- 
Gitee


From eeec5f495efa2c011baca0b79fae223bd25bfc91 Mon Sep 17 00:00:00 2001
From: zhouxianqi <13165993773@163.com>
Date: Thu, 27 Feb 2025 15:37:18 +0800
Subject: [PATCH 18/25] cluster_adapt_msprof_text

---
 .../msprof_step_trace_time_adapter.py         |  56 ++++++++++
 .../analysis/step_trace_time_analysis.py      |  34 +++++-
 .../cluster_analyse/cluster_analysis.py       |  70 ++++++++----
 .../msprof_data_preprocessor.py               |  98 +++++++++++++++++
 .../base_communication_group.py               |   6 +-
 .../communication_json_group.py               |  14 ++-
 .../msprof_communication_matrix_adapter.py    | 102 ++++++++++++++++++
 .../msprof_communication_time_adapter.py      |  38 +++++++
 .../msprof_analyze/prof_common/constant.py    |   2 +
 profiler/msprof_analyze/prof_common/utils.py  |   7 ++
 10 files changed, 396 insertions(+), 31 deletions(-)
 create mode 100644 profiler/msprof_analyze/cluster_analyse/analysis/msprof_step_trace_time_adapter.py
 create mode 100644 profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py
 create mode 100644 profiler/msprof_analyze/cluster_analyse/communication_group/msprof_communication_matrix_adapter.py
 create mode 100644 profiler/msprof_analyze/cluster_analyse/communication_group/msprof_communication_time_adapter.py

diff --git a/profiler/msprof_analyze/cluster_analyse/analysis/msprof_step_trace_time_adapter.py b/profiler/msprof_analyze/cluster_analyse/analysis/msprof_step_trace_time_adapter.py
new file mode 100644
index 00000000000..5c34a0fb421
--- /dev/null
+++ b/profiler/msprof_analyze/cluster_analyse/analysis/msprof_step_trace_time_adapter.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2025, Huawei Technologies Co., Ltd
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from msprof_analyze.cluster_analyse.prof_bean.step_trace_time_bean import StepTraceTimeBean
+from msprof_analyze.prof_common.utils import convert_to_float
+from msprof_analyze.prof_common.file_manager import FileManager
+
+
+class MsprofStepTraceTimeAdapter:
+    COMPUTE = "Computing"
+    COMM_NOT_OVERLAP = "Communication(Not Overlapped)"
+    OVERLAPPED = "Overlapped"
+    COMMUNICATION = "Communication"
+    FREE = "Free"
+    STAGE = "Stage"
+    BUBBLE = "Bubble"
+    COMM_NOT_OVERLAP_EXCLUDE_RECEIVE = "Communication(Not Overlapped and Exclude Receive)"
+    PREPARE = "Preparing"
+
+    def __init__(self, file_path_list):
+        self.file_path_list = file_path_list
+        self._data = {self.COMPUTE: 0, self.COMM_NOT_OVERLAP: 0, self.OVERLAPPED: 0, self.COMMUNICATION: 0,
+                      self.FREE: 0, self.STAGE: 0, self.BUBBLE: 0, self.COMM_NOT_OVERLAP_EXCLUDE_RECEIVE: 0,
+                      self.PREPARE: 0}
+
+    def generate_step_trace_time_data(self):
+        json_str = []
+        for file_path in self.file_path_list:
+            json_str.extend(FileManager.read_json_file(file_path))
+        receive_comm = []
+        analysis_data = {}
+        for data in json_str:
+            event_name = data.get("name", "")
+            if event_name in {self.COMMUNICATION, self.COMPUTE, self.FREE, self.COMM_NOT_OVERLAP}:
+                analysis_data.setdefault(event_name, []).append(data)
+            elif event_name.startswith('hcom_receive'):
+                receive_comm.append(data)
+        for event_type, event_list in analysis_data.items():
+            self._data[event_type] = sum((convert_to_float(event.get("dur", 0)) for event in event_list))
+        self._data[self.BUBBLE] = sum((convert_to_float(event.get("dur", 0)) for event in receive_comm))
+        self._data[self.COMM_NOT_OVERLAP_EXCLUDE_RECEIVE] = self._data[self.COMM_NOT_OVERLAP] - self._data[self.BUBBLE]
+        self._data[self.OVERLAPPED] = self._data[self.COMMUNICATION] - self._data[self.COMM_NOT_OVERLAP]
+        e2e_time = self._data[self.FREE] + self._data[self.COMPUTE] + self._data[self.COMM_NOT_OVERLAP]
+        self._data[self.STAGE] = e2e_time - self._data[self.BUBBLE]
+        return [StepTraceTimeBean(self._data)]
diff --git a/profiler/msprof_analyze/cluster_analyse/analysis/step_trace_time_analysis.py b/profiler/msprof_analyze/cluster_analyse/analysis/step_trace_time_analysis.py
index 5168f63aef5..44675137922 100644
--- a/profiler/msprof_analyze/cluster_analyse/analysis/step_trace_time_analysis.py
+++ b/profiler/msprof_analyze/cluster_analyse/analysis/step_trace_time_analysis.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+import re
 
 from msprof_analyze.prof_common.db_manager import DBManager
 from msprof_analyze.cluster_analyse.common_func.utils import increase_shared_value
@@ -21,6 +22,7 @@ from msprof_analyze.cluster_analyse.prof_bean.step_trace_time_bean import StepTr
 from msprof_analyze.prof_common.constant import Constant
 from msprof_analyze.prof_common.file_manager import FileManager
 from msprof_analyze.prof_common.logger import get_logger
+from msprof_analyze.cluster_analyse.analysis.msprof_step_trace_time_adapter import MsprofStepTraceTimeAdapter
 
 logger = get_logger()
 
@@ -40,6 +42,7 @@ class StepTraceTimeAnalysis:
         self.step_data_list = []
         self.data_type = param.get(Constant.DATA_TYPE)
         self.distributed_args = None
+        self.is_msprof = param.get(Constant.IS_MSPROF)
 
     @staticmethod
     def get_max_data_row(data_group_list: list):
@@ -50,6 +53,26 @@ class StepTraceTimeAnalysis:
             ret.append(max(item))
         return ret
 
+    @staticmethod
+    def find_msprof_json(path):
+        msprof_pattern = r'^msprof_\d{14}\.json$'
+        msprof_slice_pattern = r'^msprof_slice_\d{1}_\d{14}\.json$'
+        msprof_dict, msprof_slice_dict = {}, {}
+        for file_name in os.listdir(path):
+            if re.match(msprof_pattern, file_name):
+                timestamp = re.search(r"\d{14}", file_name).group()
+                msprof_dict.setdefault(timestamp, []).append(os.path.join(path, file_name))
+            elif re.match(msprof_slice_pattern, file_name):
+                timestamp = re.search(r"\d{14}", file_name).group()
+                msprof_slice_dict.setdefault(timestamp, []).append(os.path.join(path, file_name))
+        if msprof_dict:
+            max_timestamp = max(msprof_dict.keys())
+            return msprof_dict.get(max_timestamp)
+        if msprof_slice_dict:
+            max_timestamp = max(msprof_slice_dict.keys())
+            return msprof_slice_dict.get(max_timestamp)
+        return []
+
     def run(self, completed_processes, lock):
         self.load_step_trace_time_data()
         self.analyze_step_time()
@@ -132,9 +155,14 @@ class StepTraceTimeAnalysis:
                 metadata = FileManager.read_json_file(metadata_path)
                 self.distributed_args = metadata.get(Constant.DISTRIBUTED_ARGS, None) if metadata else None
             if self.data_type == Constant.TEXT:
-                step_time_file = os.path.join(profiling_dir_path, Constant.SINGLE_OUTPUT, Constant.STEP_TIME_CSV)
-                if os.path.exists(step_time_file):
-                    self.step_time_dict[rank_id] = FileManager.read_csv_file(step_time_file, StepTraceTimeBean)
+                if self.is_msprof:
+                    msprof_json = self.find_msprof_json(os.path.join(profiling_dir_path, "mindstudio_profiler_output"))
+                    self.step_time_dict[rank_id] = MsprofStepTraceTimeAdapter(
+                        msprof_json).generate_step_trace_time_data()
+                else:
+                    step_time_file = os.path.join(profiling_dir_path, Constant.SINGLE_OUTPUT, Constant.STEP_TIME_CSV)
+                    if os.path.exists(step_time_file):
+                        self.step_time_dict[rank_id] = FileManager.read_csv_file(step_time_file, StepTraceTimeBean)
             else:
                 step_time_file = os.path.join(profiling_dir_path, Constant.SINGLE_OUTPUT,
                                               Constant.DB_COMMUNICATION_ANALYZER)
diff --git a/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py b/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py
index d7d71908506..1e90d0cad11 100644
--- a/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py
+++ b/profiler/msprof_analyze/cluster_analyse/cluster_analysis.py
@@ -21,6 +21,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
 from msprof_analyze.cluster_analyse.analysis.analysis_facade import AnalysisFacade
 from msprof_analyze.cluster_analyse.cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor
 from msprof_analyze.cluster_analyse.cluster_data_preprocess.mindspore_data_preprocessor import MindsporeDataPreprocessor
+from msprof_analyze.cluster_analyse.cluster_data_preprocess.msprof_data_preprocessor import MsprofDataPreprocessor
 from msprof_analyze.cluster_analyse.communication_group.communication_group_generator import CommunicationGroupGenerator
 from msprof_analyze.prof_common.additional_args_manager import AdditionalArgsManager
 from msprof_analyze.prof_common.constant import Constant
@@ -47,6 +48,7 @@ ALL_FEATURE_LIST = COMM_FEATURE_LIST + get_all_recipes()
 class Interface:
     ASCEND_PT = "ascend_pt"
     ASCEND_MS = "ascend_ms"
+    PROF = "PROF_"
 
     def __init__(self, params: dict):
         self.collection_path = PathManager.get_realpath(params.get(Constant.PROFILING_PATH))
@@ -70,27 +72,38 @@ class Interface:
     def allocate_prof_data(self):
         ascend_pt_dirs = []
         ascend_ms_dirs = []
+        prof_dirs = []
         for root, dirs, _ in os.walk(self.collection_path):
             for dir_name in dirs:
                 if dir_name.endswith(self.ASCEND_PT):
                     ascend_pt_dirs.append(os.path.join(root, dir_name))
                 if dir_name.endswith(self.ASCEND_MS):
                     ascend_ms_dirs.append(os.path.join(root, dir_name))
+                if dir_name.startswith(self.PROF):
+                    prof_dirs.append(os.path.join(root, dir_name))
         pytorch_processor = PytorchDataPreprocessor(ascend_pt_dirs)
         pt_data_map = pytorch_processor.get_data_map()
-        data_type = pytorch_processor.get_data_type()
+        pt_data_type = pytorch_processor.get_data_type()
         ms_data_map = MindsporeDataPreprocessor(ascend_ms_dirs).get_data_map()
         if pt_data_map and ms_data_map:
             logger.error("Can not analyze pytorch and mindspore meantime.")
-            return []
-        return (pt_data_map, data_type) if pt_data_map else (ms_data_map, Constant.TEXT)
+            return {}
+        if pt_data_map:
+            return {Constant.DATA_MAP: pt_data_map, Constant.DATA_TYPE: pt_data_type, Constant.IS_MSPROF: False}
+        if ms_data_map:
+            return {Constant.DATA_MAP: ms_data_map, Constant.DATA_TYPE: Constant.TEXT, Constant.IS_MSPROF: False}
+        msprof_processor = MsprofDataPreprocessor(prof_dirs)
+        prof_data_map = msprof_processor.get_data_map()
+        prof_data_type = msprof_processor.get_data_type()
+        return {Constant.DATA_MAP: prof_data_map, Constant.DATA_TYPE: prof_data_type, Constant.IS_MSPROF: True}
 
     def run(self):
         PathManager.check_input_directory_path(self.collection_path)
         PathManager.check_input_directory_path(self.cluster_analysis_output_path)
         PathManager.check_path_owner_consistent([self.collection_path, self.cluster_analysis_output_path])
 
-        data_map, data_type = self.allocate_prof_data()
+        data_dict = self.allocate_prof_data()
+        data_map, data_type = data_dict.get(Constant.DATA_MAP), data_dict.get(Constant.DATA_TYPE)
         if not data_map:
             logger.warning("Can not get rank info or profiling data.")
             return
@@ -100,32 +113,43 @@ class Interface:
 
         params = {
             Constant.COLLECTION_PATH: self.collection_path,
+            Constant.ANALYSIS_MODE: self.analysis_mode,
             Constant.DATA_MAP: data_map,
             Constant.DATA_TYPE: data_type,
-            Constant.ANALYSIS_MODE: self.analysis_mode,
+            Constant.IS_MSPROF: data_dict.get(Constant.IS_MSPROF, False),
             Constant.CLUSTER_ANALYSIS_OUTPUT_PATH: self.cluster_analysis_output_path,
             Constant.DATA_SIMPLIFICATION: self.origin_params.get(Constant.DATA_SIMPLIFICATION, False),
             Constant.FORCE: self.force
         }
 
-        if self.analysis_mode in COMM_FEATURE_LIST:
-            FileManager.create_output_dir(self.cluster_analysis_output_path)
-            PathManager.check_path_writeable(self.cluster_analysis_output_path)
-            logger.info("Begin generate communication data.")
-            comm_data_dict = CommunicationGroupGenerator(params).generate()
-            logger.info("Communication data read completed.")
-            params[Constant.COMM_DATA_DICT] = comm_data_dict
-            AnalysisFacade(params).cluster_analyze()
-            logger.info("The cluster analysis result file has been generated: %s",
-                        self.cluster_analysis_output_path)
-            return
-
-        if data_type != Constant.DB:
-            logger.error("The current analysis node only supports DB as input data. Please check.")
-            return
-        FileManager.create_output_dir(self.cluster_analysis_output_path, is_overwrite=True)
-        self.origin_params.update(params)
-        AnalysisFacade(self.origin_params).recipe_analyze()
+        if data_type == Constant.TEXT:
+            if self.analysis_mode in COMM_FEATURE_LIST:
+                FileManager.create_output_dir(self.cluster_analysis_output_path)
+                PathManager.check_path_writeable(self.cluster_analysis_output_path)
+                logger.info("Begin generate communication data.")
+                comm_data_dict = CommunicationGroupGenerator(params).generate()
+                logger.info("Communication data read completed.")
+                params[Constant.COMM_DATA_DICT] = comm_data_dict
+                AnalysisFacade(params).cluster_analyze()
+                logger.info("The cluster analysis result file has been generated: %s",
+                            self.cluster_analysis_output_path)
+            else:
+                logger.error("The current analysis node only supports DB as input data. Please check.")
+        else:
+            if self.analysis_mode in COMM_FEATURE_LIST:
+                FileManager.create_output_dir(self.cluster_analysis_output_path)
+                PathManager.check_path_writeable(self.cluster_analysis_output_path)
+                logger.info("Begin generate communication data.")
+                comm_data_dict = CommunicationGroupGenerator(params).generate()
+                logger.info("Communication data read completed.")
+                params[Constant.COMM_DATA_DICT] = comm_data_dict
+                AnalysisFacade(params).cluster_analyze()
+                logger.info("The cluster analysis result file has been generated: %s",
+                            self.cluster_analysis_output_path)
+            else:
+                FileManager.create_output_dir(self.cluster_analysis_output_path, is_overwrite=True)
+                self.origin_params.update(params)
+                AnalysisFacade(self.origin_params).recipe_analyze()
 
 
 def cluster_analysis_main():
diff --git a/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py b/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py
new file mode 100644
index 00000000000..04953b0edfc
--- /dev/null
+++ b/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py
@@ -0,0 +1,98 @@
+# Copyright (c) 2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import re
+from collections import defaultdict
+
+from msprof_analyze.cluster_analyse.cluster_data_preprocess.data_preprocessor import DataPreprocessor
+from msprof_analyze.prof_common.constant import Constant
+from msprof_analyze.prof_common.logger import get_logger
+from msprof_analyze.prof_common.file_manager import FileManager
+
+logger = get_logger()
+
+
+class MsprofDataPreprocessor(DataPreprocessor):
+    DEVICE_HEAD = "device_"
+    INFO_JSON_PATTERN = r"^info\.json\.\d{1,2}$"
+    DB_PATTERN = r"^msprof_\d{1,20}\.db$"
+
+    def __init__(self, path_list: list):
+        super().__init__(path_list)
+        self.data_type = set()
+
+    def get_data_map(self) -> dict:
+        prof_data_uid = defaultdict(list)
+        prof_data_rank = defaultdict(list)
+        for dir_name in self.path_list:
+            info_json_file = self._find_info_json_file(dir_name)
+            if not info_json_file:
+                logger.error(f"Profiling data in not completed, please check the info.json file in the path {dir_name}")
+                continue
+
+            if self._check_db_type(dir_name):
+                self.data_type.add(Constant.DB)
+            elif os.path.exists(os.path.join(dir_name, "mindstudio_profiler_output")):
+                if os.path.exists(os.path.join(dir_name, "analyze")):
+                    self.data_type.add(Constant.TEXT)
+                else:
+                    logger.error(f"The profiling data has not been fully parsed.  You can parse it by executing "
+                                 f"the following command: msprof --analyze=on --output={dir_name}")
+                    continue
+            else:
+                logger.error(f"The profiling data has not been fully parsed.  You can parse it by executing "
+                             f"the following command: msprof --export=on --output={dir_name}; "
+                             f"msprof --analyze=on --output={dir_name}")
+                continue
+            info_json = FileManager.read_json_file(info_json_file)
+            rank_id = info_json.get("rank_id")
+            if rank_id != Constant.INVALID_RETURN:
+                prof_data_rank[rank_id].append(dir_name)
+                continue
+            host_id = info_json.get("hostUid")
+            device_id = int(os.path.basename(info_json_file).split(".")[-1])
+            prof_data_uid[(host_id, device_id)].append(dir_name)
+
+        if prof_data_rank:
+            for rank_id, dir_list in prof_data_rank.items():
+                dir_list.sort(key=lambda x: x.split('_')[-2])
+                self.data_map[rank_id] = dir_list[0]
+        else:
+            ordered_keys = sorted(prof_data_uid.keys(), key=lambda x: (x[0], x[1]))
+            rank_id = 0
+            for key in ordered_keys:
+                dir_list = prof_data_uid[key]
+                dir_list.sort(key=lambda x: x.split('_')[-2])
+                self.data_map[rank_id] = dir_list[0]
+                rank_id += 1
+        return self.data_map
+
+    def get_data_type(self):
+        if len(self.data_type) == 1:
+            return self.data_type.pop()
+        return Constant.INVALID
+
+    def _find_info_json_file(self, dir_name):
+        for file_name in os.listdir(dir_name):
+            for device_file in os.listdir(os.path.join(dir_name, file_name)):
+                if re.match(self.INFO_JSON_PATTERN, device_file):
+                    return os.path.join(dir_name, file_name, device_file)
+        return None
+
+    def _check_db_type(self, dir_name):
+        for file_name in os.listdir(dir_name):
+            if re.match(self.DB_PATTERN, file_name):
+                return True
+        return False
diff --git a/profiler/msprof_analyze/cluster_analyse/communication_group/base_communication_group.py b/profiler/msprof_analyze/cluster_analyse/communication_group/base_communication_group.py
index 2c02bfdbf1b..0df5214eb49 100644
--- a/profiler/msprof_analyze/cluster_analyse/communication_group/base_communication_group.py
+++ b/profiler/msprof_analyze/cluster_analyse/communication_group/base_communication_group.py
@@ -39,6 +39,7 @@ class BaseCommunicationGroup:
         self.data_map = params.get(Constant.DATA_MAP)
         self.data_type = params.get(Constant.DATA_TYPE)
         self.analysis_mode = params.get(Constant.ANALYSIS_MODE)
+        self.is_msprof = params.get(Constant.IS_MSPROF)
         self.rank_comm_dir_dict = {}
         self.p2p_link = []
         self.collective_group_dict = defaultdict(set)
@@ -54,8 +55,9 @@ class BaseCommunicationGroup:
         comm_op_dirs = []
         for rank_id, profiling_dir_path in self.data_map.items():
             if self.data_type == Constant.TEXT:
-                comm_dir = os.path.join(profiling_dir_path, Constant.SINGLE_OUTPUT, Constant.COMM_JSON)
-                matrix_dir = os.path.join(profiling_dir_path, Constant.SINGLE_OUTPUT, Constant.COMM_MATRIX_JSON)
+                output_dir = "analyze" if self.is_msprof else Constant.SINGLE_OUTPUT
+                comm_dir = os.path.join(profiling_dir_path, output_dir, Constant.COMM_JSON)
+                matrix_dir = os.path.join(profiling_dir_path, output_dir, Constant.COMM_MATRIX_JSON)
             else:
                 comm_dir = os.path.join(profiling_dir_path, Constant.SINGLE_OUTPUT, Constant.DB_COMMUNICATION_ANALYZER)
                 matrix_dir = comm_dir
diff --git a/profiler/msprof_analyze/cluster_analyse/communication_group/communication_json_group.py b/profiler/msprof_analyze/cluster_analyse/communication_group/communication_json_group.py
index 2975050da07..e6fd3b41eea 100644
--- a/profiler/msprof_analyze/cluster_analyse/communication_group/communication_json_group.py
+++ b/profiler/msprof_analyze/cluster_analyse/communication_group/communication_json_group.py
@@ -15,9 +15,13 @@
 
 import os
 from copy import deepcopy
- 
+
 from msprof_analyze.cluster_analyse.communication_group.base_communication_group import BaseCommunicationGroup
 from msprof_analyze.prof_common.file_manager import FileManager
+from msprof_analyze.cluster_analyse.communication_group.msprof_communication_matrix_adapter import \
+    MsprofCommunicationMatrixAdapter
+from msprof_analyze.cluster_analyse.communication_group.msprof_communication_time_adapter import \
+    MsprofCommunicationTimeAdapter
 
 
 class CommunicationJsonGroup(BaseCommunicationGroup):
@@ -42,7 +46,11 @@ class CommunicationJsonGroup(BaseCommunicationGroup):
         comm_data = {}
         matrix_data = {}
         if os.path.exists(comm_json_path) and self.analysis_mode in ["all", "communication_time"]:
-            comm_data = FileManager.read_json_file(comm_json_path)
+            comm_data = MsprofCommunicationTimeAdapter(
+                comm_json_path).generate_comm_time_data() if self.is_msprof else FileManager.read_json_file(
+                comm_json_path)
         if os.path.exists(matrix_json_path) and self.analysis_mode in ["all", "communication_matrix"]:
-            matrix_data = FileManager.read_json_file(matrix_json_path)
+            matrix_data = MsprofCommunicationMatrixAdapter(
+                matrix_json_path).generate_comm_matrix_data() if self.is_msprof else FileManager.read_json_file(
+                matrix_json_path)
         return rank_id, comm_data, matrix_data
diff --git a/profiler/msprof_analyze/cluster_analyse/communication_group/msprof_communication_matrix_adapter.py b/profiler/msprof_analyze/cluster_analyse/communication_group/msprof_communication_matrix_adapter.py
new file mode 100644
index 00000000000..7f1aef80b96
--- /dev/null
+++ b/profiler/msprof_analyze/cluster_analyse/communication_group/msprof_communication_matrix_adapter.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2025, Huawei Technologies Co., Ltd
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from collections import defaultdict
+
+from msprof_analyze.prof_common.file_manager import FileManager
+from msprof_analyze.prof_common.constant import Constant
+from msprof_analyze.prof_common.logger import get_logger
+
+from msprof_analyze.prof_common.utils import compute_ratio
+
+logger = get_logger()
+
+
+class MsprofCommunicationMatrixAdapter:
+    P2P_HCOM = ["hcom_send", "hcom_receive", "hcom_batchsendrecv"]
+    HCCL_PATTERN = r"send|reduce|invalid|broadcast|allreduce|" \
+                   r"receive|allgather|reducescatter|scatter|alltoall|alltoallv|alltoallvc|batchsendrecv"
+    BANDWIDTH_GB_S = "Bandwidth(GB/s)"
+    TRANSPORT_TYPE = "Transport Type"
+    TRANSIT_SIZE_MB = "Transit Size(MB)"
+    TRANSIT_TIME_MS = "Transit Time(ms)"
+
+    def __init__(self, file_path):
+        self.file_path = file_path
+
+    def generate_comm_matrix_data(self):
+        output_comm_matrix = {"step": {Constant.P2P: {}, Constant.COLLECTIVE: {}}}
+        comm_matrix_data = FileManager.read_json_file(self.file_path)
+        split_comm_dict = {Constant.P2P: {}, Constant.COLLECTIVE: {}}
+        for communication_op, comm_matrix_info in comm_matrix_data.items():
+            lower_op_name = communication_op.lower()
+            if any(lower_op_name.startswith(start_str) for start_str in self.P2P_HCOM):
+                split_comm_dict[Constant.P2P][communication_op] = comm_matrix_info
+            elif lower_op_name.startswith(Constant.TOTAL):
+                continue
+            else:
+                split_comm_dict[Constant.COLLECTIVE][communication_op] = comm_matrix_info
+        output_comm_matrix["step"][Constant.P2P] = self.integrate_matrix_data(
+            self.get_comm_type(split_comm_dict[Constant.P2P]))
+        output_comm_matrix["step"][Constant.COLLECTIVE] = self.integrate_matrix_data(
+            self.get_comm_type(split_comm_dict[Constant.COLLECTIVE]))
+        return output_comm_matrix
+
+    def get_comm_type(self, op_data: dict) -> dict:
+        new_comm_op_dict = defaultdict(list)
+        for communication_op, communication_info in op_data.items():
+            match_obj = re.compile(self.HCCL_PATTERN).search((communication_op.lower()))
+            if match_obj:
+                comm_op_type = match_obj.group()
+            else:
+                comm_op_type = communication_op.split("__")[0]
+                logger.warning(f"Unknown communication op type: {comm_op_type}")
+            for link, data in communication_info.items():
+                new_comm_op_name = (comm_op_type, communication_op.split("@")[-1], link)
+                data['Op Name'] = communication_op.split("@")[0]
+                new_comm_op_dict[new_comm_op_name].append(data)
+        return new_comm_op_dict
+
+    def integrate_matrix_data(self, new_comm_op_dict: dict):
+        """integrate the matrix data"""
+        comm_op_dict = defaultdict(dict)
+        for new_comm_op_name, data in new_comm_op_dict.items():
+            data.sort(key=lambda x: x[self.BANDWIDTH_GB_S], reverse=True)
+            t_type = data[0].get(self.TRANSPORT_TYPE, '')
+            t_size = sum(x.get(self.TRANSIT_SIZE_MB, 0) for x in data)
+            t_time = sum(x.get(self.TRANSIT_TIME_MS, 0) for x in data)
+            bandwidth = compute_ratio(t_size, t_time)
+
+            link = new_comm_op_name[2]
+            new_comm_op_name_top1 = f'{new_comm_op_name[0]}-top1@{new_comm_op_name[1]}'
+            new_comm_op_name_middle = f'{new_comm_op_name[0]}-middle@{new_comm_op_name[1]}'
+            new_comm_op_name_bottom1 = f'{new_comm_op_name[0]}-bottom1@{new_comm_op_name[1]}'
+            new_comm_op_name_bottom2 = f'{new_comm_op_name[0]}-bottom2@{new_comm_op_name[1]}'
+            new_comm_op_name_bottom3 = f'{new_comm_op_name[0]}-bottom3@{new_comm_op_name[1]}'
+            new_comm_op_name_total = f'{new_comm_op_name[0]}-total@{new_comm_op_name[1]}'
+            comm_op_dict[new_comm_op_name_top1].update({link: data[0]})
+            comm_op_dict[new_comm_op_name_middle].update({link: data[len(data) // 2]})
+            comm_op_dict[new_comm_op_name_bottom1].update({link: data[-1]})
+            comm_op_dict[new_comm_op_name_total].update({link: {
+                self.TRANSPORT_TYPE: t_type,
+                self.TRANSIT_SIZE_MB: t_size,
+                self.TRANSIT_TIME_MS: t_time,
+                self.BANDWIDTH_GB_S: bandwidth
+            }})
+            if len(data) >= 2:
+                comm_op_dict[new_comm_op_name_bottom2].update({link: data[-2]})
+            if len(data) >= 3:
+                comm_op_dict[new_comm_op_name_bottom3].update({link: data[-3]})
+        return comm_op_dict
diff --git a/profiler/msprof_analyze/cluster_analyse/communication_group/msprof_communication_time_adapter.py b/profiler/msprof_analyze/cluster_analyse/communication_group/msprof_communication_time_adapter.py
new file mode 100644
index 00000000000..7b63b700f5c
--- /dev/null
+++ b/profiler/msprof_analyze/cluster_analyse/communication_group/msprof_communication_time_adapter.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2025, Huawei Technologies Co., Ltd
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from msprof_analyze.prof_common.file_manager import FileManager
+from msprof_analyze.prof_common.constant import Constant
+
+
+class MsprofCommunicationTimeAdapter:
+    P2P_HCOM = ["hcom_send", "hcom_receive", "hcom_batchsendrecv"]
+    TOTAL = "total"
+
+    def __init__(self, file_path):
+        self.file_path = file_path
+
+    def generate_comm_time_data(self):
+        output_communication = {"step": {Constant.P2P: {}, Constant.COLLECTIVE: {}}}
+        communication_data = FileManager.read_json_file(self.file_path)
+        for communication_op, communication_info in communication_data.items():
+            lower_op_name = communication_op.lower()
+            if any(lower_op_name.startswith(start_str) for start_str in self.P2P_HCOM):
+                output_communication["step"][Constant.P2P][communication_op] = communication_info
+            elif lower_op_name.startswith(self.TOTAL):
+                continue
+            else:
+                output_communication["step"][Constant.COLLECTIVE][communication_op] = communication_info
+
+        return output_communication
diff --git a/profiler/msprof_analyze/prof_common/constant.py b/profiler/msprof_analyze/prof_common/constant.py
index 5353fc6d40f..f34aeade895 100644
--- a/profiler/msprof_analyze/prof_common/constant.py
+++ b/profiler/msprof_analyze/prof_common/constant.py
@@ -61,6 +61,7 @@ class Constant(object):
     # communication
     P2P = "p2p"
     COLLECTIVE = "collective"
+    TOTAL = "total"
     STEP_ID = "step_id"
     RANK_ID = "rank_id"
     GROUP_NAME = "group_name"
@@ -97,6 +98,7 @@ class Constant(object):
     TRANSPORT_TYPE = "Transport Type"
     COMM_DATA_DICT = "comm_data_dict"
     DATA_TYPE = "data_type"
+    IS_MSPROF = "is_prof"
 
     # step time
     RANK = "rank"
diff --git a/profiler/msprof_analyze/prof_common/utils.py b/profiler/msprof_analyze/prof_common/utils.py
index 005d8505c9c..5c083256633 100644
--- a/profiler/msprof_analyze/prof_common/utils.py
+++ b/profiler/msprof_analyze/prof_common/utils.py
@@ -91,3 +91,10 @@ def convert_to_int(num):
     except (ValueError, NameError):
         logger.error(f"Can not convert %s to int", num)
     return 0
+
+
+def compute_ratio(dividend: float, divisor: float):
+    if abs(divisor) < 1e-15:
+        return 0
+    else:
+        return round(dividend / divisor, 4)
-- 
Gitee


From 572b0b6fa96a8df74c44990b62ad2e0abacdcf9a Mon Sep 17 00:00:00 2001
From: zhouxianqi <13165993773@163.com>
Date: Thu, 27 Feb 2025 17:20:03 +0800
Subject: [PATCH 19/25] mstx_sum support range

---
 .../cluster_analyse/common_func/context.py    |   7 +-
 .../recipes/mstx_sum/mstx_sum.py              | 109 +++++++++++-------
 ...tx_mark_export.py => mstx_event_export.py} |  50 +++++++-
 3 files changed, 119 insertions(+), 47 deletions(-)
 rename profiler/msprof_analyze/prof_exports/{mstx_mark_export.py => mstx_event_export.py} (58%)

diff --git a/profiler/msprof_analyze/cluster_analyse/common_func/context.py b/profiler/msprof_analyze/cluster_analyse/common_func/context.py
index b41972c0d21..cde351508c0 100644
--- a/profiler/msprof_analyze/cluster_analyse/common_func/context.py
+++ b/profiler/msprof_analyze/cluster_analyse/common_func/context.py
@@ -84,7 +84,12 @@ class ConcurrentContext(Context):
 
     def map(self, func, *iterables, **kwargs):
         partial_func = partial(func, **kwargs)
-        return list(self._executor.map(partial_func, *iterables))
+        try:
+            res = list(self._executor.map(partial_func, *iterables))
+        except Exception as err:
+            logger.error(err)
+            return []
+        return res
 
     def wait(self, waitable):
         return waitable
diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/mstx_sum/mstx_sum.py b/profiler/msprof_analyze/cluster_analyse/recipes/mstx_sum/mstx_sum.py
index bfbcc6ffb49..db6aae0de86 100644
--- a/profiler/msprof_analyze/cluster_analyse/recipes/mstx_sum/mstx_sum.py
+++ b/profiler/msprof_analyze/cluster_analyse/recipes/mstx_sum/mstx_sum.py
@@ -21,7 +21,7 @@ from msprof_analyze.cluster_analyse.common_func.utils import describe_duration
 from msprof_analyze.cluster_analyse.recipes.base_recipe_analysis import BaseRecipeAnalysis
 from msprof_analyze.prof_common.constant import Constant
 from msprof_analyze.prof_common.logger import get_logger
-from msprof_analyze.prof_exports.mstx_mark_export import MstxMarkExport
+from msprof_analyze.prof_exports.mstx_event_export import MstxMarkExport, MstxRangeExport
 from msprof_analyze.prof_exports.mstx_step_export import MstxStepExport
 
 logger = get_logger()
@@ -43,16 +43,28 @@ def format_mark_info(df: pd.DataFrame, start_idx, stop_idx, name) -> MarkInfo:
     )
 
 
-def rename_mark_msg_name(mark_stats_df: pd.DataFrame):
+def format_range_info(df: pd.DataFrame, idx, name) -> MarkInfo:
+    range_series = df.iloc[idx]
+    return MarkInfo(
+        name=name,
+        framework_duration=float(0),
+        cann_duration=float(range_series["cann_end_ts"] - range_series["cann_start_ts"]),
+        device_duration=float(range_series["device_end_ts"] - range_series["device_start_ts"]),
+        tid=range_series["tid"],
+        start_ns=range_series["cann_start_ts"]
+    )
+
+
+def rename_mark_msg_name(mstx_stats_df: pd.DataFrame):
     msg_idx_counter = {}
-    for idx, mark_info in enumerate(mark_stats_df.itertuples(index=False)):
+    for idx, mark_info in enumerate(mstx_stats_df.itertuples(index=False)):
         msg_idx_counter.setdefault(mark_info.step_id, {}).setdefault(mark_info.name, []).append(idx)
     for msg_dict in msg_idx_counter.values():
         for msg, idx_list in msg_dict.items():
             if len(idx_list) <= 1:
                 continue
             for i, idx in enumerate(idx_list):
-                mark_stats_df.loc[idx, 'name'] = f"{msg}_{i}"
+                mstx_stats_df.loc[idx, 'name'] = f"{msg}_{i}"
 
 
 def compute_step_id(mark_stat, step_stats_df: pd.DataFrame):
@@ -80,6 +92,45 @@ def format_columns(df: pd.DataFrame):
     return formatted_df[cols]
 
 
+def handle_mark_data(mark_df: pd.DataFrame, rank_id: int) -> list:
+    res = []
+    mark_df["framework_ts"] = mark_df["framework_ts"].astype("int64")
+    mark_info = {}
+    mismatch_msg = []
+    for idx, row in enumerate(mark_df.itertuples(index=False)):
+        if row.msg.endswith(MstxSum.START_SUFFIX):
+            msg = row.msg[:-len(MstxSum.START_SUFFIX)]
+            mark_info.setdefault(row.tid, {}).setdefault(msg, []).append(idx)
+        elif row.msg.endswith(MstxSum.STOP_SUFFIX):
+            msg = row.msg[:-len(MstxSum.STOP_SUFFIX)]
+            idx_list = mark_info.get(row.tid, {}).get(msg, [])
+            if not idx_list:
+                mismatch_msg.append((row.msg, idx))
+                continue
+            start_idx = idx_list.pop()
+            res.append(format_mark_info(mark_df, start_idx, idx, msg))
+
+    # 统计未匹配上的mark信息
+    for msg_info in mark_info.values():
+        for msg, idx_list in msg_info.items():
+            if not idx_list:
+                continue
+            mismatch_msg.extend((msg + MstxSum.START_SUFFIX, idx) for idx in idx_list)
+    if mismatch_msg:
+        mismatch_msg.sort(key=lambda msg: msg[1])
+        logger.warning(f"The following mark messages do not match anyone in "
+                       f"rank {rank_id}: {','.join(msg[0] for msg in mismatch_msg)}.")
+
+    return res
+
+
+def handle_range_data(range_df: pd.DataFrame) -> list:
+    res = []
+    for idx, row in enumerate(range_df.itertuples(index=False)):
+        res.append(format_range_info(range_df, idx, row.msg))
+    return res
+
+
 class MstxSum(BaseRecipeAnalysis):
     TABLE_FRAMEWORK_STATS = "MSTXAllFrameworkStats"
     TABLE_CANN_STATS = "MSTXAllCannStats"
@@ -159,40 +210,18 @@ class MstxSum(BaseRecipeAnalysis):
         if step_df is None or step_df.empty:
             step_df = pd.DataFrame({"start_ns": [0], "end_ns": [float("inf")], "step_id": [0]})
         mark_df = MstxMarkExport(profiler_db_path, analysis_class, step_range).read_export_db()
-        if mark_df is None or mark_df.empty:
-            logger.warning(f"There is no mark data in {profiler_db_path}.")
+        range_df = MstxRangeExport(profiler_db_path, analysis_class, step_range).read_export_db()
+        mstx_res = []
+        if not mark_df.empty:
+            mstx_res += handle_mark_data(mark_df, rank_id)
+        if not range_df.empty:
+            mstx_res += handle_range_data(range_df)
+        if not mstx_res:
+            logger.warning(f"There is no mstx data in {profiler_db_path}.")
             return None
-        mark_df["framework_ts"] = mark_df["framework_ts"].astype("int64")
-
-        mark_info = {}
-        mark_res = []
-        mismatch_msg = []
-        for idx, row in enumerate(mark_df.itertuples(index=False)):
-            if row.msg.endswith(MstxSum.START_SUFFIX):
-                msg = row.msg[:-len(MstxSum.START_SUFFIX)]
-                mark_info.setdefault(row.tid, {}).setdefault(msg, []).append(idx)
-            elif row.msg.endswith(MstxSum.STOP_SUFFIX):
-                msg = row.msg[:-len(MstxSum.STOP_SUFFIX)]
-                idx_list = mark_info.get(row.tid, {}).get(msg, [])
-                if not idx_list:
-                    mismatch_msg.append((row.msg, idx))
-                    continue
-                start_idx = idx_list.pop()
-                mark_res.append(format_mark_info(mark_df, start_idx, idx, msg))
-
-        # 统计未匹配上的mark信息
-        for msg_info in mark_info.values():
-            for msg, idx_list in msg_info.items():
-                if not idx_list:
-                    continue
-                mismatch_msg.extend((msg + MstxSum.START_SUFFIX, idx) for idx in idx_list)
-        if mismatch_msg:
-            mismatch_msg.sort(key=lambda msg: msg[1])
-            logger.warning(f"The following mark messages do not match anyone in "
-                           f"rank {rank_id}: {','.join(msg[0] for msg in mismatch_msg)}.")
-
-        mark_stats_df = pd.DataFrame(mark_res).assign(Rank=rank_id)
-        mark_stats_df["step_id"] = mark_stats_df.apply(compute_step_id, axis=1, step_stats_df=step_df)
-        rename_mark_msg_name(mark_stats_df)
-        mark_stats_df = format_columns(mark_stats_df).set_index("Name", drop=True)
-        return mark_stats_df
+
+        mstx_stats_df = pd.DataFrame(mstx_res).assign(Rank=rank_id)
+        mstx_stats_df["step_id"] = mstx_stats_df.apply(compute_step_id, axis=1, step_stats_df=step_df)
+        rename_mark_msg_name(mstx_stats_df)
+        mstx_stats_df = format_columns(mstx_stats_df).set_index("Name", drop=True)
+        return mstx_stats_df
diff --git a/profiler/msprof_analyze/prof_exports/mstx_mark_export.py b/profiler/msprof_analyze/prof_exports/mstx_event_export.py
similarity index 58%
rename from profiler/msprof_analyze/prof_exports/mstx_mark_export.py
rename to profiler/msprof_analyze/prof_exports/mstx_event_export.py
index 6a7f8d0c6d2..97c3813b7eb 100644
--- a/profiler/msprof_analyze/prof_exports/mstx_mark_export.py
+++ b/profiler/msprof_analyze/prof_exports/mstx_event_export.py
@@ -16,7 +16,7 @@
 from msprof_analyze.prof_exports.base_stats_export import BaseStatsExport
 from msprof_analyze.prof_common.constant import Constant
 
-QUERY = """
+MARK_QUERY = """
 WITH
     FRAMEWORK_API AS (
         SELECT
@@ -46,7 +46,8 @@ LEFT JOIN
 LEFT JOIN
     STRING_IDS AS MSG_IDS
     ON MSTX_EVENTS.message == MSG_IDS.id
-{}
+WHERE 
+    MSTX_EVENTS.eventType == 3 {}
 ORDER BY
     MSTX_EVENTS.startNs
     """
@@ -61,9 +62,46 @@ class MstxMarkExport(BaseStatsExport):
     def get_query_statement(self):
         if self._step_range:
             filter_statement_1 = f"WHERE PYTORCH_API.startNs >= {self._step_range.get(Constant.START_NS)} " \
-                                 f"and PYTORCH_API.startNs <= {self._step_range.get(Constant.END_NS)}"
-            filter_statement_2 = f"WHERE MSTX_EVENTS.startNs >= {self._step_range.get(Constant.START_NS)} " \
-                                 f"and MSTX_EVENTS.startNs <= {self._step_range.get(Constant.END_NS)}"
+                                 f"AND PYTORCH_API.startNs <= {self._step_range.get(Constant.END_NS)}"
+            filter_statement_2 = f"AND MSTX_EVENTS.startNs >= {self._step_range.get(Constant.START_NS)} " \
+                                 f"AND MSTX_EVENTS.startNs <= {self._step_range.get(Constant.END_NS)}"
         else:
             filter_statement_1, filter_statement_2 = "", ""
-        return QUERY.format(filter_statement_1, filter_statement_2)
+        return MARK_QUERY.format(filter_statement_1, filter_statement_2)
+
+
+RANGE_QUERY = '''
+SELECT
+    MSG_IDS.value AS "msg",
+    MSTX_EVENTS.startNs AS "cann_start_ts",
+    MSTX_EVENTS.endNs AS "cann_end_ts",
+    TASK.startNs AS "device_start_ts",
+    TASK.endNs AS "device_end_ts",
+    MSTX_EVENTS.globalTid AS "tid"
+FROM
+    MSTX_EVENTS
+LEFT JOIN
+    TASK
+    ON MSTX_EVENTS.connectionId == TASK.connectionId
+LEFT JOIN
+    STRING_IDS AS MSG_IDS
+    ON MSTX_EVENTS.message == MSG_IDS.id
+WHERE
+    MSTX_EVENTS.eventType == 2 {}
+AND
+    MSTX_EVENTS.connectionId != 4294967295
+ORDER BY
+    MSTX_EVENTS.startNs
+    '''
+
+
+class MstxRangeExport(BaseStatsExport):
+
+    def __init__(self, db_path, recipe_name, step_range):
+        super().__init__(db_path, recipe_name, step_range)
+        self._query = self.get_query_statement()
+
+    def get_query_statement(self):
+        filter_statement = f"AND MSTX_EVENTS.startNs >= {self._step_range.get(Constant.START_NS)} AND " \
+                           f"MSTX_EVENTS.startNs <= {self._step_range.get(Constant.END_NS)}" if self._step_range else ""
+        return RANGE_QUERY.format(filter_statement)
-- 
Gitee


From 3944c9dea10f64b9e24c5a3967b566afff16c789 Mon Sep 17 00:00:00 2001
From: curry3 <485078529@qq.com>
Date: Wed, 26 Feb 2025 11:19:57 +0800
Subject: [PATCH 20/25] =?UTF-8?q?=E3=80=90feature=E3=80=91ms=E5=92=8Cpt?=
 =?UTF-8?q?=E8=A1=A5=E5=85=85API=E6=94=AF=E6=8C=81=E5=88=97=E8=A1=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../data_processor/pytorch_processor.py       |   8 +-
 .../dump/hook_cell/support_wrap_ops.yaml      |  50 ++--
 .../pytorch/hook_module/support_wrap_ops.yaml | 242 ++++++++++++------
 3 files changed, 197 insertions(+), 103 deletions(-)

diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py
index 64253aa4260..2cd98b12568 100644
--- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py
+++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py
@@ -78,14 +78,16 @@ class PytorchDataProcessor(BaseDataProcessor):
     def analyze_device_in_kwargs(element):
         single_arg = {}
         single_arg.update({'type': "torch.device"})
-        if not isinstance(element, str):
+        if isinstance(element, (int, str)):
+            single_arg.update({"value": element})
+        elif isinstance(element, torch.device):
             if hasattr(element, "index"):
                 device_value = element.type + ":" + str(element.index)
             else:
                 device_value = element.type
             single_arg.update({"value": device_value})
         else:
-            single_arg.update({"value": element})
+            logger.debug(f"Device type {type(element)} is not supported.")
         return single_arg
 
     @staticmethod
@@ -311,7 +313,7 @@ class TensorDataProcessor(PytorchDataProcessor):
             saved_tensor = tensor.clone().contiguous().detach()
             save_pt(saved_tensor, file_path)
         return single_arg
-    
+
     def _analyze_numpy(self, ndarray, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         save_pt(torch.tensor(ndarray), file_path)
diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml
index 723b0cbc93f..364062b4647 100644
--- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml
+++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml
@@ -564,15 +564,15 @@ tensor:
   - all
   - amax
   - amin
+  - angle
   - any
   - arccos
   - arccosh
-  - argmax
-  - angle
   - arcsin
   - arcsinh
   - arctan
   - arctanh
+  - argmax
   - argmin
   - argsort
   - asin
@@ -582,19 +582,23 @@ tensor:
   - atanh
   - baddbmm
   - bernoulli
+  - bfloat16
   - bincount
   - bitwise_and
   - bitwise_or
   - bitwise_xor
   - bmm
   - bool
+  - bool astype
   - broadcast_to
+  - byte
   - ceil
-  - cholesky_solve
   - cholesky
+  - cholesky_solve
   - clamp
   - clip
   - conj
+  - copy
   - copysign
   - cos
   - cosh
@@ -606,11 +610,13 @@ tensor:
   - deg2rad
   - diag
   - diagflat
+  - diagonal
   - diff
   - digamma
   - div
   - div_
   - divide
+  - double
   - equal
   - erf
   - erfc
@@ -618,13 +624,16 @@ tensor:
   - exp
   - expand_as
   - expm1
+  - flatten
   - flip
   - fliplr
   - flipud
+  - float
   - float_power
   - floor
   - fmod
   - frac
+  - from_numpy
   - gather_elements
   - ge
   - geqrf
@@ -648,12 +657,12 @@ tensor:
   - inner
   - int
   - inverse
+  - is_complex
+  - is_signed
   - isclose
   - isfinite
   - isinf
   - isnan
-  - is_complex
-  - is_signed
   - isneginf
   - isposinf
   - isreal
@@ -704,28 +713,27 @@ tensor:
   - new_ones
   - new_zeros
   - nextafter
-  - norm
   - nonzero
+  - norm
   - not_equal
   - ormqr
   - permute
   - pow
   - prod
   - qr
+  - rad2deg
   - ravel
   - real
   - reciprocal
   - remainder
   - renorm
-  - rad2deg
-  - tile
   - repeat_interleave
   - reshape
   - reshape
-  - round
+  - resize
   - rot90
+  - round
   - rsqrt
-  - sum_to_size
   - scatter
   - sgn
   - short
@@ -745,7 +753,8 @@ tensor:
   - sub
   - sub_
   - subtract
-  - subtract
+  - sum
+  - sum_to_size
   - svd
   - swapaxes
   - swapdims
@@ -753,13 +762,13 @@ tensor:
   - take
   - tan
   - tanh
-  - trace
-  - swapaxes
+  - tensor_split
   - tile
+  - to
   - topk
-  - tril
-  - tensor_split
+  - trace
   - transpose
+  - tril
   - true_divide
   - trunc
   - unbind
@@ -769,17 +778,6 @@ tensor:
   - view
   - where
   - xlogy
-  - from_numpy
-  - std
-  - take
-  - var
-  - all
-  - any
-  - copy
-  - diagonal
-  - flatten
-  - resize
-  - sum
 
 mint.ops:
   - abs
diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml
index 4bc22f51ceb..91eb016284a 100644
--- a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml
+++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml
@@ -149,9 +149,10 @@ tensor:
   - __bool__
   - __div__
   - __eq__
+  - __floordiv__
   - __ge__
-  - __gt__
   - __getitem__
+  - __gt__
   - __iadd__
   - __iand__
   - __idiv__
@@ -160,23 +161,33 @@ tensor:
   - __imod__
   - __imul__
   - __ior__
+  - __ipow__
   - __irshift__
   - __isub__
   - __ixor__
+  - __le__
   - __lshift__
+  - __lt__
   - __matmul__
   - __mod__
   - __mul__
+  - __ne__
   - __nonzero__
   - __or__
+  - __pow__
   - __radd__
+  - __rdiv__
+  - __rmod__
   - __rmul__
+  - __ror__
+  - __rpow__
   - __rshift__
+  - __rsub__
+  - __rxor__
   - __setitem__
   - __sub__
   - __truediv__
   - __xor__
-  - __pow__
   - abs
   - abs_
   - absolute
@@ -199,12 +210,14 @@ tensor:
   - addmv_
   - addr
   - addr_
+  - adjoint
   - align_as
   - align_to
   - all
   - allclose
   - amax
   - amin
+  - aminmax
   - angle
   - any
   - arccos
@@ -216,12 +229,15 @@ tensor:
   - arcsinh
   - arcsinh_
   - arctan
+  - arctan2
+  - arctan2_
   - arctan_
   - arctanh
   - arctanh_
   - argmax
   - argmin
   - argsort
+  - argwhere
   - asin
   - asin_
   - asinh
@@ -236,39 +252,51 @@ tensor:
   - baddbmm_
   - bernoulli
   - bernoulli_
+  - bfloat16
   - bincount
   - bitwise_and
   - bitwise_and_
+  - bitwise_left_shift
+  - bitwise_left_shift_
   - bitwise_not
   - bitwise_not_
   - bitwise_or
   - bitwise_or_
+  - bitwise_right_shift
+  - bitwise_right_shift_
   - bitwise_xor
   - bitwise_xor_
   - bmm
+  - bool
   - broadcast_to
+  - byte
   - cauchy_
   - ceil
   - ceil_
+  - cfloat
+  - char
   - cholesky
+  - cholesky_inverse
+  - cholesky_solve
   - chunk
   - clamp
-  - cholesky_solve
-  - cholesky_inverse
   - clamp_
   - clamp_max
   - clamp_max_
-  - clip
   - clamp_min
   - clamp_min_
+  - clip
   - clip_
+  - conj_physical
   - copysign
   - copysign_
+  - corrcoef
   - cos
   - cos_
   - cosh
   - cosh_
   - count_nonzero
+  - cov
   - cummax
   - cummin
   - cumprod
@@ -282,20 +310,23 @@ tensor:
   - diag_embed
   - diagflat
   - diagonal
+  - diagonal_scatter
   - diff
-  - dist
   - digamma
   - digamma_
+  - dist
   - div
   - div_
   - divide
   - divide_
   - dot
+  - double
+  - dsplit
   - eig
   - eq
   - eq_
-  - erf
   - equal
+  - erf
   - erf_
   - erfc
   - erfc_
@@ -304,18 +335,21 @@ tensor:
   - exp
   - exp2
   - exp2_
-  - expm1
   - exp_
+  - expand
+  - expand_as
+  - expm1
   - expm1_
   - exponential_
   - fill_
-  - fix
   - fill_diagonal_
+  - fix
   - fix_
+  - flatten
   - flip
   - fliplr
-  - flatten
   - flipud
+  - float
   - float_power
   - float_power_
   - floor
@@ -328,6 +362,7 @@ tensor:
   - fmod_
   - frac
   - frac_
+  - frexp
   - gather
   - gcd
   - gcd_
@@ -338,31 +373,37 @@ tensor:
   - ger
   - greater
   - greater_
-  - gt
-  - gt_
   - greater_equal
   - greater_equal_
+  - gt
+  - gt_
+  - half
   - hardshrink
   - heaviside
   - heaviside_
   - histc
+  - histogram
+  - hsplit
   - hypot
   - hypot_
+  - i0
+  - i0_
   - igamma
   - igamma_
   - igammac
   - igammac_
   - index_add
   - index_add_
-  - inverse
   - index_copy
   - index_copy_
   - index_fill
   - index_fill_
   - index_put
   - index_put_
-  - inner
   - index_select
+  - inner
+  - int
+  - inverse
   - isclose
   - isfinite
   - isinf
@@ -380,7 +421,6 @@ tensor:
   - le_
   - lerp
   - lerp_
-  - where
   - less
   - less_
   - less_equal
@@ -397,43 +437,47 @@ tensor:
   - log_
   - log_normal_
   - log_softmax
-  - logcumsumexp
-  - logdet
   - logaddexp
   - logaddexp2
+  - logcumsumexp
+  - logdet
   - logical_and
   - logical_and_
   - logical_not
-  - logit
   - logical_not_
   - logical_or
   - logical_or_
   - logical_xor
   - logical_xor_
+  - logit
   - logit_
   - logsumexp
+  - long
   - lstsq
   - lt
   - lt_
+  - lu
   - lu_solve
   - map2_
   - map_
   - masked_fill
-  - matmul
   - masked_fill_
   - masked_scatter
   - masked_scatter_
   - masked_select
+  - matmul
   - matrix_exp
+  - matrix_power
   - max
   - maximum
   - mean
-  - matrix_power
   - median
   - min
   - minimum
   - mm
   - mode
+  - moveaxis
+  - movedim
   - msort
   - mul
   - mul_
@@ -443,6 +487,11 @@ tensor:
   - mv
   - mvlgamma
   - mvlgamma_
+  - nan_to_num
+  - nan_to_num_
+  - nanmean
+  - nanmedian
+  - nanquantile
   - nansum
   - narrow
   - narrow_copy
@@ -452,20 +501,29 @@ tensor:
   - neg_
   - negative
   - negative_
+  - nextafter
+  - nextafter_
   - nonzero
   - norm
   - normal_
   - not_equal
   - not_equal_
+  - numpy
+  - orgqr
+  - ormqr
+  - outer
   - permute
   - pinverse
   - polygamma
+  - polygamma_
   - pow
   - pow_
-  - polygamma_
   - prelu
   - prod
   - put_
+  - q_zero_point
+  - qr
+  - quantile
   - rad2deg
   - rad2deg_
   - ravel
@@ -474,15 +532,16 @@ tensor:
   - relu
   - relu_
   - remainder
-  - repeat_interleave
-  - reshape
   - remainder_
   - renorm
   - renorm_
   - repeat
+  - repeat_interleave
+  - reshape
   - reshape_as
   - resize_
   - resize_as_
+  - resolve_neg
   - roll
   - rot90
   - round
@@ -496,6 +555,7 @@ tensor:
   - select
   - sgn
   - sgn_
+  - short
   - sigmoid
   - sigmoid_
   - sign
@@ -507,11 +567,13 @@ tensor:
   - sinc_
   - sinh
   - sinh_
+  - slice_scatter
   - slogdet
   - smm
   - softmax
   - solve
   - sort
+  - split
   - split_with_sizes
   - sqrt
   - sqrt_
@@ -521,21 +583,29 @@ tensor:
   - squeeze_
   - sspaddmm
   - std
+  - stft
+  - stride
   - sub
   - sub_
+  - subtract
   - sum
   - sum_to_size
   - svd
+  - swapaxes
+  - swapdims
+  - swapdims_
   - symeig
   - t
   - t_
   - take
+  - take_along_dim
   - tan
   - tan_
   - tanh
   - tanh_
   - tensor_split
   - tile
+  - to
   - topk
   - transpose
   - transpose_
@@ -543,8 +613,8 @@ tensor:
   - tril
   - tril_
   - triu
-  - true_divide
   - triu_
+  - true_divide
   - true_divide_
   - trunc
   - trunc_
@@ -552,37 +622,20 @@ tensor:
   - unbind
   - unflatten
   - unfold
+  - unique
+  - unique_consecutive
   - unsafe_chunk
-  - unsqueeze
   - unsafe_split
   - unsafe_split_with_sizes
+  - unsqueeze
+  - unsqueeze_
   - var
   - vdot
-  - unsqueeze_
   - view_as
+  - vsplit
+  - where
   - xlogy
   - xlogy_
-  - split
-  - stft
-  - nan_to_num
-  - dsplit
-  - orgqr
-  - bitwise_left_shift_
-  - arctan2
-  - histogram
-  - q_zero_point
-  - adjoint
-  - ormqr
-  - bitwise_right_shift_
-  - nanquantile
-  - lu
-  - quantile
-  - arctan2_
-  - qr
-  - diagonal_scatter
-  - corrcoef
-  - vsplit
-  - aminmax
 
 torch:
   - linalg.norm
@@ -642,13 +695,14 @@ torch:
   - addmv
   - addmv_
   - addr
-  - amax
   - affine_grid_generator
   - align_tensors
   - all
   - alpha_dropout
-  - amin
   - alpha_dropout_
+  - amax
+  - amin
+  - aminmax
   - angle
   - any
   - arange
@@ -661,12 +715,14 @@ torch:
   - arcsinh
   - arcsinh_
   - arctan
+  - arctan2
   - arctan_
   - arctanh
   - arctanh_
   - argmax
   - argmin
   - argsort
+  - argwhere
   - asin
   - asin_
   - asinh
@@ -687,13 +743,13 @@ torch:
   - batch_norm_elemt
   - batch_norm_gather_stats
   - batch_norm_gather_stats_with_counts
-  - bernoulli
   - batch_norm_stats
   - batch_norm_update_stats
+  - bernoulli
   - bilinear
+  - binary_cross_entropy_with_logits
   - bincount
   - binomial
-  - binary_cross_entropy_with_logits
   - bitwise_and
   - bitwise_not
   - bitwise_or
@@ -739,9 +795,9 @@ torch:
   - conv_transpose1d
   - conv_transpose2d
   - conv_transpose3d
-  - cos
   - convolution
   - copysign
+  - cos
   - cos_
   - cosh
   - cosh_
@@ -755,14 +811,16 @@ torch:
   - cummin
   - cumprod
   - cumsum
+  - cumulative_trapezoid
   - deg2rad
   - deg2rad_
   - det
   - diag
   - diag_embed
-  - diff
   - diagflat
   - diagonal
+  - diagonal_scatter
+  - diff
   - digamma
   - dist
   - div
@@ -771,12 +829,15 @@ torch:
   - dropout
   - dropout_
   - dsmm
+  - dsplit
   - dstack
   - eig
   - einsum
   - embedding
   - embedding_bag
   - embedding_renorm_
+  - empty
+  - empty_like
   - eq
   - equal
   - erf
@@ -791,12 +852,12 @@ torch:
   - expm1
   - expm1_
   - eye
-  - feature_dropout
   - feature_alpha_dropout
   - feature_alpha_dropout_
+  - feature_dropout
   - feature_dropout_
-  - fix
   - fill_
+  - fix
   - fix_
   - flatten
   - flip
@@ -811,8 +872,9 @@ torch:
   - fmod
   - frac
   - frac_
-  - full
+  - frexp
   - frobenius_norm
+  - full
   - full_like
   - gather
   - gcd
@@ -824,8 +886,8 @@ torch:
   - greater_equal
   - grid_sampler
   - grid_sampler_2d
-  - group_norm
   - grid_sampler_3d
+  - group_norm
   - gru
   - gru_cell
   - gt
@@ -835,23 +897,29 @@ torch:
   - heaviside
   - hinge_embedding_loss
   - histc
+  - histogram
+  - histogramdd
   - hsmm
+  - hsplit
   - hspmm
   - hstack
   - hypot
+  - i0
+  - i0_
   - igamma
   - igammac
   - index_add
   - index_copy
-  - inner
   - index_fill
   - index_put
   - index_put_
   - index_select
+  - inner
   - instance_norm
   - inverse
   - isclose
   - isfinite
+  - isin
   - isinf
   - isnan
   - isneginf
@@ -879,8 +947,8 @@ torch:
   - log1p_
   - log2
   - log2_
-  - log_softmax
   - log_
+  - log_softmax
   - logaddexp
   - logaddexp2
   - logcumsumexp
@@ -899,18 +967,18 @@ torch:
   - lt
   - lu_solve
   - lu_unpack
-  - masked_fill
   - margin_ranking_loss
+  - masked_fill
   - masked_scatter
   - masked_select
-  - matrix_exp
   - matmul
+  - matrix_exp
   - matrix_power
   - matrix_rank
   - max
   - max_pool1d
-  - max_pool2d
   - max_pool1d_with_indices
+  - max_pool2d
   - max_pool3d
   - maximum
   - mean
@@ -929,18 +997,20 @@ torch:
   - mvlgamma
   - nan_to_num
   - nan_to_num_
+  - nanmean
   - nanmedian
+  - nanquantile
   - nansum
   - narrow
+  - narrow_copy
   - native_batch_norm
   - native_group_norm
-  - narrow_copy
   - native_layer_norm
   - native_norm
   - ne
   - neg
-  - negative
   - neg_
+  - negative
   - negative_
   - nextafter
   - nonzero
@@ -972,30 +1042,31 @@ torch:
   - ravel
   - real
   - reciprocal
-  - relu
   - reciprocal_
+  - relu
   - relu_
   - remainder
   - renorm
   - repeat_interleave
   - reshape
   - resize_as_
+  - resolve_neg
   - roll
   - rot90
   - round
   - round_
+  - row_stack
   - rrelu
   - rrelu_
   - rsqrt
-  - row_stack
   - rsqrt_
   - rsub
   - saddmm
   - scalar_tensor
   - scatter
-  - select
   - scatter_add
   - searchsorted
+  - select
   - selu
   - selu_
   - sgn
@@ -1015,12 +1086,12 @@ torch:
   - solve
   - sort
   - sparse_coo_tensor
-  - square
   - split
   - split_with_sizes
   - spmm
   - sqrt
   - sqrt_
+  - square
   - square_
   - squeeze
   - sspaddmm
@@ -1042,8 +1113,8 @@ torch:
   - tan_
   - tanh
   - tanh_
-  - tensordot
   - tensor_split
+  - tensordot
   - threshold
   - threshold_
   - tile
@@ -1059,19 +1130,21 @@ torch:
   - true_divide
   - trunc
   - trunc_
-  - unique_consecutive
-  - xlogy
   - unbind
+  - unflatten
+  - unique_consecutive
   - unsafe_chunk
   - unsafe_split
-  - vander
-  - var
-  - vdot
   - unsafe_split_with_sizes
   - unsqueeze
+  - vander
+  - var
   - var_mean
+  - vdot
+  - vsplit
   - vstack
   - where
+  - xlogy
   - xlogy_
 
 _VF:
@@ -1165,6 +1238,27 @@ torch_npu:
   - npu_moe_finalize_routing
   - npu_moe_gating_top_k_softmax
   - npu_trans_quant_param
+  - npu_gelu
+  - npu_ffn
+  - npu_quant_matmul
+  - npu_format_cast_
+  - npu_dynamic_quant
+  - npu_moe_compute_expert_tokens
+  - npu_weight_quant_batchmatmul
+  - npu_dynamic_quant_asymmetric
+  - npu_grouped_matmul
+  - npu_quant_scatter_
+  - npu_group_quant
+  - npu_fused_infer_attention_score
+  - npu_quantize
+  - npu_fast_gelu
+  - npu_weight_quant_batchmatmul
+  - scatter_update
+  - scatter_update_
+  - npu_moe_init_routing
+  - npu_scatter_nd_update_
+  - npu_scatter_nd_update
+  - npu_prefetch
 
 aten:
   - signbit
-- 
Gitee


From 45afc5e983f6c0d3b19026b16d74c8ff5dac9762 Mon Sep 17 00:00:00 2001
From: qianggee <qjchenb@163.com>
Date: Fri, 28 Feb 2025 02:27:54 +0000
Subject: [PATCH 21/25] fix grad sync bug

---
 debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py b/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py
index eea2bdbc2d2..286ec298ba2 100644
--- a/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py
+++ b/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py
@@ -1052,7 +1052,7 @@ class TrainerMon:
             self.enable_megatron = True
             logger.info("megatron version is > core_r0.8.0 <= core_r0.9.0")
         except ImportError:
-            self.enable_megatron = False
+            self.enable_megatron = False | self.enable_megatron
 
         if not self.enable_megatron:
             self._hook_weights()
-- 
Gitee


From ef229d0fedd79b0750d160019398d7bcdb323fe3 Mon Sep 17 00:00:00 2001
From: zhouxianqi <13165993773@163.com>
Date: Fri, 28 Feb 2025 10:32:22 +0800
Subject: [PATCH 22/25] update_msprof_analyze_whl

---
 profiler/msprof_analyze/README.md   | 1 +
 profiler/msprof_analyze/version.txt | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/profiler/msprof_analyze/README.md b/profiler/msprof_analyze/README.md
index 7e2267a5559..d39aea89a52 100644
--- a/profiler/msprof_analyze/README.md
+++ b/profiler/msprof_analyze/README.md
@@ -117,6 +117,7 @@ Successfully installed msprof-analyze-{version}
 
 | profiler版本 | 发布日期       | 下载链接                                                                                                                                      | 校验码                                                       |
 |------------|------------|-------------------------------------------------------------------------------------------------------------------------------------------| ------------------------------------------------------------ |
+| 2.0.1      | 2025-02-28 | [msprof_analyze-2.0.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/2.0.1/msprof_analyze-2.0.1-py3-none-any.whl) | 82dfe2c779dbab9015f61d36ea0c32d832b6d182454b3f7db68e6c0ed49c0423 |
 | 2.0.0      | 2025-02-08 | [msprof_analyze-2.0.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/2.0.0/msprof_analyze-2.0.0-py3-none-any.whl) | 8e44e5f3e7681c377bb2657a600ad9841d3bed11061ddd7844c30e8a97242101 |
 | 1.3.4      | 2025-01-20 | [msprof_analyze-1.3.4-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.3.4/msprof_analyze-1.3.4-py3-none-any.whl) | 8de92188d1a97105fb14cadcb0875ccd5f66629ee3bb25f37178da1906f4cce2 |
 | 1.3.3      | 2024-12-26 | [msprof_analyze-1.3.3-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.3.3/msprof_analyze-1.3.3-py3-none-any.whl) | 27676f2eee636bd0c65243f81e292c7f9d30d7f985c772ac9cbaf10b54d3584e |
diff --git a/profiler/msprof_analyze/version.txt b/profiler/msprof_analyze/version.txt
index 359a5b952d4..10bf840ed53 100644
--- a/profiler/msprof_analyze/version.txt
+++ b/profiler/msprof_analyze/version.txt
@@ -1 +1 @@
-2.0.0
\ No newline at end of file
+2.0.1
\ No newline at end of file
-- 
Gitee


From 1291b93f40d6e4c143116f4d63166c8a91920730 Mon Sep 17 00:00:00 2001
From: zhouxianqi <13165993773@163.com>
Date: Fri, 28 Feb 2025 10:36:27 +0800
Subject: [PATCH 23/25] base_recipe_analysis_adapt_msprof

---
 .../recipes/base_recipe_analysis.py           | 47 +++++++++++++++----
 .../msprof_analyze/prof_common/constant.py    |  1 +
 .../prof_exports/base_stats_export.py         |  3 ++
 3 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/base_recipe_analysis.py b/profiler/msprof_analyze/cluster_analyse/recipes/base_recipe_analysis.py
index a8b50359253..ed60873a1ef 100644
--- a/profiler/msprof_analyze/cluster_analyse/recipes/base_recipe_analysis.py
+++ b/profiler/msprof_analyze/cluster_analyse/recipes/base_recipe_analysis.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 import argparse
 import os
+import re
 import shutil
 import sys
 import traceback
@@ -42,6 +43,7 @@ class BaseRecipeAnalysis(ABC):
         self._recipe_name = params.get(Constant.RECIPE_NAME, "")
         self._parallel_mode = params.get(Constant.PARALLEL_MODE, "")
         self._export_type = params.get(Constant.EXPORT_TYPE, "")
+        self._is_msprof = params.get(Constant.IS_MSPROF)
         self._cluster_analysis_output_path = os.path.join(
             params.get(Constant.CLUSTER_ANALYSIS_OUTPUT_PATH, self._collection_dir), Constant.CLUSTER_ANALYSIS_OUTPUT)
         self._output_path = self._cluster_analysis_output_path if self._export_type == "db" else os.path.join(
@@ -158,16 +160,40 @@ class BaseRecipeAnalysis(ABC):
         db_paths = []
         for rank_id in rank_ids:
             rank_path = self._data_map[rank_id]
-            db_path = os.path.join(rank_path, Constant.SINGLE_OUTPUT, f"ascend_pytorch_profiler_{rank_id}.db")
-            if os.path.exists(db_path):
-                db_paths.append({Constant.RANK_ID: rank_id, Constant.PROFILER_DB_PATH: db_path,
-                                 Constant.STEP_RANGE: self._get_step_range(db_path)})
+            db_path_dict = {Constant.RANK_ID: rank_id, Constant.PROFILER_DB_PATH: "", Constant.ANALYSIS_DB_PATH: "",
+                            Constant.STEP_RANGE: {}}
+            profiler_db_path = self._get_profiler_db_path(rank_id, rank_path)
+            analysis_db_path = os.path.join(rank_path, "analyze", "communication_analyzer.db") if self._is_msprof \
+                else os.path.join(rank_path, Constant.SINGLE_OUTPUT, f"analysis.db")
+            if os.path.exists(profiler_db_path):
+                db_path_dict[Constant.PROFILER_DB_PATH] = profiler_db_path
+                db_path_dict[Constant.STEP_RANGE] = self._get_step_range(profiler_db_path)
             else:
-                logger.warning(f"DB file not found, rank id: {rank_id}, db path: {db_path}.")
+                logger.warning(f"Profiler DB file not found, rank id: {rank_id}, db path: {profiler_db_path}.")
+
+            if os.path.exists(analysis_db_path):
+                db_path_dict[Constant.ANALYSIS_DB_PATH] = analysis_db_path
+            else:
+                logger.warning(f"Analysis DB file not found, rank id: {rank_id}, db path: {analysis_db_path}.")
+            if db_path_dict.get(Constant.PROFILER_DB_PATH):
+                db_paths.append(db_path_dict)
         if invalid_rank_id:
             logger.warning(f"Invalid Rank id: [{','.join(invalid_rank_id)}].")
         return db_paths
 
+    def _get_profiler_db_path(self, rank_id, data_path):
+        if self._is_msprof:
+            msprof_db_pattern = r"^msprof_\d{14}\.db$"
+            msprof_db_list = []
+            for file_name in os.listdir(data_path):
+                if re.match(msprof_db_pattern, file_name):
+                    msprof_db_list.append(file_name)
+            if msprof_db_list:
+                msprof_db_list.sort(key=lambda x: x.split(".")[0].split("_")[-1])
+                return os.path.join(data_path, msprof_db_list[-1])
+            return os.path.join(data_path, "msprof_xx.db")
+        return os.path.join(data_path, Constant.SINGLE_OUTPUT, f"ascend_pytorch_profiler_{rank_id}.db")
+
     def _get_step_range(self, db_path):
         step_range = {}
         if self._step_id == Constant.VOID_STEP:
@@ -204,9 +230,14 @@ class BaseRecipeAnalysis(ABC):
         Extract the profiling data required for cluster analysis from each device, and then aggregate the
         results from each device to be processed by a reduce function.
         Params:
-            data_map: eg. {"RANK_ID": 1,
-                           "profiler_db_path": "xxxx/ascend_pytorch_profiler_1.db",
-                           "step_range": {"id": 2, "startNs": 12345, "endNs": 12443]}
+            data_map: eg1. {"RANK_ID": 1,
+                            "profiler_db_path": "xxx/ASCEND_PROFILER_OUTPUT/ascend_pytorch_profiler_1.db",
+                            "analysis_db_path": "xxx/ASCEND_PROFILER_OUTPUT/analysis.db",
+                            "step_range": {"id": 2, "startNs": 12345, "endNs": 12443]}
+                      eg2. {"RANK_ID": 1,
+                            "profiler_db_path": "xxx/msprof_20250227145123.db",
+                            "analysis_db_path": "xxx/analyze/communication_analyzer.db",
+                            "step_range": {"id": 2, "startNs": 12345, "endNs": 12443]}
             analysis_class: hccl_sum, compute_op_sum, cann_api_sum, mstx_sum……
         """
         pass
diff --git a/profiler/msprof_analyze/prof_common/constant.py b/profiler/msprof_analyze/prof_common/constant.py
index f34aeade895..c04e429321d 100644
--- a/profiler/msprof_analyze/prof_common/constant.py
+++ b/profiler/msprof_analyze/prof_common/constant.py
@@ -423,6 +423,7 @@ class Constant(object):
 
     CONCURRENT_MODE = "concurrent"
     PROFILER_DB_PATH = "profiler_db_path"
+    ANALYSIS_DB_PATH = "analysis_db_path"
     RANK_LIST = "rank_list"
     EXPORT_TYPE = "export_type"
     EXTRA_ARGS = "args"
diff --git a/profiler/msprof_analyze/prof_exports/base_stats_export.py b/profiler/msprof_analyze/prof_exports/base_stats_export.py
index 65ccd69ecde..6e0ff5e211e 100644
--- a/profiler/msprof_analyze/prof_exports/base_stats_export.py
+++ b/profiler/msprof_analyze/prof_exports/base_stats_export.py
@@ -35,6 +35,9 @@ class BaseStatsExport:
 
     def read_export_db(self):
         try:
+            if not self._db_path:
+                logger.error("db path is None.")
+                return None
             query = self.get_query()
             if query is None:
                 logger.error("query is None.")
-- 
Gitee


From d1cf36f94b72be63d241cdb0e9afeddbf011c3b2 Mon Sep 17 00:00:00 2001
From: Mrtutu <zhangwei983@huawei.com>
Date: Thu, 27 Feb 2025 17:48:01 +0800
Subject: [PATCH 24/25] fix dyno param

---
 dynolog_npu/README.md                         | 121 ++++++++++++++----
 .../dynolog_npu/cli/src/commands/nputrace.rs  |   5 +
 dynolog_npu/dynolog_npu/cli/src/main.rs       |   5 +
 3 files changed, 103 insertions(+), 28 deletions(-)

diff --git a/dynolog_npu/README.md b/dynolog_npu/README.md
index 9cc015e66c6..d6ebd6f7ff0 100644
--- a/dynolog_npu/README.md
+++ b/dynolog_npu/README.md
@@ -85,32 +85,67 @@ nputrace子命令支持的参数选项
 
 | 子命令 | 参数类型 | 说明 |
 |-------|-------|-------|
-| record_shapes | action | 是否采集算子的InputShapes和InputTypes，设置参数采集，默认不采集 |
-| profile_memory | action | 是否采集算子内存信息，设置参数采集，默认不采集 |
-| with_stack | action | 是否采集Python调用栈，设置参数采集，默认不采集 |
-| with_flops | action | 是否采集算子flops，设置参数采集，默认不采集 |
-| with_modules | action | 是否采集modules层级的Python调用栈，设置参数采集，默认不采集 |
+| job-id | u64 | 采集任务的job id，默认值0，dynolog原生参数 |
+| pids | String | 采集任务的pid列表，多个pid用逗号分隔，默认值0，dynolog原生参数 |
+| process-limit | u64 | 最大采集进程的数量，默认值3，dynolog原生参数 |
+| profile-start-time | u64 | 用于同步采集的Unix时间戳，单位毫秒，默认值0，dynolog原生参数 |
+| duration-ms | u64 | 采集的周期，单位毫秒，默认值500，dynolog原生参数 |
+| iterations | i64 | 采集总迭代数，默认值-1，dynolog原生参数 |
+| log-file | String | 采集落盘的路径，必选值 |
+| start-step | u64 | 开始采集的迭代数，默认值0 |
+| record-shapes | action | 是否采集算子的InputShapes和InputTypes，设置参数采集，默认不采集 |
+| profile-memory | action | 是否采集算子内存信息，设置参数采集，默认不采集 |
+| with-stack | action | 是否采集Python调用栈，设置参数采集，默认不采集 |
+| with-flops | action | 是否采集算子flops，设置参数采集，默认不采集 |
+| with-modules | action | 是否采集modules层级的Python调用栈，设置参数采集，默认不采集 |
 | analyse | action | 采集后是否自动解析，设置参数解析，默认不解析 |
-| l2_cache | action | 是否采集L2 Cache数据，设置参数采集，默认不采集 |
-| op_attr | action | 是否采集算子属性信息，设置参数采集，默认不采集 |
-| data_simplification | String | 解析完成后是否数据精简，可选值范围[`true`, `false`]，默认值`true` |
+| l2-cache | action | 是否采集L2 Cache数据，设置参数采集，默认不采集 |
+| op-attr | action | 是否采集算子属性信息，设置参数采集，默认不采集 |
+| msprof-tx | action | 是否使能MSTX，设置参数采集，默认使能 |
+| data-simplification | String | 解析完成后是否数据精简，可选值范围[`true`, `false`]，默认值`true` |
 | activities | String | 控制CPU、NPU事件采集范围，可选值范围[`CPU,NPU`, `NPU,CPU`, `CPU`, `NPU`]，默认值`CPU,NPU` |
-| profiler_level | String | 控制profiler的采集等级，可选值范围[`Level_none`, `Level0`, `Level1`, `Level2`]，默认值`Level0`|
-| aic_metrics | String | AI Core的性能指标采集项，可选值范围[`AiCoreNone`, `PipeUtilization`, `ArithmeticUtilization`, `Memory`, `MemoryL0`, `ResourceConflictRatio`, `MemoryUB`, `L2Cache`, `MemoryAccess`]，默认值`AiCoreNone`|
-| export_type | String | profiler解析导出数据的类型，可选值范围[`Text`, `Db`]，默认值`Text`|
-| gc_detect_threshold | Option<f32> | GC检测阈值，单位ms，只采集超过阈值的GC事件。该参数为可选参数，默认不设置时不开启GC检测 |
+| profiler-level | String | 控制profiler的采集等级，可选值范围[`Level_none`, `Level0`, `Level1`, `Level2`]，默认值`Level0`|
+| aic-metrics | String | AI Core的性能指标采集项，可选值范围[`AiCoreNone`, `PipeUtilization`, `ArithmeticUtilization`, `Memory`, `MemoryL0`, `ResourceConflictRatio`, `MemoryUB`, `L2Cache`, `MemoryAccess`]，默认值`AiCoreNone`|
+| export-type | String | profiler解析导出数据的类型，可选值范围[`Text`, `Db`]，默认值`Text`|
+| gc-detect-threshold | Option<f32> | GC检测阈值，单位ms，只采集超过阈值的GC事件。该参数为可选参数，默认不设置时不开启GC检测 |
 
-- nputrace示例命令
 
+- nputrace使用方法
+
+Step1： 拉起dynolog daemon进程
+```bash
+# 方法1：使用systemd拉起service
+# 修改配置文件/etc/dynolog.gflags, 使能ipc_monitor
+echo "--enable_ipc_monitor" | sudo tee -a /etc/dynolog.gflags
+sudo systemctl start dynolog
+
+# 方法2：命令行执行
+dynolog --enable-ipc-monitor
+
+#dynolog daemon的日志路径为：/var/log/dynolog.log
+```
+
+Step 2：使能dynolog trace dump环境变量
+```bash
+export KINETO_USE_DAEMON=1
+```
+
+Step 3: 拉起训练任务
+```bash
+# 训练任务中需要使用pytorch的优化器/继承原生优化器
+bash train.sh
+```
+
+Step 4：使用dyno CLI动态触发trace dump
 ```bash
-# 示例1：采集框架、CANN和device数据，同时采集完后自动解析以及解析完成不做数据精简，落盘路径为/tmp/profile_data
-dyno nputrace --activities CPU,NPU --analyse --data_simplification false --log-file /tmp/profile_data
+# 示例1：从第10个step开始采集，采集2个step，采集框架、CANN和device数据，同时采集完后自动解析以及解析完成不做数据精简，落盘路径为/tmp/profile_data
+dyno nputrace --start-step 10 --iterations 2 --activities CPU,NPU --analyse --data-simplification false --log-file /tmp/profile_data
 
-# 示例2：只采集CANN和device数据，同时采集完后自动解析以及解析完成后开启数据精简，落盘路径为/tmp/profile_data
-dyno nputrace --activities NPU --analyse --data_simplification true --log-file /tmp/profile_data
+# 示例2：从第10个step开始采集，采集2个step，只采集CANN和device数据，同时采集完后自动解析以及解析完成后开启数据精简，落盘路径为/tmp/profile_data
+dyno nputrace --start-step 10 --iterations 2 --activities NPU --analyse --data-simplification true --log-file /tmp/profile_data
 
-# 示例3：只采集CANN和device数据，只采集不解析，落盘路径为/tmp/profile_data
-dyno nputrace --activities NPU --log-file /tmp/profile_data
+# 示例3：从第10个step开始采集，采集2个step，只采集CANN和device数据，只采集不解析，落盘路径为/tmp/profile_data
+dyno nputrace --start-step 10 --iterations 2 --activities NPU --log-file /tmp/profile_data
 ```
 
 ### NPU Monitor功能
@@ -129,20 +164,50 @@ dyno npu-monitor [SUBCOMMANDS]
 npu-monitor子命令支持的参数选项
 | 子命令 | 参数类型 | 说明 |
 |-------|-------|-------|
-| npu_monitor_start | action | 开启性能监控，设置参数开启，默认不采集 |
-| npu_monitor_stop | action | 停止性能监控，设置参数开启，默认不采集 |
-| report_interval_s | int | 性能监控数据上报周期，单位s，需要在启动时设置。默认值60 |
-| mspti_activity_kind | String | 性能监控数据上报数据类型，可以设置单个或多个，多个类型以逗号分隔，需要在启动时设置。可选值范围[`Marker`, `Kernel`, `API`, `Hccl`, `Memory`, `MemSet`, `MemCpy`] , 默认值`Marker`|
+| npu-monitor-start | action | 开启性能监控，设置参数开启，默认不采集 |
+| npu-monitor-stop | action | 停止性能监控，设置参数开启，默认不采集 |
+| report-interval-s | int | 性能监控数据上报周期，单位s，需要在启动时设置。默认值60 |
+| mspti-activity-kind | String | 性能监控数据上报数据类型，可以设置单个或多个，多个类型以逗号分隔，需要在启动时设置。可选值范围[`Marker`, `Kernel`, `API`, `Hccl`, `Memory`, `MemSet`, `MemCpy`] , 默认值`Marker`|
 
-- npu-monitor示例命令
+- npu-monitor使用方法
 
+Step1： 拉起dynolog daemon进程
+```bash
+# 方法1：使用systemd拉起service
+# 修改配置文件/etc/dynolog.gflags, 使能ipc_monitor
+echo "--enable_ipc_monitor" | sudo tee -a /etc/dynolog.gflags
+sudo systemctl start dynolog
+
+# 方法2：命令行执行
+dynolog --enable-ipc-monitor
+
+#dynolog daemon的日志路径为：/var/log/dynolog.log
+```
+
+Step 2：使能dynolog trace dump环境变量
+```bash
+export KINETO_USE_DAEMON=1
+```
+
+Step 3: 拉起训练任务
+```bash
+# 训练任务中需要使用pytorch的优化器/继承原生优化器
+bash train.sh
+```
+
+Step 4：使用dyno CLI使能npu-monitor
 ```bash
 # 示例1：开启性能监控，使用默认配置
-dyno npu-monitor --npu_monitor_start
+dyno npu-monitor --npu-monitor-start
 
 # 示例2：暂停性能监控
-dyno npu-monitor --npu_monitor_stop
+dyno npu-monitor --npu-monitor-stop
+
+# 示例3：性能监控过程中修改配置
+# 上报周期30s, 上报数据类型Marker和Kernel
+dyno npu-monitor --report-interval-s 30 --mspti-activity-kind Marker,Kernel
 
-# 示例3：开启性能监控，上报周期30s, 上报数据类型Marker和Kernel
-dyno npu-monitor --npu_monitor_start 30 --mspti_activity_kind Marker,Kernel
+# 示例4：性能监控开启时修改配置
+# 上报周期30s, 上报数据类型Marker和Kernel
+dyno npu-monitor --npu-monitor-start --report-interval-s 30 --mspti-activity-kind Marker,Kernel
 ```
\ No newline at end of file
diff --git a/dynolog_npu/dynolog_npu/cli/src/commands/nputrace.rs b/dynolog_npu/dynolog_npu/cli/src/commands/nputrace.rs
index 4bf7132de33..f70923bca4c 100644
--- a/dynolog_npu/dynolog_npu/cli/src/commands/nputrace.rs
+++ b/dynolog_npu/dynolog_npu/cli/src/commands/nputrace.rs
@@ -55,6 +55,7 @@ pub struct NpuTraceOptions {
     pub aic_metrics: String,
     pub l2_cache: bool,
     pub op_attr: bool,
+    pub msprof_tx: bool,
     pub gc_detect_threshold: Option<f32>,
     pub data_simplification: String,
     pub export_type: String,
@@ -75,6 +76,7 @@ PROFILE_PROFILER_LEVEL={}
 PROFILE_AIC_METRICS={}
 PROFILE_L2_CACHE={}
 PROFILE_OP_ATTR={}
+PROFILE_MSPROF_TX={}
 PROFILE_GC_DETECT_THRESHOLD={}
 PROFILE_DATA_SIMPLIFICATION={}
 PROFILE_EXPORT_TYPE={}"#,
@@ -89,6 +91,7 @@ PROFILE_EXPORT_TYPE={}"#,
             self.aic_metrics,
             self.l2_cache,
             self.op_attr,
+            self.msprof_tx,
             self.gc_detect_threshold.map_or("None".to_string(), |v| v.to_string()),
             self.data_simplification,
             self.export_type
@@ -213,6 +216,7 @@ ACTIVITIES_ITERATIONS=1000"#
                 aic_metrics: "AiCoreNone".to_string(),
                 l2_cache: true,
                 op_attr: true,
+                msprof_tx: true,
                 gc_detect_threshold: 0.1,
                 data_simplification: "true",
                 export_type: "Text".to_string(),
@@ -234,6 +238,7 @@ PROFILE_PROFILER_LEVEL=Level0
 PROFILE_AIC_METRICS=AiCoreNone
 PROFILE_L2_CACHE=true
 PROFILE_OP_ATTR=true
+PROFILE_MSPROF_TX=true
 PROFILE_GC_DETECT_THRESHOLD=0.1
 PROFILE_DATA_SIMPLIFICATION=true
 PROFILE_EXPORT_TYPE=Text"#
diff --git a/dynolog_npu/dynolog_npu/cli/src/main.rs b/dynolog_npu/dynolog_npu/cli/src/main.rs
index 8bc4a2af0e2..9fdea3d1254 100644
--- a/dynolog_npu/dynolog_npu/cli/src/main.rs
+++ b/dynolog_npu/dynolog_npu/cli/src/main.rs
@@ -172,6 +172,9 @@ enum Command {
         /// Whether to collect op attributes.
         #[clap(long, action)]
         op_attr: bool,
+        /// Whether to enable MSTX.
+        #[clap(long, action)]
+        msprof_tx: bool,
         /// GC detect threshold.
         #[clap(long)]
         gc_detect_threshold: Option<f32>,
@@ -290,6 +293,7 @@ fn main() -> Result<()> {
             aic_metrics,
             l2_cache,
             op_attr,
+            msprof_tx,
             gc_detect_threshold,
             data_simplification,
             export_type,
@@ -318,6 +322,7 @@ fn main() -> Result<()> {
                 aic_metrics,
                 l2_cache,
                 op_attr,
+                msprof_tx,
                 gc_detect_threshold,
                 data_simplification,
                 export_type,
-- 
Gitee


From f54ae7594d04d3a42af973ec69fc8e793a2fe566 Mon Sep 17 00:00:00 2001
From: yangxinxian <947098055@qq.com>
Date: Mon, 3 Mar 2025 10:15:32 +0800
Subject: [PATCH 25/25] Update PULL_REQUEST_TEMPLATE.zh-CN.md

---
 .gitee/PULL_REQUEST_TEMPLATE.zh-CN.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md b/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
index fc9e09f3503..e9cc1deb82f 100644
--- a/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
+++ b/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
@@ -17,6 +17,11 @@
 
 ---
 
+## 3. 分支合并要求
+- [ ] **代码合并**（请确保将 master 分支的最新代码同步合并至 poc 分支及 pre-research 分支，同时保证 poc 分支的代码也已正确合并到 pre-research 分支。）
+
+---
+
 ## 3. 代码检视
 - **要求：**
   - 合入代码超过 200 行，需三人以上会议检视。
-- 
Gitee